1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2016 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #ifndef CTS_USES_VULKANSC
89 #include "vktSpvAsmIntegerDotProductTests.hpp"
90 #endif // CTS_USES_VULKANSC
91 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
92
93 #include <cmath>
94 #include <limits>
95 #include <map>
96 #include <string>
97 #include <sstream>
98 #include <utility>
99 #include <stack>
100 #include <cassert>
101
102 namespace vkt
103 {
104 namespace SpirVAssembly
105 {
106
107 namespace
108 {
109
110 using namespace vk;
111 using std::map;
112 using std::string;
113 using std::vector;
114 using tcu::IVec3;
115 using tcu::IVec4;
116 using tcu::RGBA;
117 using tcu::TestLog;
118 using tcu::TestStatus;
119 using tcu::Vec4;
120 using de::UniquePtr;
121 using tcu::StringTemplate;
122 using tcu::Vec4;
123
124 const bool TEST_WITH_NAN = true;
125 const bool TEST_WITHOUT_NAN = false;
126
127 const string loadScalarF16FromUint =
128 "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
129 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
130 "%ld_arg_${var}_entry = OpLabel\n"
131 "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
132 "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
133 "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
134 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
135 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
136 "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
137 "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
138 "OpReturnValue %ld_arg_${var}_ex\n"
139 "OpFunctionEnd\n";
140
141 const string loadV2F16FromUint =
142 "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
143 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
144 "%ld_arg_${var}_entry = OpLabel\n"
145 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
146 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
147 "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
148 "OpReturnValue %ld_arg_${var}_cast\n"
149 "OpFunctionEnd\n";
150
151 const string loadV3F16FromUints =
152 // Since we allocate a vec4 worth of values, this case is almost the
153 // same as that case.
154 "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
155 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
156 "%ld_arg_${var}_entry = OpLabel\n"
157 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
158 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
159 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
160 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
161 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
162 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
163 "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
164 "OpReturnValue %ld_arg_${var}_shuffle\n"
165 "OpFunctionEnd\n";
166
167 const string loadV4F16FromUints =
168 "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
169 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
170 "%ld_arg_${var}_entry = OpLabel\n"
171 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
172 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
173 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
174 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
175 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
176 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
177 "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
178 "OpReturnValue %ld_arg_${var}_shuffle\n"
179 "OpFunctionEnd\n";
180
181 const string loadM2x2F16FromUints =
182 "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
183 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
184 "%ld_arg_${var}_entry = OpLabel\n"
185 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
186 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
187 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
188 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
189 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
190 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
191 "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
192 "OpReturnValue %ld_arg_${var}_cons\n"
193 "OpFunctionEnd\n";
194
195 const string loadM2x3F16FromUints =
196 "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
197 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
198 "%ld_arg_${var}_entry = OpLabel\n"
199 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
200 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
201 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
202 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
203 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
204 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
205 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
206 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
207 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
208 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
209 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
210 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
211 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
212 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
213 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
214 "OpReturnValue %ld_arg_${var}_mat\n"
215 "OpFunctionEnd\n";
216
217 const string loadM2x4F16FromUints =
218 "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
219 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
220 "%ld_arg_${var}_entry = OpLabel\n"
221 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
222 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
223 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
224 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
225 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
226 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
227 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
228 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
229 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
230 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
231 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
232 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
233 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
234 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
235 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
236 "OpReturnValue %ld_arg_${var}_mat\n"
237 "OpFunctionEnd\n";
238
239 const string loadM3x2F16FromUints =
240 "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
241 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
242 "%ld_arg_${var}_entry = OpLabel\n"
243 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
244 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
245 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
246 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
247 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
248 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
249 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
250 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
251 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
252 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
253 "OpReturnValue %ld_arg_${var}_mat\n"
254 "OpFunctionEnd\n";
255
256 const string loadM3x3F16FromUints =
257 "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
258 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
259 "%ld_arg_${var}_entry = OpLabel\n"
260 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
261 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
262 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
263 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
264 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
265 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
266 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
267 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
268 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
269 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
270 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
271 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
272 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
273 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
274 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
275 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
276 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
277 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
278 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
279 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
280 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
281 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
282 "OpReturnValue %ld_arg_${var}_mat\n"
283 "OpFunctionEnd\n";
284
285 const string loadM3x4F16FromUints =
286 "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
287 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
288 "%ld_arg_${var}_entry = OpLabel\n"
289 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
290 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
291 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
292 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
293 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
294 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
295 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
296 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
297 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
298 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
299 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
300 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
301 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
302 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
303 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
304 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
305 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
306 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
307 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
308 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
309 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
310 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
311 "OpReturnValue %ld_arg_${var}_mat\n"
312 "OpFunctionEnd\n";
313
314 const string loadM4x2F16FromUints =
315 "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
316 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
317 "%ld_arg_${var}_entry = OpLabel\n"
318 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
319 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
320 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
321 "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
322 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
323 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
324 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
325 "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
326 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
327 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
328 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
329 "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
330 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
331 "OpReturnValue %ld_arg_${var}_mat\n"
332 "OpFunctionEnd\n";
333
334 const string loadM4x3F16FromUints =
335 "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
336 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
337 "%ld_arg_${var}_entry = OpLabel\n"
338 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
339 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
340 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
341 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
342 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
343 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
344 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
345 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
346 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
347 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
348 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
349 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
350 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
351 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
352 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
353 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
354 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
355 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
356 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
357 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
358 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
359 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
360 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
361 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
362 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
363 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
364 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
365 "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
366 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
367 "OpReturnValue %ld_arg_${var}_mat\n"
368 "OpFunctionEnd\n";
369
370 const string loadM4x4F16FromUints =
371 "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
372 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
373 "%ld_arg_${var}_entry = OpLabel\n"
374 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
375 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
376 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
377 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
378 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
379 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
380 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
381 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
382 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
383 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
384 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
385 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
386 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
387 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
388 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
389 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
390 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
391 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
392 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
393 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
394 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
395 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
396 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
397 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
398 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
399 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
400 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
401 "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
402 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
403 "OpReturnValue %ld_arg_${var}_mat\n"
404 "OpFunctionEnd\n";
405
406 const string storeScalarF16AsUint =
407 // This version is sensitive to the initial value in the output buffer.
408 // The infrastructure sets all output buffer bits to one before invoking
409 // the shader so this version uses an atomic and to generate the correct
410 // zeroes.
411 "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
412 "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
413 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
414 "%st_fn_${var}_entry = OpLabel\n"
415 "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
416 "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
417 "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
418 "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
419 // Or 16 bits of ones into the half that was not populated with the result.
420 "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
421 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
422 "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
423 "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
424 "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
425 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
426 "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
427 "OpReturn\n"
428 "OpFunctionEnd\n";
429
430 const string storeV2F16AsUint =
431 "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
432 "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
433 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
434 "%st_fn_${var}_entry = OpLabel\n"
435 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
436 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
437 "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
438 "OpReturn\n"
439 "OpFunctionEnd\n";
440
441 const string storeV3F16AsUints =
442 // Since we allocate a vec4 worth of values, this case can be treated
443 // almost the same as a vec4 case. We will store some extra data that
444 // should not be compared.
445 "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
446 "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
447 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
448 "%st_fn_${var}_entry = OpLabel\n"
449 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
450 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
451 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
452 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
453 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
454 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
455 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
456 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
457 "OpReturn\n"
458 "OpFunctionEnd\n";
459
460 const string storeV4F16AsUints =
461 "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
462 "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
463 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
464 "%st_fn_${var}_entry = OpLabel\n"
465 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
466 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
467 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
468 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
469 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
470 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
471 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
472 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
473 "OpReturn\n"
474 "OpFunctionEnd\n";
475
476 const string storeM2x2F16AsUints =
477 "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
478 "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
479 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
480 "%st_fn_${var}_entry = OpLabel\n"
481 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
482 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
483 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
484 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
485 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
486 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
487 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
488 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
489 "OpReturn\n"
490 "OpFunctionEnd\n";
491
492 const string storeM2x3F16AsUints =
493 // In the extracted elements for 01 and 11 the second element doesn't
494 // matter.
495 "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
496 "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
497 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
498 "%st_fn_${var}_entry = OpLabel\n"
499 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
500 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
501 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
502 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
503 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
504 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
505 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
506 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
507 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
508 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
509 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
510 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
511 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
512 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
513 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
514 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
515 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
516 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
517 "OpReturn\n"
518 "OpFunctionEnd\n";
519
520 const string storeM2x4F16AsUints =
521 "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
522 "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
523 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
524 "%st_fn_${var}_entry = OpLabel\n"
525 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
526 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
527 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
528 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
529 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
530 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
531 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
532 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
533 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
534 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
535 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
536 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
537 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
538 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
539 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
540 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
541 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
542 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
543 "OpReturn\n"
544 "OpFunctionEnd\n";
545
546 const string storeM3x2F16AsUints =
547 "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
548 "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
549 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
550 "%st_fn_${var}_entry = OpLabel\n"
551 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
552 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
553 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
554 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
555 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
556 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
557 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
558 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
559 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
560 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
561 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
562 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
563 "OpReturn\n"
564 "OpFunctionEnd\n";
565
566 const string storeM3x3F16AsUints =
567 // The second element of the each broken down vec3 doesn't matter.
568 "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
569 "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
570 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
571 "%st_fn_${var}_entry = OpLabel\n"
572 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
573 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
574 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
575 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
576 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
577 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
578 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
579 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
580 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
581 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
582 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
583 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
584 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
585 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
586 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
587 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
588 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
589 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
590 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
591 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
592 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
593 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
594 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
595 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
596 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
597 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
598 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
599 "OpReturn\n"
600 "OpFunctionEnd\n";
601
602 const string storeM3x4F16AsUints =
603 "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
604 "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
605 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
606 "%st_fn_${var}_entry = OpLabel\n"
607 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
608 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
609 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
610 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
611 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
612 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
613 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
614 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
615 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
616 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
617 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
618 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
619 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
620 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
621 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
622 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
623 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
624 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
625 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
626 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
627 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
628 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
629 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
630 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
631 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
632 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
633 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
634 "OpReturn\n"
635 "OpFunctionEnd\n";
636
637 const string storeM4x2F16AsUints =
638 "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
639 "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
640 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
641 "%st_fn_${var}_entry = OpLabel\n"
642 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
643 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
644 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
645 "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
646 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
647 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
648 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
649 "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
650 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
651 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
652 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
653 "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
654 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
655 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
656 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
657 "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
658 "OpReturn\n"
659 "OpFunctionEnd\n";
660
661 const string storeM4x3F16AsUints =
662 // The last element of each decomposed vec3 doesn't matter.
663 "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
664 "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
665 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
666 "%st_fn_${var}_entry = OpLabel\n"
667 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
668 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
669 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
670 "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
671 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
672 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
673 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
674 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
675 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
676 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
677 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
678 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
679 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
680 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
681 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
682 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
683 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
684 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
685 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
686 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
687 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
688 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
689 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
690 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
691 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
692 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
693 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
694 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
695 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
696 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
697 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
698 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
699 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
700 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
701 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
702 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
703 "OpReturn\n"
704 "OpFunctionEnd\n";
705
706 const string storeM4x4F16AsUints =
707 "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
708 "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
709 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
710 "%st_fn_${var}_entry = OpLabel\n"
711 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
712 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
713 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
714 "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
715 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
716 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
717 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
718 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
719 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
720 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
721 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
722 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
723 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
724 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
725 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
726 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
727 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
728 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
729 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
730 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
731 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
732 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
733 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
734 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
735 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
736 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
737 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
738 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
739 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
740 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
741 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
742 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
743 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
744 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
745 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
746 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
747 "OpReturn\n"
748 "OpFunctionEnd\n";
749
750 template<typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)751 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
752 {
753 T* const typedPtr = (T*)dst;
754 for (int ndx = 0; ndx < numValues; ndx++)
755 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
756 }
757
758 // Filter is a function that returns true if a value should pass, false otherwise.
759 template<typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)760 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
761 {
762 T* const typedPtr = (T*)dst;
763 T value;
764 for (int ndx = 0; ndx < numValues; ndx++)
765 {
766 do
767 value = de::randomScalar<T>(rnd, minValue, maxValue);
768 while (!filter(value));
769
770 typedPtr[offset + ndx] = value;
771 }
772 }
773
774 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)775 deInt64 randomInt64LogDistributed (de::Random& rnd)
776 {
777 deInt64 val = rnd.getUint64();
778 val &= (1ull << rnd.getInt(1, 63)) - 1;
779 if (rnd.getBool())
780 val = -val;
781 return val;
782 }
783
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues)784 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
785 {
786 for (int ndx = 0; ndx < numValues; ndx++)
787 dst[ndx] = randomInt64LogDistributed(rnd);
788 }
789
790 template<typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues,FilterT filter)791 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
792 {
793 for (int ndx = 0; ndx < numValues; ndx++)
794 {
795 deInt64 value;
796 do {
797 value = randomInt64LogDistributed(rnd);
798 } while (!filter(value));
799 dst[ndx] = value;
800 }
801 }
802
filterNonNegative(const deInt64 value)803 inline bool filterNonNegative (const deInt64 value)
804 {
805 return value >= 0;
806 }
807
filterPositive(const deInt64 value)808 inline bool filterPositive (const deInt64 value)
809 {
810 return value > 0;
811 }
812
filterNotZero(const deInt64 value)813 inline bool filterNotZero (const deInt64 value)
814 {
815 return value != 0;
816 }
817
floorAll(vector<float> & values)818 static void floorAll (vector<float>& values)
819 {
820 for (size_t i = 0; i < values.size(); i++)
821 values[i] = deFloatFloor(values[i]);
822 }
823
floorAll(vector<Vec4> & values)824 static void floorAll (vector<Vec4>& values)
825 {
826 for (size_t i = 0; i < values.size(); i++)
827 values[i] = floor(values[i]);
828 }
829
830 struct CaseParameter
831 {
832 const char* name;
833 string param;
834
CaseParametervkt::SpirVAssembly::__anon437b2d460111::CaseParameter835 CaseParameter (const char* case_, const string& param_) : name(case_), param(param_) {}
836 };
837
838 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
839 //
840 // #version 430
841 //
842 // layout(std140, set = 0, binding = 0) readonly buffer Input {
843 // float elements[];
844 // } input_data;
845 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
846 // float elements[];
847 // } output_data;
848 //
849 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
850 //
851 // void main() {
852 // uint x = gl_GlobalInvocationID.x;
853 // output_data.elements[x] = -input_data.elements[x];
854 // }
855
getAsmForLocalSizeTest(bool useLiteralLocalSize,bool useLiteralLocalSizeId,bool useSpecConstantWorkgroupSize,IVec3 workGroupSize,deUint32 ndx)856 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
857 {
858 std::ostringstream out;
859 out << "OpCapability Shader\n"
860 "OpMemoryModel Logical GLSL450\n";
861
862 if (useLiteralLocalSizeId)
863 {
864 out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
865 "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
866 }
867 else
868 {
869 out << "OpEntryPoint GLCompute %main \"main\" %id\n";
870
871 if (useLiteralLocalSize)
872 {
873 out << "OpExecutionMode %main LocalSize "
874 << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
875 }
876 }
877
878 out << "OpSource GLSL 430\n"
879 "OpName %main \"main\"\n"
880 "OpName %id \"gl_GlobalInvocationID\"\n"
881 "OpDecorate %id BuiltIn GlobalInvocationId\n";
882
883 if (useSpecConstantWorkgroupSize)
884 {
885 out << "OpDecorate %spec_0 SpecId 100\n"
886 "OpDecorate %spec_1 SpecId 101\n"
887 "OpDecorate %spec_2 SpecId 102\n"
888 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
889 }
890
891 if (useLiteralLocalSizeId)
892 {
893 out << getComputeAsmInputOutputBufferTraits("Block")
894 << getComputeAsmCommonTypes("StorageBuffer")
895 << getComputeAsmInputOutputBuffer("StorageBuffer")
896 << "%const_0 = OpConstant %u32 " << workGroupSize.x() << "\n"
897 "%const_1 = OpConstant %u32 " << workGroupSize.y() << "\n"
898 "%const_2 = OpConstant %u32 " << workGroupSize.z() << "\n";
899 }
900 else
901 {
902 out << getComputeAsmInputOutputBufferTraits()
903 << getComputeAsmCommonTypes()
904 << getComputeAsmInputOutputBuffer();
905 }
906
907 out << "%id = OpVariable %uvec3ptr Input\n"
908 "%zero = OpConstant %i32 0 \n";
909
910 if (useSpecConstantWorkgroupSize)
911 {
912 out << "%spec_0 = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
913 "%spec_1 = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
914 "%spec_2 = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
915 "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
916 }
917
918 out << "%main = OpFunction %void None %voidf\n"
919 "%label = OpLabel\n"
920 "%idval = OpLoad %uvec3 %id\n"
921 "%ndx = OpCompositeExtract %u32 %idval " << ndx << "\n"
922
923 "%inloc = OpAccessChain %f32ptr %indata %zero %ndx\n"
924 "%inval = OpLoad %f32 %inloc\n"
925 "%neg = OpFNegate %f32 %inval\n"
926 "%outloc = OpAccessChain %f32ptr %outdata %zero %ndx\n"
927 " OpStore %outloc %neg\n"
928 " OpReturn\n"
929 " OpFunctionEnd\n";
930
931 return out.str();
932 }
933
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)934 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
935 {
936 const char* groupName[]{ "localsize", "localsize_id" };
937
938 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
939 ComputeShaderSpec spec;
940 de::Random rnd (deStringHash(group->getName()));
941 const deUint32 numElements = 64u;
942 vector<float> positiveFloats (numElements, 0);
943 vector<float> negativeFloats (numElements, 0);
944
945 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
946
947 for (size_t ndx = 0; ndx < numElements; ++ndx)
948 negativeFloats[ndx] = -positiveFloats[ndx];
949
950 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
951 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
952
953 if (useLocalSizeId)
954 {
955 spec.spirvVersion = SPIRV_VERSION_1_5;
956 spec.extensions.push_back("VK_KHR_maintenance4");
957 }
958
959 spec.numWorkGroups = IVec3(numElements, 1, 1);
960
961 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
962 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", spec));
963
964 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
965 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", spec));
966
967 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
968 {
969 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
970 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", spec));
971 }
972
973 spec.numWorkGroups = IVec3(1, 1, 1);
974
975 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
976 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", spec));
977
978 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
979 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", spec));
980
981 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
982 {
983 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
984 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", spec));
985 }
986
987 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
988 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", spec));
989
990 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
991 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", spec));
992
993 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
994 {
995 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
996 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", spec));
997 }
998
999 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
1000 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", spec));
1001
1002 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1003 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", spec));
1004
1005 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
1006 {
1007 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1008 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", spec));
1009 }
1010
1011 return group.release();
1012 }
1013
createOpNopGroup(tcu::TestContext & testCtx)1014 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1015 {
1016 // Test the OpNop instruction
1017 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnop"));
1018 ComputeShaderSpec spec;
1019 de::Random rnd (deStringHash(group->getName()));
1020 const int numElements = 100;
1021 vector<float> positiveFloats (numElements, 0);
1022 vector<float> negativeFloats (numElements, 0);
1023
1024 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1025
1026 for (size_t ndx = 0; ndx < numElements; ++ndx)
1027 negativeFloats[ndx] = -positiveFloats[ndx];
1028
1029 spec.assembly =
1030 string(getComputeAsmShaderPreamble()) +
1031
1032 "OpSource GLSL 430\n"
1033 "OpName %main \"main\"\n"
1034 "OpName %id \"gl_GlobalInvocationID\"\n"
1035
1036 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1037
1038 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1039
1040 + string(getComputeAsmInputOutputBuffer()) +
1041
1042 "%id = OpVariable %uvec3ptr Input\n"
1043 "%zero = OpConstant %i32 0\n"
1044
1045 "%main = OpFunction %void None %voidf\n"
1046 "%label = OpLabel\n"
1047 "%idval = OpLoad %uvec3 %id\n"
1048 "%x = OpCompositeExtract %u32 %idval 0\n"
1049
1050 " OpNop\n" // Inside a function body
1051
1052 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1053 "%inval = OpLoad %f32 %inloc\n"
1054 "%neg = OpFNegate %f32 %inval\n"
1055 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1056 " OpStore %outloc %neg\n"
1057 " OpReturn\n"
1058 " OpFunctionEnd\n";
1059 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1060 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1061 spec.numWorkGroups = IVec3(numElements, 1, 1);
1062
1063 // OpNop appearing at different places
1064 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1065
1066 return group.release();
1067 }
1068
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1069 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1070 {
1071 // Compute shaders with unused variables
1072 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "unused_variables"));
1073 de::Random rnd (deStringHash(group->getName()));
1074 const int numElements = 100;
1075 vector<float> positiveFloats (numElements, 0);
1076 vector<float> negativeFloats (numElements, 0);
1077
1078 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1079
1080 for (size_t ndx = 0; ndx < numElements; ++ndx)
1081 negativeFloats[ndx] = -positiveFloats[ndx];
1082
1083 const VariableLocation testLocations[] =
1084 {
1085 // Set Binding
1086 { 0, 5 },
1087 { 5, 5 },
1088 };
1089
1090 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1091 {
1092 const VariableLocation& location = testLocations[locationNdx];
1093
1094 // Unused variable.
1095 {
1096 ComputeShaderSpec spec;
1097
1098 spec.assembly =
1099 string(getComputeAsmShaderPreamble()) +
1100
1101 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1102
1103 + getUnusedDecorations(location)
1104
1105 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1106
1107 + getUnusedTypesAndConstants()
1108
1109 + string(getComputeAsmInputOutputBuffer())
1110
1111 + getUnusedBuffer() +
1112
1113 "%id = OpVariable %uvec3ptr Input\n"
1114 "%zero = OpConstant %i32 0\n"
1115
1116 "%main = OpFunction %void None %voidf\n"
1117 "%label = OpLabel\n"
1118 "%idval = OpLoad %uvec3 %id\n"
1119 "%x = OpCompositeExtract %u32 %idval 0\n"
1120
1121 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1122 "%inval = OpLoad %f32 %inloc\n"
1123 "%neg = OpFNegate %f32 %inval\n"
1124 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1125 " OpStore %outloc %neg\n"
1126 " OpReturn\n"
1127 " OpFunctionEnd\n";
1128 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1129 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1130 spec.numWorkGroups = IVec3(numElements, 1, 1);
1131
1132 std::string testName = "variable_" + location.toString();
1133
1134 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
1135 }
1136
1137 // Unused function.
1138 {
1139 ComputeShaderSpec spec;
1140
1141 spec.assembly =
1142 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1143
1144 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1145
1146 + getUnusedDecorations(location)
1147
1148 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1149
1150 + getUnusedTypesAndConstants() +
1151
1152 "%c_i32_0 = OpConstant %i32 0\n"
1153 "%c_i32_1 = OpConstant %i32 1\n"
1154
1155 + string(getComputeAsmInputOutputBuffer())
1156
1157 + getUnusedBuffer() +
1158
1159 "%id = OpVariable %uvec3ptr Input\n"
1160 "%zero = OpConstant %i32 0\n"
1161
1162 "%main = OpFunction %void None %voidf\n"
1163 "%label = OpLabel\n"
1164 "%idval = OpLoad %uvec3 %id\n"
1165 "%x = OpCompositeExtract %u32 %idval 0\n"
1166
1167 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1168 "%inval = OpLoad %f32 %inloc\n"
1169 "%neg = OpFNegate %f32 %inval\n"
1170 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1171 " OpStore %outloc %neg\n"
1172 " OpReturn\n"
1173 " OpFunctionEnd\n"
1174
1175 + getUnusedFunctionBody();
1176
1177 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1178 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1179 spec.numWorkGroups = IVec3(numElements, 1, 1);
1180
1181 std::string testName = "function_" + location.toString();
1182
1183 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(),spec));
1184 }
1185 }
1186
1187 return group.release();
1188 }
1189
1190 template<bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1191 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1192 {
1193 if (outputAllocs.size() != 1)
1194 return false;
1195
1196 vector<deUint8> input1Bytes;
1197 vector<deUint8> input2Bytes;
1198 vector<deUint8> expectedBytes;
1199
1200 inputs[0].getBytes(input1Bytes);
1201 inputs[1].getBytes(input2Bytes);
1202 expectedOutputs[0].getBytes(expectedBytes);
1203
1204 const deInt32* const expectedOutputAsInt = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1205 const deInt32* const outputAsInt = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1206 const float* const input1AsFloat = reinterpret_cast<const float*>(&input1Bytes.front());
1207 const float* const input2AsFloat = reinterpret_cast<const float*>(&input2Bytes.front());
1208 bool returnValue = true;
1209
1210 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1211 {
1212 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1213 continue;
1214
1215 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1216 {
1217 log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1218 returnValue = false;
1219 }
1220 }
1221 return returnValue;
1222 }
1223
1224 typedef VkBool32 (*compareFuncType) (float, float);
1225
1226 struct OpFUnordCase
1227 {
1228 const char* name;
1229 const char* opCode;
1230 compareFuncType compareFunc;
1231
OpFUnordCasevkt::SpirVAssembly::__anon437b2d460111::OpFUnordCase1232 OpFUnordCase (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1233 : name (_name)
1234 , opCode (_opCode)
1235 , compareFunc (_compareFunc) {}
1236 };
1237
1238 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1239 do { \
1240 struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1241 cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1242 } while (deGetFalse())
1243
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1244 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1245 {
1246 const string nan = testWithNan ? "_nan" : "";
1247 const string groupName = "opfunord" + nan;
1248 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1249 de::Random rnd (deStringHash(group->getName()));
1250 const int numElements = 100;
1251 vector<OpFUnordCase> cases;
1252 string extensions = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1253 string capabilities = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1254 string exeModes = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1255 const StringTemplate shaderTemplate (
1256 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1257 "OpSource GLSL 430\n"
1258 "OpName %main \"main\"\n"
1259 "OpName %id \"gl_GlobalInvocationID\"\n"
1260
1261 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1262
1263 "OpDecorate %buf BufferBlock\n"
1264 "OpDecorate %buf2 BufferBlock\n"
1265 "OpDecorate %indata1 DescriptorSet 0\n"
1266 "OpDecorate %indata1 Binding 0\n"
1267 "OpDecorate %indata2 DescriptorSet 0\n"
1268 "OpDecorate %indata2 Binding 1\n"
1269 "OpDecorate %outdata DescriptorSet 0\n"
1270 "OpDecorate %outdata Binding 2\n"
1271 "OpDecorate %f32arr ArrayStride 4\n"
1272 "OpDecorate %i32arr ArrayStride 4\n"
1273 "OpMemberDecorate %buf 0 Offset 0\n"
1274 "OpMemberDecorate %buf2 0 Offset 0\n"
1275
1276 + string(getComputeAsmCommonTypes()) +
1277
1278 "%buf = OpTypeStruct %f32arr\n"
1279 "%bufptr = OpTypePointer Uniform %buf\n"
1280 "%indata1 = OpVariable %bufptr Uniform\n"
1281 "%indata2 = OpVariable %bufptr Uniform\n"
1282
1283 "%buf2 = OpTypeStruct %i32arr\n"
1284 "%buf2ptr = OpTypePointer Uniform %buf2\n"
1285 "%outdata = OpVariable %buf2ptr Uniform\n"
1286
1287 "%id = OpVariable %uvec3ptr Input\n"
1288 "%zero = OpConstant %i32 0\n"
1289 "%consti1 = OpConstant %i32 1\n"
1290 "%constf1 = OpConstant %f32 1.0\n"
1291
1292 "%main = OpFunction %void None %voidf\n"
1293 "%label = OpLabel\n"
1294 "%idval = OpLoad %uvec3 %id\n"
1295 "%x = OpCompositeExtract %u32 %idval 0\n"
1296
1297 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1298 "%inval1 = OpLoad %f32 %inloc1\n"
1299 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1300 "%inval2 = OpLoad %f32 %inloc2\n"
1301 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
1302
1303 "%result = ${OPCODE} %bool %inval1 %inval2\n"
1304 "%int_res = OpSelect %i32 %result %consti1 %zero\n"
1305 " OpStore %outloc %int_res\n"
1306
1307 " OpReturn\n"
1308 " OpFunctionEnd\n");
1309
1310 ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1311 ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1312 ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1313 ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1314 ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1315 ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1316
1317 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1318 {
1319 map<string, string> specializations;
1320 ComputeShaderSpec spec;
1321 const float NaN = std::numeric_limits<float>::quiet_NaN();
1322 vector<float> inputFloats1 (numElements, 0);
1323 vector<float> inputFloats2 (numElements, 0);
1324 vector<deInt32> expectedInts (numElements, 0);
1325
1326 specializations["OPCODE"] = cases[caseNdx].opCode;
1327 spec.assembly = shaderTemplate.specialize(specializations);
1328
1329 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1330 for (size_t ndx = 0; ndx < numElements; ++ndx)
1331 {
1332 switch (ndx % 6)
1333 {
1334 case 0: inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1335 case 1: inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1336 case 2: inputFloats2[ndx] = inputFloats1[ndx]; break;
1337 case 3: inputFloats2[ndx] = NaN; break;
1338 case 4: inputFloats2[ndx] = inputFloats1[ndx]; inputFloats1[ndx] = NaN; break;
1339 case 5: inputFloats2[ndx] = NaN; inputFloats1[ndx] = NaN; break;
1340 }
1341 expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1342 }
1343
1344 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1345 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1346 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1347 spec.numWorkGroups = IVec3(numElements, 1, 1);
1348 spec.verifyIO = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1349
1350 if (testWithNan)
1351 {
1352 spec.extensions.push_back("VK_KHR_shader_float_controls");
1353 spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1354 }
1355
1356 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1357 }
1358
1359 return group.release();
1360 }
1361
1362 struct OpAtomicCase
1363 {
1364 const char* name;
1365 const char* assembly;
1366 const char* retValAssembly;
1367 OpAtomicType opAtomic;
1368 deInt32 numOutputElements;
1369
OpAtomicCasevkt::SpirVAssembly::__anon437b2d460111::OpAtomicCase1370 OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1371 : name (_name)
1372 , assembly (_assembly)
1373 , retValAssembly (_retValAssembly)
1374 , opAtomic (_opAtomic)
1375 , numOutputElements (_numOutputElements) {}
1376 };
1377
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1378 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1379 {
1380 std::string groupName ("opatomic");
1381 if (useStorageBuffer)
1382 groupName += "_storage_buffer";
1383 if (verifyReturnValues)
1384 groupName += "_return_values";
1385 if (volatileAtomic)
1386 groupName += "_volatile";
1387 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1388 vector<OpAtomicCase> cases;
1389
1390 const StringTemplate shaderTemplate (
1391
1392 string("OpCapability Shader\n") +
1393 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1394 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1395 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1396 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1397 "OpEntryPoint GLCompute %main \"main\" %id\n"
1398 "OpExecutionMode %main LocalSize 1 1 1\n" +
1399
1400 "OpSource GLSL 430\n"
1401 "OpName %main \"main\"\n"
1402 "OpName %id \"gl_GlobalInvocationID\"\n"
1403
1404 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1405
1406 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1407 "OpDecorate %indata DescriptorSet 0\n"
1408 "OpDecorate %indata Binding 0\n"
1409 "OpDecorate %i32arr ArrayStride 4\n"
1410 "OpMemberDecorate %buf 0 Offset 0\n"
1411
1412 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1413 "OpDecorate %sum DescriptorSet 0\n"
1414 "OpDecorate %sum Binding 1\n"
1415 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1416
1417 "${RETVAL_BUF_DECORATE}"
1418
1419 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1420
1421 "%buf = OpTypeStruct %i32arr\n"
1422 "%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1423 "%indata = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1424
1425 "%sumbuf = OpTypeStruct %i32arr\n"
1426 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1427 "%sum = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1428
1429 "${RETVAL_BUF_DECL}"
1430
1431 "%id = OpVariable %uvec3ptr Input\n"
1432 "%minusone = OpConstant %i32 -1\n"
1433 "%zero = OpConstant %i32 0\n"
1434 "%one = OpConstant %u32 1\n"
1435 "%two = OpConstant %i32 2\n"
1436 "%five = OpConstant %i32 5\n"
1437 "%volbit = OpConstant %i32 32768\n"
1438
1439 "%main = OpFunction %void None %voidf\n"
1440 "%label = OpLabel\n"
1441 "%idval = OpLoad %uvec3 %id\n"
1442 "%x = OpCompositeExtract %u32 %idval 0\n"
1443
1444 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
1445 "%inval = OpLoad %i32 %inloc\n"
1446
1447 "%outloc = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1448 "${INSTRUCTION}"
1449 "${RETVAL_ASSEMBLY}"
1450
1451 " OpReturn\n"
1452 " OpFunctionEnd\n");
1453
1454 #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1455 do { \
1456 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1457 } while (deGetFalse())
1458 #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1459 #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1460
1461 ADD_OPATOMIC_CASE_1(iadd, "%retv = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1462 " OpStore %retloc %retv\n", OPATOMIC_IADD );
1463 ADD_OPATOMIC_CASE_1(isub, "%retv = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1464 " OpStore %retloc %retv\n", OPATOMIC_ISUB );
1465 ADD_OPATOMIC_CASE_1(iinc, "%retv = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1466 " OpStore %retloc %retv\n", OPATOMIC_IINC );
1467 ADD_OPATOMIC_CASE_1(idec, "%retv = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1468 " OpStore %retloc %retv\n", OPATOMIC_IDEC );
1469 if (!verifyReturnValues)
1470 {
1471 ADD_OPATOMIC_CASE_N(load, "%inval2 = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1472 " OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1473 ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1474 }
1475
1476 ADD_OPATOMIC_CASE_N(compex, "%even = OpSMod %i32 %inval %two\n"
1477 " OpStore %outloc %even\n"
1478 "%retv = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1479 " OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1480
1481
1482 #undef ADD_OPATOMIC_CASE
1483 #undef ADD_OPATOMIC_CASE_1
1484 #undef ADD_OPATOMIC_CASE_N
1485
1486 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1487 {
1488 map<string, string> specializations;
1489 ComputeShaderSpec spec;
1490 vector<deInt32> inputInts (numElements, 0);
1491 vector<deInt32> expected (cases[caseNdx].numOutputElements, -1);
1492
1493 if (volatileAtomic)
1494 {
1495 spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1496 spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1497
1498 // volatile, queuefamily scope
1499 specializations["SEMANTICS"] = "%volbit";
1500 specializations["SCOPE"] = "%five";
1501 }
1502 else
1503 {
1504 // non-volatile, device scope
1505 specializations["SEMANTICS"] = "%zero";
1506 specializations["SCOPE"] = "%one";
1507 }
1508 specializations["INDEX"] = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1509 specializations["INSTRUCTION"] = cases[caseNdx].assembly;
1510 specializations["BLOCK_DECORATION"] = useStorageBuffer ? "Block" : "BufferBlock";
1511 specializations["BLOCK_POINTER_TYPE"] = useStorageBuffer ? "StorageBuffer" : "Uniform";
1512
1513 if (verifyReturnValues)
1514 {
1515 const StringTemplate blockDecoration (
1516 "\n"
1517 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1518 "OpDecorate %ret DescriptorSet 0\n"
1519 "OpDecorate %ret Binding 2\n"
1520 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1521
1522 const StringTemplate blockDeclaration (
1523 "\n"
1524 "%retbuf = OpTypeStruct %i32arr\n"
1525 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1526 "%ret = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1527
1528 specializations["RETVAL_ASSEMBLY"] =
1529 "%retloc = OpAccessChain %i32ptr %ret %zero %x\n"
1530 + std::string(cases[caseNdx].retValAssembly);
1531
1532 specializations["RETVAL_BUF_DECORATE"] = blockDecoration.specialize(specializations);
1533 specializations["RETVAL_BUF_DECL"] = blockDeclaration.specialize(specializations);
1534 }
1535 else
1536 {
1537 specializations["RETVAL_ASSEMBLY"] = "";
1538 specializations["RETVAL_BUF_DECORATE"] = "";
1539 specializations["RETVAL_BUF_DECL"] = "";
1540 }
1541
1542 spec.assembly = shaderTemplate.specialize(specializations);
1543
1544 // Specialize one more time, to catch things that were in a template parameter
1545 const StringTemplate assemblyTemplate(spec.assembly);
1546 spec.assembly = assemblyTemplate.specialize(specializations);
1547
1548 if (useStorageBuffer)
1549 spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1550
1551 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1552 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1553 if (verifyReturnValues)
1554 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1555 spec.numWorkGroups = IVec3(numElements, 1, 1);
1556
1557 if (verifyReturnValues)
1558 {
1559 switch (cases[caseNdx].opAtomic)
1560 {
1561 case OPATOMIC_IADD:
1562 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1563 break;
1564 case OPATOMIC_ISUB:
1565 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1566 break;
1567 case OPATOMIC_IINC:
1568 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1569 break;
1570 case OPATOMIC_IDEC:
1571 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1572 break;
1573 case OPATOMIC_COMPEX:
1574 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1575 break;
1576 default:
1577 DE_FATAL("Unsupported OpAtomic type for return value verification");
1578 }
1579 }
1580 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1581 }
1582
1583 return group.release();
1584 }
1585
createOpLineGroup(tcu::TestContext & testCtx)1586 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1587 {
1588 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1589 ComputeShaderSpec spec;
1590 de::Random rnd (deStringHash(group->getName()));
1591 const int numElements = 100;
1592 vector<float> positiveFloats (numElements, 0);
1593 vector<float> negativeFloats (numElements, 0);
1594
1595 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1596
1597 for (size_t ndx = 0; ndx < numElements; ++ndx)
1598 negativeFloats[ndx] = -positiveFloats[ndx];
1599
1600 spec.assembly =
1601 string(getComputeAsmShaderPreamble()) +
1602
1603 "%fname1 = OpString \"negateInputs.comp\"\n"
1604 "%fname2 = OpString \"negateInputs\"\n"
1605
1606 "OpSource GLSL 430\n"
1607 "OpName %main \"main\"\n"
1608 "OpName %id \"gl_GlobalInvocationID\"\n"
1609
1610 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1611
1612 + string(getComputeAsmInputOutputBufferTraits()) +
1613
1614 "OpLine %fname1 0 0\n" // At the earliest possible position
1615
1616 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1617
1618 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1619 "OpLine %fname2 1 0\n" // Different filenames
1620 "OpLine %fname1 1000 100000\n"
1621
1622 "%id = OpVariable %uvec3ptr Input\n"
1623 "%zero = OpConstant %i32 0\n"
1624
1625 "OpLine %fname1 1 1\n" // Before a function
1626
1627 "%main = OpFunction %void None %voidf\n"
1628 "%label = OpLabel\n"
1629
1630 "OpLine %fname1 1 1\n" // In a function
1631
1632 "%idval = OpLoad %uvec3 %id\n"
1633 "%x = OpCompositeExtract %u32 %idval 0\n"
1634 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1635 "%inval = OpLoad %f32 %inloc\n"
1636 "%neg = OpFNegate %f32 %inval\n"
1637 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1638 " OpStore %outloc %neg\n"
1639 " OpReturn\n"
1640 " OpFunctionEnd\n";
1641 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1642 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1643 spec.numWorkGroups = IVec3(numElements, 1, 1);
1644
1645 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1646
1647 return group.release();
1648 }
1649
veryfiBinaryShader(const ProgramBinary & binary)1650 bool veryfiBinaryShader (const ProgramBinary& binary)
1651 {
1652 const size_t paternCount = 3u;
1653 bool paternsCheck[paternCount] =
1654 {
1655 false, false, false
1656 };
1657 const string patersns[paternCount] =
1658 {
1659 "VULKAN CTS",
1660 "Negative values",
1661 "Date: 2017/09/21"
1662 };
1663 size_t paternNdx = 0u;
1664
1665 for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1666 {
1667 if (false == paternsCheck[paternNdx] &&
1668 patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1669 deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1670 {
1671 paternsCheck[paternNdx]= true;
1672 paternNdx++;
1673 if (paternNdx == paternCount)
1674 break;
1675 }
1676 }
1677
1678 for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1679 {
1680 if (!paternsCheck[ndx])
1681 return false;
1682 }
1683
1684 return true;
1685 }
1686
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1687 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1688 {
1689 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1690 ComputeShaderSpec spec;
1691 de::Random rnd (deStringHash(group->getName()));
1692 const int numElements = 10;
1693 vector<float> positiveFloats (numElements, 0);
1694 vector<float> negativeFloats (numElements, 0);
1695
1696 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1697
1698 for (size_t ndx = 0; ndx < numElements; ++ndx)
1699 negativeFloats[ndx] = -positiveFloats[ndx];
1700
1701 spec.assembly =
1702 string(getComputeAsmShaderPreamble()) +
1703 "%fname = OpString \"negateInputs.comp\"\n"
1704
1705 "OpSource GLSL 430\n"
1706 "OpName %main \"main\"\n"
1707 "OpName %id \"gl_GlobalInvocationID\"\n"
1708 "OpModuleProcessed \"VULKAN CTS\"\n" //OpModuleProcessed;
1709 "OpModuleProcessed \"Negative values\"\n"
1710 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1711 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1712
1713 + string(getComputeAsmInputOutputBufferTraits())
1714
1715 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1716
1717 "OpLine %fname 0 1\n"
1718
1719 "OpLine %fname 1000 1\n"
1720
1721 "%id = OpVariable %uvec3ptr Input\n"
1722 "%zero = OpConstant %i32 0\n"
1723 "%main = OpFunction %void None %voidf\n"
1724
1725 "%label = OpLabel\n"
1726 "%idval = OpLoad %uvec3 %id\n"
1727 "%x = OpCompositeExtract %u32 %idval 0\n"
1728
1729 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1730 "%inval = OpLoad %f32 %inloc\n"
1731 "%neg = OpFNegate %f32 %inval\n"
1732 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1733 " OpStore %outloc %neg\n"
1734 " OpReturn\n"
1735 " OpFunctionEnd\n";
1736 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1737 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1738 spec.numWorkGroups = IVec3(numElements, 1, 1);
1739 spec.verifyBinary = veryfiBinaryShader;
1740 spec.spirvVersion = SPIRV_VERSION_1_3;
1741
1742 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1743
1744 return group.release();
1745 }
1746
createOpNoLineGroup(tcu::TestContext & testCtx)1747 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1748 {
1749 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1750 ComputeShaderSpec spec;
1751 de::Random rnd (deStringHash(group->getName()));
1752 const int numElements = 100;
1753 vector<float> positiveFloats (numElements, 0);
1754 vector<float> negativeFloats (numElements, 0);
1755
1756 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1757
1758 for (size_t ndx = 0; ndx < numElements; ++ndx)
1759 negativeFloats[ndx] = -positiveFloats[ndx];
1760
1761 spec.assembly =
1762 string(getComputeAsmShaderPreamble()) +
1763
1764 "%fname = OpString \"negateInputs.comp\"\n"
1765
1766 "OpSource GLSL 430\n"
1767 "OpName %main \"main\"\n"
1768 "OpName %id \"gl_GlobalInvocationID\"\n"
1769
1770 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1771
1772 + string(getComputeAsmInputOutputBufferTraits()) +
1773
1774 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1775
1776 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1777
1778 "OpLine %fname 0 1\n"
1779 "OpNoLine\n" // Immediately following a preceding OpLine
1780
1781 "OpLine %fname 1000 1\n"
1782
1783 "%id = OpVariable %uvec3ptr Input\n"
1784 "%zero = OpConstant %i32 0\n"
1785
1786 "OpNoLine\n" // Contents after the previous OpLine
1787
1788 "%main = OpFunction %void None %voidf\n"
1789 "%label = OpLabel\n"
1790 "%idval = OpLoad %uvec3 %id\n"
1791 "%x = OpCompositeExtract %u32 %idval 0\n"
1792
1793 "OpNoLine\n" // Multiple OpNoLine
1794 "OpNoLine\n"
1795 "OpNoLine\n"
1796
1797 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1798 "%inval = OpLoad %f32 %inloc\n"
1799 "%neg = OpFNegate %f32 %inval\n"
1800 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1801 " OpStore %outloc %neg\n"
1802 " OpReturn\n"
1803 " OpFunctionEnd\n";
1804 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1805 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1806 spec.numWorkGroups = IVec3(numElements, 1, 1);
1807
1808 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
1809
1810 return group.release();
1811 }
1812
1813 // Compare instruction for the contraction compute case.
1814 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1815 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1816 {
1817 if (outputAllocs.size() != 1)
1818 return false;
1819
1820 // Only size is needed because we are not comparing the exact values.
1821 size_t byteSize = expectedOutputs[0].getByteSize();
1822
1823 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1824
1825 for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1826 if (outputAsFloat[i] != 0.f &&
1827 outputAsFloat[i] != -ldexp(1, -24)) {
1828 return false;
1829 }
1830 }
1831
1832 return true;
1833 }
1834
createNoContractionGroup(tcu::TestContext & testCtx)1835 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1836 {
1837 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1838 vector<CaseParameter> cases;
1839 const int numElements = 100;
1840 vector<float> inputFloats1 (numElements, 0);
1841 vector<float> inputFloats2 (numElements, 0);
1842 vector<float> outputFloats (numElements, 0);
1843 const StringTemplate shaderTemplate (
1844 string(getComputeAsmShaderPreamble()) +
1845
1846 "OpName %main \"main\"\n"
1847 "OpName %id \"gl_GlobalInvocationID\"\n"
1848
1849 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1850
1851 "${DECORATION}\n"
1852
1853 "OpDecorate %buf BufferBlock\n"
1854 "OpDecorate %indata1 DescriptorSet 0\n"
1855 "OpDecorate %indata1 Binding 0\n"
1856 "OpDecorate %indata2 DescriptorSet 0\n"
1857 "OpDecorate %indata2 Binding 1\n"
1858 "OpDecorate %outdata DescriptorSet 0\n"
1859 "OpDecorate %outdata Binding 2\n"
1860 "OpDecorate %f32arr ArrayStride 4\n"
1861 "OpMemberDecorate %buf 0 Offset 0\n"
1862
1863 + string(getComputeAsmCommonTypes()) +
1864
1865 "%buf = OpTypeStruct %f32arr\n"
1866 "%bufptr = OpTypePointer Uniform %buf\n"
1867 "%indata1 = OpVariable %bufptr Uniform\n"
1868 "%indata2 = OpVariable %bufptr Uniform\n"
1869 "%outdata = OpVariable %bufptr Uniform\n"
1870
1871 "%id = OpVariable %uvec3ptr Input\n"
1872 "%zero = OpConstant %i32 0\n"
1873 "%c_f_m1 = OpConstant %f32 -1.\n"
1874
1875 "%main = OpFunction %void None %voidf\n"
1876 "%label = OpLabel\n"
1877 "%idval = OpLoad %uvec3 %id\n"
1878 "%x = OpCompositeExtract %u32 %idval 0\n"
1879 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1880 "%inval1 = OpLoad %f32 %inloc1\n"
1881 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1882 "%inval2 = OpLoad %f32 %inloc2\n"
1883 "%mul = OpFMul %f32 %inval1 %inval2\n"
1884 "%add = OpFAdd %f32 %mul %c_f_m1\n"
1885 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1886 " OpStore %outloc %add\n"
1887 " OpReturn\n"
1888 " OpFunctionEnd\n");
1889
1890 cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1891 cases.push_back(CaseParameter("addition", "OpDecorate %add NoContraction"));
1892 cases.push_back(CaseParameter("both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1893
1894 for (size_t ndx = 0; ndx < numElements; ++ndx)
1895 {
1896 inputFloats1[ndx] = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1897 inputFloats2[ndx] = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1898 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1899 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1900 // So the final result will be 0.f or 0x1p-24.
1901 // If the operation is combined into a precise fused multiply-add, then the result would be
1902 // 2^-46 (0xa8800000).
1903 outputFloats[ndx] = 0.f;
1904 }
1905
1906 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1907 {
1908 map<string, string> specializations;
1909 ComputeShaderSpec spec;
1910
1911 specializations["DECORATION"] = cases[caseNdx].param;
1912 spec.assembly = shaderTemplate.specialize(specializations);
1913 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1914 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1915 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1916 spec.numWorkGroups = IVec3(numElements, 1, 1);
1917 // Check against the two possible answers based on rounding mode.
1918 spec.verifyIO = &compareNoContractCase;
1919
1920 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
1921 }
1922 return group.release();
1923 }
1924
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1925 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1926 {
1927 if (outputAllocs.size() != 1)
1928 return false;
1929
1930 vector<deUint8> expectedBytes;
1931 expectedOutputs[0].getBytes(expectedBytes);
1932
1933 const float* expectedOutputAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
1934 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1935
1936 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1937 {
1938 const float f0 = expectedOutputAsFloat[idx];
1939 const float f1 = outputAsFloat[idx];
1940 // \todo relative error needs to be fairly high because FRem may be implemented as
1941 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1942 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1943 return false;
1944 }
1945
1946 return true;
1947 }
1948
createOpFRemGroup(tcu::TestContext & testCtx)1949 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1950 {
1951 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1952 ComputeShaderSpec spec;
1953 de::Random rnd (deStringHash(group->getName()));
1954 const int numElements = 200;
1955 vector<float> inputFloats1 (numElements, 0);
1956 vector<float> inputFloats2 (numElements, 0);
1957 vector<float> outputFloats (numElements, 0);
1958
1959 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1960 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1961
1962 for (size_t ndx = 0; ndx < numElements; ++ndx)
1963 {
1964 // Guard against divisors near zero.
1965 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1966 inputFloats2[ndx] = 8.f;
1967
1968 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1969 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1970 }
1971
1972 spec.assembly =
1973 string(getComputeAsmShaderPreamble()) +
1974
1975 "OpName %main \"main\"\n"
1976 "OpName %id \"gl_GlobalInvocationID\"\n"
1977
1978 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1979
1980 "OpDecorate %buf BufferBlock\n"
1981 "OpDecorate %indata1 DescriptorSet 0\n"
1982 "OpDecorate %indata1 Binding 0\n"
1983 "OpDecorate %indata2 DescriptorSet 0\n"
1984 "OpDecorate %indata2 Binding 1\n"
1985 "OpDecorate %outdata DescriptorSet 0\n"
1986 "OpDecorate %outdata Binding 2\n"
1987 "OpDecorate %f32arr ArrayStride 4\n"
1988 "OpMemberDecorate %buf 0 Offset 0\n"
1989
1990 + string(getComputeAsmCommonTypes()) +
1991
1992 "%buf = OpTypeStruct %f32arr\n"
1993 "%bufptr = OpTypePointer Uniform %buf\n"
1994 "%indata1 = OpVariable %bufptr Uniform\n"
1995 "%indata2 = OpVariable %bufptr Uniform\n"
1996 "%outdata = OpVariable %bufptr Uniform\n"
1997
1998 "%id = OpVariable %uvec3ptr Input\n"
1999 "%zero = OpConstant %i32 0\n"
2000
2001 "%main = OpFunction %void None %voidf\n"
2002 "%label = OpLabel\n"
2003 "%idval = OpLoad %uvec3 %id\n"
2004 "%x = OpCompositeExtract %u32 %idval 0\n"
2005 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2006 "%inval1 = OpLoad %f32 %inloc1\n"
2007 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2008 "%inval2 = OpLoad %f32 %inloc2\n"
2009 "%rem = OpFRem %f32 %inval1 %inval2\n"
2010 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2011 " OpStore %outloc %rem\n"
2012 " OpReturn\n"
2013 " OpFunctionEnd\n";
2014
2015 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2016 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2017 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2018 spec.numWorkGroups = IVec3(numElements, 1, 1);
2019 spec.verifyIO = &compareFRem;
2020
2021 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2022
2023 return group.release();
2024 }
2025
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2026 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2027 {
2028 if (outputAllocs.size() != 1)
2029 return false;
2030
2031 const BufferSp& expectedOutput (expectedOutputs[0].getBuffer());
2032 std::vector<deUint8> data;
2033 expectedOutput->getBytes(data);
2034
2035 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2036 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2037
2038 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2039 {
2040 const float f0 = expectedOutputAsFloat[idx];
2041 const float f1 = outputAsFloat[idx];
2042
2043 // For NMin, we accept NaN as output if both inputs were NaN.
2044 // Otherwise the NaN is the wrong choise, as on architectures that
2045 // do not handle NaN, those are huge values.
2046 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2047 return false;
2048 }
2049
2050 return true;
2051 }
2052
createOpNMinGroup(tcu::TestContext & testCtx)2053 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2054 {
2055 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2056 ComputeShaderSpec spec;
2057 de::Random rnd (deStringHash(group->getName()));
2058 const int numElements = 200;
2059 vector<float> inputFloats1 (numElements, 0);
2060 vector<float> inputFloats2 (numElements, 0);
2061 vector<float> outputFloats (numElements, 0);
2062
2063 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2064 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2065
2066 // Make the first case a full-NAN case.
2067 inputFloats1[0] = TCU_NAN;
2068 inputFloats2[0] = TCU_NAN;
2069
2070 for (size_t ndx = 0; ndx < numElements; ++ndx)
2071 {
2072 // By default, pick the smallest
2073 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2074
2075 // Make half of the cases NaN cases
2076 if ((ndx & 1) == 0)
2077 {
2078 // Alternate between the NaN operand
2079 if ((ndx & 2) == 0)
2080 {
2081 outputFloats[ndx] = inputFloats2[ndx];
2082 inputFloats1[ndx] = TCU_NAN;
2083 }
2084 else
2085 {
2086 outputFloats[ndx] = inputFloats1[ndx];
2087 inputFloats2[ndx] = TCU_NAN;
2088 }
2089 }
2090 }
2091
2092 spec.assembly =
2093 "OpCapability Shader\n"
2094 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2095 "OpMemoryModel Logical GLSL450\n"
2096 "OpEntryPoint GLCompute %main \"main\" %id\n"
2097 "OpExecutionMode %main LocalSize 1 1 1\n"
2098
2099 "OpName %main \"main\"\n"
2100 "OpName %id \"gl_GlobalInvocationID\"\n"
2101
2102 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2103
2104 "OpDecorate %buf BufferBlock\n"
2105 "OpDecorate %indata1 DescriptorSet 0\n"
2106 "OpDecorate %indata1 Binding 0\n"
2107 "OpDecorate %indata2 DescriptorSet 0\n"
2108 "OpDecorate %indata2 Binding 1\n"
2109 "OpDecorate %outdata DescriptorSet 0\n"
2110 "OpDecorate %outdata Binding 2\n"
2111 "OpDecorate %f32arr ArrayStride 4\n"
2112 "OpMemberDecorate %buf 0 Offset 0\n"
2113
2114 + string(getComputeAsmCommonTypes()) +
2115
2116 "%buf = OpTypeStruct %f32arr\n"
2117 "%bufptr = OpTypePointer Uniform %buf\n"
2118 "%indata1 = OpVariable %bufptr Uniform\n"
2119 "%indata2 = OpVariable %bufptr Uniform\n"
2120 "%outdata = OpVariable %bufptr Uniform\n"
2121
2122 "%id = OpVariable %uvec3ptr Input\n"
2123 "%zero = OpConstant %i32 0\n"
2124
2125 "%main = OpFunction %void None %voidf\n"
2126 "%label = OpLabel\n"
2127 "%idval = OpLoad %uvec3 %id\n"
2128 "%x = OpCompositeExtract %u32 %idval 0\n"
2129 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2130 "%inval1 = OpLoad %f32 %inloc1\n"
2131 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2132 "%inval2 = OpLoad %f32 %inloc2\n"
2133 "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2134 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2135 " OpStore %outloc %rem\n"
2136 " OpReturn\n"
2137 " OpFunctionEnd\n";
2138
2139 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2140 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2141 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2142 spec.numWorkGroups = IVec3(numElements, 1, 1);
2143 spec.verifyIO = &compareNMin;
2144
2145 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2146
2147 return group.release();
2148 }
2149
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2150 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2151 {
2152 if (outputAllocs.size() != 1)
2153 return false;
2154
2155 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2156 std::vector<deUint8> data;
2157 expectedOutput->getBytes(data);
2158
2159 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2160 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2161
2162 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2163 {
2164 const float f0 = expectedOutputAsFloat[idx];
2165 const float f1 = outputAsFloat[idx];
2166
2167 // For NMax, NaN is considered acceptable result, since in
2168 // architectures that do not handle NaNs, those are huge values.
2169 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2170 return false;
2171 }
2172
2173 return true;
2174 }
2175
createOpNMaxGroup(tcu::TestContext & testCtx)2176 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2177 {
2178 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2179 ComputeShaderSpec spec;
2180 de::Random rnd (deStringHash(group->getName()));
2181 const int numElements = 200;
2182 vector<float> inputFloats1 (numElements, 0);
2183 vector<float> inputFloats2 (numElements, 0);
2184 vector<float> outputFloats (numElements, 0);
2185
2186 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2187 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2188
2189 // Make the first case a full-NAN case.
2190 inputFloats1[0] = TCU_NAN;
2191 inputFloats2[0] = TCU_NAN;
2192
2193 for (size_t ndx = 0; ndx < numElements; ++ndx)
2194 {
2195 // By default, pick the biggest
2196 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2197
2198 // Make half of the cases NaN cases
2199 if ((ndx & 1) == 0)
2200 {
2201 // Alternate between the NaN operand
2202 if ((ndx & 2) == 0)
2203 {
2204 outputFloats[ndx] = inputFloats2[ndx];
2205 inputFloats1[ndx] = TCU_NAN;
2206 }
2207 else
2208 {
2209 outputFloats[ndx] = inputFloats1[ndx];
2210 inputFloats2[ndx] = TCU_NAN;
2211 }
2212 }
2213 }
2214
2215 spec.assembly =
2216 "OpCapability Shader\n"
2217 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2218 "OpMemoryModel Logical GLSL450\n"
2219 "OpEntryPoint GLCompute %main \"main\" %id\n"
2220 "OpExecutionMode %main LocalSize 1 1 1\n"
2221
2222 "OpName %main \"main\"\n"
2223 "OpName %id \"gl_GlobalInvocationID\"\n"
2224
2225 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2226
2227 "OpDecorate %buf BufferBlock\n"
2228 "OpDecorate %indata1 DescriptorSet 0\n"
2229 "OpDecorate %indata1 Binding 0\n"
2230 "OpDecorate %indata2 DescriptorSet 0\n"
2231 "OpDecorate %indata2 Binding 1\n"
2232 "OpDecorate %outdata DescriptorSet 0\n"
2233 "OpDecorate %outdata Binding 2\n"
2234 "OpDecorate %f32arr ArrayStride 4\n"
2235 "OpMemberDecorate %buf 0 Offset 0\n"
2236
2237 + string(getComputeAsmCommonTypes()) +
2238
2239 "%buf = OpTypeStruct %f32arr\n"
2240 "%bufptr = OpTypePointer Uniform %buf\n"
2241 "%indata1 = OpVariable %bufptr Uniform\n"
2242 "%indata2 = OpVariable %bufptr Uniform\n"
2243 "%outdata = OpVariable %bufptr Uniform\n"
2244
2245 "%id = OpVariable %uvec3ptr Input\n"
2246 "%zero = OpConstant %i32 0\n"
2247
2248 "%main = OpFunction %void None %voidf\n"
2249 "%label = OpLabel\n"
2250 "%idval = OpLoad %uvec3 %id\n"
2251 "%x = OpCompositeExtract %u32 %idval 0\n"
2252 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2253 "%inval1 = OpLoad %f32 %inloc1\n"
2254 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2255 "%inval2 = OpLoad %f32 %inloc2\n"
2256 "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2257 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2258 " OpStore %outloc %rem\n"
2259 " OpReturn\n"
2260 " OpFunctionEnd\n";
2261
2262 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2263 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2264 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2265 spec.numWorkGroups = IVec3(numElements, 1, 1);
2266 spec.verifyIO = &compareNMax;
2267
2268 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2269
2270 return group.release();
2271 }
2272
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2273 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2274 {
2275 if (outputAllocs.size() != 1)
2276 return false;
2277
2278 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2279 std::vector<deUint8> data;
2280 expectedOutput->getBytes(data);
2281
2282 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2283 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2284
2285 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2286 {
2287 const float e0 = expectedOutputAsFloat[idx * 2];
2288 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2289 const float res = outputAsFloat[idx];
2290
2291 // For NClamp, we have two possible outcomes based on
2292 // whether NaNs are handled or not.
2293 // If either min or max value is NaN, the result is undefined,
2294 // so this test doesn't stress those. If the clamped value is
2295 // NaN, and NaNs are handled, the result is min; if NaNs are not
2296 // handled, they are big values that result in max.
2297 // If all three parameters are NaN, the result should be NaN.
2298 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2299 (deFloatAbs(e0 - res) < 0.00001f) ||
2300 (deFloatAbs(e1 - res) < 0.00001f)))
2301 return false;
2302 }
2303
2304 return true;
2305 }
2306
createOpNClampGroup(tcu::TestContext & testCtx)2307 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2308 {
2309 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2310 ComputeShaderSpec spec;
2311 de::Random rnd (deStringHash(group->getName()));
2312 const int numElements = 200;
2313 vector<float> inputFloats1 (numElements, 0);
2314 vector<float> inputFloats2 (numElements, 0);
2315 vector<float> inputFloats3 (numElements, 0);
2316 vector<float> outputFloats (numElements * 2, 0);
2317
2318 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2319 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2320 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2321
2322 for (size_t ndx = 0; ndx < numElements; ++ndx)
2323 {
2324 // Results are only defined if max value is bigger than min value.
2325 if (inputFloats2[ndx] > inputFloats3[ndx])
2326 {
2327 float t = inputFloats2[ndx];
2328 inputFloats2[ndx] = inputFloats3[ndx];
2329 inputFloats3[ndx] = t;
2330 }
2331
2332 // By default, do the clamp, setting both possible answers
2333 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2334
2335 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2336 float maxResB = maxResA;
2337
2338 // Alternate between the NaN cases
2339 if (ndx & 1)
2340 {
2341 inputFloats1[ndx] = TCU_NAN;
2342 // If NaN is handled, the result should be same as the clamp minimum.
2343 // If NaN is not handled, the result should clamp to the clamp maximum.
2344 maxResA = inputFloats2[ndx];
2345 maxResB = inputFloats3[ndx];
2346 }
2347 else
2348 {
2349 // Not a NaN case - only one legal result.
2350 maxResA = defaultRes;
2351 maxResB = defaultRes;
2352 }
2353
2354 outputFloats[ndx * 2] = maxResA;
2355 outputFloats[ndx * 2 + 1] = maxResB;
2356 }
2357
2358 // Make the first case a full-NAN case.
2359 inputFloats1[0] = TCU_NAN;
2360 inputFloats2[0] = TCU_NAN;
2361 inputFloats3[0] = TCU_NAN;
2362 outputFloats[0] = TCU_NAN;
2363 outputFloats[1] = TCU_NAN;
2364
2365 spec.assembly =
2366 "OpCapability Shader\n"
2367 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2368 "OpMemoryModel Logical GLSL450\n"
2369 "OpEntryPoint GLCompute %main \"main\" %id\n"
2370 "OpExecutionMode %main LocalSize 1 1 1\n"
2371
2372 "OpName %main \"main\"\n"
2373 "OpName %id \"gl_GlobalInvocationID\"\n"
2374
2375 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2376
2377 "OpDecorate %buf BufferBlock\n"
2378 "OpDecorate %indata1 DescriptorSet 0\n"
2379 "OpDecorate %indata1 Binding 0\n"
2380 "OpDecorate %indata2 DescriptorSet 0\n"
2381 "OpDecorate %indata2 Binding 1\n"
2382 "OpDecorate %indata3 DescriptorSet 0\n"
2383 "OpDecorate %indata3 Binding 2\n"
2384 "OpDecorate %outdata DescriptorSet 0\n"
2385 "OpDecorate %outdata Binding 3\n"
2386 "OpDecorate %f32arr ArrayStride 4\n"
2387 "OpMemberDecorate %buf 0 Offset 0\n"
2388
2389 + string(getComputeAsmCommonTypes()) +
2390
2391 "%buf = OpTypeStruct %f32arr\n"
2392 "%bufptr = OpTypePointer Uniform %buf\n"
2393 "%indata1 = OpVariable %bufptr Uniform\n"
2394 "%indata2 = OpVariable %bufptr Uniform\n"
2395 "%indata3 = OpVariable %bufptr Uniform\n"
2396 "%outdata = OpVariable %bufptr Uniform\n"
2397
2398 "%id = OpVariable %uvec3ptr Input\n"
2399 "%zero = OpConstant %i32 0\n"
2400
2401 "%main = OpFunction %void None %voidf\n"
2402 "%label = OpLabel\n"
2403 "%idval = OpLoad %uvec3 %id\n"
2404 "%x = OpCompositeExtract %u32 %idval 0\n"
2405 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2406 "%inval1 = OpLoad %f32 %inloc1\n"
2407 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2408 "%inval2 = OpLoad %f32 %inloc2\n"
2409 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
2410 "%inval3 = OpLoad %f32 %inloc3\n"
2411 "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2412 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2413 " OpStore %outloc %rem\n"
2414 " OpReturn\n"
2415 " OpFunctionEnd\n";
2416
2417 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2418 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2419 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2420 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2421 spec.numWorkGroups = IVec3(numElements, 1, 1);
2422 spec.verifyIO = &compareNClamp;
2423
2424 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
2425
2426 return group.release();
2427 }
2428
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2429 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2430 {
2431 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2432 de::Random rnd (deStringHash(group->getName()));
2433 const int numElements = 200;
2434
2435 const struct CaseParams
2436 {
2437 const char* name;
2438 const char* failMessage; // customized status message
2439 qpTestResult failResult; // override status on failure
2440 int op1Min, op1Max; // operand ranges
2441 int op2Min, op2Max;
2442 } cases[] =
2443 {
2444 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2445 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2446 };
2447 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2448
2449 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2450 {
2451 const CaseParams& params = cases[caseNdx];
2452 ComputeShaderSpec spec;
2453 vector<deInt32> inputInts1 (numElements, 0);
2454 vector<deInt32> inputInts2 (numElements, 0);
2455 vector<deInt32> outputInts (numElements, 0);
2456
2457 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2458 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2459
2460 for (int ndx = 0; ndx < numElements; ++ndx)
2461 {
2462 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2463 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2464 }
2465
2466 spec.assembly =
2467 string(getComputeAsmShaderPreamble()) +
2468
2469 "OpName %main \"main\"\n"
2470 "OpName %id \"gl_GlobalInvocationID\"\n"
2471
2472 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2473
2474 "OpDecorate %buf BufferBlock\n"
2475 "OpDecorate %indata1 DescriptorSet 0\n"
2476 "OpDecorate %indata1 Binding 0\n"
2477 "OpDecorate %indata2 DescriptorSet 0\n"
2478 "OpDecorate %indata2 Binding 1\n"
2479 "OpDecorate %outdata DescriptorSet 0\n"
2480 "OpDecorate %outdata Binding 2\n"
2481 "OpDecorate %i32arr ArrayStride 4\n"
2482 "OpMemberDecorate %buf 0 Offset 0\n"
2483
2484 + string(getComputeAsmCommonTypes()) +
2485
2486 "%buf = OpTypeStruct %i32arr\n"
2487 "%bufptr = OpTypePointer Uniform %buf\n"
2488 "%indata1 = OpVariable %bufptr Uniform\n"
2489 "%indata2 = OpVariable %bufptr Uniform\n"
2490 "%outdata = OpVariable %bufptr Uniform\n"
2491
2492 "%id = OpVariable %uvec3ptr Input\n"
2493 "%zero = OpConstant %i32 0\n"
2494
2495 "%main = OpFunction %void None %voidf\n"
2496 "%label = OpLabel\n"
2497 "%idval = OpLoad %uvec3 %id\n"
2498 "%x = OpCompositeExtract %u32 %idval 0\n"
2499 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2500 "%inval1 = OpLoad %i32 %inloc1\n"
2501 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2502 "%inval2 = OpLoad %i32 %inloc2\n"
2503 "%rem = OpSRem %i32 %inval1 %inval2\n"
2504 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2505 " OpStore %outloc %rem\n"
2506 " OpReturn\n"
2507 " OpFunctionEnd\n";
2508
2509 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2510 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2511 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2512 spec.numWorkGroups = IVec3(numElements, 1, 1);
2513 spec.failResult = params.failResult;
2514 spec.failMessage = params.failMessage;
2515
2516 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2517 }
2518
2519 return group.release();
2520 }
2521
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2522 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2523 {
2524 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2525 de::Random rnd (deStringHash(group->getName()));
2526 const int numElements = 200;
2527
2528 const struct CaseParams
2529 {
2530 const char* name;
2531 const char* failMessage; // customized status message
2532 qpTestResult failResult; // override status on failure
2533 bool positive;
2534 } cases[] =
2535 {
2536 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2537 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2538 };
2539 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2540
2541 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2542 {
2543 const CaseParams& params = cases[caseNdx];
2544 ComputeShaderSpec spec;
2545 vector<deInt64> inputInts1 (numElements, 0);
2546 vector<deInt64> inputInts2 (numElements, 0);
2547 vector<deInt64> outputInts (numElements, 0);
2548
2549 if (params.positive)
2550 {
2551 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2552 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2553 }
2554 else
2555 {
2556 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2557 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2558 }
2559
2560 for (int ndx = 0; ndx < numElements; ++ndx)
2561 {
2562 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2563 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2564 }
2565
2566 spec.assembly =
2567 "OpCapability Int64\n"
2568
2569 + string(getComputeAsmShaderPreamble()) +
2570
2571 "OpName %main \"main\"\n"
2572 "OpName %id \"gl_GlobalInvocationID\"\n"
2573
2574 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2575
2576 "OpDecorate %buf BufferBlock\n"
2577 "OpDecorate %indata1 DescriptorSet 0\n"
2578 "OpDecorate %indata1 Binding 0\n"
2579 "OpDecorate %indata2 DescriptorSet 0\n"
2580 "OpDecorate %indata2 Binding 1\n"
2581 "OpDecorate %outdata DescriptorSet 0\n"
2582 "OpDecorate %outdata Binding 2\n"
2583 "OpDecorate %i64arr ArrayStride 8\n"
2584 "OpMemberDecorate %buf 0 Offset 0\n"
2585
2586 + string(getComputeAsmCommonTypes())
2587 + string(getComputeAsmCommonInt64Types()) +
2588
2589 "%buf = OpTypeStruct %i64arr\n"
2590 "%bufptr = OpTypePointer Uniform %buf\n"
2591 "%indata1 = OpVariable %bufptr Uniform\n"
2592 "%indata2 = OpVariable %bufptr Uniform\n"
2593 "%outdata = OpVariable %bufptr Uniform\n"
2594
2595 "%id = OpVariable %uvec3ptr Input\n"
2596 "%zero = OpConstant %i64 0\n"
2597
2598 "%main = OpFunction %void None %voidf\n"
2599 "%label = OpLabel\n"
2600 "%idval = OpLoad %uvec3 %id\n"
2601 "%x = OpCompositeExtract %u32 %idval 0\n"
2602 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2603 "%inval1 = OpLoad %i64 %inloc1\n"
2604 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2605 "%inval2 = OpLoad %i64 %inloc2\n"
2606 "%rem = OpSRem %i64 %inval1 %inval2\n"
2607 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2608 " OpStore %outloc %rem\n"
2609 " OpReturn\n"
2610 " OpFunctionEnd\n";
2611
2612 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2613 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2614 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2615 spec.numWorkGroups = IVec3(numElements, 1, 1);
2616 spec.failResult = params.failResult;
2617 spec.failMessage = params.failMessage;
2618
2619 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2620
2621 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2622 }
2623
2624 return group.release();
2625 }
2626
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2627 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2628 {
2629 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2630 de::Random rnd (deStringHash(group->getName()));
2631 const int numElements = 200;
2632
2633 const struct CaseParams
2634 {
2635 const char* name;
2636 const char* failMessage; // customized status message
2637 qpTestResult failResult; // override status on failure
2638 int op1Min, op1Max; // operand ranges
2639 int op2Min, op2Max;
2640 } cases[] =
2641 {
2642 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2643 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2644 };
2645 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2646
2647 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2648 {
2649 const CaseParams& params = cases[caseNdx];
2650
2651 ComputeShaderSpec spec;
2652 vector<deInt32> inputInts1 (numElements, 0);
2653 vector<deInt32> inputInts2 (numElements, 0);
2654 vector<deInt32> outputInts (numElements, 0);
2655
2656 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2657 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2658
2659 for (int ndx = 0; ndx < numElements; ++ndx)
2660 {
2661 deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2662 if (rem == 0)
2663 {
2664 outputInts[ndx] = 0;
2665 }
2666 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2667 {
2668 // They have the same sign
2669 outputInts[ndx] = rem;
2670 }
2671 else
2672 {
2673 // They have opposite sign. The remainder operation takes the
2674 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2675 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2676 // the result has the correct sign and that it is still
2677 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2678 //
2679 // See also http://mathforum.org/library/drmath/view/52343.html
2680 outputInts[ndx] = rem + inputInts2[ndx];
2681 }
2682 }
2683
2684 spec.assembly =
2685 string(getComputeAsmShaderPreamble()) +
2686
2687 "OpName %main \"main\"\n"
2688 "OpName %id \"gl_GlobalInvocationID\"\n"
2689
2690 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2691
2692 "OpDecorate %buf BufferBlock\n"
2693 "OpDecorate %indata1 DescriptorSet 0\n"
2694 "OpDecorate %indata1 Binding 0\n"
2695 "OpDecorate %indata2 DescriptorSet 0\n"
2696 "OpDecorate %indata2 Binding 1\n"
2697 "OpDecorate %outdata DescriptorSet 0\n"
2698 "OpDecorate %outdata Binding 2\n"
2699 "OpDecorate %i32arr ArrayStride 4\n"
2700 "OpMemberDecorate %buf 0 Offset 0\n"
2701
2702 + string(getComputeAsmCommonTypes()) +
2703
2704 "%buf = OpTypeStruct %i32arr\n"
2705 "%bufptr = OpTypePointer Uniform %buf\n"
2706 "%indata1 = OpVariable %bufptr Uniform\n"
2707 "%indata2 = OpVariable %bufptr Uniform\n"
2708 "%outdata = OpVariable %bufptr Uniform\n"
2709
2710 "%id = OpVariable %uvec3ptr Input\n"
2711 "%zero = OpConstant %i32 0\n"
2712
2713 "%main = OpFunction %void None %voidf\n"
2714 "%label = OpLabel\n"
2715 "%idval = OpLoad %uvec3 %id\n"
2716 "%x = OpCompositeExtract %u32 %idval 0\n"
2717 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2718 "%inval1 = OpLoad %i32 %inloc1\n"
2719 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2720 "%inval2 = OpLoad %i32 %inloc2\n"
2721 "%rem = OpSMod %i32 %inval1 %inval2\n"
2722 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2723 " OpStore %outloc %rem\n"
2724 " OpReturn\n"
2725 " OpFunctionEnd\n";
2726
2727 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2728 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2729 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2730 spec.numWorkGroups = IVec3(numElements, 1, 1);
2731 spec.failResult = params.failResult;
2732 spec.failMessage = params.failMessage;
2733
2734 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2735 }
2736
2737 return group.release();
2738 }
2739
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2740 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2741 {
2742 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2743 de::Random rnd (deStringHash(group->getName()));
2744 const int numElements = 200;
2745
2746 const struct CaseParams
2747 {
2748 const char* name;
2749 const char* failMessage; // customized status message
2750 qpTestResult failResult; // override status on failure
2751 bool positive;
2752 } cases[] =
2753 {
2754 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2755 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2756 };
2757 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2758
2759 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2760 {
2761 const CaseParams& params = cases[caseNdx];
2762
2763 ComputeShaderSpec spec;
2764 vector<deInt64> inputInts1 (numElements, 0);
2765 vector<deInt64> inputInts2 (numElements, 0);
2766 vector<deInt64> outputInts (numElements, 0);
2767
2768
2769 if (params.positive)
2770 {
2771 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2772 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2773 }
2774 else
2775 {
2776 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2777 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2778 }
2779
2780 for (int ndx = 0; ndx < numElements; ++ndx)
2781 {
2782 deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2783 if (rem == 0)
2784 {
2785 outputInts[ndx] = 0;
2786 }
2787 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2788 {
2789 // They have the same sign
2790 outputInts[ndx] = rem;
2791 }
2792 else
2793 {
2794 // They have opposite sign. The remainder operation takes the
2795 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2796 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2797 // the result has the correct sign and that it is still
2798 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2799 //
2800 // See also http://mathforum.org/library/drmath/view/52343.html
2801 outputInts[ndx] = rem + inputInts2[ndx];
2802 }
2803 }
2804
2805 spec.assembly =
2806 "OpCapability Int64\n"
2807
2808 + string(getComputeAsmShaderPreamble()) +
2809
2810 "OpName %main \"main\"\n"
2811 "OpName %id \"gl_GlobalInvocationID\"\n"
2812
2813 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2814
2815 "OpDecorate %buf BufferBlock\n"
2816 "OpDecorate %indata1 DescriptorSet 0\n"
2817 "OpDecorate %indata1 Binding 0\n"
2818 "OpDecorate %indata2 DescriptorSet 0\n"
2819 "OpDecorate %indata2 Binding 1\n"
2820 "OpDecorate %outdata DescriptorSet 0\n"
2821 "OpDecorate %outdata Binding 2\n"
2822 "OpDecorate %i64arr ArrayStride 8\n"
2823 "OpMemberDecorate %buf 0 Offset 0\n"
2824
2825 + string(getComputeAsmCommonTypes())
2826 + string(getComputeAsmCommonInt64Types()) +
2827
2828 "%buf = OpTypeStruct %i64arr\n"
2829 "%bufptr = OpTypePointer Uniform %buf\n"
2830 "%indata1 = OpVariable %bufptr Uniform\n"
2831 "%indata2 = OpVariable %bufptr Uniform\n"
2832 "%outdata = OpVariable %bufptr Uniform\n"
2833
2834 "%id = OpVariable %uvec3ptr Input\n"
2835 "%zero = OpConstant %i64 0\n"
2836
2837 "%main = OpFunction %void None %voidf\n"
2838 "%label = OpLabel\n"
2839 "%idval = OpLoad %uvec3 %id\n"
2840 "%x = OpCompositeExtract %u32 %idval 0\n"
2841 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2842 "%inval1 = OpLoad %i64 %inloc1\n"
2843 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2844 "%inval2 = OpLoad %i64 %inloc2\n"
2845 "%rem = OpSMod %i64 %inval1 %inval2\n"
2846 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2847 " OpStore %outloc %rem\n"
2848 " OpReturn\n"
2849 " OpFunctionEnd\n";
2850
2851 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2852 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2853 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2854 spec.numWorkGroups = IVec3(numElements, 1, 1);
2855 spec.failResult = params.failResult;
2856 spec.failMessage = params.failMessage;
2857
2858 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2859
2860 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, spec));
2861 }
2862
2863 return group.release();
2864 }
2865
2866 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2867 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2868 {
2869 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2870 de::Random rnd (deStringHash(group->getName()));
2871 const int numElements = 100;
2872
2873 // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2874 ComputeShaderSpec spec1;
2875 vector<Vec4> inputFloats1 (numElements);
2876 vector<Vec4> outputFloats1 (numElements);
2877
2878 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2879
2880 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2881 floorAll(inputFloats1);
2882
2883 for (size_t ndx = 0; ndx < numElements; ++ndx)
2884 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2885
2886 spec1.assembly =
2887 string(getComputeAsmShaderPreamble()) +
2888
2889 "OpName %main \"main\"\n"
2890 "OpName %id \"gl_GlobalInvocationID\"\n"
2891
2892 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2893 "OpDecorate %vec4arr ArrayStride 16\n"
2894
2895 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2896
2897 "%vec4 = OpTypeVector %f32 4\n"
2898 "%vec4ptr_u = OpTypePointer Uniform %vec4\n"
2899 "%vec4ptr_f = OpTypePointer Function %vec4\n"
2900 "%vec4arr = OpTypeRuntimeArray %vec4\n"
2901 "%buf = OpTypeStruct %vec4arr\n"
2902 "%bufptr = OpTypePointer Uniform %buf\n"
2903 "%indata = OpVariable %bufptr Uniform\n"
2904 "%outdata = OpVariable %bufptr Uniform\n"
2905
2906 "%id = OpVariable %uvec3ptr Input\n"
2907 "%zero = OpConstant %i32 0\n"
2908 "%c_f_0 = OpConstant %f32 0.\n"
2909 "%c_f_0_5 = OpConstant %f32 0.5\n"
2910 "%c_f_1_5 = OpConstant %f32 1.5\n"
2911 "%c_f_2_5 = OpConstant %f32 2.5\n"
2912 "%c_vec4 = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2913
2914 "%main = OpFunction %void None %voidf\n"
2915 "%label = OpLabel\n"
2916 "%v_vec4 = OpVariable %vec4ptr_f Function\n"
2917 "%idval = OpLoad %uvec3 %id\n"
2918 "%x = OpCompositeExtract %u32 %idval 0\n"
2919 "%inloc = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2920 "%outloc = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2921 " OpCopyMemory %v_vec4 %inloc\n"
2922 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2923 "%add = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2924 " OpStore %outloc %add\n"
2925 " OpReturn\n"
2926 " OpFunctionEnd\n";
2927
2928 spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2929 spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2930 spec1.numWorkGroups = IVec3(numElements, 1, 1);
2931
2932 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", spec1));
2933
2934 // The following case copies a float[100] variable from the input buffer to the output buffer.
2935 ComputeShaderSpec spec2;
2936 vector<float> inputFloats2 (numElements);
2937 vector<float> outputFloats2 (numElements);
2938
2939 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2940
2941 for (size_t ndx = 0; ndx < numElements; ++ndx)
2942 outputFloats2[ndx] = inputFloats2[ndx];
2943
2944 spec2.assembly =
2945 string(getComputeAsmShaderPreamble()) +
2946
2947 "OpName %main \"main\"\n"
2948 "OpName %id \"gl_GlobalInvocationID\"\n"
2949
2950 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2951 "OpDecorate %f32arr100 ArrayStride 4\n"
2952
2953 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2954
2955 "%hundred = OpConstant %u32 100\n"
2956 "%f32arr100 = OpTypeArray %f32 %hundred\n"
2957 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2958 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2959 "%buf = OpTypeStruct %f32arr100\n"
2960 "%bufptr = OpTypePointer Uniform %buf\n"
2961 "%indata = OpVariable %bufptr Uniform\n"
2962 "%outdata = OpVariable %bufptr Uniform\n"
2963
2964 "%id = OpVariable %uvec3ptr Input\n"
2965 "%zero = OpConstant %i32 0\n"
2966
2967 "%main = OpFunction %void None %voidf\n"
2968 "%label = OpLabel\n"
2969 "%var = OpVariable %f32arr100ptr_f Function\n"
2970 "%inarr = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2971 "%outarr = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2972 " OpCopyMemory %var %inarr\n"
2973 " OpCopyMemory %outarr %var\n"
2974 " OpReturn\n"
2975 " OpFunctionEnd\n";
2976
2977 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2978 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2979 spec2.numWorkGroups = IVec3(1, 1, 1);
2980
2981 group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", spec2));
2982
2983 // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2984 ComputeShaderSpec spec3;
2985 vector<float> inputFloats3 (16);
2986 vector<float> outputFloats3 (16);
2987
2988 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2989
2990 for (size_t ndx = 0; ndx < 16; ++ndx)
2991 outputFloats3[ndx] = inputFloats3[ndx];
2992
2993 spec3.assembly =
2994 string(getComputeAsmShaderPreamble()) +
2995
2996 "OpName %main \"main\"\n"
2997 "OpName %id \"gl_GlobalInvocationID\"\n"
2998
2999 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3000 //"OpMemberDecorate %buf 0 Offset 0\n" - exists in getComputeAsmInputOutputBufferTraits
3001 "OpMemberDecorate %buf 1 Offset 16\n"
3002 "OpMemberDecorate %buf 2 Offset 32\n"
3003 "OpMemberDecorate %buf 3 Offset 48\n"
3004
3005 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3006
3007 "%vec4 = OpTypeVector %f32 4\n"
3008 "%buf = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3009 "%bufptr = OpTypePointer Uniform %buf\n"
3010 "%indata = OpVariable %bufptr Uniform\n"
3011 "%outdata = OpVariable %bufptr Uniform\n"
3012 "%vec4stptr = OpTypePointer Function %buf\n"
3013
3014 "%id = OpVariable %uvec3ptr Input\n"
3015 "%zero = OpConstant %i32 0\n"
3016
3017 "%main = OpFunction %void None %voidf\n"
3018 "%label = OpLabel\n"
3019 "%var = OpVariable %vec4stptr Function\n"
3020 " OpCopyMemory %var %indata\n"
3021 " OpCopyMemory %outdata %var\n"
3022 " OpReturn\n"
3023 " OpFunctionEnd\n";
3024
3025 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3026 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3027 spec3.numWorkGroups = IVec3(1, 1, 1);
3028
3029 group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", spec3));
3030
3031 // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3032 ComputeShaderSpec spec4;
3033 vector<float> inputFloats4 (numElements);
3034 vector<float> outputFloats4 (numElements);
3035
3036 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3037
3038 for (size_t ndx = 0; ndx < numElements; ++ndx)
3039 outputFloats4[ndx] = -inputFloats4[ndx];
3040
3041 spec4.assembly =
3042 string(getComputeAsmShaderPreamble()) +
3043
3044 "OpName %main \"main\"\n"
3045 "OpName %id \"gl_GlobalInvocationID\"\n"
3046
3047 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3048
3049 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3050
3051 "%f32ptr_f = OpTypePointer Function %f32\n"
3052 "%id = OpVariable %uvec3ptr Input\n"
3053 "%zero = OpConstant %i32 0\n"
3054
3055 "%main = OpFunction %void None %voidf\n"
3056 "%label = OpLabel\n"
3057 "%var = OpVariable %f32ptr_f Function\n"
3058 "%idval = OpLoad %uvec3 %id\n"
3059 "%x = OpCompositeExtract %u32 %idval 0\n"
3060 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3061 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3062 " OpCopyMemory %var %inloc\n"
3063 "%val = OpLoad %f32 %var\n"
3064 "%neg = OpFNegate %f32 %val\n"
3065 " OpStore %outloc %neg\n"
3066 " OpReturn\n"
3067 " OpFunctionEnd\n";
3068
3069 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3070 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3071 spec4.numWorkGroups = IVec3(numElements, 1, 1);
3072
3073 group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", spec4));
3074
3075 return group.release();
3076 }
3077
createOpCopyObjectGroup(tcu::TestContext & testCtx)3078 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3079 {
3080 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3081 ComputeShaderSpec spec;
3082 de::Random rnd (deStringHash(group->getName()));
3083 const int numElements = 100;
3084 vector<float> inputFloats (numElements, 0);
3085 vector<float> outputFloats (numElements, 0);
3086
3087 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3088
3089 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3090 floorAll(inputFloats);
3091
3092 for (size_t ndx = 0; ndx < numElements; ++ndx)
3093 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3094
3095 spec.assembly =
3096 string(getComputeAsmShaderPreamble()) +
3097
3098 "OpName %main \"main\"\n"
3099 "OpName %id \"gl_GlobalInvocationID\"\n"
3100
3101 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3102
3103 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3104
3105 "%fmat = OpTypeMatrix %fvec3 3\n"
3106 "%three = OpConstant %u32 3\n"
3107 "%farr = OpTypeArray %f32 %three\n"
3108 "%fst = OpTypeStruct %f32 %f32\n"
3109
3110 + string(getComputeAsmInputOutputBuffer()) +
3111
3112 "%id = OpVariable %uvec3ptr Input\n"
3113 "%zero = OpConstant %i32 0\n"
3114 "%c_f = OpConstant %f32 1.5\n"
3115 "%c_fvec3 = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3116 "%c_fmat = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3117 "%c_farr = OpConstantComposite %farr %c_f %c_f %c_f\n"
3118 "%c_fst = OpConstantComposite %fst %c_f %c_f\n"
3119
3120 "%main = OpFunction %void None %voidf\n"
3121 "%label = OpLabel\n"
3122 "%c_f_copy = OpCopyObject %f32 %c_f\n"
3123 "%c_fvec3_copy = OpCopyObject %fvec3 %c_fvec3\n"
3124 "%c_fmat_copy = OpCopyObject %fmat %c_fmat\n"
3125 "%c_farr_copy = OpCopyObject %farr %c_farr\n"
3126 "%c_fst_copy = OpCopyObject %fst %c_fst\n"
3127 "%fvec3_elem = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3128 "%fmat_elem = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3129 "%farr_elem = OpCompositeExtract %f32 %c_farr_copy 2\n"
3130 "%fst_elem = OpCompositeExtract %f32 %c_fst_copy 1\n"
3131 // Add up. 1.5 * 5 = 7.5.
3132 "%add1 = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3133 "%add2 = OpFAdd %f32 %add1 %fmat_elem\n"
3134 "%add3 = OpFAdd %f32 %add2 %farr_elem\n"
3135 "%add4 = OpFAdd %f32 %add3 %fst_elem\n"
3136
3137 "%idval = OpLoad %uvec3 %id\n"
3138 "%x = OpCompositeExtract %u32 %idval 0\n"
3139 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3140 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3141 "%inval = OpLoad %f32 %inloc\n"
3142 "%add = OpFAdd %f32 %add4 %inval\n"
3143 " OpStore %outloc %add\n"
3144 " OpReturn\n"
3145 " OpFunctionEnd\n";
3146 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3147 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3148 spec.numWorkGroups = IVec3(numElements, 1, 1);
3149
3150 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
3151
3152 return group.release();
3153 }
3154 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3155 //
3156 // #version 430
3157 //
3158 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3159 // float elements[];
3160 // } input_data;
3161 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3162 // float elements[];
3163 // } output_data;
3164 //
3165 // void not_called_func() {
3166 // // place OpUnreachable here
3167 // }
3168 //
3169 // uint modulo4(uint val) {
3170 // switch (val % uint(4)) {
3171 // case 0: return 3;
3172 // case 1: return 2;
3173 // case 2: return 1;
3174 // case 3: return 0;
3175 // default: return 100; // place OpUnreachable here
3176 // }
3177 // }
3178 //
3179 // uint const5() {
3180 // return 5;
3181 // // place OpUnreachable here
3182 // }
3183 //
3184 // void main() {
3185 // uint x = gl_GlobalInvocationID.x;
3186 // if (const5() > modulo4(1000)) {
3187 // output_data.elements[x] = -input_data.elements[x];
3188 // } else {
3189 // // place OpUnreachable here
3190 // output_data.elements[x] = input_data.elements[x];
3191 // }
3192 // }
3193
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3194 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3195 {
3196 #ifndef CTS_USES_VULKANSC
3197 static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3198
3199 struct Case
3200 {
3201 string name;
3202 string desc;
3203 };
3204
3205 static const Case cases[] =
3206 {
3207 { "unreachable-switch-merge-in-loop", "Test containing an unreachable switch merge block inside an infinite loop" },
3208 };
3209
3210 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3211 {
3212 const string fileName = cases[i].name + ".amber";
3213 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3214 }
3215 #else
3216 DE_UNREF(group);
3217 DE_UNREF(testCtx);
3218 #endif
3219 }
3220
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3221 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3222 {
3223 #ifndef CTS_USES_VULKANSC
3224 static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3225
3226 struct Case
3227 {
3228 string name;
3229 string desc;
3230 };
3231
3232 static const Case cases[] =
3233 {
3234 { "switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block" },
3235 };
3236
3237 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3238 {
3239 const string fileName = cases[i].name + ".amber";
3240 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3241 }
3242 #else
3243 DE_UNREF(group);
3244 DE_UNREF(testCtx);
3245 #endif
3246 }
3247
3248 #ifndef CTS_USES_VULKANSC
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3249 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3250 {
3251 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3252 static const char dataDir[] = "spirv_assembly/instruction/compute/arraylength";
3253
3254 struct Case
3255 {
3256 string name;
3257 string desc;
3258 };
3259
3260 static const Case cases[] =
3261 {
3262 { "array-stride-larger-than-element-size", "Test using an unsized array with stride larger than the element size" }
3263 };
3264
3265 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3266 {
3267 const string fileName = cases[i].name + ".amber";
3268 group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3269 }
3270
3271 return group.release();
3272 }
3273 #endif
3274
createOpUnreachableGroup(tcu::TestContext & testCtx)3275 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3276 {
3277 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3278 ComputeShaderSpec spec;
3279 de::Random rnd (deStringHash(group->getName()));
3280 const int numElements = 100;
3281 vector<float> positiveFloats (numElements, 0);
3282 vector<float> negativeFloats (numElements, 0);
3283
3284 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3285
3286 for (size_t ndx = 0; ndx < numElements; ++ndx)
3287 negativeFloats[ndx] = -positiveFloats[ndx];
3288
3289 spec.assembly =
3290 string(getComputeAsmShaderPreamble()) +
3291
3292 "OpSource GLSL 430\n"
3293 "OpName %main \"main\"\n"
3294 "OpName %func_not_called_func \"not_called_func(\"\n"
3295 "OpName %func_modulo4 \"modulo4(u1;\"\n"
3296 "OpName %func_const5 \"const5(\"\n"
3297 "OpName %id \"gl_GlobalInvocationID\"\n"
3298
3299 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3300
3301 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3302
3303 "%u32ptr = OpTypePointer Function %u32\n"
3304 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3305 "%unitf = OpTypeFunction %u32\n"
3306
3307 "%id = OpVariable %uvec3ptr Input\n"
3308 "%zero = OpConstant %u32 0\n"
3309 "%one = OpConstant %u32 1\n"
3310 "%two = OpConstant %u32 2\n"
3311 "%three = OpConstant %u32 3\n"
3312 "%four = OpConstant %u32 4\n"
3313 "%five = OpConstant %u32 5\n"
3314 "%hundred = OpConstant %u32 100\n"
3315 "%thousand = OpConstant %u32 1000\n"
3316
3317 + string(getComputeAsmInputOutputBuffer()) +
3318
3319 // Main()
3320 "%main = OpFunction %void None %voidf\n"
3321 "%main_entry = OpLabel\n"
3322 "%v_thousand = OpVariable %u32ptr Function %thousand\n"
3323 "%idval = OpLoad %uvec3 %id\n"
3324 "%x = OpCompositeExtract %u32 %idval 0\n"
3325 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3326 "%inval = OpLoad %f32 %inloc\n"
3327 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3328 "%ret_const5 = OpFunctionCall %u32 %func_const5\n"
3329 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3330 "%cmp_gt = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3331 " OpSelectionMerge %if_end None\n"
3332 " OpBranchConditional %cmp_gt %if_true %if_false\n"
3333 "%if_true = OpLabel\n"
3334 "%negate = OpFNegate %f32 %inval\n"
3335 " OpStore %outloc %negate\n"
3336 " OpBranch %if_end\n"
3337 "%if_false = OpLabel\n"
3338 " OpUnreachable\n" // Unreachable else branch for if statement
3339 "%if_end = OpLabel\n"
3340 " OpReturn\n"
3341 " OpFunctionEnd\n"
3342
3343 // not_called_function()
3344 "%func_not_called_func = OpFunction %void None %voidf\n"
3345 "%not_called_func_entry = OpLabel\n"
3346 " OpUnreachable\n" // Unreachable entry block in not called static function
3347 " OpFunctionEnd\n"
3348
3349 // modulo4()
3350 "%func_modulo4 = OpFunction %u32 None %uintfuint\n"
3351 "%valptr = OpFunctionParameter %u32ptr\n"
3352 "%modulo4_entry = OpLabel\n"
3353 "%val = OpLoad %u32 %valptr\n"
3354 "%modulo = OpUMod %u32 %val %four\n"
3355 " OpSelectionMerge %switch_merge None\n"
3356 " OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3357 "%case0 = OpLabel\n"
3358 " OpReturnValue %three\n"
3359 "%case1 = OpLabel\n"
3360 " OpReturnValue %two\n"
3361 "%case2 = OpLabel\n"
3362 " OpReturnValue %one\n"
3363 "%case3 = OpLabel\n"
3364 " OpReturnValue %zero\n"
3365 "%default = OpLabel\n"
3366 " OpUnreachable\n" // Unreachable default case for switch statement
3367 "%switch_merge = OpLabel\n"
3368 " OpUnreachable\n" // Unreachable merge block for switch statement
3369 " OpFunctionEnd\n"
3370
3371 // const5()
3372 "%func_const5 = OpFunction %u32 None %unitf\n"
3373 "%const5_entry = OpLabel\n"
3374 " OpReturnValue %five\n"
3375 "%unreachable = OpLabel\n"
3376 " OpUnreachable\n" // Unreachable block in function
3377 " OpFunctionEnd\n";
3378 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3379 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3380 spec.numWorkGroups = IVec3(numElements, 1, 1);
3381
3382 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3383
3384 addOpUnreachableAmberTests(*group, testCtx);
3385
3386 return group.release();
3387 }
3388
3389 // Assembly code used for testing decoration group is based on GLSL source code:
3390 //
3391 // #version 430
3392 //
3393 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3394 // float elements[];
3395 // } input_data0;
3396 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3397 // float elements[];
3398 // } input_data1;
3399 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3400 // float elements[];
3401 // } input_data2;
3402 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3403 // float elements[];
3404 // } input_data3;
3405 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3406 // float elements[];
3407 // } input_data4;
3408 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3409 // float elements[];
3410 // } output_data;
3411 //
3412 // void main() {
3413 // uint x = gl_GlobalInvocationID.x;
3414 // output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3415 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3416 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3417 {
3418 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3419 ComputeShaderSpec spec;
3420 de::Random rnd (deStringHash(group->getName()));
3421 const int numElements = 100;
3422 vector<float> inputFloats0 (numElements, 0);
3423 vector<float> inputFloats1 (numElements, 0);
3424 vector<float> inputFloats2 (numElements, 0);
3425 vector<float> inputFloats3 (numElements, 0);
3426 vector<float> inputFloats4 (numElements, 0);
3427 vector<float> outputFloats (numElements, 0);
3428
3429 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3430 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3431 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3432 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3433 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3434
3435 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3436 floorAll(inputFloats0);
3437 floorAll(inputFloats1);
3438 floorAll(inputFloats2);
3439 floorAll(inputFloats3);
3440 floorAll(inputFloats4);
3441
3442 for (size_t ndx = 0; ndx < numElements; ++ndx)
3443 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3444
3445 spec.assembly =
3446 string(getComputeAsmShaderPreamble()) +
3447
3448 "OpSource GLSL 430\n"
3449 "OpName %main \"main\"\n"
3450 "OpName %id \"gl_GlobalInvocationID\"\n"
3451
3452 // Not using group decoration on variable.
3453 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3454 // Not using group decoration on type.
3455 "OpDecorate %f32arr ArrayStride 4\n"
3456
3457 "OpDecorate %groups BufferBlock\n"
3458 "OpDecorate %groupm Offset 0\n"
3459 "%groups = OpDecorationGroup\n"
3460 "%groupm = OpDecorationGroup\n"
3461
3462 // Group decoration on multiple structs.
3463 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3464 // Group decoration on multiple struct members.
3465 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3466
3467 "OpDecorate %group1 DescriptorSet 0\n"
3468 "OpDecorate %group3 DescriptorSet 0\n"
3469 "OpDecorate %group3 NonWritable\n"
3470 "OpDecorate %group3 Restrict\n"
3471 "%group0 = OpDecorationGroup\n"
3472 "%group1 = OpDecorationGroup\n"
3473 "%group3 = OpDecorationGroup\n"
3474
3475 // Applying the same decoration group multiple times.
3476 "OpGroupDecorate %group1 %outdata\n"
3477 "OpGroupDecorate %group1 %outdata\n"
3478 "OpGroupDecorate %group1 %outdata\n"
3479 "OpDecorate %outdata DescriptorSet 0\n"
3480 "OpDecorate %outdata Binding 5\n"
3481 // Applying decoration group containing nothing.
3482 "OpGroupDecorate %group0 %indata0\n"
3483 "OpDecorate %indata0 DescriptorSet 0\n"
3484 "OpDecorate %indata0 Binding 0\n"
3485 // Applying decoration group containing one decoration.
3486 "OpGroupDecorate %group1 %indata1\n"
3487 "OpDecorate %indata1 Binding 1\n"
3488 // Applying decoration group containing multiple decorations.
3489 "OpGroupDecorate %group3 %indata2 %indata3\n"
3490 "OpDecorate %indata2 Binding 2\n"
3491 "OpDecorate %indata3 Binding 3\n"
3492 // Applying multiple decoration groups (with overlapping).
3493 "OpGroupDecorate %group0 %indata4\n"
3494 "OpGroupDecorate %group1 %indata4\n"
3495 "OpGroupDecorate %group3 %indata4\n"
3496 "OpDecorate %indata4 Binding 4\n"
3497
3498 + string(getComputeAsmCommonTypes()) +
3499
3500 "%id = OpVariable %uvec3ptr Input\n"
3501 "%zero = OpConstant %i32 0\n"
3502
3503 "%outbuf = OpTypeStruct %f32arr\n"
3504 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3505 "%outdata = OpVariable %outbufptr Uniform\n"
3506 "%inbuf0 = OpTypeStruct %f32arr\n"
3507 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3508 "%indata0 = OpVariable %inbuf0ptr Uniform\n"
3509 "%inbuf1 = OpTypeStruct %f32arr\n"
3510 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3511 "%indata1 = OpVariable %inbuf1ptr Uniform\n"
3512 "%inbuf2 = OpTypeStruct %f32arr\n"
3513 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3514 "%indata2 = OpVariable %inbuf2ptr Uniform\n"
3515 "%inbuf3 = OpTypeStruct %f32arr\n"
3516 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3517 "%indata3 = OpVariable %inbuf3ptr Uniform\n"
3518 "%inbuf4 = OpTypeStruct %f32arr\n"
3519 "%inbufptr = OpTypePointer Uniform %inbuf4\n"
3520 "%indata4 = OpVariable %inbufptr Uniform\n"
3521
3522 "%main = OpFunction %void None %voidf\n"
3523 "%label = OpLabel\n"
3524 "%idval = OpLoad %uvec3 %id\n"
3525 "%x = OpCompositeExtract %u32 %idval 0\n"
3526 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3527 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3528 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3529 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3530 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3531 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3532 "%inval0 = OpLoad %f32 %inloc0\n"
3533 "%inval1 = OpLoad %f32 %inloc1\n"
3534 "%inval2 = OpLoad %f32 %inloc2\n"
3535 "%inval3 = OpLoad %f32 %inloc3\n"
3536 "%inval4 = OpLoad %f32 %inloc4\n"
3537 "%add0 = OpFAdd %f32 %inval0 %inval1\n"
3538 "%add1 = OpFAdd %f32 %add0 %inval2\n"
3539 "%add2 = OpFAdd %f32 %add1 %inval3\n"
3540 "%add = OpFAdd %f32 %add2 %inval4\n"
3541 " OpStore %outloc %add\n"
3542 " OpReturn\n"
3543 " OpFunctionEnd\n";
3544 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3545 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3546 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3547 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3548 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3549 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3550 spec.numWorkGroups = IVec3(numElements, 1, 1);
3551
3552 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
3553
3554 return group.release();
3555 }
3556
3557 enum SpecConstantType
3558 {
3559 SC_INT8,
3560 SC_UINT8,
3561 SC_INT16,
3562 SC_UINT16,
3563 SC_INT32,
3564 SC_UINT32,
3565 SC_INT64,
3566 SC_UINT64,
3567 SC_FLOAT16,
3568 SC_FLOAT32,
3569 SC_FLOAT64,
3570 };
3571
3572 struct SpecConstantValue
3573 {
3574 SpecConstantType type;
3575 union ValueUnion {
3576 deInt8 i8;
3577 deUint8 u8;
3578 deInt16 i16;
3579 deUint16 u16;
3580 deInt32 i32;
3581 deUint32 u32;
3582 deInt64 i64;
3583 deUint64 u64;
3584 tcu::Float16 f16;
3585 tcu::Float32 f32;
3586 tcu::Float64 f64;
3587
ValueUnion(deInt8 v)3588 ValueUnion (deInt8 v) : i8(v) {}
ValueUnion(deUint8 v)3589 ValueUnion (deUint8 v) : u8(v) {}
ValueUnion(deInt16 v)3590 ValueUnion (deInt16 v) : i16(v) {}
ValueUnion(deUint16 v)3591 ValueUnion (deUint16 v) : u16(v) {}
ValueUnion(deInt32 v)3592 ValueUnion (deInt32 v) : i32(v) {}
ValueUnion(deUint32 v)3593 ValueUnion (deUint32 v) : u32(v) {}
ValueUnion(deInt64 v)3594 ValueUnion (deInt64 v) : i64(v) {}
ValueUnion(deUint64 v)3595 ValueUnion (deUint64 v) : u64(v) {}
ValueUnion(tcu::Float16 v)3596 ValueUnion (tcu::Float16 v) : f16(v) {}
ValueUnion(tcu::Float32 v)3597 ValueUnion (tcu::Float32 v) : f32(v) {}
ValueUnion(tcu::Float64 v)3598 ValueUnion (tcu::Float64 v) : f64(v) {}
3599 } value;
3600
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3601 SpecConstantValue (deInt8 v) : type(SC_INT8) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3602 SpecConstantValue (deUint8 v) : type(SC_UINT8) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3603 SpecConstantValue (deInt16 v) : type(SC_INT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3604 SpecConstantValue (deUint16 v) : type(SC_UINT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3605 SpecConstantValue (deInt32 v) : type(SC_INT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3606 SpecConstantValue (deUint32 v) : type(SC_UINT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3607 SpecConstantValue (deInt64 v) : type(SC_INT64) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3608 SpecConstantValue (deUint64 v) : type(SC_UINT64) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3609 SpecConstantValue (tcu::Float16 v) : type(SC_FLOAT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3610 SpecConstantValue (tcu::Float32 v) : type(SC_FLOAT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3611 SpecConstantValue (tcu::Float64 v) : type(SC_FLOAT64) , value(v) {}
3612
appendTovkt::SpirVAssembly::__anon437b2d460111::SpecConstantValue3613 void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3614 {
3615 switch (type)
3616 {
3617 case SC_INT8: specConstants.append(value.i8); break;
3618 case SC_UINT8: specConstants.append(value.u8); break;
3619 case SC_INT16: specConstants.append(value.i16); break;
3620 case SC_UINT16: specConstants.append(value.u16); break;
3621 case SC_INT32: specConstants.append(value.i32); break;
3622 case SC_UINT32: specConstants.append(value.u32); break;
3623 case SC_INT64: specConstants.append(value.i64); break;
3624 case SC_UINT64: specConstants.append(value.u64); break;
3625 case SC_FLOAT16: specConstants.append(value.f16); break;
3626 case SC_FLOAT32: specConstants.append(value.f32); break;
3627 case SC_FLOAT64: specConstants.append(value.f64); break;
3628 default:
3629 DE_ASSERT(false);
3630 }
3631 }
3632 };
3633
3634 enum CaseFlagBits
3635 {
3636 FLAG_NONE = 0,
3637 FLAG_CONVERT = 1,
3638 FLAG_I8 = (1<<1),
3639 FLAG_I16 = (1<<2),
3640 FLAG_I64 = (1<<3),
3641 FLAG_F16 = (1<<4),
3642 FLAG_F64 = (1<<5),
3643 };
3644 using CaseFlags = deUint32;
3645
3646 struct SpecConstantTwoValCase
3647 {
3648 const std::string caseName;
3649 const std::string scDefinition0;
3650 const std::string scDefinition1;
3651 const std::string scResultType;
3652 const std::string scOperation;
3653 SpecConstantValue scActualValue0;
3654 SpecConstantValue scActualValue1;
3655 const std::string resultOperation;
3656 vector<deInt32> expectedOutput;
3657 CaseFlags caseFlags;
3658
SpecConstantTwoValCasevkt::SpirVAssembly::__anon437b2d460111::SpecConstantTwoValCase3659 SpecConstantTwoValCase (const std::string& name,
3660 const std::string& definition0,
3661 const std::string& definition1,
3662 const std::string& resultType,
3663 const std::string& operation,
3664 SpecConstantValue value0,
3665 SpecConstantValue value1,
3666 const std::string& resultOp,
3667 const vector<deInt32>& output,
3668 CaseFlags flags = FLAG_NONE)
3669 : caseName (name)
3670 , scDefinition0 (definition0)
3671 , scDefinition1 (definition1)
3672 , scResultType (resultType)
3673 , scOperation (operation)
3674 , scActualValue0 (value0)
3675 , scActualValue1 (value1)
3676 , resultOperation (resultOp)
3677 , expectedOutput (output)
3678 , caseFlags (flags)
3679 {}
3680 };
3681
getSpecConstantOpStructConstantsAndTypes()3682 std::string getSpecConstantOpStructConstantsAndTypes ()
3683 {
3684 return
3685 "%zero = OpConstant %i32 0\n"
3686 "%one = OpConstant %i32 1\n"
3687 "%two = OpConstant %i32 2\n"
3688 "%three = OpConstant %i32 3\n"
3689 "%iarr3 = OpTypeArray %i32 %three\n"
3690 "%imat3 = OpTypeArray %iarr3 %three\n"
3691 "%struct = OpTypeStruct %imat3\n"
3692 ;
3693 }
3694
getSpecConstantOpStructComposites()3695 std::string getSpecConstantOpStructComposites ()
3696 {
3697 return
3698 "%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero\n"
3699 "%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3700 "%struct_0 = OpConstantComposite %struct %imat3_0\n"
3701 ;
3702 }
3703
getSpecConstantOpStructConstBlock()3704 std::string getSpecConstantOpStructConstBlock ()
3705 {
3706 return
3707 "%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0\n" // Compose (sc_0, sc_1, sc_2)
3708 "%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1\n"
3709 "%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2\n"
3710
3711 "%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0\n" // Compose (sc_1, sc_2, sc_0)
3712 "%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1\n"
3713 "%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2\n"
3714
3715 "%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0\n" // Compose (sc_2, sc_0, sc_1)
3716 "%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1\n"
3717 "%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2\n"
3718
3719 "%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0\n" // Matrix with the 3 previous arrays.
3720 "%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1\n"
3721 "%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2\n"
3722
3723 "%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0\n" // Save it in the struct.
3724
3725 "%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0\n" // Extract some component pairs to compare them.
3726 "%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0\n"
3727
3728 "%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1\n"
3729 "%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2\n"
3730
3731 "%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0\n"
3732 "%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1\n"
3733
3734 "%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0\n" // Must be false.
3735 "%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2\n" // Must be true.
3736 "%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1\n" // Must be true.
3737
3738 "%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero\n" // Must select 0
3739 "%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero\n" // Must select 1
3740 "%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one\n" // Must select 2
3741 ;
3742 }
3743
getSpecConstantOpStructInstructions()3744 std::string getSpecConstantOpStructInstructions ()
3745 {
3746 return
3747 // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3748 // the final result should not be altered.
3749 "%subf_a = OpISub %i32 %one %mustbe_0\n"
3750 "%subf_b = OpIMul %i32 %subf_a %mustbe_1\n"
3751 "%subf_c = OpISub %i32 %mustbe_2 %one\n"
3752 "%factor = OpIMul %i32 %subf_b %subf_c\n"
3753 "%sc_final = OpIMul %i32 %factor %sc_factor\n"
3754 ;
3755 }
3756
createSpecConstantGroup(tcu::TestContext & testCtx)3757 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3758 {
3759 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3760 vector<SpecConstantTwoValCase> cases;
3761 de::Random rnd (deStringHash(group->getName()));
3762 const int numElements = 100;
3763 vector<deInt32> inputInts (numElements, 0);
3764 vector<deInt32> outputInts1 (numElements, 0);
3765 vector<deInt32> outputInts2 (numElements, 0);
3766 vector<deInt32> outputInts3 (numElements, 0);
3767 vector<deInt32> outputInts4 (numElements, 0);
3768 vector<deInt32> outputInts5 (numElements, 0);
3769 const StringTemplate shaderTemplate (
3770 "${CAPABILITIES:opt}"
3771 + string(getComputeAsmShaderPreamble()) +
3772
3773 "OpName %main \"main\"\n"
3774 "OpName %id \"gl_GlobalInvocationID\"\n"
3775
3776 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3777 "OpDecorate %sc_0 SpecId 0\n"
3778 "OpDecorate %sc_1 SpecId 1\n"
3779 "OpDecorate %i32arr ArrayStride 4\n"
3780
3781 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3782
3783 "${OPTYPE_DEFINITIONS:opt}"
3784 "%buf = OpTypeStruct %i32arr\n"
3785 "%bufptr = OpTypePointer Uniform %buf\n"
3786 "%indata = OpVariable %bufptr Uniform\n"
3787 "%outdata = OpVariable %bufptr Uniform\n"
3788
3789 "%id = OpVariable %uvec3ptr Input\n"
3790 "%zero = OpConstant %i32 0\n"
3791
3792 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
3793 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
3794 "%sc_final = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3795
3796 "%main = OpFunction %void None %voidf\n"
3797 "%label = OpLabel\n"
3798 "${TYPE_CONVERT:opt}"
3799 "%idval = OpLoad %uvec3 %id\n"
3800 "%x = OpCompositeExtract %u32 %idval 0\n"
3801 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
3802 "%inval = OpLoad %i32 %inloc\n"
3803 "%final = ${GEN_RESULT}\n"
3804 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
3805 " OpStore %outloc %final\n"
3806 " OpReturn\n"
3807 " OpFunctionEnd\n");
3808
3809 fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3810
3811 for (size_t ndx = 0; ndx < numElements; ++ndx)
3812 {
3813 outputInts1[ndx] = inputInts[ndx] + 42;
3814 outputInts2[ndx] = inputInts[ndx];
3815 outputInts3[ndx] = inputInts[ndx] - 11200;
3816 outputInts4[ndx] = inputInts[ndx] + 1;
3817 outputInts5[ndx] = inputInts[ndx] - 42;
3818 }
3819
3820 const char addScToInput[] = "OpIAdd %i32 %inval %sc_final";
3821 const char addSc32ToInput[] = "OpIAdd %i32 %inval %sc_final32";
3822 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_final %inval %zero";
3823 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3824
3825 cases.push_back(SpecConstantTwoValCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 62, -20, addScToInput, outputInts1));
3826 cases.push_back(SpecConstantTwoValCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 100, 58, addScToInput, outputInts1));
3827 cases.push_back(SpecConstantTwoValCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -2, -21, addScToInput, outputInts1));
3828 cases.push_back(SpecConstantTwoValCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, -3, addScToInput, outputInts1));
3829 cases.push_back(SpecConstantTwoValCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 3, addScToInput, outputInts1));
3830 cases.push_back(SpecConstantTwoValCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3831 cases.push_back(SpecConstantTwoValCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3832 cases.push_back(SpecConstantTwoValCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 342, 50, addScToInput, outputInts1));
3833 cases.push_back(SpecConstantTwoValCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 42, 63, addScToInput, outputInts1));
3834 cases.push_back(SpecConstantTwoValCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 34, 8, addScToInput, outputInts1));
3835 cases.push_back(SpecConstantTwoValCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 18, 56, addScToInput, outputInts1));
3836 cases.push_back(SpecConstantTwoValCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, 2, addScToInput, outputInts1));
3837 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, 2, addScToInput, outputInts5));
3838 cases.push_back(SpecConstantTwoValCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, 1, addScToInput, outputInts1));
3839
3840 // Shifts for other integer sizes.
3841 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{168}, deInt64{2}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3842 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-168}, deInt64{2}, addSc32ToInput, outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3843 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{21}, deInt64{1}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3844 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{168}, deInt16{2}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3845 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-168}, deInt16{2}, addSc32ToInput, outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3846 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{21}, deInt16{1}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3847 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{84}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3848 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-84}, deInt8{1}, addSc32ToInput, outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3849 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{21}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3850
3851 // Shifts for other integer sizes but only in the shift amount.
3852 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt64{2}, addScToInput, outputInts1, (FLAG_I64)));
3853 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt64{2}, addScToInput, outputInts5, (FLAG_I64)));
3854 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt64{1}, addScToInput, outputInts1, (FLAG_I64)));
3855 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt16{2}, addScToInput, outputInts1, (FLAG_I16)));
3856 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt16{2}, addScToInput, outputInts5, (FLAG_I16)));
3857 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt16{1}, addScToInput, outputInts1, (FLAG_I16)));
3858 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 84, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3859 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -84, deInt8{1}, addScToInput, outputInts5, (FLAG_I8)));
3860 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3861
3862 cases.push_back(SpecConstantTwoValCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputInts2));
3863 cases.push_back(SpecConstantTwoValCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputInts2));
3864 cases.push_back(SpecConstantTwoValCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3865 cases.push_back(SpecConstantTwoValCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputInts2));
3866 cases.push_back(SpecConstantTwoValCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputInts2));
3867 cases.push_back(SpecConstantTwoValCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputInts2));
3868 cases.push_back(SpecConstantTwoValCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3869 cases.push_back(SpecConstantTwoValCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputInts2));
3870 cases.push_back(SpecConstantTwoValCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputInts2));
3871 cases.push_back(SpecConstantTwoValCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputInts2));
3872 cases.push_back(SpecConstantTwoValCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3873 cases.push_back(SpecConstantTwoValCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3874 cases.push_back(SpecConstantTwoValCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3875 cases.push_back(SpecConstantTwoValCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3876 cases.push_back(SpecConstantTwoValCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -42, 0, addScToInput, outputInts1));
3877 cases.push_back(SpecConstantTwoValCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -43, 0, addScToInput, outputInts1));
3878 cases.push_back(SpecConstantTwoValCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
3879 cases.push_back(SpecConstantTwoValCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
3880 cases.push_back(SpecConstantTwoValCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -11200, 0, addSc32ToInput, outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3881 cases.push_back(SpecConstantTwoValCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32{-11200.0}, tcu::Float32{0.0}, addSc32ToInput, outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3882 cases.push_back(SpecConstantTwoValCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16{1.0}, tcu::Float16{0.0}, addSc32ToInput, outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3883
3884 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3885 {
3886 map<string, string> specializations;
3887 ComputeShaderSpec spec;
3888
3889 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
3890 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
3891 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
3892 specializations["SC_OP"] = cases[caseNdx].scOperation;
3893 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
3894
3895 // Special SPIR-V code when using 16-bit integers.
3896 if (cases[caseNdx].caseFlags & FLAG_I16)
3897 {
3898 spec.requestedVulkanFeatures.coreFeatures.shaderInt16 = VK_TRUE;
3899 specializations["CAPABILITIES"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
3900 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
3901 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3902 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
3903 }
3904
3905 // Special SPIR-V code when using 64-bit integers.
3906 if (cases[caseNdx].caseFlags & FLAG_I64)
3907 {
3908 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
3909 specializations["CAPABILITIES"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
3910 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
3911 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3912 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 64-bit integer to 32-bit integer
3913 }
3914
3915 // Special SPIR-V code when using 64-bit floats.
3916 if (cases[caseNdx].caseFlags & FLAG_F64)
3917 {
3918 spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3919 specializations["CAPABILITIES"] += "OpCapability Float64\n"; // Adds 64-bit float capability
3920 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
3921 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3922 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
3923 }
3924
3925 // Extension needed for float16 and int8.
3926 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3927 spec.extensions.push_back("VK_KHR_shader_float16_int8");
3928
3929 // Special SPIR-V code when using 16-bit floats.
3930 if (cases[caseNdx].caseFlags & FLAG_F16)
3931 {
3932 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3933 specializations["CAPABILITIES"] += "OpCapability Float16\n"; // Adds 16-bit float capability
3934 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
3935 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3936 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 16-bit float to 32-bit integer
3937 }
3938
3939 // Special SPIR-V code when using 8-bit integers.
3940 if (cases[caseNdx].caseFlags & FLAG_I8)
3941 {
3942 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3943 specializations["CAPABILITIES"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
3944 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
3945 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3946 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
3947 }
3948
3949 spec.assembly = shaderTemplate.specialize(specializations);
3950 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3951 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3952 spec.numWorkGroups = IVec3(numElements, 1, 1);
3953 cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3954 cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3955
3956 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), spec));
3957 }
3958
3959 ComputeShaderSpec spec;
3960
3961 spec.assembly =
3962 string(getComputeAsmShaderPreamble()) +
3963
3964 "OpName %main \"main\"\n"
3965 "OpName %id \"gl_GlobalInvocationID\"\n"
3966
3967 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3968 "OpDecorate %sc_0 SpecId 0\n"
3969 "OpDecorate %sc_1 SpecId 1\n"
3970 "OpDecorate %sc_2 SpecId 2\n"
3971 "OpDecorate %i32arr ArrayStride 4\n"
3972
3973 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3974
3975 "%ivec3 = OpTypeVector %i32 3\n"
3976
3977 + getSpecConstantOpStructConstantsAndTypes() +
3978
3979 "%buf = OpTypeStruct %i32arr\n"
3980 "%bufptr = OpTypePointer Uniform %buf\n"
3981 "%indata = OpVariable %bufptr Uniform\n"
3982 "%outdata = OpVariable %bufptr Uniform\n"
3983
3984 "%id = OpVariable %uvec3ptr Input\n"
3985 "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
3986 "%vec3_undef = OpUndef %ivec3\n"
3987
3988 + getSpecConstantOpStructComposites () +
3989
3990 "%sc_0 = OpSpecConstant %i32 0\n"
3991 "%sc_1 = OpSpecConstant %i32 0\n"
3992 "%sc_2 = OpSpecConstant %i32 0\n"
3993
3994 + getSpecConstantOpStructConstBlock () +
3995
3996 "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
3997 "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
3998 "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
3999 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
4000 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
4001 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
4002 "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
4003 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
4004 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
4005 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
4006 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
4007 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
4008 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
4009
4010 "%main = OpFunction %void None %voidf\n"
4011 "%label = OpLabel\n"
4012
4013 + getSpecConstantOpStructInstructions() +
4014
4015 "%idval = OpLoad %uvec3 %id\n"
4016 "%x = OpCompositeExtract %u32 %idval 0\n"
4017 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
4018 "%inval = OpLoad %i32 %inloc\n"
4019 "%final = OpIAdd %i32 %inval %sc_final\n"
4020 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
4021 " OpStore %outloc %final\n"
4022 " OpReturn\n"
4023 " OpFunctionEnd\n";
4024 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4025 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4026 spec.numWorkGroups = IVec3(numElements, 1, 1);
4027 spec.specConstants.append<deInt32>(123);
4028 spec.specConstants.append<deInt32>(56);
4029 spec.specConstants.append<deInt32>(-77);
4030
4031 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", spec));
4032
4033 return group.release();
4034 }
4035
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4036 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4037 {
4038 ComputeShaderSpec specInt;
4039 ComputeShaderSpec specFloat;
4040 ComputeShaderSpec specFloat16;
4041 ComputeShaderSpec specVec3;
4042 ComputeShaderSpec specMat4;
4043 ComputeShaderSpec specArray;
4044 ComputeShaderSpec specStruct;
4045 de::Random rnd (deStringHash(group->getName()));
4046 const int numElements = 100;
4047 vector<float> inputFloats (numElements, 0);
4048 vector<float> outputFloats (numElements, 0);
4049 vector<deUint32> inputUints (numElements, 0);
4050 vector<deUint32> outputUints (numElements, 0);
4051
4052 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4053
4054 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4055 floorAll(inputFloats);
4056
4057 for (size_t ndx = 0; ndx < numElements; ++ndx)
4058 {
4059 // Just check if the value is positive or not
4060 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4061 }
4062
4063 for (size_t ndx = 0; ndx < numElements; ++ndx)
4064 {
4065 inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4066 outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4067 }
4068
4069 // All of the tests are of the form:
4070 //
4071 // testtype r
4072 //
4073 // if (inputdata > 0)
4074 // r = 1
4075 // else
4076 // r = -1
4077 //
4078 // return (float)r
4079
4080 specFloat.assembly =
4081 string(getComputeAsmShaderPreamble()) +
4082
4083 "OpSource GLSL 430\n"
4084 "OpName %main \"main\"\n"
4085 "OpName %id \"gl_GlobalInvocationID\"\n"
4086
4087 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4088
4089 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4090
4091 "%id = OpVariable %uvec3ptr Input\n"
4092 "%zero = OpConstant %i32 0\n"
4093 "%float_0 = OpConstant %f32 0.0\n"
4094 "%float_1 = OpConstant %f32 1.0\n"
4095 "%float_n1 = OpConstant %f32 -1.0\n"
4096
4097 "%main = OpFunction %void None %voidf\n"
4098 "%entry = OpLabel\n"
4099 "%idval = OpLoad %uvec3 %id\n"
4100 "%x = OpCompositeExtract %u32 %idval 0\n"
4101 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4102 "%inval = OpLoad %f32 %inloc\n"
4103
4104 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4105 " OpSelectionMerge %cm None\n"
4106 " OpBranchConditional %comp %tb %fb\n"
4107 "%tb = OpLabel\n"
4108 " OpBranch %cm\n"
4109 "%fb = OpLabel\n"
4110 " OpBranch %cm\n"
4111 "%cm = OpLabel\n"
4112 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4113
4114 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4115 " OpStore %outloc %res\n"
4116 " OpReturn\n"
4117
4118 " OpFunctionEnd\n";
4119 specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4120 specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4121 specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4122
4123 specFloat16.assembly =
4124 "OpCapability Shader\n"
4125 "OpCapability Float16\n"
4126 "OpMemoryModel Logical GLSL450\n"
4127 "OpEntryPoint GLCompute %main \"main\" %id\n"
4128 "OpExecutionMode %main LocalSize 1 1 1\n"
4129
4130 "OpSource GLSL 430\n"
4131 "OpName %main \"main\"\n"
4132 "OpName %id \"gl_GlobalInvocationID\"\n"
4133
4134 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4135
4136 "OpDecorate %buf BufferBlock\n"
4137 "OpDecorate %indata DescriptorSet 0\n"
4138 "OpDecorate %indata Binding 0\n"
4139 "OpDecorate %outdata DescriptorSet 0\n"
4140 "OpDecorate %outdata Binding 1\n"
4141 "OpDecorate %u32arr ArrayStride 4\n"
4142 "OpMemberDecorate %buf 0 Offset 0\n"
4143
4144 + string(getComputeAsmCommonTypes()) +
4145
4146 "%f16 = OpTypeFloat 16\n"
4147 "%f16vec2 = OpTypeVector %f16 2\n"
4148 "%fvec2 = OpTypeVector %f32 2\n"
4149 "%u32ptr = OpTypePointer Uniform %u32\n"
4150 "%u32arr = OpTypeRuntimeArray %u32\n"
4151 "%f16_0 = OpConstant %f16 0.0\n"
4152
4153
4154 "%buf = OpTypeStruct %u32arr\n"
4155 "%bufptr = OpTypePointer Uniform %buf\n"
4156 "%indata = OpVariable %bufptr Uniform\n"
4157 "%outdata = OpVariable %bufptr Uniform\n"
4158
4159 "%id = OpVariable %uvec3ptr Input\n"
4160 "%zero = OpConstant %i32 0\n"
4161 "%float_0 = OpConstant %f32 0.0\n"
4162 "%float_1 = OpConstant %f32 1.0\n"
4163 "%float_n1 = OpConstant %f32 -1.0\n"
4164
4165 "%main = OpFunction %void None %voidf\n"
4166 "%entry = OpLabel\n"
4167 "%idval = OpLoad %uvec3 %id\n"
4168 "%x = OpCompositeExtract %u32 %idval 0\n"
4169 "%inloc = OpAccessChain %u32ptr %indata %zero %x\n"
4170 "%inval = OpLoad %u32 %inloc\n"
4171 "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4172 "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4173 "%f32_inval = OpFConvert %f32 %f16_inval\n"
4174
4175 "%comp = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4176 " OpSelectionMerge %cm None\n"
4177 " OpBranchConditional %comp %tb %fb\n"
4178 "%tb = OpLabel\n"
4179 " OpBranch %cm\n"
4180 "%fb = OpLabel\n"
4181 " OpBranch %cm\n"
4182 "%cm = OpLabel\n"
4183 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4184 "%f16_res = OpFConvert %f16 %res\n"
4185
4186 "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4187 "%u32_res = OpBitcast %u32 %f16vec2_res\n"
4188
4189 "%outloc = OpAccessChain %u32ptr %outdata %zero %x\n"
4190 " OpStore %outloc %u32_res\n"
4191 " OpReturn\n"
4192
4193 " OpFunctionEnd\n";
4194
4195 specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4196 specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4197 specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4198 specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4199
4200 specMat4.assembly =
4201 string(getComputeAsmShaderPreamble()) +
4202
4203 "OpSource GLSL 430\n"
4204 "OpName %main \"main\"\n"
4205 "OpName %id \"gl_GlobalInvocationID\"\n"
4206
4207 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4208
4209 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4210
4211 "%id = OpVariable %uvec3ptr Input\n"
4212 "%v4f32 = OpTypeVector %f32 4\n"
4213 "%mat4v4f32 = OpTypeMatrix %v4f32 4\n"
4214 "%zero = OpConstant %i32 0\n"
4215 "%float_0 = OpConstant %f32 0.0\n"
4216 "%float_1 = OpConstant %f32 1.0\n"
4217 "%float_n1 = OpConstant %f32 -1.0\n"
4218 "%m11 = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4219 "%m12 = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4220 "%m13 = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4221 "%m14 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4222 "%m1 = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4223 "%m21 = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4224 "%m22 = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4225 "%m23 = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4226 "%m24 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4227 "%m2 = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4228
4229 "%main = OpFunction %void None %voidf\n"
4230 "%entry = OpLabel\n"
4231 "%idval = OpLoad %uvec3 %id\n"
4232 "%x = OpCompositeExtract %u32 %idval 0\n"
4233 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4234 "%inval = OpLoad %f32 %inloc\n"
4235
4236 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4237 " OpSelectionMerge %cm None\n"
4238 " OpBranchConditional %comp %tb %fb\n"
4239 "%tb = OpLabel\n"
4240 " OpBranch %cm\n"
4241 "%fb = OpLabel\n"
4242 " OpBranch %cm\n"
4243 "%cm = OpLabel\n"
4244 "%mres = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4245 "%res = OpCompositeExtract %f32 %mres 2 2\n"
4246
4247 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4248 " OpStore %outloc %res\n"
4249 " OpReturn\n"
4250
4251 " OpFunctionEnd\n";
4252 specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4253 specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4254 specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4255
4256 specVec3.assembly =
4257 string(getComputeAsmShaderPreamble()) +
4258
4259 "OpSource GLSL 430\n"
4260 "OpName %main \"main\"\n"
4261 "OpName %id \"gl_GlobalInvocationID\"\n"
4262
4263 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4264
4265 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4266
4267 "%id = OpVariable %uvec3ptr Input\n"
4268 "%zero = OpConstant %i32 0\n"
4269 "%float_0 = OpConstant %f32 0.0\n"
4270 "%float_1 = OpConstant %f32 1.0\n"
4271 "%float_n1 = OpConstant %f32 -1.0\n"
4272 "%v1 = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4273 "%v2 = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4274
4275 "%main = OpFunction %void None %voidf\n"
4276 "%entry = OpLabel\n"
4277 "%idval = OpLoad %uvec3 %id\n"
4278 "%x = OpCompositeExtract %u32 %idval 0\n"
4279 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4280 "%inval = OpLoad %f32 %inloc\n"
4281
4282 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4283 " OpSelectionMerge %cm None\n"
4284 " OpBranchConditional %comp %tb %fb\n"
4285 "%tb = OpLabel\n"
4286 " OpBranch %cm\n"
4287 "%fb = OpLabel\n"
4288 " OpBranch %cm\n"
4289 "%cm = OpLabel\n"
4290 "%vres = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4291 "%res = OpCompositeExtract %f32 %vres 2\n"
4292
4293 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4294 " OpStore %outloc %res\n"
4295 " OpReturn\n"
4296
4297 " OpFunctionEnd\n";
4298 specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4299 specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4300 specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4301
4302 specInt.assembly =
4303 string(getComputeAsmShaderPreamble()) +
4304
4305 "OpSource GLSL 430\n"
4306 "OpName %main \"main\"\n"
4307 "OpName %id \"gl_GlobalInvocationID\"\n"
4308
4309 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4310
4311 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4312
4313 "%id = OpVariable %uvec3ptr Input\n"
4314 "%zero = OpConstant %i32 0\n"
4315 "%float_0 = OpConstant %f32 0.0\n"
4316 "%i1 = OpConstant %i32 1\n"
4317 "%i2 = OpConstant %i32 -1\n"
4318
4319 "%main = OpFunction %void None %voidf\n"
4320 "%entry = OpLabel\n"
4321 "%idval = OpLoad %uvec3 %id\n"
4322 "%x = OpCompositeExtract %u32 %idval 0\n"
4323 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4324 "%inval = OpLoad %f32 %inloc\n"
4325
4326 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4327 " OpSelectionMerge %cm None\n"
4328 " OpBranchConditional %comp %tb %fb\n"
4329 "%tb = OpLabel\n"
4330 " OpBranch %cm\n"
4331 "%fb = OpLabel\n"
4332 " OpBranch %cm\n"
4333 "%cm = OpLabel\n"
4334 "%ires = OpPhi %i32 %i1 %tb %i2 %fb\n"
4335 "%res = OpConvertSToF %f32 %ires\n"
4336
4337 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4338 " OpStore %outloc %res\n"
4339 " OpReturn\n"
4340
4341 " OpFunctionEnd\n";
4342 specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4343 specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4344 specInt.numWorkGroups = IVec3(numElements, 1, 1);
4345
4346 specArray.assembly =
4347 string(getComputeAsmShaderPreamble()) +
4348
4349 "OpSource GLSL 430\n"
4350 "OpName %main \"main\"\n"
4351 "OpName %id \"gl_GlobalInvocationID\"\n"
4352
4353 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4354
4355 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4356
4357 "%id = OpVariable %uvec3ptr Input\n"
4358 "%zero = OpConstant %i32 0\n"
4359 "%u7 = OpConstant %u32 7\n"
4360 "%float_0 = OpConstant %f32 0.0\n"
4361 "%float_1 = OpConstant %f32 1.0\n"
4362 "%float_n1 = OpConstant %f32 -1.0\n"
4363 "%f32a7 = OpTypeArray %f32 %u7\n"
4364 "%a1 = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4365 "%a2 = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4366 "%main = OpFunction %void None %voidf\n"
4367 "%entry = OpLabel\n"
4368 "%idval = OpLoad %uvec3 %id\n"
4369 "%x = OpCompositeExtract %u32 %idval 0\n"
4370 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4371 "%inval = OpLoad %f32 %inloc\n"
4372
4373 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4374 " OpSelectionMerge %cm None\n"
4375 " OpBranchConditional %comp %tb %fb\n"
4376 "%tb = OpLabel\n"
4377 " OpBranch %cm\n"
4378 "%fb = OpLabel\n"
4379 " OpBranch %cm\n"
4380 "%cm = OpLabel\n"
4381 "%ares = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4382 "%res = OpCompositeExtract %f32 %ares 5\n"
4383
4384 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4385 " OpStore %outloc %res\n"
4386 " OpReturn\n"
4387
4388 " OpFunctionEnd\n";
4389 specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4390 specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4391 specArray.numWorkGroups = IVec3(numElements, 1, 1);
4392
4393 specStruct.assembly =
4394 string(getComputeAsmShaderPreamble()) +
4395
4396 "OpSource GLSL 430\n"
4397 "OpName %main \"main\"\n"
4398 "OpName %id \"gl_GlobalInvocationID\"\n"
4399
4400 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4401
4402 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4403
4404 "%id = OpVariable %uvec3ptr Input\n"
4405 "%zero = OpConstant %i32 0\n"
4406 "%float_0 = OpConstant %f32 0.0\n"
4407 "%float_1 = OpConstant %f32 1.0\n"
4408 "%float_n1 = OpConstant %f32 -1.0\n"
4409
4410 "%v2f32 = OpTypeVector %f32 2\n"
4411 "%Data2 = OpTypeStruct %f32 %v2f32\n"
4412 "%Data = OpTypeStruct %Data2 %f32\n"
4413
4414 "%in1a = OpConstantComposite %v2f32 %float_1 %float_1\n"
4415 "%in1b = OpConstantComposite %Data2 %float_1 %in1a\n"
4416 "%s1 = OpConstantComposite %Data %in1b %float_1\n"
4417 "%in2a = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4418 "%in2b = OpConstantComposite %Data2 %float_n1 %in2a\n"
4419 "%s2 = OpConstantComposite %Data %in2b %float_n1\n"
4420
4421 "%main = OpFunction %void None %voidf\n"
4422 "%entry = OpLabel\n"
4423 "%idval = OpLoad %uvec3 %id\n"
4424 "%x = OpCompositeExtract %u32 %idval 0\n"
4425 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4426 "%inval = OpLoad %f32 %inloc\n"
4427
4428 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4429 " OpSelectionMerge %cm None\n"
4430 " OpBranchConditional %comp %tb %fb\n"
4431 "%tb = OpLabel\n"
4432 " OpBranch %cm\n"
4433 "%fb = OpLabel\n"
4434 " OpBranch %cm\n"
4435 "%cm = OpLabel\n"
4436 "%sres = OpPhi %Data %s1 %tb %s2 %fb\n"
4437 "%res = OpCompositeExtract %f32 %sres 0 0\n"
4438
4439 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4440 " OpStore %outloc %res\n"
4441 " OpReturn\n"
4442
4443 " OpFunctionEnd\n";
4444 specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4445 specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4446 specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4447
4448 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", specInt));
4449 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", specFloat));
4450 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", specFloat16));
4451 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", specVec3));
4452 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", specMat4));
4453 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", specArray));
4454 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", specStruct));
4455 }
4456
generateConstantDefinitions(int count)4457 string generateConstantDefinitions (int count)
4458 {
4459 std::ostringstream r;
4460 for (int i = 0; i < count; i++)
4461 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4462 r << "\n";
4463 return r.str();
4464 }
4465
generateSwitchCases(int count)4466 string generateSwitchCases (int count)
4467 {
4468 std::ostringstream r;
4469 for (int i = 0; i < count; i++)
4470 r << " " << i << " %case" << i;
4471 r << "\n";
4472 return r.str();
4473 }
4474
generateSwitchTargets(int count)4475 string generateSwitchTargets (int count)
4476 {
4477 std::ostringstream r;
4478 for (int i = 0; i < count; i++)
4479 r << "%case" << i << " = OpLabel\n OpBranch %phi\n";
4480 r << "\n";
4481 return r.str();
4482 }
4483
generateOpPhiParams(int count)4484 string generateOpPhiParams (int count)
4485 {
4486 std::ostringstream r;
4487 for (int i = 0; i < count; i++)
4488 r << " %cf" << (i * 10 + 5) << " %case" << i;
4489 r << "\n";
4490 return r.str();
4491 }
4492
generateIntWidth(int value)4493 string generateIntWidth (int value)
4494 {
4495 std::ostringstream r;
4496 r << value;
4497 return r.str();
4498 }
4499
4500 // Expand input string by injecting "ABC" between the input
4501 // string characters. The acc/add/treshold parameters are used
4502 // to skip some of the injections to make the result less
4503 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4504 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4505 {
4506 std::ostringstream res;
4507 const char* p = s.c_str();
4508
4509 while (*p)
4510 {
4511 res << *p;
4512 acc += add;
4513 if (acc > treshold)
4514 {
4515 acc -= treshold;
4516 res << "ABC";
4517 }
4518 p++;
4519 }
4520 return res.str();
4521 }
4522
4523 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4524 float calcOpPhiCase5 (float val, const string& s)
4525 {
4526 const char* p = s.c_str();
4527 float x[8];
4528 bool b[8];
4529 const float tv[8] = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4530 const float v = deFloatAbs(val);
4531 float res = 0;
4532 int depth = -1;
4533 int skip = 0;
4534
4535 for (int i = 7; i >= 0; --i)
4536 x[i] = std::fmod((float)v, (float)(2 << i));
4537 for (int i = 7; i >= 0; --i)
4538 b[i] = x[i] > tv[i];
4539
4540 while (*p)
4541 {
4542 if (*p == 'A')
4543 {
4544 depth++;
4545 if (skip == 0 && b[depth])
4546 {
4547 res++;
4548 }
4549 else
4550 skip++;
4551 }
4552 if (*p == 'B')
4553 {
4554 if (skip)
4555 skip--;
4556 if (b[depth] || skip)
4557 skip++;
4558 }
4559 if (*p == 'C')
4560 {
4561 depth--;
4562 if (skip)
4563 skip--;
4564 }
4565 p++;
4566 }
4567 return res;
4568 }
4569
4570 // In the code string, the letters represent the following:
4571 //
4572 // A:
4573 // if (certain bit is set)
4574 // {
4575 // result++;
4576 //
4577 // B:
4578 // } else {
4579 //
4580 // C:
4581 // }
4582 //
4583 // examples:
4584 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4585 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4586 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4587 //
4588 // Code generation gets a bit complicated due to the else-branches,
4589 // which do not generate new values. Thus, the generator needs to
4590 // keep track of the previous variable change seen by the else
4591 // branch.
generateOpPhiCase5(const string & s)4592 string generateOpPhiCase5 (const string& s)
4593 {
4594 std::stack<int> idStack;
4595 std::stack<std::string> value;
4596 std::stack<std::string> valueLabel;
4597 std::stack<std::string> mergeLeft;
4598 std::stack<std::string> mergeRight;
4599 std::ostringstream res;
4600 const char* p = s.c_str();
4601 int depth = -1;
4602 int currId = 0;
4603 int iter = 0;
4604
4605 idStack.push(-1);
4606 value.push("%f32_0");
4607 valueLabel.push("%f32_0 %entry");
4608
4609 while (*p)
4610 {
4611 if (*p == 'A')
4612 {
4613 depth++;
4614 currId = iter;
4615 idStack.push(currId);
4616 res << "\tOpSelectionMerge %m" << currId << " None\n";
4617 res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4618 res << "%t" << currId << " = OpLabel\n";
4619 res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4620 std::ostringstream tag;
4621 tag << "%rt" << currId;
4622 value.push(tag.str());
4623 tag << " %t" << currId;
4624 valueLabel.push(tag.str());
4625 }
4626
4627 if (*p == 'B')
4628 {
4629 mergeLeft.push(valueLabel.top());
4630 value.pop();
4631 valueLabel.pop();
4632 res << "\tOpBranch %m" << currId << "\n";
4633 res << "%f" << currId << " = OpLabel\n";
4634 std::ostringstream tag;
4635 tag << value.top() << " %f" << currId;
4636 valueLabel.pop();
4637 valueLabel.push(tag.str());
4638 }
4639
4640 if (*p == 'C')
4641 {
4642 mergeRight.push(valueLabel.top());
4643 res << "\tOpBranch %m" << currId << "\n";
4644 res << "%m" << currId << " = OpLabel\n";
4645 if (*(p + 1) == 0)
4646 res << "%res"; // last result goes to %res
4647 else
4648 res << "%rm" << currId;
4649 res << " = OpPhi %f32 " << mergeLeft.top() << " " << mergeRight.top() << "\n";
4650 std::ostringstream tag;
4651 tag << "%rm" << currId;
4652 value.pop();
4653 value.push(tag.str());
4654 tag << " %m" << currId;
4655 valueLabel.pop();
4656 valueLabel.push(tag.str());
4657 mergeLeft.pop();
4658 mergeRight.pop();
4659 depth--;
4660 idStack.pop();
4661 currId = idStack.top();
4662 }
4663 p++;
4664 iter++;
4665 }
4666 return res.str();
4667 }
4668
createOpPhiGroup(tcu::TestContext & testCtx)4669 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4670 {
4671 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4672 ComputeShaderSpec spec1;
4673 ComputeShaderSpec spec2;
4674 ComputeShaderSpec spec3;
4675 ComputeShaderSpec spec4;
4676 ComputeShaderSpec spec5;
4677 de::Random rnd (deStringHash(group->getName()));
4678 const int numElements = 100;
4679 vector<float> inputFloats (numElements, 0);
4680 vector<float> outputFloats1 (numElements, 0);
4681 vector<float> outputFloats2 (numElements, 0);
4682 vector<float> outputFloats3 (numElements, 0);
4683 vector<float> outputFloats4 (numElements, 0);
4684 vector<float> outputFloats5 (numElements, 0);
4685 std::string codestring = "ABC";
4686 const int test4Width = 512;
4687
4688 // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4689 // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4690 // shader code.
4691 for (int i = 0, acc = 0; i < 9; i++)
4692 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4693
4694 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4695
4696 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4697 floorAll(inputFloats);
4698
4699 for (size_t ndx = 0; ndx < numElements; ++ndx)
4700 {
4701 switch (ndx % 3)
4702 {
4703 case 0: outputFloats1[ndx] = inputFloats[ndx] + 5.5f; break;
4704 case 1: outputFloats1[ndx] = inputFloats[ndx] + 20.5f; break;
4705 case 2: outputFloats1[ndx] = inputFloats[ndx] + 1.75f; break;
4706 default: break;
4707 }
4708 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4709 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4710
4711 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4712 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4713
4714 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4715 }
4716
4717 spec1.assembly =
4718 string(getComputeAsmShaderPreamble()) +
4719
4720 "OpSource GLSL 430\n"
4721 "OpName %main \"main\"\n"
4722 "OpName %id \"gl_GlobalInvocationID\"\n"
4723
4724 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4725
4726 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4727
4728 "%id = OpVariable %uvec3ptr Input\n"
4729 "%zero = OpConstant %i32 0\n"
4730 "%three = OpConstant %u32 3\n"
4731 "%constf5p5 = OpConstant %f32 5.5\n"
4732 "%constf20p5 = OpConstant %f32 20.5\n"
4733 "%constf1p75 = OpConstant %f32 1.75\n"
4734 "%constf8p5 = OpConstant %f32 8.5\n"
4735 "%constf6p5 = OpConstant %f32 6.5\n"
4736
4737 "%main = OpFunction %void None %voidf\n"
4738 "%entry = OpLabel\n"
4739 "%idval = OpLoad %uvec3 %id\n"
4740 "%x = OpCompositeExtract %u32 %idval 0\n"
4741 "%selector = OpUMod %u32 %x %three\n"
4742 " OpSelectionMerge %phi None\n"
4743 " OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4744
4745 // Case 1 before OpPhi.
4746 "%case1 = OpLabel\n"
4747 " OpBranch %phi\n"
4748
4749 "%default = OpLabel\n"
4750 " OpUnreachable\n"
4751
4752 "%phi = OpLabel\n"
4753 "%operand = OpPhi %f32 %constf1p75 %case2 %constf20p5 %case1 %constf5p5 %case0\n" // not in the order of blocks
4754 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4755 "%inval = OpLoad %f32 %inloc\n"
4756 "%add = OpFAdd %f32 %inval %operand\n"
4757 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4758 " OpStore %outloc %add\n"
4759 " OpReturn\n"
4760
4761 // Case 0 after OpPhi.
4762 "%case0 = OpLabel\n"
4763 " OpBranch %phi\n"
4764
4765
4766 // Case 2 after OpPhi.
4767 "%case2 = OpLabel\n"
4768 " OpBranch %phi\n"
4769
4770 " OpFunctionEnd\n";
4771 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4772 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4773 spec1.numWorkGroups = IVec3(numElements, 1, 1);
4774
4775 group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", spec1));
4776
4777 spec2.assembly =
4778 string(getComputeAsmShaderPreamble()) +
4779
4780 "OpName %main \"main\"\n"
4781 "OpName %id \"gl_GlobalInvocationID\"\n"
4782
4783 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4784
4785 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4786
4787 "%id = OpVariable %uvec3ptr Input\n"
4788 "%zero = OpConstant %i32 0\n"
4789 "%one = OpConstant %i32 1\n"
4790 "%three = OpConstant %i32 3\n"
4791 "%constf6p5 = OpConstant %f32 6.5\n"
4792
4793 "%main = OpFunction %void None %voidf\n"
4794 "%entry = OpLabel\n"
4795 "%idval = OpLoad %uvec3 %id\n"
4796 "%x = OpCompositeExtract %u32 %idval 0\n"
4797 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4798 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4799 "%inval = OpLoad %f32 %inloc\n"
4800 " OpBranch %phi\n"
4801
4802 "%phi = OpLabel\n"
4803 "%step = OpPhi %i32 %zero %entry %step_next %phi\n"
4804 "%accum = OpPhi %f32 %inval %entry %accum_next %phi\n"
4805 "%step_next = OpIAdd %i32 %step %one\n"
4806 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4807 "%still_loop = OpSLessThan %bool %step %three\n"
4808 " OpLoopMerge %exit %phi None\n"
4809 " OpBranchConditional %still_loop %phi %exit\n"
4810
4811 "%exit = OpLabel\n"
4812 " OpStore %outloc %accum\n"
4813 " OpReturn\n"
4814 " OpFunctionEnd\n";
4815 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4816 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4817 spec2.numWorkGroups = IVec3(numElements, 1, 1);
4818
4819 group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", spec2));
4820
4821 spec3.assembly =
4822 string(getComputeAsmShaderPreamble()) +
4823
4824 "OpName %main \"main\"\n"
4825 "OpName %id \"gl_GlobalInvocationID\"\n"
4826
4827 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4828
4829 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4830
4831 "%f32ptr_f = OpTypePointer Function %f32\n"
4832 "%id = OpVariable %uvec3ptr Input\n"
4833 "%true = OpConstantTrue %bool\n"
4834 "%false = OpConstantFalse %bool\n"
4835 "%zero = OpConstant %i32 0\n"
4836 "%constf8p5 = OpConstant %f32 8.5\n"
4837
4838 "%main = OpFunction %void None %voidf\n"
4839 "%entry = OpLabel\n"
4840 "%b = OpVariable %f32ptr_f Function %constf8p5\n"
4841 "%idval = OpLoad %uvec3 %id\n"
4842 "%x = OpCompositeExtract %u32 %idval 0\n"
4843 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4844 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4845 "%a_init = OpLoad %f32 %inloc\n"
4846 "%b_init = OpLoad %f32 %b\n"
4847 " OpBranch %phi\n"
4848
4849 "%phi = OpLabel\n"
4850 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
4851 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
4852 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
4853 " OpLoopMerge %exit %phi None\n"
4854 " OpBranchConditional %still_loop %phi %exit\n"
4855
4856 "%exit = OpLabel\n"
4857 "%sub = OpFSub %f32 %a_next %b_next\n"
4858 " OpStore %outloc %sub\n"
4859 " OpReturn\n"
4860 " OpFunctionEnd\n";
4861 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4862 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4863 spec3.numWorkGroups = IVec3(numElements, 1, 1);
4864
4865 group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", spec3));
4866
4867 spec4.assembly =
4868 "OpCapability Shader\n"
4869 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4870 "OpMemoryModel Logical GLSL450\n"
4871 "OpEntryPoint GLCompute %main \"main\" %id\n"
4872 "OpExecutionMode %main LocalSize 1 1 1\n"
4873
4874 "OpSource GLSL 430\n"
4875 "OpName %main \"main\"\n"
4876 "OpName %id \"gl_GlobalInvocationID\"\n"
4877
4878 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4879
4880 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4881
4882 "%id = OpVariable %uvec3ptr Input\n"
4883 "%zero = OpConstant %i32 0\n"
4884 "%cimod = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4885
4886 + generateConstantDefinitions(test4Width) +
4887
4888 "%main = OpFunction %void None %voidf\n"
4889 "%entry = OpLabel\n"
4890 "%idval = OpLoad %uvec3 %id\n"
4891 "%x = OpCompositeExtract %u32 %idval 0\n"
4892 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4893 "%inval = OpLoad %f32 %inloc\n"
4894 "%xf = OpConvertUToF %f32 %x\n"
4895 "%xm = OpFMul %f32 %xf %inval\n"
4896 "%xa = OpExtInst %f32 %ext FAbs %xm\n"
4897 "%xi = OpConvertFToU %u32 %xa\n"
4898 "%selector = OpUMod %u32 %xi %cimod\n"
4899 " OpSelectionMerge %phi None\n"
4900 " OpSwitch %selector %default "
4901
4902 + generateSwitchCases(test4Width) +
4903
4904 "%default = OpLabel\n"
4905 " OpUnreachable\n"
4906
4907 + generateSwitchTargets(test4Width) +
4908
4909 "%phi = OpLabel\n"
4910 "%result = OpPhi %f32"
4911
4912 + generateOpPhiParams(test4Width) +
4913
4914 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4915 " OpStore %outloc %result\n"
4916 " OpReturn\n"
4917
4918 " OpFunctionEnd\n";
4919 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4920 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4921 spec4.numWorkGroups = IVec3(numElements, 1, 1);
4922
4923 group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", spec4));
4924
4925 spec5.assembly =
4926 "OpCapability Shader\n"
4927 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4928 "OpMemoryModel Logical GLSL450\n"
4929 "OpEntryPoint GLCompute %main \"main\" %id\n"
4930 "OpExecutionMode %main LocalSize 1 1 1\n"
4931 "%code = OpString \"" + codestring + "\"\n"
4932
4933 "OpSource GLSL 430\n"
4934 "OpName %main \"main\"\n"
4935 "OpName %id \"gl_GlobalInvocationID\"\n"
4936
4937 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4938
4939 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4940
4941 "%id = OpVariable %uvec3ptr Input\n"
4942 "%zero = OpConstant %i32 0\n"
4943 "%f32_0 = OpConstant %f32 0.0\n"
4944 "%f32_0_5 = OpConstant %f32 0.5\n"
4945 "%f32_1 = OpConstant %f32 1.0\n"
4946 "%f32_1_5 = OpConstant %f32 1.5\n"
4947 "%f32_2 = OpConstant %f32 2.0\n"
4948 "%f32_3_5 = OpConstant %f32 3.5\n"
4949 "%f32_4 = OpConstant %f32 4.0\n"
4950 "%f32_7_5 = OpConstant %f32 7.5\n"
4951 "%f32_8 = OpConstant %f32 8.0\n"
4952 "%f32_15_5 = OpConstant %f32 15.5\n"
4953 "%f32_16 = OpConstant %f32 16.0\n"
4954 "%f32_31_5 = OpConstant %f32 31.5\n"
4955 "%f32_32 = OpConstant %f32 32.0\n"
4956 "%f32_63_5 = OpConstant %f32 63.5\n"
4957 "%f32_64 = OpConstant %f32 64.0\n"
4958 "%f32_127_5 = OpConstant %f32 127.5\n"
4959 "%f32_128 = OpConstant %f32 128.0\n"
4960 "%f32_256 = OpConstant %f32 256.0\n"
4961
4962 "%main = OpFunction %void None %voidf\n"
4963 "%entry = OpLabel\n"
4964 "%idval = OpLoad %uvec3 %id\n"
4965 "%x = OpCompositeExtract %u32 %idval 0\n"
4966 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4967 "%inval = OpLoad %f32 %inloc\n"
4968
4969 "%xabs = OpExtInst %f32 %ext FAbs %inval\n"
4970 "%x8 = OpFMod %f32 %xabs %f32_256\n"
4971 "%x7 = OpFMod %f32 %xabs %f32_128\n"
4972 "%x6 = OpFMod %f32 %xabs %f32_64\n"
4973 "%x5 = OpFMod %f32 %xabs %f32_32\n"
4974 "%x4 = OpFMod %f32 %xabs %f32_16\n"
4975 "%x3 = OpFMod %f32 %xabs %f32_8\n"
4976 "%x2 = OpFMod %f32 %xabs %f32_4\n"
4977 "%x1 = OpFMod %f32 %xabs %f32_2\n"
4978
4979 "%b7 = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4980 "%b6 = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4981 "%b5 = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4982 "%b4 = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4983 "%b3 = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4984 "%b2 = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4985 "%b1 = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4986 "%b0 = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4987
4988 + generateOpPhiCase5(codestring) +
4989
4990 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4991 " OpStore %outloc %res\n"
4992 " OpReturn\n"
4993
4994 " OpFunctionEnd\n";
4995 spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4996 spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4997 spec5.numWorkGroups = IVec3(numElements, 1, 1);
4998
4999 group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", spec5));
5000
5001 createOpPhiVartypeTests(group, testCtx);
5002
5003 return group.release();
5004 }
5005
5006 // Assembly code used for testing block order is based on GLSL source code:
5007 //
5008 // #version 430
5009 //
5010 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5011 // float elements[];
5012 // } input_data;
5013 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5014 // float elements[];
5015 // } output_data;
5016 //
5017 // void main() {
5018 // uint x = gl_GlobalInvocationID.x;
5019 // output_data.elements[x] = input_data.elements[x];
5020 // if (x > uint(50)) {
5021 // switch (x % uint(3)) {
5022 // case 0: output_data.elements[x] += 1.5f; break;
5023 // case 1: output_data.elements[x] += 42.f; break;
5024 // case 2: output_data.elements[x] -= 27.f; break;
5025 // default: break;
5026 // }
5027 // } else {
5028 // output_data.elements[x] = -input_data.elements[x];
5029 // }
5030 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5031 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5032 {
5033 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5034 ComputeShaderSpec spec;
5035 de::Random rnd (deStringHash(group->getName()));
5036 const int numElements = 100;
5037 vector<float> inputFloats (numElements, 0);
5038 vector<float> outputFloats (numElements, 0);
5039
5040 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5041
5042 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5043 floorAll(inputFloats);
5044
5045 for (size_t ndx = 0; ndx <= 50; ++ndx)
5046 outputFloats[ndx] = -inputFloats[ndx];
5047
5048 for (size_t ndx = 51; ndx < numElements; ++ndx)
5049 {
5050 switch (ndx % 3)
5051 {
5052 case 0: outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5053 case 1: outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5054 case 2: outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5055 default: break;
5056 }
5057 }
5058
5059 spec.assembly =
5060 string(getComputeAsmShaderPreamble()) +
5061
5062 "OpSource GLSL 430\n"
5063 "OpName %main \"main\"\n"
5064 "OpName %id \"gl_GlobalInvocationID\"\n"
5065
5066 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5067
5068 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5069
5070 "%u32ptr = OpTypePointer Function %u32\n"
5071 "%u32ptr_input = OpTypePointer Input %u32\n"
5072
5073 + string(getComputeAsmInputOutputBuffer()) +
5074
5075 "%id = OpVariable %uvec3ptr Input\n"
5076 "%zero = OpConstant %i32 0\n"
5077 "%const3 = OpConstant %u32 3\n"
5078 "%const50 = OpConstant %u32 50\n"
5079 "%constf1p5 = OpConstant %f32 1.5\n"
5080 "%constf27 = OpConstant %f32 27.0\n"
5081 "%constf42 = OpConstant %f32 42.0\n"
5082
5083 "%main = OpFunction %void None %voidf\n"
5084
5085 // entry block.
5086 "%entry = OpLabel\n"
5087
5088 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5089 "%xvar = OpVariable %u32ptr Function\n"
5090 "%xptr = OpAccessChain %u32ptr_input %id %zero\n"
5091 "%x = OpLoad %u32 %xptr\n"
5092 " OpStore %xvar %x\n"
5093
5094 "%cmp = OpUGreaterThan %bool %x %const50\n"
5095 " OpSelectionMerge %if_merge None\n"
5096 " OpBranchConditional %cmp %if_true %if_false\n"
5097
5098 // False branch for if-statement: placed in the middle of switch cases and before true branch.
5099 "%if_false = OpLabel\n"
5100 "%x_f = OpLoad %u32 %xvar\n"
5101 "%inloc_f = OpAccessChain %f32ptr %indata %zero %x_f\n"
5102 "%inval_f = OpLoad %f32 %inloc_f\n"
5103 "%negate = OpFNegate %f32 %inval_f\n"
5104 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5105 " OpStore %outloc_f %negate\n"
5106 " OpBranch %if_merge\n"
5107
5108 // Merge block for if-statement: placed in the middle of true and false branch.
5109 "%if_merge = OpLabel\n"
5110 " OpReturn\n"
5111
5112 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5113 "%if_true = OpLabel\n"
5114 "%xval_t = OpLoad %u32 %xvar\n"
5115 "%mod = OpUMod %u32 %xval_t %const3\n"
5116 " OpSelectionMerge %switch_merge None\n"
5117 " OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5118
5119 // Merge block for switch-statement: placed before the case
5120 // bodies. But it must follow OpSwitch which dominates it.
5121 "%switch_merge = OpLabel\n"
5122 " OpBranch %if_merge\n"
5123
5124 // Case 1 for switch-statement: placed before case 0.
5125 // It must follow the OpSwitch that dominates it.
5126 "%case1 = OpLabel\n"
5127 "%x_1 = OpLoad %u32 %xvar\n"
5128 "%inloc_1 = OpAccessChain %f32ptr %indata %zero %x_1\n"
5129 "%inval_1 = OpLoad %f32 %inloc_1\n"
5130 "%addf42 = OpFAdd %f32 %inval_1 %constf42\n"
5131 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5132 " OpStore %outloc_1 %addf42\n"
5133 " OpBranch %switch_merge\n"
5134
5135 // Case 2 for switch-statement.
5136 "%case2 = OpLabel\n"
5137 "%x_2 = OpLoad %u32 %xvar\n"
5138 "%inloc_2 = OpAccessChain %f32ptr %indata %zero %x_2\n"
5139 "%inval_2 = OpLoad %f32 %inloc_2\n"
5140 "%subf27 = OpFSub %f32 %inval_2 %constf27\n"
5141 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5142 " OpStore %outloc_2 %subf27\n"
5143 " OpBranch %switch_merge\n"
5144
5145 // Default case for switch-statement: placed in the middle of normal cases.
5146 "%default = OpLabel\n"
5147 " OpBranch %switch_merge\n"
5148
5149 // Case 0 for switch-statement: out of order.
5150 "%case0 = OpLabel\n"
5151 "%x_0 = OpLoad %u32 %xvar\n"
5152 "%inloc_0 = OpAccessChain %f32ptr %indata %zero %x_0\n"
5153 "%inval_0 = OpLoad %f32 %inloc_0\n"
5154 "%addf1p5 = OpFAdd %f32 %inval_0 %constf1p5\n"
5155 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5156 " OpStore %outloc_0 %addf1p5\n"
5157 " OpBranch %switch_merge\n"
5158
5159 " OpFunctionEnd\n";
5160 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5161 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5162 spec.numWorkGroups = IVec3(numElements, 1, 1);
5163
5164 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", spec));
5165
5166 return group.release();
5167 }
5168
createMultipleShaderGroup(tcu::TestContext & testCtx)5169 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5170 {
5171 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5172 ComputeShaderSpec spec1;
5173 ComputeShaderSpec spec2;
5174 de::Random rnd (deStringHash(group->getName()));
5175 const int numElements = 100;
5176 vector<float> inputFloats (numElements, 0);
5177 vector<float> outputFloats1 (numElements, 0);
5178 vector<float> outputFloats2 (numElements, 0);
5179 fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5180
5181 for (size_t ndx = 0; ndx < numElements; ++ndx)
5182 {
5183 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5184 outputFloats2[ndx] = -inputFloats[ndx];
5185 }
5186
5187 const string assembly(
5188 "OpCapability Shader\n"
5189 "OpMemoryModel Logical GLSL450\n"
5190 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5191 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5192 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5193 "OpEntryPoint Vertex %vert_main \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5194 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5195 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5196
5197 "OpName %comp_main1 \"entrypoint1\"\n"
5198 "OpName %comp_main2 \"entrypoint2\"\n"
5199 "OpName %vert_main \"entrypoint2\"\n"
5200 "OpName %id \"gl_GlobalInvocationID\"\n"
5201 "OpName %vert_builtin_st \"gl_PerVertex\"\n"
5202 "OpName %vertexIndex \"gl_VertexIndex\"\n"
5203 "OpName %instanceIndex \"gl_InstanceIndex\"\n"
5204 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5205 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5206 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5207
5208 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5209 "OpDecorate %vertexIndex BuiltIn VertexIndex\n"
5210 "OpDecorate %instanceIndex BuiltIn InstanceIndex\n"
5211 "OpDecorate %vert_builtin_st Block\n"
5212 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5213 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5214 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5215
5216 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5217
5218 "%zero = OpConstant %i32 0\n"
5219 "%one = OpConstant %u32 1\n"
5220 "%c_f32_1 = OpConstant %f32 1\n"
5221
5222 "%i32inputptr = OpTypePointer Input %i32\n"
5223 "%vec4 = OpTypeVector %f32 4\n"
5224 "%vec4ptr = OpTypePointer Output %vec4\n"
5225 "%f32arr1 = OpTypeArray %f32 %one\n"
5226 "%vert_builtin_st = OpTypeStruct %vec4 %f32 %f32arr1\n"
5227 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5228 "%vert_builtins = OpVariable %vert_builtin_st_ptr Output\n"
5229
5230 "%id = OpVariable %uvec3ptr Input\n"
5231 "%vertexIndex = OpVariable %i32inputptr Input\n"
5232 "%instanceIndex = OpVariable %i32inputptr Input\n"
5233 "%c_vec4_1 = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5234
5235 // gl_Position = vec4(1.);
5236 "%vert_main = OpFunction %void None %voidf\n"
5237 "%vert_entry = OpLabel\n"
5238 "%position = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5239 " OpStore %position %c_vec4_1\n"
5240 " OpReturn\n"
5241 " OpFunctionEnd\n"
5242
5243 // Double inputs.
5244 "%comp_main1 = OpFunction %void None %voidf\n"
5245 "%comp1_entry = OpLabel\n"
5246 "%idval1 = OpLoad %uvec3 %id\n"
5247 "%x1 = OpCompositeExtract %u32 %idval1 0\n"
5248 "%inloc1 = OpAccessChain %f32ptr %indata %zero %x1\n"
5249 "%inval1 = OpLoad %f32 %inloc1\n"
5250 "%add = OpFAdd %f32 %inval1 %inval1\n"
5251 "%outloc1 = OpAccessChain %f32ptr %outdata %zero %x1\n"
5252 " OpStore %outloc1 %add\n"
5253 " OpReturn\n"
5254 " OpFunctionEnd\n"
5255
5256 // Negate inputs.
5257 "%comp_main2 = OpFunction %void None %voidf\n"
5258 "%comp2_entry = OpLabel\n"
5259 "%idval2 = OpLoad %uvec3 %id\n"
5260 "%x2 = OpCompositeExtract %u32 %idval2 0\n"
5261 "%inloc2 = OpAccessChain %f32ptr %indata %zero %x2\n"
5262 "%inval2 = OpLoad %f32 %inloc2\n"
5263 "%neg = OpFNegate %f32 %inval2\n"
5264 "%outloc2 = OpAccessChain %f32ptr %outdata %zero %x2\n"
5265 " OpStore %outloc2 %neg\n"
5266 " OpReturn\n"
5267 " OpFunctionEnd\n");
5268
5269 spec1.assembly = assembly;
5270 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5271 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5272 spec1.numWorkGroups = IVec3(numElements, 1, 1);
5273 spec1.entryPoint = "entrypoint1";
5274
5275 spec2.assembly = assembly;
5276 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5277 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5278 spec2.numWorkGroups = IVec3(numElements, 1, 1);
5279 spec2.entryPoint = "entrypoint2";
5280
5281 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", spec1));
5282 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", spec2));
5283
5284 return group.release();
5285 }
5286
makeLongUTF8String(size_t num4ByteChars)5287 inline std::string makeLongUTF8String (size_t num4ByteChars)
5288 {
5289 // An example of a longest valid UTF-8 character. Be explicit about the
5290 // character type because Microsoft compilers can otherwise interpret the
5291 // character string as being over wide (16-bit) characters. Ideally, we
5292 // would just use a C++11 UTF-8 string literal, but we want to support older
5293 // Microsoft compilers.
5294 const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5295 std::string longString;
5296 longString.reserve(num4ByteChars * 4);
5297 for (size_t count = 0; count < num4ByteChars; count++)
5298 {
5299 longString += earthAfrica;
5300 }
5301 return longString;
5302 }
5303
createOpSourceGroup(tcu::TestContext & testCtx)5304 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5305 {
5306 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5307 vector<CaseParameter> cases;
5308 de::Random rnd (deStringHash(group->getName()));
5309 const int numElements = 100;
5310 vector<float> positiveFloats (numElements, 0);
5311 vector<float> negativeFloats (numElements, 0);
5312 const StringTemplate shaderTemplate (
5313 "OpCapability Shader\n"
5314 "OpMemoryModel Logical GLSL450\n"
5315
5316 "OpEntryPoint GLCompute %main \"main\" %id\n"
5317 "OpExecutionMode %main LocalSize 1 1 1\n"
5318
5319 "${SOURCE}\n"
5320
5321 "OpName %main \"main\"\n"
5322 "OpName %id \"gl_GlobalInvocationID\"\n"
5323
5324 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5325
5326 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5327
5328 "%id = OpVariable %uvec3ptr Input\n"
5329 "%zero = OpConstant %i32 0\n"
5330
5331 "%main = OpFunction %void None %voidf\n"
5332 "%label = OpLabel\n"
5333 "%idval = OpLoad %uvec3 %id\n"
5334 "%x = OpCompositeExtract %u32 %idval 0\n"
5335 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5336 "%inval = OpLoad %f32 %inloc\n"
5337 "%neg = OpFNegate %f32 %inval\n"
5338 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5339 " OpStore %outloc %neg\n"
5340 " OpReturn\n"
5341 " OpFunctionEnd\n");
5342
5343 cases.push_back(CaseParameter("unknown_source", "OpSource Unknown 0"));
5344 cases.push_back(CaseParameter("wrong_source", "OpSource OpenCL_C 210"));
5345 cases.push_back(CaseParameter("normal_filename", "%fname = OpString \"filename\"\n"
5346 "OpSource GLSL 430 %fname"));
5347 cases.push_back(CaseParameter("empty_filename", "%fname = OpString \"\"\n"
5348 "OpSource GLSL 430 %fname"));
5349 cases.push_back(CaseParameter("normal_source_code", "%fname = OpString \"filename\"\n"
5350 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5351 cases.push_back(CaseParameter("empty_source_code", "%fname = OpString \"filename\"\n"
5352 "OpSource GLSL 430 %fname \"\""));
5353 cases.push_back(CaseParameter("long_source_code", "%fname = OpString \"filename\"\n"
5354 "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5355 cases.push_back(CaseParameter("utf8_source_code", "%fname = OpString \"filename\"\n"
5356 "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5357 cases.push_back(CaseParameter("normal_sourcecontinued", "%fname = OpString \"filename\"\n"
5358 "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5359 "OpSourceContinued \"id main() {}\""));
5360 cases.push_back(CaseParameter("empty_sourcecontinued", "%fname = OpString \"filename\"\n"
5361 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5362 "OpSourceContinued \"\""));
5363 cases.push_back(CaseParameter("long_sourcecontinued", "%fname = OpString \"filename\"\n"
5364 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5365 "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5366 cases.push_back(CaseParameter("utf8_sourcecontinued", "%fname = OpString \"filename\"\n"
5367 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5368 "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5369 cases.push_back(CaseParameter("multi_sourcecontinued", "%fname = OpString \"filename\"\n"
5370 "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5371 "OpSourceContinued \"void\"\n"
5372 "OpSourceContinued \"main()\"\n"
5373 "OpSourceContinued \"{}\""));
5374 cases.push_back(CaseParameter("empty_source_before_sourcecontinued", "%fname = OpString \"filename\"\n"
5375 "OpSource GLSL 430 %fname \"\"\n"
5376 "OpSourceContinued \"#version 430\nvoid main() {}\""));
5377
5378 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5379
5380 for (size_t ndx = 0; ndx < numElements; ++ndx)
5381 negativeFloats[ndx] = -positiveFloats[ndx];
5382
5383 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5384 {
5385 map<string, string> specializations;
5386 ComputeShaderSpec spec;
5387
5388 specializations["SOURCE"] = cases[caseNdx].param;
5389 spec.assembly = shaderTemplate.specialize(specializations);
5390 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5391 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5392 spec.numWorkGroups = IVec3(numElements, 1, 1);
5393
5394 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5395 }
5396
5397 return group.release();
5398 }
5399
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5400 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5401 {
5402 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5403 vector<CaseParameter> cases;
5404 de::Random rnd (deStringHash(group->getName()));
5405 const int numElements = 100;
5406 vector<float> inputFloats (numElements, 0);
5407 vector<float> outputFloats (numElements, 0);
5408 const StringTemplate shaderTemplate (
5409 string(getComputeAsmShaderPreamble()) +
5410
5411 "OpSourceExtension \"${EXTENSION}\"\n"
5412
5413 "OpName %main \"main\"\n"
5414 "OpName %id \"gl_GlobalInvocationID\"\n"
5415
5416 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5417
5418 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5419
5420 "%id = OpVariable %uvec3ptr Input\n"
5421 "%zero = OpConstant %i32 0\n"
5422
5423 "%main = OpFunction %void None %voidf\n"
5424 "%label = OpLabel\n"
5425 "%idval = OpLoad %uvec3 %id\n"
5426 "%x = OpCompositeExtract %u32 %idval 0\n"
5427 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5428 "%inval = OpLoad %f32 %inloc\n"
5429 "%neg = OpFNegate %f32 %inval\n"
5430 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5431 " OpStore %outloc %neg\n"
5432 " OpReturn\n"
5433 " OpFunctionEnd\n");
5434
5435 cases.push_back(CaseParameter("empty_extension", ""));
5436 cases.push_back(CaseParameter("real_extension", "GL_ARB_texture_rectangle"));
5437 cases.push_back(CaseParameter("fake_extension", "GL_ARB_im_the_ultimate_extension"));
5438 cases.push_back(CaseParameter("utf8_extension", "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5439 cases.push_back(CaseParameter("long_extension", makeLongUTF8String(65533) + "ccc")); // word count: 65535
5440
5441 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5442
5443 for (size_t ndx = 0; ndx < numElements; ++ndx)
5444 outputFloats[ndx] = -inputFloats[ndx];
5445
5446 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5447 {
5448 map<string, string> specializations;
5449 ComputeShaderSpec spec;
5450
5451 specializations["EXTENSION"] = cases[caseNdx].param;
5452 spec.assembly = shaderTemplate.specialize(specializations);
5453 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5454 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5455 spec.numWorkGroups = IVec3(numElements, 1, 1);
5456
5457 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5458 }
5459
5460 return group.release();
5461 }
5462
5463 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5464 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5465 {
5466 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5467 vector<CaseParameter> cases;
5468 de::Random rnd (deStringHash(group->getName()));
5469 const int numElements = 100;
5470 vector<float> positiveFloats (numElements, 0);
5471 vector<float> negativeFloats (numElements, 0);
5472 const StringTemplate shaderTemplate (
5473 string(getComputeAsmShaderPreamble()) +
5474
5475 "OpSource GLSL 430\n"
5476 "OpName %main \"main\"\n"
5477 "OpName %id \"gl_GlobalInvocationID\"\n"
5478
5479 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5480
5481 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5482 "%uvec2 = OpTypeVector %u32 2\n"
5483 "%bvec3 = OpTypeVector %bool 3\n"
5484 "%fvec4 = OpTypeVector %f32 4\n"
5485 "%fmat33 = OpTypeMatrix %fvec3 3\n"
5486 "%const100 = OpConstant %u32 100\n"
5487 "%uarr100 = OpTypeArray %i32 %const100\n"
5488 "%struct = OpTypeStruct %f32 %i32 %u32\n"
5489 "%pointer = OpTypePointer Function %i32\n"
5490 + string(getComputeAsmInputOutputBuffer()) +
5491
5492 "%null = OpConstantNull ${TYPE}\n"
5493
5494 "%id = OpVariable %uvec3ptr Input\n"
5495 "%zero = OpConstant %i32 0\n"
5496
5497 "%main = OpFunction %void None %voidf\n"
5498 "%label = OpLabel\n"
5499 "%idval = OpLoad %uvec3 %id\n"
5500 "%x = OpCompositeExtract %u32 %idval 0\n"
5501 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5502 "%inval = OpLoad %f32 %inloc\n"
5503 "%neg = OpFNegate %f32 %inval\n"
5504 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5505 " OpStore %outloc %neg\n"
5506 " OpReturn\n"
5507 " OpFunctionEnd\n");
5508
5509 cases.push_back(CaseParameter("bool", "%bool"));
5510 cases.push_back(CaseParameter("sint32", "%i32"));
5511 cases.push_back(CaseParameter("uint32", "%u32"));
5512 cases.push_back(CaseParameter("float32", "%f32"));
5513 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
5514 cases.push_back(CaseParameter("vec3bool", "%bvec3"));
5515 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
5516 cases.push_back(CaseParameter("matrix", "%fmat33"));
5517 cases.push_back(CaseParameter("array", "%uarr100"));
5518 cases.push_back(CaseParameter("struct", "%struct"));
5519 cases.push_back(CaseParameter("pointer", "%pointer"));
5520
5521 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5522
5523 for (size_t ndx = 0; ndx < numElements; ++ndx)
5524 negativeFloats[ndx] = -positiveFloats[ndx];
5525
5526 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5527 {
5528 map<string, string> specializations;
5529 ComputeShaderSpec spec;
5530
5531 specializations["TYPE"] = cases[caseNdx].param;
5532 spec.assembly = shaderTemplate.specialize(specializations);
5533 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5534 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5535 spec.numWorkGroups = IVec3(numElements, 1, 1);
5536
5537 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5538 }
5539
5540 return group.release();
5541 }
5542
5543 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5544 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5545 {
5546 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5547 vector<CaseParameter> cases;
5548 de::Random rnd (deStringHash(group->getName()));
5549 const int numElements = 100;
5550 vector<float> positiveFloats (numElements, 0);
5551 vector<float> negativeFloats (numElements, 0);
5552 const StringTemplate shaderTemplate (
5553 string(getComputeAsmShaderPreamble()) +
5554
5555 "OpSource GLSL 430\n"
5556 "OpName %main \"main\"\n"
5557 "OpName %id \"gl_GlobalInvocationID\"\n"
5558
5559 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5560
5561 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5562
5563 "%id = OpVariable %uvec3ptr Input\n"
5564 "%zero = OpConstant %i32 0\n"
5565
5566 "${CONSTANT}\n"
5567
5568 "%main = OpFunction %void None %voidf\n"
5569 "%label = OpLabel\n"
5570 "%idval = OpLoad %uvec3 %id\n"
5571 "%x = OpCompositeExtract %u32 %idval 0\n"
5572 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5573 "%inval = OpLoad %f32 %inloc\n"
5574 "%neg = OpFNegate %f32 %inval\n"
5575 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5576 " OpStore %outloc %neg\n"
5577 " OpReturn\n"
5578 " OpFunctionEnd\n");
5579
5580 cases.push_back(CaseParameter("vector", "%five = OpConstant %i32 5\n"
5581 "%ivec3 = OpTypeVector %i32 3\n"
5582 "%const = OpConstantComposite %ivec3 %five %zero %five"));
5583 cases.push_back(CaseParameter("matrix", "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5584 "%ten = OpConstant %f32 10.\n"
5585 "%fzero = OpConstant %f32 0.\n"
5586 "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5587 "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5588 cases.push_back(CaseParameter("struct", "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5589 "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5590 "%fzero = OpConstant %f32 0.\n"
5591 "%one = OpConstant %f32 1.\n"
5592 "%point5 = OpConstant %f32 0.5\n"
5593 "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5594 "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5595 "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5596 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %u32 %f32\n"
5597 "%st2 = OpTypeStruct %i32 %i32\n"
5598 "%struct = OpTypeStruct %st1 %st2\n"
5599 "%point5 = OpConstant %f32 0.5\n"
5600 "%one = OpConstant %u32 1\n"
5601 "%ten = OpConstant %i32 10\n"
5602 "%st1val = OpConstantComposite %st1 %one %point5\n"
5603 "%st2val = OpConstantComposite %st2 %ten %ten\n"
5604 "%const = OpConstantComposite %struct %st1val %st2val"));
5605
5606 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5607
5608 for (size_t ndx = 0; ndx < numElements; ++ndx)
5609 negativeFloats[ndx] = -positiveFloats[ndx];
5610
5611 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5612 {
5613 map<string, string> specializations;
5614 ComputeShaderSpec spec;
5615
5616 specializations["CONSTANT"] = cases[caseNdx].param;
5617 spec.assembly = shaderTemplate.specialize(specializations);
5618 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5619 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5620 spec.numWorkGroups = IVec3(numElements, 1, 1);
5621
5622 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
5623 }
5624
5625 return group.release();
5626 }
5627
5628 // Creates a floating point number with the given exponent, and significand
5629 // bits set. It can only create normalized numbers. Only the least significant
5630 // 24 bits of the significand will be examined. The final bit of the
5631 // significand will also be ignored. This allows alignment to be written
5632 // similarly to C99 hex-floats.
5633 // For example if you wanted to write 0x1.7f34p-12 you would call
5634 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(deInt32 exponent,deUint32 significand)5635 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5636 {
5637 float f = 1.0f;
5638
5639 for (deInt32 idx = 0; idx < 23; ++idx)
5640 {
5641 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5642 significand <<= 1;
5643 }
5644
5645 return std::ldexp(f, exponent);
5646 }
5647
5648 // Compare instruction for the OpQuantizeF16 compute exact case.
5649 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5650 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5651 {
5652 assert(outputAllocs.size() == 1);
5653
5654 // Only size is needed because we cannot compare Nans.
5655 size_t byteSize = expectedOutputs[0].getByteSize();
5656
5657 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5658
5659 if (byteSize != 4*sizeof(float)) {
5660 return false;
5661 }
5662
5663 if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5664 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5665 return false;
5666 }
5667 outputAsFloat++;
5668
5669 if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5670 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5671 return false;
5672 }
5673 outputAsFloat++;
5674
5675 if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5676 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5677 return false;
5678 }
5679 outputAsFloat++;
5680
5681 if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5682 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5683 return false;
5684 }
5685
5686 return true;
5687 }
5688
5689 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5690 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5691 {
5692 assert (outputAllocs.size() == 1);
5693
5694 // Only size is needed because we cannot compare Nans.
5695 size_t byteSize = expectedOutputs[0].getByteSize();
5696
5697 const float* const output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5698
5699 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5700 {
5701 if (!deFloatIsNaN(output_as_float[idx]))
5702 {
5703 return false;
5704 }
5705 }
5706
5707 return true;
5708 }
5709
5710 // Checks that every output from a test-case is either +0.0f or -0.0f
compareZeros(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5711 bool compareZeros (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5712 {
5713 assert (outputAllocs.size() == 1);
5714
5715 // Only size is needed because all the results are supposed to be zero.
5716 size_t byteSize = expectedOutputs[0].getByteSize();
5717
5718 const float* const output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5719
5720 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5721 {
5722 if (output_as_float[idx] != 0)
5723 return false;
5724 }
5725
5726 return true;
5727 }
5728
5729 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5730 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5731 {
5732 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5733
5734 const std::string shader (
5735 string(getComputeAsmShaderPreamble()) +
5736
5737 "OpSource GLSL 430\n"
5738 "OpName %main \"main\"\n"
5739 "OpName %id \"gl_GlobalInvocationID\"\n"
5740
5741 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5742
5743 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5744
5745 "%id = OpVariable %uvec3ptr Input\n"
5746 "%zero = OpConstant %i32 0\n"
5747
5748 "%main = OpFunction %void None %voidf\n"
5749 "%label = OpLabel\n"
5750 "%idval = OpLoad %uvec3 %id\n"
5751 "%x = OpCompositeExtract %u32 %idval 0\n"
5752 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5753 "%inval = OpLoad %f32 %inloc\n"
5754 "%quant = OpQuantizeToF16 %f32 %inval\n"
5755 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5756 " OpStore %outloc %quant\n"
5757 " OpReturn\n"
5758 " OpFunctionEnd\n");
5759
5760 {
5761 ComputeShaderSpec spec;
5762 const deUint32 numElements = 100;
5763 vector<float> infinities;
5764 vector<float> results;
5765
5766 infinities.reserve(numElements);
5767 results.reserve(numElements);
5768
5769 for (size_t idx = 0; idx < numElements; ++idx)
5770 {
5771 switch(idx % 4)
5772 {
5773 case 0:
5774 infinities.push_back(std::numeric_limits<float>::infinity());
5775 results.push_back(std::numeric_limits<float>::infinity());
5776 break;
5777 case 1:
5778 infinities.push_back(-std::numeric_limits<float>::infinity());
5779 results.push_back(-std::numeric_limits<float>::infinity());
5780 break;
5781 case 2:
5782 infinities.push_back(std::ldexp(1.0f, 16));
5783 results.push_back(std::numeric_limits<float>::infinity());
5784 break;
5785 case 3:
5786 infinities.push_back(std::ldexp(-1.0f, 32));
5787 results.push_back(-std::numeric_limits<float>::infinity());
5788 break;
5789 }
5790 }
5791
5792 spec.assembly = shader;
5793 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5794 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5795 spec.numWorkGroups = IVec3(numElements, 1, 1);
5796
5797 group->addChild(new SpvAsmComputeShaderCase(
5798 testCtx, "infinities", spec));
5799 }
5800
5801 {
5802 ComputeShaderSpec spec;
5803 vector<float> nans;
5804 const deUint32 numElements = 100;
5805
5806 nans.reserve(numElements);
5807
5808 for (size_t idx = 0; idx < numElements; ++idx)
5809 {
5810 if (idx % 2 == 0)
5811 {
5812 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5813 }
5814 else
5815 {
5816 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5817 }
5818 }
5819
5820 spec.assembly = shader;
5821 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5822 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5823 spec.numWorkGroups = IVec3(numElements, 1, 1);
5824 spec.verifyIO = &compareNan;
5825
5826 group->addChild(new SpvAsmComputeShaderCase(
5827 testCtx, "propagated_nans", spec));
5828 }
5829
5830 {
5831 ComputeShaderSpec spec;
5832 vector<float> small;
5833 vector<float> zeros;
5834 const deUint32 numElements = 100;
5835
5836 small.reserve(numElements);
5837 zeros.reserve(numElements);
5838
5839 for (size_t idx = 0; idx < numElements; ++idx)
5840 {
5841 switch(idx % 6)
5842 {
5843 case 0:
5844 small.push_back(0.f);
5845 break;
5846 case 1:
5847 small.push_back(-0.f);
5848 break;
5849 case 2:
5850 small.push_back(std::ldexp(1.0f, -16));
5851 break;
5852 case 3:
5853 small.push_back(std::ldexp(-1.0f, -32));
5854 break;
5855 case 4:
5856 small.push_back(std::ldexp(1.0f, -127));
5857 break;
5858 case 5:
5859 small.push_back(-std::ldexp(1.0f, -128));
5860 break;
5861 }
5862 }
5863
5864 spec.assembly = shader;
5865 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5866 // Only the size of outputs[0] will be used, actual expected values aren't needed.
5867 spec.outputs.push_back(BufferSp(new Float32Buffer(small)));
5868 spec.numWorkGroups = IVec3(numElements, 1, 1);
5869 spec.verifyIO = &compareZeros;
5870
5871 group->addChild(new SpvAsmComputeShaderCase(
5872 testCtx, "flush_to_zero", spec));
5873 }
5874
5875 {
5876 ComputeShaderSpec spec;
5877 vector<float> exact;
5878 const deUint32 numElements = 200;
5879
5880 exact.reserve(numElements);
5881
5882 for (size_t idx = 0; idx < numElements; ++idx)
5883 exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5884
5885 spec.assembly = shader;
5886 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5887 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5888 spec.numWorkGroups = IVec3(numElements, 1, 1);
5889
5890 group->addChild(new SpvAsmComputeShaderCase(
5891 testCtx, "exact", spec));
5892 }
5893
5894 {
5895 ComputeShaderSpec spec;
5896 vector<float> inputs;
5897 const deUint32 numElements = 4;
5898
5899 inputs.push_back(constructNormalizedFloat(8, 0x300300));
5900 inputs.push_back(-constructNormalizedFloat(-7, 0x600800));
5901 inputs.push_back(constructNormalizedFloat(2, 0x01E000));
5902 inputs.push_back(constructNormalizedFloat(1, 0xFFE000));
5903
5904 spec.assembly = shader;
5905 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5906 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5907 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5908 spec.numWorkGroups = IVec3(numElements, 1, 1);
5909
5910 group->addChild(new SpvAsmComputeShaderCase(
5911 testCtx, "rounded", spec));
5912 }
5913
5914 return group.release();
5915 }
5916
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)5917 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5918 {
5919 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5920
5921 const std::string shader (
5922 string(getComputeAsmShaderPreamble()) +
5923
5924 "OpName %main \"main\"\n"
5925 "OpName %id \"gl_GlobalInvocationID\"\n"
5926
5927 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5928
5929 "OpDecorate %sc_0 SpecId 0\n"
5930 "OpDecorate %sc_1 SpecId 1\n"
5931 "OpDecorate %sc_2 SpecId 2\n"
5932 "OpDecorate %sc_3 SpecId 3\n"
5933 "OpDecorate %sc_4 SpecId 4\n"
5934 "OpDecorate %sc_5 SpecId 5\n"
5935
5936 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5937
5938 "%id = OpVariable %uvec3ptr Input\n"
5939 "%zero = OpConstant %i32 0\n"
5940 "%c_u32_6 = OpConstant %u32 6\n"
5941
5942 "%sc_0 = OpSpecConstant %f32 0.\n"
5943 "%sc_1 = OpSpecConstant %f32 0.\n"
5944 "%sc_2 = OpSpecConstant %f32 0.\n"
5945 "%sc_3 = OpSpecConstant %f32 0.\n"
5946 "%sc_4 = OpSpecConstant %f32 0.\n"
5947 "%sc_5 = OpSpecConstant %f32 0.\n"
5948
5949 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5950 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5951 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5952 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5953 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5954 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5955
5956 "%main = OpFunction %void None %voidf\n"
5957 "%label = OpLabel\n"
5958 "%idval = OpLoad %uvec3 %id\n"
5959 "%x = OpCompositeExtract %u32 %idval 0\n"
5960 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5961 "%selector = OpUMod %u32 %x %c_u32_6\n"
5962 " OpSelectionMerge %exit None\n"
5963 " OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5964
5965 "%case0 = OpLabel\n"
5966 " OpStore %outloc %sc_0_quant\n"
5967 " OpBranch %exit\n"
5968
5969 "%case1 = OpLabel\n"
5970 " OpStore %outloc %sc_1_quant\n"
5971 " OpBranch %exit\n"
5972
5973 "%case2 = OpLabel\n"
5974 " OpStore %outloc %sc_2_quant\n"
5975 " OpBranch %exit\n"
5976
5977 "%case3 = OpLabel\n"
5978 " OpStore %outloc %sc_3_quant\n"
5979 " OpBranch %exit\n"
5980
5981 "%case4 = OpLabel\n"
5982 " OpStore %outloc %sc_4_quant\n"
5983 " OpBranch %exit\n"
5984
5985 "%case5 = OpLabel\n"
5986 " OpStore %outloc %sc_5_quant\n"
5987 " OpBranch %exit\n"
5988
5989 "%exit = OpLabel\n"
5990 " OpReturn\n"
5991
5992 " OpFunctionEnd\n");
5993
5994 {
5995 ComputeShaderSpec spec;
5996 const deUint8 numCases = 4;
5997 vector<float> inputs (numCases, 0.f);
5998 vector<float> outputs;
5999
6000 spec.assembly = shader;
6001 spec.numWorkGroups = IVec3(numCases, 1, 1);
6002
6003 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
6004 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
6005 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
6006 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
6007
6008 outputs.push_back(std::numeric_limits<float>::infinity());
6009 outputs.push_back(-std::numeric_limits<float>::infinity());
6010 outputs.push_back(std::numeric_limits<float>::infinity());
6011 outputs.push_back(-std::numeric_limits<float>::infinity());
6012
6013 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6014 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6015
6016 group->addChild(new SpvAsmComputeShaderCase(
6017 testCtx, "infinities", spec));
6018 }
6019
6020 {
6021 ComputeShaderSpec spec;
6022 const deUint8 numCases = 2;
6023 vector<float> inputs (numCases, 0.f);
6024 vector<float> outputs;
6025
6026 spec.assembly = shader;
6027 spec.numWorkGroups = IVec3(numCases, 1, 1);
6028 spec.verifyIO = &compareNan;
6029
6030 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6031 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6032
6033 for (deUint8 idx = 0; idx < numCases; ++idx)
6034 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6035
6036 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6037 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6038
6039 group->addChild(new SpvAsmComputeShaderCase(
6040 testCtx, "propagated_nans", spec));
6041 }
6042
6043 {
6044 ComputeShaderSpec spec;
6045 const deUint8 numCases = 6;
6046 vector<float> inputs (numCases, 0.f);
6047 vector<float> outputs;
6048
6049 spec.assembly = shader;
6050 spec.numWorkGroups = IVec3(numCases, 1, 1);
6051
6052 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6053 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6054 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6055 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6056 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6057 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6058
6059 spec.verifyIO = &compareZeros;
6060
6061 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6062 // Only the size of outputs[0] will be used, actual expected values aren't needed.
6063 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
6064
6065 group->addChild(new SpvAsmComputeShaderCase(
6066 testCtx, "flush_to_zero", spec));
6067 }
6068
6069 {
6070 ComputeShaderSpec spec;
6071 const deUint8 numCases = 6;
6072 vector<float> inputs (numCases, 0.f);
6073 vector<float> outputs;
6074
6075 spec.assembly = shader;
6076 spec.numWorkGroups = IVec3(numCases, 1, 1);
6077
6078 for (deUint8 idx = 0; idx < 6; ++idx)
6079 {
6080 const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6081 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6082 outputs.push_back(f);
6083 }
6084
6085 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6086 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6087
6088 group->addChild(new SpvAsmComputeShaderCase(
6089 testCtx, "exact", spec));
6090 }
6091
6092 {
6093 ComputeShaderSpec spec;
6094 const deUint8 numCases = 4;
6095 vector<float> inputs (numCases, 0.f);
6096 vector<float> outputs;
6097
6098 spec.assembly = shader;
6099 spec.numWorkGroups = IVec3(numCases, 1, 1);
6100 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6101
6102 outputs.push_back(constructNormalizedFloat(8, 0x300300));
6103 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6104 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6105 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6106
6107 for (deUint8 idx = 0; idx < numCases; ++idx)
6108 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6109
6110 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6111 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6112
6113 group->addChild(new SpvAsmComputeShaderCase(
6114 testCtx, "rounded", spec));
6115 }
6116
6117 return group.release();
6118 }
6119
6120 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6121 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6122 {
6123 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6124 ComputeShaderSpec spec;
6125 de::Random rnd (deStringHash(group->getName()));
6126 const int numElements = 100;
6127 vector<float> positiveFloats (numElements, 0);
6128 vector<float> negativeFloats (numElements, 0);
6129
6130 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6131
6132 for (size_t ndx = 0; ndx < numElements; ++ndx)
6133 negativeFloats[ndx] = -positiveFloats[ndx];
6134
6135 spec.assembly =
6136 "OpCapability Shader\n"
6137 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6138 "OpMemoryModel Logical GLSL450\n"
6139 "OpEntryPoint GLCompute %main \"main\" %id\n"
6140 "OpExecutionMode %main LocalSize 1 1 1\n"
6141
6142 "OpSource GLSL 430\n"
6143 "OpName %main \"main\"\n"
6144 "OpName %id \"gl_GlobalInvocationID\"\n"
6145
6146 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6147
6148 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6149
6150 "%fmat = OpTypeMatrix %fvec3 3\n"
6151 "%ten = OpConstant %u32 10\n"
6152 "%f32arr10 = OpTypeArray %f32 %ten\n"
6153 "%fst = OpTypeStruct %f32 %f32\n"
6154
6155 + string(getComputeAsmInputOutputBuffer()) +
6156
6157 "%id = OpVariable %uvec3ptr Input\n"
6158 "%zero = OpConstant %i32 0\n"
6159
6160 // Create a bunch of null values
6161 "%unull = OpConstantNull %u32\n"
6162 "%fnull = OpConstantNull %f32\n"
6163 "%vnull = OpConstantNull %fvec3\n"
6164 "%mnull = OpConstantNull %fmat\n"
6165 "%anull = OpConstantNull %f32arr10\n"
6166 "%snull = OpConstantComposite %fst %fnull %fnull\n"
6167
6168 "%main = OpFunction %void None %voidf\n"
6169 "%label = OpLabel\n"
6170 "%idval = OpLoad %uvec3 %id\n"
6171 "%x = OpCompositeExtract %u32 %idval 0\n"
6172 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6173 "%inval = OpLoad %f32 %inloc\n"
6174 "%neg = OpFNegate %f32 %inval\n"
6175
6176 // Get the abs() of (a certain element of) those null values
6177 "%unull_cov = OpConvertUToF %f32 %unull\n"
6178 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6179 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6180 "%vnull_0 = OpCompositeExtract %f32 %vnull 0\n"
6181 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6182 "%mnull_12 = OpCompositeExtract %f32 %mnull 1 2\n"
6183 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6184 "%anull_3 = OpCompositeExtract %f32 %anull 3\n"
6185 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6186 "%snull_1 = OpCompositeExtract %f32 %snull 1\n"
6187 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6188
6189 // Add them all
6190 "%add1 = OpFAdd %f32 %neg %unull_abs\n"
6191 "%add2 = OpFAdd %f32 %add1 %fnull_abs\n"
6192 "%add3 = OpFAdd %f32 %add2 %vnull_abs\n"
6193 "%add4 = OpFAdd %f32 %add3 %mnull_abs\n"
6194 "%add5 = OpFAdd %f32 %add4 %anull_abs\n"
6195 "%final = OpFAdd %f32 %add5 %snull_abs\n"
6196
6197 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6198 " OpStore %outloc %final\n" // write to output
6199 " OpReturn\n"
6200 " OpFunctionEnd\n";
6201 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6202 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6203 spec.numWorkGroups = IVec3(numElements, 1, 1);
6204
6205 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", spec));
6206
6207 return group.release();
6208 }
6209
6210 // Assembly code used for testing loop control is based on GLSL source code:
6211 // #version 430
6212 //
6213 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6214 // float elements[];
6215 // } input_data;
6216 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6217 // float elements[];
6218 // } output_data;
6219 //
6220 // void main() {
6221 // uint x = gl_GlobalInvocationID.x;
6222 // output_data.elements[x] = input_data.elements[x];
6223 // for (uint i = 0; i < 4; ++i)
6224 // output_data.elements[x] += 1.f;
6225 // }
createLoopControlGroup(tcu::TestContext & testCtx)6226 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6227 {
6228 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6229 vector<CaseParameter> cases;
6230 de::Random rnd (deStringHash(group->getName()));
6231 const int numElements = 100;
6232 vector<float> inputFloats (numElements, 0);
6233 vector<float> outputFloats (numElements, 0);
6234 const StringTemplate shaderTemplate (
6235 string(getComputeAsmShaderPreamble()) +
6236
6237 "OpSource GLSL 430\n"
6238 "OpName %main \"main\"\n"
6239 "OpName %id \"gl_GlobalInvocationID\"\n"
6240
6241 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6242
6243 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6244
6245 "%u32ptr = OpTypePointer Function %u32\n"
6246
6247 "%id = OpVariable %uvec3ptr Input\n"
6248 "%zero = OpConstant %i32 0\n"
6249 "%uzero = OpConstant %u32 0\n"
6250 "%one = OpConstant %i32 1\n"
6251 "%constf1 = OpConstant %f32 1.0\n"
6252 "%four = OpConstant %u32 4\n"
6253
6254 "%main = OpFunction %void None %voidf\n"
6255 "%entry = OpLabel\n"
6256 "%i = OpVariable %u32ptr Function\n"
6257 " OpStore %i %uzero\n"
6258
6259 "%idval = OpLoad %uvec3 %id\n"
6260 "%x = OpCompositeExtract %u32 %idval 0\n"
6261 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6262 "%inval = OpLoad %f32 %inloc\n"
6263 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6264 " OpStore %outloc %inval\n"
6265 " OpBranch %loop_entry\n"
6266
6267 "%loop_entry = OpLabel\n"
6268 "%i_val = OpLoad %u32 %i\n"
6269 "%cmp_lt = OpULessThan %bool %i_val %four\n"
6270 " OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6271 " OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6272 "%loop_body = OpLabel\n"
6273 "%outval = OpLoad %f32 %outloc\n"
6274 "%addf1 = OpFAdd %f32 %outval %constf1\n"
6275 " OpStore %outloc %addf1\n"
6276 "%new_i = OpIAdd %u32 %i_val %one\n"
6277 " OpStore %i %new_i\n"
6278 " OpBranch %loop_entry\n"
6279 "%loop_merge = OpLabel\n"
6280 " OpReturn\n"
6281 " OpFunctionEnd\n");
6282
6283 cases.push_back(CaseParameter("none", "None"));
6284 cases.push_back(CaseParameter("unroll", "Unroll"));
6285 cases.push_back(CaseParameter("dont_unroll", "DontUnroll"));
6286
6287 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6288
6289 for (size_t ndx = 0; ndx < numElements; ++ndx)
6290 outputFloats[ndx] = inputFloats[ndx] + 4.f;
6291
6292 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6293 {
6294 map<string, string> specializations;
6295 ComputeShaderSpec spec;
6296
6297 specializations["CONTROL"] = cases[caseNdx].param;
6298 spec.assembly = shaderTemplate.specialize(specializations);
6299 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6300 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6301 spec.numWorkGroups = IVec3(numElements, 1, 1);
6302
6303 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6304 }
6305
6306 group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length"));
6307 group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite"));
6308
6309 return group.release();
6310 }
6311
6312 // Assembly code used for testing selection control is based on GLSL source code:
6313 // #version 430
6314 //
6315 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6316 // float elements[];
6317 // } input_data;
6318 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6319 // float elements[];
6320 // } output_data;
6321 //
6322 // void main() {
6323 // uint x = gl_GlobalInvocationID.x;
6324 // float val = input_data.elements[x];
6325 // if (val > 10.f)
6326 // output_data.elements[x] = val + 1.f;
6327 // else
6328 // output_data.elements[x] = val - 1.f;
6329 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6330 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6331 {
6332 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6333 vector<CaseParameter> cases;
6334 de::Random rnd (deStringHash(group->getName()));
6335 const int numElements = 100;
6336 vector<float> inputFloats (numElements, 0);
6337 vector<float> outputFloats (numElements, 0);
6338 const StringTemplate shaderTemplate (
6339 string(getComputeAsmShaderPreamble()) +
6340
6341 "OpSource GLSL 430\n"
6342 "OpName %main \"main\"\n"
6343 "OpName %id \"gl_GlobalInvocationID\"\n"
6344
6345 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6346
6347 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6348
6349 "%id = OpVariable %uvec3ptr Input\n"
6350 "%zero = OpConstant %i32 0\n"
6351 "%constf1 = OpConstant %f32 1.0\n"
6352 "%constf10 = OpConstant %f32 10.0\n"
6353
6354 "%main = OpFunction %void None %voidf\n"
6355 "%entry = OpLabel\n"
6356 "%idval = OpLoad %uvec3 %id\n"
6357 "%x = OpCompositeExtract %u32 %idval 0\n"
6358 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6359 "%inval = OpLoad %f32 %inloc\n"
6360 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6361 "%cmp_gt = OpFOrdGreaterThan %bool %inval %constf10\n"
6362
6363 " OpSelectionMerge %if_end ${CONTROL}\n"
6364 " OpBranchConditional %cmp_gt %if_true %if_false\n"
6365 "%if_true = OpLabel\n"
6366 "%addf1 = OpFAdd %f32 %inval %constf1\n"
6367 " OpStore %outloc %addf1\n"
6368 " OpBranch %if_end\n"
6369 "%if_false = OpLabel\n"
6370 "%subf1 = OpFSub %f32 %inval %constf1\n"
6371 " OpStore %outloc %subf1\n"
6372 " OpBranch %if_end\n"
6373 "%if_end = OpLabel\n"
6374 " OpReturn\n"
6375 " OpFunctionEnd\n");
6376
6377 cases.push_back(CaseParameter("none", "None"));
6378 cases.push_back(CaseParameter("flatten", "Flatten"));
6379 cases.push_back(CaseParameter("dont_flatten", "DontFlatten"));
6380 cases.push_back(CaseParameter("flatten_dont_flatten", "DontFlatten|Flatten"));
6381
6382 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6383
6384 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6385 floorAll(inputFloats);
6386
6387 for (size_t ndx = 0; ndx < numElements; ++ndx)
6388 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6389
6390 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6391 {
6392 map<string, string> specializations;
6393 ComputeShaderSpec spec;
6394
6395 specializations["CONTROL"] = cases[caseNdx].param;
6396 spec.assembly = shaderTemplate.specialize(specializations);
6397 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6398 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6399 spec.numWorkGroups = IVec3(numElements, 1, 1);
6400
6401 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6402 }
6403
6404 return group.release();
6405 }
6406
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6407 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6408 {
6409 // Generate a long name.
6410 std::string longname;
6411 longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6412
6413 // Some bad names, abusing utf-8 encoding. This may also cause problems
6414 // with the logs.
6415 // 1. Various illegal code points in utf-8
6416 std::string utf8illegal =
6417 "Illegal bytes in UTF-8: "
6418 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6419 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6420
6421 // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6422 std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6423
6424 // 3. Some overlong encodings
6425 std::string utf8overlong =
6426 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6427 "\xf0\x8f\xbf\xbf";
6428
6429 // 4. Internet "zalgo" meme "bleeding text"
6430 std::string utf8zalgo =
6431 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6432 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6433 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6434 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6435 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6436 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6437 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6438 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6439 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6440 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6441 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6442 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6443 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6444 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6445 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6446 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6447 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6448 "\x93\xcd\x96\xcc\x97\xff";
6449
6450 // General name abuses
6451 abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6452 abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6453 abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6454 abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6455 abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6456
6457 // GL keywords
6458 abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6459 abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6460 abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6461 abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6462 abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6463 abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6464 abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6465 abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6466 abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6467 abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6468 abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6469 abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6470 abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6471 abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6472 abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6473 abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6474 abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6475 abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6476 abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6477 abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6478 abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6479 }
6480
createOpNameGroup(tcu::TestContext & testCtx)6481 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6482 {
6483 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6484 de::MovePtr<tcu::TestCaseGroup> entryMainGroup (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6485 de::MovePtr<tcu::TestCaseGroup> entryNotGroup (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6486 de::MovePtr<tcu::TestCaseGroup> abuseGroup (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6487 vector<CaseParameter> cases;
6488 vector<CaseParameter> abuseCases;
6489 vector<string> testFunc;
6490 de::Random rnd (deStringHash(group->getName()));
6491 const int numElements = 128;
6492 vector<float> inputFloats (numElements, 0);
6493 vector<float> outputFloats (numElements, 0);
6494
6495 getOpNameAbuseCases(abuseCases);
6496
6497 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6498
6499 for(size_t ndx = 0; ndx < numElements; ++ndx)
6500 outputFloats[ndx] = -inputFloats[ndx];
6501
6502 const string commonShaderHeader =
6503 "OpCapability Shader\n"
6504 "OpMemoryModel Logical GLSL450\n"
6505 "OpEntryPoint GLCompute %main \"main\" %id\n"
6506 "OpExecutionMode %main LocalSize 1 1 1\n";
6507
6508 const string commonShaderFooter =
6509 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6510
6511 + string(getComputeAsmInputOutputBufferTraits())
6512 + string(getComputeAsmCommonTypes())
6513 + string(getComputeAsmInputOutputBuffer()) +
6514
6515 "%id = OpVariable %uvec3ptr Input\n"
6516 "%zero = OpConstant %i32 0\n"
6517
6518 "%func = OpFunction %void None %voidf\n"
6519 "%5 = OpLabel\n"
6520 " OpReturn\n"
6521 " OpFunctionEnd\n"
6522
6523 "%main = OpFunction %void None %voidf\n"
6524 "%entry = OpLabel\n"
6525 "%7 = OpFunctionCall %void %func\n"
6526
6527 "%idval = OpLoad %uvec3 %id\n"
6528 "%x = OpCompositeExtract %u32 %idval 0\n"
6529
6530 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6531 "%inval = OpLoad %f32 %inloc\n"
6532 "%neg = OpFNegate %f32 %inval\n"
6533 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6534 " OpStore %outloc %neg\n"
6535
6536 " OpReturn\n"
6537 " OpFunctionEnd\n";
6538
6539 const StringTemplate shaderTemplate (
6540 "OpCapability Shader\n"
6541 "OpMemoryModel Logical GLSL450\n"
6542 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6543 "OpExecutionMode %main LocalSize 1 1 1\n"
6544 "OpName %${ID} \"${NAME}\"\n" +
6545 commonShaderFooter);
6546
6547 const std::string multipleNames =
6548 commonShaderHeader +
6549 "OpName %main \"to_be\"\n"
6550 "OpName %id \"or_not\"\n"
6551 "OpName %main \"to_be\"\n"
6552 "OpName %main \"makes_no\"\n"
6553 "OpName %func \"difference\"\n"
6554 "OpName %5 \"to_me\"\n" +
6555 commonShaderFooter;
6556
6557 {
6558 ComputeShaderSpec spec;
6559
6560 spec.assembly = multipleNames;
6561 spec.numWorkGroups = IVec3(numElements, 1, 1);
6562 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6563 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6564
6565 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", spec));
6566 }
6567
6568 const std::string everythingNamed =
6569 commonShaderHeader +
6570 "OpName %main \"name1\"\n"
6571 "OpName %id \"name2\"\n"
6572 "OpName %zero \"name3\"\n"
6573 "OpName %entry \"name4\"\n"
6574 "OpName %func \"name5\"\n"
6575 "OpName %5 \"name6\"\n"
6576 "OpName %7 \"name7\"\n"
6577 "OpName %idval \"name8\"\n"
6578 "OpName %inloc \"name9\"\n"
6579 "OpName %inval \"name10\"\n"
6580 "OpName %neg \"name11\"\n"
6581 "OpName %outloc \"name12\"\n"+
6582 commonShaderFooter;
6583 {
6584 ComputeShaderSpec spec;
6585
6586 spec.assembly = everythingNamed;
6587 spec.numWorkGroups = IVec3(numElements, 1, 1);
6588 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6589 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6590
6591 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", spec));
6592 }
6593
6594 const std::string everythingNamedTheSame =
6595 commonShaderHeader +
6596 "OpName %main \"the_same\"\n"
6597 "OpName %id \"the_same\"\n"
6598 "OpName %zero \"the_same\"\n"
6599 "OpName %entry \"the_same\"\n"
6600 "OpName %func \"the_same\"\n"
6601 "OpName %5 \"the_same\"\n"
6602 "OpName %7 \"the_same\"\n"
6603 "OpName %idval \"the_same\"\n"
6604 "OpName %inloc \"the_same\"\n"
6605 "OpName %inval \"the_same\"\n"
6606 "OpName %neg \"the_same\"\n"
6607 "OpName %outloc \"the_same\"\n"+
6608 commonShaderFooter;
6609 {
6610 ComputeShaderSpec spec;
6611
6612 spec.assembly = everythingNamedTheSame;
6613 spec.numWorkGroups = IVec3(numElements, 1, 1);
6614 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6615 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6616
6617 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
6618 }
6619
6620 // main_is_...
6621 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6622 {
6623 map<string, string> specializations;
6624 ComputeShaderSpec spec;
6625
6626 specializations["ENTRY"] = "main";
6627 specializations["ID"] = "main";
6628 specializations["NAME"] = abuseCases[ndx].param;
6629 spec.assembly = shaderTemplate.specialize(specializations);
6630 spec.numWorkGroups = IVec3(numElements, 1, 1);
6631 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6632 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6633
6634 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), spec));
6635 }
6636
6637 // x_is_....
6638 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6639 {
6640 map<string, string> specializations;
6641 ComputeShaderSpec spec;
6642
6643 specializations["ENTRY"] = "main";
6644 specializations["ID"] = "x";
6645 specializations["NAME"] = abuseCases[ndx].param;
6646 spec.assembly = shaderTemplate.specialize(specializations);
6647 spec.numWorkGroups = IVec3(numElements, 1, 1);
6648 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6649 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6650
6651 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), spec));
6652 }
6653
6654 cases.push_back(CaseParameter("_is_main", "main"));
6655 cases.push_back(CaseParameter("_is_not_main", "not_main"));
6656 testFunc.push_back("main");
6657 testFunc.push_back("func");
6658
6659 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6660 {
6661 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6662 {
6663 map<string, string> specializations;
6664 ComputeShaderSpec spec;
6665
6666 specializations["ENTRY"] = "main";
6667 specializations["ID"] = testFunc[fNdx];
6668 specializations["NAME"] = cases[ndx].param;
6669 spec.assembly = shaderTemplate.specialize(specializations);
6670 spec.numWorkGroups = IVec3(numElements, 1, 1);
6671 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6672 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6673
6674 entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6675 }
6676 }
6677
6678 cases.push_back(CaseParameter("_is_entry", "rdc"));
6679
6680 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6681 {
6682 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6683 {
6684 map<string, string> specializations;
6685 ComputeShaderSpec spec;
6686
6687 specializations["ENTRY"] = "rdc";
6688 specializations["ID"] = testFunc[fNdx];
6689 specializations["NAME"] = cases[ndx].param;
6690 spec.assembly = shaderTemplate.specialize(specializations);
6691 spec.numWorkGroups = IVec3(numElements, 1, 1);
6692 spec.entryPoint = "rdc";
6693 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6694 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6695
6696 entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), spec));
6697 }
6698 }
6699
6700 group->addChild(entryMainGroup.release());
6701 group->addChild(entryNotGroup.release());
6702 group->addChild(abuseGroup.release());
6703
6704 return group.release();
6705 }
6706
createOpMemberNameGroup(tcu::TestContext & testCtx)6707 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6708 {
6709 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6710 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6711 vector<CaseParameter> abuseCases;
6712 vector<string> testFunc;
6713 de::Random rnd(deStringHash(group->getName()));
6714 const int numElements = 128;
6715 vector<float> inputFloats(numElements, 0);
6716 vector<float> outputFloats(numElements, 0);
6717
6718 getOpNameAbuseCases(abuseCases);
6719
6720 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6721
6722 for (size_t ndx = 0; ndx < numElements; ++ndx)
6723 outputFloats[ndx] = -inputFloats[ndx];
6724
6725 const string commonShaderHeader =
6726 "OpCapability Shader\n"
6727 "OpMemoryModel Logical GLSL450\n"
6728 "OpEntryPoint GLCompute %main \"main\" %id\n"
6729 "OpExecutionMode %main LocalSize 1 1 1\n";
6730
6731 const string commonShaderFooter =
6732 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6733
6734 + string(getComputeAsmInputOutputBufferTraits())
6735 + string(getComputeAsmCommonTypes())
6736 + string(getComputeAsmInputOutputBuffer()) +
6737
6738 "%u3str = OpTypeStruct %u32 %u32 %u32\n"
6739
6740 "%id = OpVariable %uvec3ptr Input\n"
6741 "%zero = OpConstant %i32 0\n"
6742
6743 "%main = OpFunction %void None %voidf\n"
6744 "%entry = OpLabel\n"
6745
6746 "%idval = OpLoad %uvec3 %id\n"
6747 "%x0 = OpCompositeExtract %u32 %idval 0\n"
6748
6749 "%idstr = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6750 "%x = OpCompositeExtract %u32 %idstr 0\n"
6751
6752 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6753 "%inval = OpLoad %f32 %inloc\n"
6754 "%neg = OpFNegate %f32 %inval\n"
6755 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6756 " OpStore %outloc %neg\n"
6757
6758 " OpReturn\n"
6759 " OpFunctionEnd\n";
6760
6761 const StringTemplate shaderTemplate(
6762 commonShaderHeader +
6763 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6764 commonShaderFooter);
6765
6766 const std::string multipleNames =
6767 commonShaderHeader +
6768 "OpMemberName %u3str 0 \"to_be\"\n"
6769 "OpMemberName %u3str 1 \"or_not\"\n"
6770 "OpMemberName %u3str 0 \"to_be\"\n"
6771 "OpMemberName %u3str 2 \"makes_no\"\n"
6772 "OpMemberName %u3str 0 \"difference\"\n"
6773 "OpMemberName %u3str 0 \"to_me\"\n" +
6774 commonShaderFooter;
6775 {
6776 ComputeShaderSpec spec;
6777
6778 spec.assembly = multipleNames;
6779 spec.numWorkGroups = IVec3(numElements, 1, 1);
6780 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6781 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6782
6783 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", spec));
6784 }
6785
6786 const std::string everythingNamedTheSame =
6787 commonShaderHeader +
6788 "OpMemberName %u3str 0 \"the_same\"\n"
6789 "OpMemberName %u3str 1 \"the_same\"\n"
6790 "OpMemberName %u3str 2 \"the_same\"\n" +
6791 commonShaderFooter;
6792
6793 {
6794 ComputeShaderSpec spec;
6795
6796 spec.assembly = everythingNamedTheSame;
6797 spec.numWorkGroups = IVec3(numElements, 1, 1);
6798 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6799 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6800
6801 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", spec));
6802 }
6803
6804 // u3str_x_is_....
6805 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6806 {
6807 map<string, string> specializations;
6808 ComputeShaderSpec spec;
6809
6810 specializations["NAME"] = abuseCases[ndx].param;
6811 spec.assembly = shaderTemplate.specialize(specializations);
6812 spec.numWorkGroups = IVec3(numElements, 1, 1);
6813 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6814 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6815
6816 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), spec));
6817 }
6818
6819 group->addChild(abuseGroup.release());
6820
6821 return group.release();
6822 }
6823
6824 // Assembly code used for testing function control is based on GLSL source code:
6825 //
6826 // #version 430
6827 //
6828 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6829 // float elements[];
6830 // } input_data;
6831 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6832 // float elements[];
6833 // } output_data;
6834 //
6835 // float const10() { return 10.f; }
6836 //
6837 // void main() {
6838 // uint x = gl_GlobalInvocationID.x;
6839 // output_data.elements[x] = input_data.elements[x] + const10();
6840 // }
createFunctionControlGroup(tcu::TestContext & testCtx)6841 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6842 {
6843 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6844 vector<CaseParameter> cases;
6845 de::Random rnd (deStringHash(group->getName()));
6846 const int numElements = 100;
6847 vector<float> inputFloats (numElements, 0);
6848 vector<float> outputFloats (numElements, 0);
6849 const StringTemplate shaderTemplate (
6850 string(getComputeAsmShaderPreamble()) +
6851
6852 "OpSource GLSL 430\n"
6853 "OpName %main \"main\"\n"
6854 "OpName %func_const10 \"const10(\"\n"
6855 "OpName %id \"gl_GlobalInvocationID\"\n"
6856
6857 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6858
6859 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6860
6861 "%f32f = OpTypeFunction %f32\n"
6862 "%id = OpVariable %uvec3ptr Input\n"
6863 "%zero = OpConstant %i32 0\n"
6864 "%constf10 = OpConstant %f32 10.0\n"
6865
6866 "%main = OpFunction %void None %voidf\n"
6867 "%entry = OpLabel\n"
6868 "%idval = OpLoad %uvec3 %id\n"
6869 "%x = OpCompositeExtract %u32 %idval 0\n"
6870 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6871 "%inval = OpLoad %f32 %inloc\n"
6872 "%ret_10 = OpFunctionCall %f32 %func_const10\n"
6873 "%fadd = OpFAdd %f32 %inval %ret_10\n"
6874 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6875 " OpStore %outloc %fadd\n"
6876 " OpReturn\n"
6877 " OpFunctionEnd\n"
6878
6879 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6880 "%label = OpLabel\n"
6881 " OpReturnValue %constf10\n"
6882 " OpFunctionEnd\n");
6883
6884 cases.push_back(CaseParameter("none", "None"));
6885 cases.push_back(CaseParameter("inline", "Inline"));
6886 cases.push_back(CaseParameter("dont_inline", "DontInline"));
6887 cases.push_back(CaseParameter("pure", "Pure"));
6888 cases.push_back(CaseParameter("const", "Const"));
6889 cases.push_back(CaseParameter("inline_pure", "Inline|Pure"));
6890 cases.push_back(CaseParameter("const_dont_inline", "Const|DontInline"));
6891 cases.push_back(CaseParameter("inline_dont_inline", "Inline|DontInline"));
6892 cases.push_back(CaseParameter("pure_inline_dont_inline", "Pure|Inline|DontInline"));
6893
6894 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6895
6896 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6897 floorAll(inputFloats);
6898
6899 for (size_t ndx = 0; ndx < numElements; ++ndx)
6900 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6901
6902 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6903 {
6904 map<string, string> specializations;
6905 ComputeShaderSpec spec;
6906
6907 specializations["CONTROL"] = cases[caseNdx].param;
6908 spec.assembly = shaderTemplate.specialize(specializations);
6909 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6910 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6911 spec.numWorkGroups = IVec3(numElements, 1, 1);
6912
6913 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6914 }
6915
6916 return group.release();
6917 }
6918
createMemoryAccessGroup(tcu::TestContext & testCtx)6919 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6920 {
6921 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6922 vector<CaseParameter> cases;
6923 de::Random rnd (deStringHash(group->getName()));
6924 const int numElements = 100;
6925 vector<float> inputFloats (numElements, 0);
6926 vector<float> outputFloats (numElements, 0);
6927 const StringTemplate shaderTemplate (
6928 string(getComputeAsmShaderPreamble()) +
6929
6930 "OpSource GLSL 430\n"
6931 "OpName %main \"main\"\n"
6932 "OpName %id \"gl_GlobalInvocationID\"\n"
6933
6934 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6935
6936 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6937
6938 "%f32ptr_f = OpTypePointer Function %f32\n"
6939
6940 "%id = OpVariable %uvec3ptr Input\n"
6941 "%zero = OpConstant %i32 0\n"
6942 "%four = OpConstant %i32 4\n"
6943
6944 "%main = OpFunction %void None %voidf\n"
6945 "%label = OpLabel\n"
6946 "%copy = OpVariable %f32ptr_f Function\n"
6947 "%idval = OpLoad %uvec3 %id ${ACCESS}\n"
6948 "%x = OpCompositeExtract %u32 %idval 0\n"
6949 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6950 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6951 " OpCopyMemory %copy %inloc ${ACCESS}\n"
6952 "%val1 = OpLoad %f32 %copy\n"
6953 "%val2 = OpLoad %f32 %inloc\n"
6954 "%add = OpFAdd %f32 %val1 %val2\n"
6955 " OpStore %outloc %add ${ACCESS}\n"
6956 " OpReturn\n"
6957 " OpFunctionEnd\n");
6958
6959 cases.push_back(CaseParameter("null", ""));
6960 cases.push_back(CaseParameter("none", "None"));
6961 cases.push_back(CaseParameter("volatile", "Volatile"));
6962 cases.push_back(CaseParameter("aligned", "Aligned 4"));
6963 cases.push_back(CaseParameter("nontemporal", "Nontemporal"));
6964 cases.push_back(CaseParameter("aligned_nontemporal", "Aligned|Nontemporal 4"));
6965 cases.push_back(CaseParameter("aligned_volatile", "Volatile|Aligned 4"));
6966
6967 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6968
6969 for (size_t ndx = 0; ndx < numElements; ++ndx)
6970 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6971
6972 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6973 {
6974 map<string, string> specializations;
6975 ComputeShaderSpec spec;
6976
6977 specializations["ACCESS"] = cases[caseNdx].param;
6978 spec.assembly = shaderTemplate.specialize(specializations);
6979 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6980 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6981 spec.numWorkGroups = IVec3(numElements, 1, 1);
6982
6983 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
6984 }
6985
6986 return group.release();
6987 }
6988
6989 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)6990 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6991 {
6992 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6993 vector<CaseParameter> cases;
6994 de::Random rnd (deStringHash(group->getName()));
6995 const int numElements = 100;
6996 vector<float> positiveFloats (numElements, 0);
6997 vector<float> negativeFloats (numElements, 0);
6998 const StringTemplate shaderTemplate (
6999 string(getComputeAsmShaderPreamble()) +
7000
7001 "OpSource GLSL 430\n"
7002 "OpName %main \"main\"\n"
7003 "OpName %id \"gl_GlobalInvocationID\"\n"
7004
7005 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7006
7007 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
7008 "%uvec2 = OpTypeVector %u32 2\n"
7009 "%fvec4 = OpTypeVector %f32 4\n"
7010 "%fmat33 = OpTypeMatrix %fvec3 3\n"
7011 "%image = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7012 "%sampler = OpTypeSampler\n"
7013 "%simage = OpTypeSampledImage %image\n"
7014 "%const100 = OpConstant %u32 100\n"
7015 "%uarr100 = OpTypeArray %i32 %const100\n"
7016 "%struct = OpTypeStruct %f32 %i32 %u32\n"
7017 "%pointer = OpTypePointer Function %i32\n"
7018 + string(getComputeAsmInputOutputBuffer()) +
7019
7020 "%id = OpVariable %uvec3ptr Input\n"
7021 "%zero = OpConstant %i32 0\n"
7022
7023 "%main = OpFunction %void None %voidf\n"
7024 "%label = OpLabel\n"
7025
7026 "%undef = OpUndef ${TYPE}\n"
7027
7028 "%idval = OpLoad %uvec3 %id\n"
7029 "%x = OpCompositeExtract %u32 %idval 0\n"
7030
7031 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7032 "%inval = OpLoad %f32 %inloc\n"
7033 "%neg = OpFNegate %f32 %inval\n"
7034 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7035 " OpStore %outloc %neg\n"
7036 " OpReturn\n"
7037 " OpFunctionEnd\n");
7038
7039 cases.push_back(CaseParameter("bool", "%bool"));
7040 cases.push_back(CaseParameter("sint32", "%i32"));
7041 cases.push_back(CaseParameter("uint32", "%u32"));
7042 cases.push_back(CaseParameter("float32", "%f32"));
7043 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
7044 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
7045 cases.push_back(CaseParameter("matrix", "%fmat33"));
7046 cases.push_back(CaseParameter("image", "%image"));
7047 cases.push_back(CaseParameter("sampler", "%sampler"));
7048 cases.push_back(CaseParameter("sampledimage", "%simage"));
7049 cases.push_back(CaseParameter("array", "%uarr100"));
7050 cases.push_back(CaseParameter("runtimearray", "%f32arr"));
7051 cases.push_back(CaseParameter("struct", "%struct"));
7052 cases.push_back(CaseParameter("pointer", "%pointer"));
7053
7054 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7055
7056 for (size_t ndx = 0; ndx < numElements; ++ndx)
7057 negativeFloats[ndx] = -positiveFloats[ndx];
7058
7059 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7060 {
7061 map<string, string> specializations;
7062 ComputeShaderSpec spec;
7063
7064 specializations["TYPE"] = cases[caseNdx].param;
7065 spec.assembly = shaderTemplate.specialize(specializations);
7066 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7067 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7068 spec.numWorkGroups = IVec3(numElements, 1, 1);
7069
7070 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7071 }
7072
7073 // OpUndef with constants.
7074 #ifndef CTS_USES_VULKANSC
7075 {
7076 static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7077
7078 static const struct
7079 {
7080 const std::string name;
7081 const std::string desc;
7082 } amberCases[] =
7083 {
7084 { "undefined_constant_composite", "OpUndef value in OpConstantComposite" },
7085 { "undefined_spec_constant_composite", "OpUndef value in OpSpecConstantComposite" },
7086 };
7087
7088 for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7089 {
7090 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7091 amberCases[i].name.c_str(),
7092 amberCases[i].desc.c_str(),
7093 data_dir,
7094 amberCases[i].name + ".amber");
7095 group->addChild(testCase);
7096 }
7097 }
7098 #endif
7099
7100 return group.release();
7101 }
7102
7103 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7104 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7105 {
7106 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7107 vector<CaseParameter> cases;
7108 de::Random rnd (deStringHash(group->getName()));
7109 const int numElements = 100;
7110 vector<float> positiveFloats (numElements, 0);
7111 vector<float> negativeFloats (numElements, 0);
7112 const StringTemplate shaderTemplate (
7113 "OpCapability Shader\n"
7114 "OpCapability Float16\n"
7115 "OpMemoryModel Logical GLSL450\n"
7116 "OpEntryPoint GLCompute %main \"main\" %id\n"
7117 "OpExecutionMode %main LocalSize 1 1 1\n"
7118 "OpSource GLSL 430\n"
7119 "OpName %main \"main\"\n"
7120 "OpName %id \"gl_GlobalInvocationID\"\n"
7121
7122 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7123
7124 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7125
7126 "%id = OpVariable %uvec3ptr Input\n"
7127 "%zero = OpConstant %i32 0\n"
7128 "%f16 = OpTypeFloat 16\n"
7129 "%c_f16_0 = OpConstant %f16 0.0\n"
7130 "%c_f16_0_5 = OpConstant %f16 0.5\n"
7131 "%c_f16_1 = OpConstant %f16 1.0\n"
7132 "%v2f16 = OpTypeVector %f16 2\n"
7133 "%v3f16 = OpTypeVector %f16 3\n"
7134 "%v4f16 = OpTypeVector %f16 4\n"
7135
7136 "${CONSTANT}\n"
7137
7138 "%main = OpFunction %void None %voidf\n"
7139 "%label = OpLabel\n"
7140 "%idval = OpLoad %uvec3 %id\n"
7141 "%x = OpCompositeExtract %u32 %idval 0\n"
7142 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7143 "%inval = OpLoad %f32 %inloc\n"
7144 "%neg = OpFNegate %f32 %inval\n"
7145 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7146 " OpStore %outloc %neg\n"
7147 " OpReturn\n"
7148 " OpFunctionEnd\n");
7149
7150
7151 cases.push_back(CaseParameter("vector", "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7152 cases.push_back(CaseParameter("matrix", "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7153 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7154 "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7155 cases.push_back(CaseParameter("struct", "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7156 "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7157 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7158 "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7159 "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7160 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %i32 %f16\n"
7161 "%st2 = OpTypeStruct %i32 %i32\n"
7162 "%struct = OpTypeStruct %st1 %st2\n"
7163 "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7164 "%st2val = OpConstantComposite %st2 %zero %zero\n"
7165 "%const = OpConstantComposite %struct %st1val %st2val"));
7166
7167 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7168
7169 for (size_t ndx = 0; ndx < numElements; ++ndx)
7170 negativeFloats[ndx] = -positiveFloats[ndx];
7171
7172 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7173 {
7174 map<string, string> specializations;
7175 ComputeShaderSpec spec;
7176
7177 specializations["CONSTANT"] = cases[caseNdx].param;
7178 spec.assembly = shaderTemplate.specialize(specializations);
7179 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7180 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7181 spec.numWorkGroups = IVec3(numElements, 1, 1);
7182
7183 spec.extensions.push_back("VK_KHR_shader_float16_int8");
7184
7185 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7186
7187 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, spec));
7188 }
7189
7190 return group.release();
7191 }
7192
squarize(const vector<deFloat16> & inData,const deUint32 argNo)7193 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7194 {
7195 const size_t inDataLength = inData.size();
7196 vector<deFloat16> result;
7197
7198 result.reserve(inDataLength * inDataLength);
7199
7200 if (argNo == 0)
7201 {
7202 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7203 result.insert(result.end(), inData.begin(), inData.end());
7204 }
7205
7206 if (argNo == 1)
7207 {
7208 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7209 {
7210 const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7211
7212 result.insert(result.end(), tmp.begin(), tmp.end());
7213 }
7214 }
7215
7216 return result;
7217 }
7218
squarizeVector(const vector<deFloat16> & inData,const deUint32 argNo)7219 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7220 {
7221 vector<deFloat16> vec;
7222 vector<deFloat16> result;
7223
7224 // Create vectors. vec will contain each possible pair from inData
7225 {
7226 const size_t inDataLength = inData.size();
7227
7228 DE_ASSERT(inDataLength <= 64);
7229
7230 vec.reserve(2 * inDataLength * inDataLength);
7231
7232 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7233 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7234 {
7235 vec.push_back(inData[numIdxX]);
7236 vec.push_back(inData[numIdxY]);
7237 }
7238 }
7239
7240 // Create vector pairs. result will contain each possible pair from vec
7241 {
7242 const size_t coordsPerVector = 2;
7243 const size_t vectorsCount = vec.size() / coordsPerVector;
7244
7245 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7246
7247 if (argNo == 0)
7248 {
7249 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7250 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7251 {
7252 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7253 result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7254 }
7255 }
7256
7257 if (argNo == 1)
7258 {
7259 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7260 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7261 {
7262 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7263 result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7264 }
7265 }
7266 }
7267
7268 return result;
7269 }
7270
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isNan7271 struct fp16isNan { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isNaN(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isInf7272 struct fp16isInf { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isInf(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isEqual7273 struct fp16isEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() == in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isUnequal7274 struct fp16isUnequal { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() != in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isLess7275 struct fp16isLess { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() < in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isGreater7276 struct fp16isGreater { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() > in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isLessOrEqual7277 struct fp16isLessOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() <= in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon437b2d460111::fp16isGreaterOrEqual7278 struct fp16isGreaterOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() >= in2.asFloat(); } };
7279
7280 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7281 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7282 {
7283 if (inputs.size() != 2 || outputAllocs.size() != 1)
7284 return false;
7285
7286 vector<deUint8> input1Bytes;
7287 vector<deUint8> input2Bytes;
7288
7289 inputs[0].getBytes(input1Bytes);
7290 inputs[1].getBytes(input2Bytes);
7291
7292 const deUint32 denormModesCount = 2;
7293 const deFloat16 float16one = tcu::Float16(1.0f).bits();
7294 const deFloat16 float16zero = tcu::Float16(0.0f).bits();
7295 const tcu::Float16 zero = tcu::Float16::zero(1);
7296 const deFloat16* const outputAsFP16 = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7297 const deFloat16* const input1AsFP16 = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7298 const deFloat16* const input2AsFP16 = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7299 deUint32 successfulRuns = denormModesCount;
7300 std::string results[denormModesCount];
7301 TestedLogicalFunction testedLogicalFunction;
7302
7303 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7304 {
7305 const bool flushToZero = (denormMode == 1);
7306
7307 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7308 {
7309 const tcu::Float16 f1pre = tcu::Float16(input1AsFP16[idx]);
7310 const tcu::Float16 f2pre = tcu::Float16(input2AsFP16[idx]);
7311 const tcu::Float16 f1 = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7312 const tcu::Float16 f2 = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7313 deFloat16 expectedOutput = float16zero;
7314
7315 if (onlyTestFunc)
7316 {
7317 if (testedLogicalFunction(f1, f2))
7318 expectedOutput = float16one;
7319 }
7320 else
7321 {
7322 const bool f1nan = f1.isNaN();
7323 const bool f2nan = f2.isNaN();
7324
7325 // Skip NaN floats if not supported by implementation
7326 if (!nanSupported && (f1nan || f2nan))
7327 continue;
7328
7329 if (unationModeAnd)
7330 {
7331 const bool ordered = !f1nan && !f2nan;
7332
7333 if (ordered && testedLogicalFunction(f1, f2))
7334 expectedOutput = float16one;
7335 }
7336 else
7337 {
7338 const bool unordered = f1nan || f2nan;
7339
7340 if (unordered || testedLogicalFunction(f1, f2))
7341 expectedOutput = float16one;
7342 }
7343 }
7344
7345 if (outputAsFP16[idx] != expectedOutput)
7346 {
7347 std::ostringstream str;
7348
7349 str << "ERROR: Sub-case #" << idx
7350 << " flushToZero:" << flushToZero
7351 << std::hex
7352 << " failed, inputs: 0x" << f1.bits()
7353 << ";0x" << f2.bits()
7354 << " output: 0x" << outputAsFP16[idx]
7355 << " expected output: 0x" << expectedOutput;
7356
7357 results[denormMode] = str.str();
7358
7359 successfulRuns--;
7360
7361 break;
7362 }
7363 }
7364 }
7365
7366 if (successfulRuns == 0)
7367 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7368 log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7369
7370 return successfulRuns > 0;
7371 }
7372
7373 } // anonymous
7374
createOpSourceTests(tcu::TestContext & testCtx)7375 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7376 {
7377 struct NameCodePair { string name, code; };
7378 RGBA defaultColors[4];
7379 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7380 const std::string opsourceGLSLWithFile = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7381 map<string, string> fragments = passthruFragments();
7382 const NameCodePair tests[] =
7383 {
7384 {"unknown", "OpSource Unknown 321"},
7385 {"essl", "OpSource ESSL 310"},
7386 {"glsl", "OpSource GLSL 450"},
7387 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7388 {"opencl_c", "OpSource OpenCL_C 120"},
7389 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7390 {"file", opsourceGLSLWithFile},
7391 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7392 // Longest possible source string: SPIR-V limits instructions to 65535
7393 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7394 // contain 65530 UTF8 characters (one word each) plus one last word
7395 // containing 3 ASCII characters and \0.
7396 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7397 };
7398
7399 getDefaultColors(defaultColors);
7400 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7401 {
7402 fragments["debug"] = tests[testNdx].code;
7403 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7404 }
7405
7406 return opSourceTests.release();
7407 }
7408
createOpSourceContinuedTests(tcu::TestContext & testCtx)7409 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7410 {
7411 struct NameCodePair { string name, code; };
7412 RGBA defaultColors[4];
7413 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7414 map<string, string> fragments = passthruFragments();
7415 const std::string opsource = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7416 const NameCodePair tests[] =
7417 {
7418 {"empty", opsource + "OpSourceContinued \"\""},
7419 {"short", opsource + "OpSourceContinued \"abcde\""},
7420 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7421 // Longest possible source string: SPIR-V limits instructions to 65535
7422 // words, of which the first one is OpSourceContinued/length; the rest
7423 // will contain 65533 UTF8 characters (one word each) plus one last word
7424 // containing 3 ASCII characters and \0.
7425 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7426 };
7427
7428 getDefaultColors(defaultColors);
7429 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7430 {
7431 fragments["debug"] = tests[testNdx].code;
7432 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7433 }
7434
7435 return opSourceTests.release();
7436 }
createOpNoLineTests(tcu::TestContext & testCtx)7437 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7438 {
7439 RGBA defaultColors[4];
7440 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7441 map<string, string> fragments;
7442 getDefaultColors(defaultColors);
7443 fragments["debug"] =
7444 "%name = OpString \"name\"\n";
7445
7446 fragments["pre_main"] =
7447 "OpNoLine\n"
7448 "OpNoLine\n"
7449 "OpLine %name 1 1\n"
7450 "OpNoLine\n"
7451 "OpLine %name 1 1\n"
7452 "OpLine %name 1 1\n"
7453 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7454 "OpNoLine\n"
7455 "OpLine %name 1 1\n"
7456 "OpNoLine\n"
7457 "OpLine %name 1 1\n"
7458 "OpLine %name 1 1\n"
7459 "%second_param1 = OpFunctionParameter %v4f32\n"
7460 "OpNoLine\n"
7461 "OpNoLine\n"
7462 "%label_secondfunction = OpLabel\n"
7463 "OpNoLine\n"
7464 "OpReturnValue %second_param1\n"
7465 "OpFunctionEnd\n"
7466 "OpNoLine\n"
7467 "OpNoLine\n";
7468
7469 fragments["testfun"] =
7470 // A %test_code function that returns its argument unchanged.
7471 "OpNoLine\n"
7472 "OpNoLine\n"
7473 "OpLine %name 1 1\n"
7474 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7475 "OpNoLine\n"
7476 "%param1 = OpFunctionParameter %v4f32\n"
7477 "OpNoLine\n"
7478 "OpNoLine\n"
7479 "%label_testfun = OpLabel\n"
7480 "OpNoLine\n"
7481 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7482 "OpReturnValue %val1\n"
7483 "OpFunctionEnd\n"
7484 "OpLine %name 1 1\n"
7485 "OpNoLine\n";
7486
7487 createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7488
7489 return opLineTests.release();
7490 }
7491
createOpModuleProcessedTests(tcu::TestContext & testCtx)7492 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7493 {
7494 RGBA defaultColors[4];
7495 de::MovePtr<tcu::TestCaseGroup> opModuleProcessedTests (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7496 map<string, string> fragments;
7497 std::vector<std::string> noExtensions;
7498 GraphicsResources resources;
7499
7500 getDefaultColors(defaultColors);
7501 resources.verifyBinary = veryfiBinaryShader;
7502 resources.spirvVersion = SPIRV_VERSION_1_3;
7503
7504 fragments["moduleprocessed"] =
7505 "OpModuleProcessed \"VULKAN CTS\"\n"
7506 "OpModuleProcessed \"Negative values\"\n"
7507 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7508
7509 fragments["pre_main"] =
7510 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7511 "%second_param1 = OpFunctionParameter %v4f32\n"
7512 "%label_secondfunction = OpLabel\n"
7513 "OpReturnValue %second_param1\n"
7514 "OpFunctionEnd\n";
7515
7516 fragments["testfun"] =
7517 // A %test_code function that returns its argument unchanged.
7518 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7519 "%param1 = OpFunctionParameter %v4f32\n"
7520 "%label_testfun = OpLabel\n"
7521 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7522 "OpReturnValue %val1\n"
7523 "OpFunctionEnd\n";
7524
7525 createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7526
7527 return opModuleProcessedTests.release();
7528 }
7529
7530
createOpLineTests(tcu::TestContext & testCtx)7531 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7532 {
7533 RGBA defaultColors[4];
7534 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7535 map<string, string> fragments;
7536 std::vector<std::pair<std::string, std::string> > problemStrings;
7537
7538 problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7539 problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7540 problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7541 getDefaultColors(defaultColors);
7542
7543 fragments["debug"] =
7544 "%other_name = OpString \"other_name\"\n";
7545
7546 fragments["pre_main"] =
7547 "OpLine %file_name 32 0\n"
7548 "OpLine %file_name 32 32\n"
7549 "OpLine %file_name 32 40\n"
7550 "OpLine %other_name 32 40\n"
7551 "OpLine %other_name 0 100\n"
7552 "OpLine %other_name 0 4294967295\n"
7553 "OpLine %other_name 4294967295 0\n"
7554 "OpLine %other_name 32 40\n"
7555 "OpLine %file_name 0 0\n"
7556 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7557 "OpLine %file_name 1 0\n"
7558 "%second_param1 = OpFunctionParameter %v4f32\n"
7559 "OpLine %file_name 1 3\n"
7560 "OpLine %file_name 1 2\n"
7561 "%label_secondfunction = OpLabel\n"
7562 "OpLine %file_name 0 2\n"
7563 "OpReturnValue %second_param1\n"
7564 "OpFunctionEnd\n"
7565 "OpLine %file_name 0 2\n"
7566 "OpLine %file_name 0 2\n";
7567
7568 fragments["testfun"] =
7569 // A %test_code function that returns its argument unchanged.
7570 "OpLine %file_name 1 0\n"
7571 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7572 "OpLine %file_name 16 330\n"
7573 "%param1 = OpFunctionParameter %v4f32\n"
7574 "OpLine %file_name 14 442\n"
7575 "%label_testfun = OpLabel\n"
7576 "OpLine %file_name 11 1024\n"
7577 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7578 "OpLine %file_name 2 97\n"
7579 "OpReturnValue %val1\n"
7580 "OpFunctionEnd\n"
7581 "OpLine %file_name 5 32\n";
7582
7583 for (size_t i = 0; i < problemStrings.size(); ++i)
7584 {
7585 map<string, string> testFragments = fragments;
7586 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7587 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7588 }
7589
7590 return opLineTests.release();
7591 }
7592
createOpConstantNullTests(tcu::TestContext & testCtx)7593 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7594 {
7595 de::MovePtr<tcu::TestCaseGroup> opConstantNullTests (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7596 RGBA colors[4];
7597
7598
7599 const char functionStart[] =
7600 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7601 "%param1 = OpFunctionParameter %v4f32\n"
7602 "%lbl = OpLabel\n";
7603
7604 const char functionEnd[] =
7605 "OpReturnValue %transformed_param\n"
7606 "OpFunctionEnd\n";
7607
7608 struct NameConstantsCode
7609 {
7610 string name;
7611 string constants;
7612 string code;
7613 };
7614
7615 NameConstantsCode tests[] =
7616 {
7617 {
7618 "vec4",
7619 "%cnull = OpConstantNull %v4f32\n",
7620 "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7621 },
7622 {
7623 "float",
7624 "%cnull = OpConstantNull %f32\n",
7625 "%vp = OpVariable %fp_v4f32 Function\n"
7626 "%v = OpLoad %v4f32 %vp\n"
7627 "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7628 "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7629 "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7630 "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7631 "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7632 },
7633 {
7634 "bool",
7635 "%cnull = OpConstantNull %bool\n",
7636 "%v = OpVariable %fp_v4f32 Function\n"
7637 " OpStore %v %param1\n"
7638 " OpSelectionMerge %false_label None\n"
7639 " OpBranchConditional %cnull %true_label %false_label\n"
7640 "%true_label = OpLabel\n"
7641 " OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7642 " OpBranch %false_label\n"
7643 "%false_label = OpLabel\n"
7644 "%transformed_param = OpLoad %v4f32 %v\n"
7645 },
7646 {
7647 "i32",
7648 "%cnull = OpConstantNull %i32\n",
7649 "%v = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7650 "%b = OpIEqual %bool %cnull %c_i32_0\n"
7651 " OpSelectionMerge %false_label None\n"
7652 " OpBranchConditional %b %true_label %false_label\n"
7653 "%true_label = OpLabel\n"
7654 " OpStore %v %param1\n"
7655 " OpBranch %false_label\n"
7656 "%false_label = OpLabel\n"
7657 "%transformed_param = OpLoad %v4f32 %v\n"
7658 },
7659 {
7660 "struct",
7661 "%stype = OpTypeStruct %f32 %v4f32\n"
7662 "%fp_stype = OpTypePointer Function %stype\n"
7663 "%cnull = OpConstantNull %stype\n",
7664 "%v = OpVariable %fp_stype Function %cnull\n"
7665 "%f = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7666 "%f_val = OpLoad %v4f32 %f\n"
7667 "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7668 },
7669 {
7670 "array",
7671 "%a4_v4f32 = OpTypeArray %v4f32 %c_u32_4\n"
7672 "%fp_a4_v4f32 = OpTypePointer Function %a4_v4f32\n"
7673 "%cnull = OpConstantNull %a4_v4f32\n",
7674 "%v = OpVariable %fp_a4_v4f32 Function %cnull\n"
7675 "%f = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7676 "%f1 = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7677 "%f2 = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7678 "%f3 = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7679 "%f_val = OpLoad %v4f32 %f\n"
7680 "%f1_val = OpLoad %v4f32 %f1\n"
7681 "%f2_val = OpLoad %v4f32 %f2\n"
7682 "%f3_val = OpLoad %v4f32 %f3\n"
7683 "%t0 = OpFAdd %v4f32 %param1 %f_val\n"
7684 "%t1 = OpFAdd %v4f32 %t0 %f1_val\n"
7685 "%t2 = OpFAdd %v4f32 %t1 %f2_val\n"
7686 "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7687 },
7688 {
7689 "matrix",
7690 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7691 "%cnull = OpConstantNull %mat4x4_f32\n",
7692 // Our null matrix * any vector should result in a zero vector.
7693 "%v = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7694 "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7695 }
7696 };
7697
7698 getHalfColorsFullAlpha(colors);
7699
7700 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7701 {
7702 map<string, string> fragments;
7703 fragments["pre_main"] = tests[testNdx].constants;
7704 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7705 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7706 }
7707 return opConstantNullTests.release();
7708 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7709 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7710 {
7711 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7712 RGBA inputColors[4];
7713 RGBA outputColors[4];
7714
7715
7716 const char functionStart[] =
7717 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7718 "%param1 = OpFunctionParameter %v4f32\n"
7719 "%lbl = OpLabel\n";
7720
7721 const char functionEnd[] =
7722 "OpReturnValue %transformed_param\n"
7723 "OpFunctionEnd\n";
7724
7725 struct NameConstantsCode
7726 {
7727 string name;
7728 string constants;
7729 string code;
7730 };
7731
7732 NameConstantsCode tests[] =
7733 {
7734 {
7735 "vec4",
7736
7737 "%cval = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7738 "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7739 },
7740 {
7741 "struct",
7742
7743 "%stype = OpTypeStruct %v4f32 %f32\n"
7744 "%fp_stype = OpTypePointer Function %stype\n"
7745 "%f32_n_1 = OpConstant %f32 -1.0\n"
7746 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7747 "%cvec = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7748 "%cval = OpConstantComposite %stype %cvec %f32_n_1\n",
7749
7750 "%v = OpVariable %fp_stype Function %cval\n"
7751 "%vec_ptr = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7752 "%f32_ptr = OpAccessChain %fp_f32 %v %c_u32_1\n"
7753 "%vec_val = OpLoad %v4f32 %vec_ptr\n"
7754 "%f32_val = OpLoad %f32 %f32_ptr\n"
7755 "%tmp1 = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7756 "%tmp2 = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7757 "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7758 },
7759 {
7760 // [1|0|0|0.5] [x] = x + 0.5
7761 // [0|1|0|0.5] [y] = y + 0.5
7762 // [0|0|1|0.5] [z] = z + 0.5
7763 // [0|0|0|1 ] [1] = 1
7764 "matrix",
7765
7766 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7767 "%v4f32_1_0_0_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7768 "%v4f32_0_1_0_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7769 "%v4f32_0_0_1_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7770 "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7771 "%cval = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7772
7773 "%transformed_param = OpMatrixTimesVector %v4f32 %cval %param1\n"
7774 },
7775 {
7776 "array",
7777
7778 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7779 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7780 "%f32_n_1 = OpConstant %f32 -1.0\n"
7781 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7782 "%carr = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7783
7784 "%v = OpVariable %fp_a4f32 Function %carr\n"
7785 "%f = OpAccessChain %fp_f32 %v %c_u32_0\n"
7786 "%f1 = OpAccessChain %fp_f32 %v %c_u32_1\n"
7787 "%f2 = OpAccessChain %fp_f32 %v %c_u32_2\n"
7788 "%f3 = OpAccessChain %fp_f32 %v %c_u32_3\n"
7789 "%f_val = OpLoad %f32 %f\n"
7790 "%f1_val = OpLoad %f32 %f1\n"
7791 "%f2_val = OpLoad %f32 %f2\n"
7792 "%f3_val = OpLoad %f32 %f3\n"
7793 "%ftot1 = OpFAdd %f32 %f_val %f1_val\n"
7794 "%ftot2 = OpFAdd %f32 %ftot1 %f2_val\n"
7795 "%ftot3 = OpFAdd %f32 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
7796 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7797 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7798 },
7799 {
7800 //
7801 // [
7802 // {
7803 // 0.0,
7804 // [ 1.0, 1.0, 1.0, 1.0]
7805 // },
7806 // {
7807 // 1.0,
7808 // [ 0.0, 0.5, 0.0, 0.0]
7809 // }, // ^^^
7810 // {
7811 // 0.0,
7812 // [ 1.0, 1.0, 1.0, 1.0]
7813 // }
7814 // ]
7815 "array_of_struct_of_array",
7816
7817 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7818 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7819 "%stype = OpTypeStruct %f32 %a4f32\n"
7820 "%a3stype = OpTypeArray %stype %c_u32_3\n"
7821 "%fp_a3stype = OpTypePointer Function %a3stype\n"
7822 "%ca4f32_0 = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7823 "%ca4f32_1 = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7824 "%cstype1 = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7825 "%cstype2 = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7826 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7827
7828 "%v = OpVariable %fp_a3stype Function %carr\n"
7829 "%f = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7830 "%f_l = OpLoad %f32 %f\n"
7831 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7832 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7833 }
7834 };
7835
7836 getHalfColorsFullAlpha(inputColors);
7837 outputColors[0] = RGBA(255, 255, 255, 255);
7838 outputColors[1] = RGBA(255, 127, 127, 255);
7839 outputColors[2] = RGBA(127, 255, 127, 255);
7840 outputColors[3] = RGBA(127, 127, 255, 255);
7841
7842 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7843 {
7844 map<string, string> fragments;
7845 fragments["pre_main"] = tests[testNdx].constants;
7846 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7847 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7848 }
7849 return opConstantCompositeTests.release();
7850 }
7851
createSelectionBlockOrderTests(tcu::TestContext & testCtx)7852 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7853 {
7854 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7855 RGBA inputColors[4];
7856 RGBA outputColors[4];
7857 map<string, string> fragments;
7858
7859 // vec4 test_code(vec4 param) {
7860 // vec4 result = param;
7861 // for (int i = 0; i < 4; ++i) {
7862 // if (i == 0) result[i] = 0.;
7863 // else result[i] = 1. - result[i];
7864 // }
7865 // return result;
7866 // }
7867 const char function[] =
7868 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7869 "%param1 = OpFunctionParameter %v4f32\n"
7870 "%lbl = OpLabel\n"
7871 "%iptr = OpVariable %fp_i32 Function\n"
7872 "%result = OpVariable %fp_v4f32 Function\n"
7873 " OpStore %iptr %c_i32_0\n"
7874 " OpStore %result %param1\n"
7875 " OpBranch %loop\n"
7876
7877 // Loop entry block.
7878 "%loop = OpLabel\n"
7879 "%ival = OpLoad %i32 %iptr\n"
7880 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7881 " OpLoopMerge %exit %if_entry None\n"
7882 " OpBranchConditional %lt_4 %if_entry %exit\n"
7883
7884 // Merge block for loop.
7885 "%exit = OpLabel\n"
7886 "%ret = OpLoad %v4f32 %result\n"
7887 " OpReturnValue %ret\n"
7888
7889 // If-statement entry block.
7890 "%if_entry = OpLabel\n"
7891 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7892 "%eq_0 = OpIEqual %bool %ival %c_i32_0\n"
7893 " OpSelectionMerge %if_exit None\n"
7894 " OpBranchConditional %eq_0 %if_true %if_false\n"
7895
7896 // False branch for if-statement.
7897 "%if_false = OpLabel\n"
7898 "%val = OpLoad %f32 %loc\n"
7899 "%sub = OpFSub %f32 %c_f32_1 %val\n"
7900 " OpStore %loc %sub\n"
7901 " OpBranch %if_exit\n"
7902
7903 // Merge block for if-statement.
7904 "%if_exit = OpLabel\n"
7905 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7906 " OpStore %iptr %ival_next\n"
7907 " OpBranch %loop\n"
7908
7909 // True branch for if-statement.
7910 "%if_true = OpLabel\n"
7911 " OpStore %loc %c_f32_0\n"
7912 " OpBranch %if_exit\n"
7913
7914 " OpFunctionEnd\n";
7915
7916 fragments["testfun"] = function;
7917
7918 inputColors[0] = RGBA(127, 127, 127, 0);
7919 inputColors[1] = RGBA(127, 0, 0, 0);
7920 inputColors[2] = RGBA(0, 127, 0, 0);
7921 inputColors[3] = RGBA(0, 0, 127, 0);
7922
7923 outputColors[0] = RGBA(0, 128, 128, 255);
7924 outputColors[1] = RGBA(0, 255, 255, 255);
7925 outputColors[2] = RGBA(0, 128, 255, 255);
7926 outputColors[3] = RGBA(0, 255, 128, 255);
7927
7928 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7929
7930 return group.release();
7931 }
7932
createSwitchBlockOrderTests(tcu::TestContext & testCtx)7933 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7934 {
7935 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7936 RGBA inputColors[4];
7937 RGBA outputColors[4];
7938 map<string, string> fragments;
7939
7940 const char typesAndConstants[] =
7941 "%c_f32_p2 = OpConstant %f32 0.2\n"
7942 "%c_f32_p4 = OpConstant %f32 0.4\n"
7943 "%c_f32_p6 = OpConstant %f32 0.6\n"
7944 "%c_f32_p8 = OpConstant %f32 0.8\n";
7945
7946 // vec4 test_code(vec4 param) {
7947 // vec4 result = param;
7948 // for (int i = 0; i < 4; ++i) {
7949 // switch (i) {
7950 // case 0: result[i] += .2; break;
7951 // case 1: result[i] += .6; break;
7952 // case 2: result[i] += .4; break;
7953 // case 3: result[i] += .8; break;
7954 // default: break; // unreachable
7955 // }
7956 // }
7957 // return result;
7958 // }
7959 const char function[] =
7960 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7961 "%param1 = OpFunctionParameter %v4f32\n"
7962 "%lbl = OpLabel\n"
7963 "%iptr = OpVariable %fp_i32 Function\n"
7964 "%result = OpVariable %fp_v4f32 Function\n"
7965 " OpStore %iptr %c_i32_0\n"
7966 " OpStore %result %param1\n"
7967 " OpBranch %loop\n"
7968
7969 // Loop entry block.
7970 "%loop = OpLabel\n"
7971 "%ival = OpLoad %i32 %iptr\n"
7972 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7973 " OpLoopMerge %exit %cont None\n"
7974 " OpBranchConditional %lt_4 %switch_entry %exit\n"
7975
7976 // Merge block for loop.
7977 "%exit = OpLabel\n"
7978 "%ret = OpLoad %v4f32 %result\n"
7979 " OpReturnValue %ret\n"
7980
7981 // Switch-statement entry block.
7982 "%switch_entry = OpLabel\n"
7983 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7984 "%val = OpLoad %f32 %loc\n"
7985 " OpSelectionMerge %switch_exit None\n"
7986 " OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7987
7988 "%case2 = OpLabel\n"
7989 "%addp4 = OpFAdd %f32 %val %c_f32_p4\n"
7990 " OpStore %loc %addp4\n"
7991 " OpBranch %switch_exit\n"
7992
7993 "%switch_default = OpLabel\n"
7994 " OpUnreachable\n"
7995
7996 "%case3 = OpLabel\n"
7997 "%addp8 = OpFAdd %f32 %val %c_f32_p8\n"
7998 " OpStore %loc %addp8\n"
7999 " OpBranch %switch_exit\n"
8000
8001 "%case0 = OpLabel\n"
8002 "%addp2 = OpFAdd %f32 %val %c_f32_p2\n"
8003 " OpStore %loc %addp2\n"
8004 " OpBranch %switch_exit\n"
8005
8006 // Merge block for switch-statement.
8007 "%switch_exit = OpLabel\n"
8008 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8009 " OpStore %iptr %ival_next\n"
8010 " OpBranch %cont\n"
8011 "%cont = OpLabel\n"
8012 " OpBranch %loop\n"
8013
8014 "%case1 = OpLabel\n"
8015 "%addp6 = OpFAdd %f32 %val %c_f32_p6\n"
8016 " OpStore %loc %addp6\n"
8017 " OpBranch %switch_exit\n"
8018
8019 " OpFunctionEnd\n";
8020
8021 fragments["pre_main"] = typesAndConstants;
8022 fragments["testfun"] = function;
8023
8024 inputColors[0] = RGBA(127, 27, 127, 51);
8025 inputColors[1] = RGBA(127, 0, 0, 51);
8026 inputColors[2] = RGBA(0, 27, 0, 51);
8027 inputColors[3] = RGBA(0, 0, 127, 51);
8028
8029 outputColors[0] = RGBA(178, 180, 229, 255);
8030 outputColors[1] = RGBA(178, 153, 102, 255);
8031 outputColors[2] = RGBA(51, 180, 102, 255);
8032 outputColors[3] = RGBA(51, 153, 229, 255);
8033
8034 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8035
8036 addOpSwitchAmberTests(*group, testCtx);
8037
8038 return group.release();
8039 }
8040
createDecorationGroupTests(tcu::TestContext & testCtx)8041 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8042 {
8043 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8044 RGBA inputColors[4];
8045 RGBA outputColors[4];
8046 map<string, string> fragments;
8047
8048 const char decorations[] =
8049 "OpDecorate %array_group ArrayStride 4\n"
8050 "OpDecorate %struct_member_group Offset 0\n"
8051 "%array_group = OpDecorationGroup\n"
8052 "%struct_member_group = OpDecorationGroup\n"
8053
8054 "OpDecorate %group1 RelaxedPrecision\n"
8055 "OpDecorate %group3 RelaxedPrecision\n"
8056 "OpDecorate %group3 Flat\n"
8057 "OpDecorate %group3 Restrict\n"
8058 "%group0 = OpDecorationGroup\n"
8059 "%group1 = OpDecorationGroup\n"
8060 "%group3 = OpDecorationGroup\n";
8061
8062 const char typesAndConstants[] =
8063 "%a3f32 = OpTypeArray %f32 %c_u32_3\n"
8064 "%struct1 = OpTypeStruct %a3f32\n"
8065 "%struct2 = OpTypeStruct %a3f32\n"
8066 "%fp_struct1 = OpTypePointer Function %struct1\n"
8067 "%fp_struct2 = OpTypePointer Function %struct2\n"
8068 "%c_f32_2 = OpConstant %f32 2.\n"
8069 "%c_f32_n2 = OpConstant %f32 -2.\n"
8070
8071 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8072 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8073 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8074 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8075
8076 const char function[] =
8077 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8078 "%param = OpFunctionParameter %v4f32\n"
8079 "%entry = OpLabel\n"
8080 "%result = OpVariable %fp_v4f32 Function\n"
8081 "%v_struct1 = OpVariable %fp_struct1 Function\n"
8082 "%v_struct2 = OpVariable %fp_struct2 Function\n"
8083 " OpStore %result %param\n"
8084 " OpStore %v_struct1 %c_struct1\n"
8085 " OpStore %v_struct2 %c_struct2\n"
8086 "%ptr1 = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8087 "%val1 = OpLoad %f32 %ptr1\n"
8088 "%ptr2 = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8089 "%val2 = OpLoad %f32 %ptr2\n"
8090 "%addvalues = OpFAdd %f32 %val1 %val2\n"
8091 "%ptr = OpAccessChain %fp_f32 %result %c_i32_1\n"
8092 "%val = OpLoad %f32 %ptr\n"
8093 "%addresult = OpFAdd %f32 %addvalues %val\n"
8094 " OpStore %ptr %addresult\n"
8095 "%ret = OpLoad %v4f32 %result\n"
8096 " OpReturnValue %ret\n"
8097 " OpFunctionEnd\n";
8098
8099 struct CaseNameDecoration
8100 {
8101 string name;
8102 string decoration;
8103 };
8104
8105 CaseNameDecoration tests[] =
8106 {
8107 {
8108 "same_decoration_group_on_multiple_types",
8109 "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8110 },
8111 {
8112 "empty_decoration_group",
8113 "OpGroupDecorate %group0 %a3f32\n"
8114 "OpGroupDecorate %group0 %result\n"
8115 },
8116 {
8117 "one_element_decoration_group",
8118 "OpGroupDecorate %array_group %a3f32\n"
8119 },
8120 {
8121 "multiple_elements_decoration_group",
8122 "OpGroupDecorate %group3 %v_struct1\n"
8123 },
8124 {
8125 "multiple_decoration_groups_on_same_variable",
8126 "OpGroupDecorate %group0 %v_struct2\n"
8127 "OpGroupDecorate %group1 %v_struct2\n"
8128 "OpGroupDecorate %group3 %v_struct2\n"
8129 },
8130 {
8131 "same_decoration_group_multiple_times",
8132 "OpGroupDecorate %group1 %addvalues\n"
8133 "OpGroupDecorate %group1 %addvalues\n"
8134 "OpGroupDecorate %group1 %addvalues\n"
8135 },
8136
8137 };
8138
8139 getHalfColorsFullAlpha(inputColors);
8140 getHalfColorsFullAlpha(outputColors);
8141
8142 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8143 {
8144 fragments["decoration"] = decorations + tests[idx].decoration;
8145 fragments["pre_main"] = typesAndConstants;
8146 fragments["testfun"] = function;
8147
8148 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8149 }
8150
8151 return group.release();
8152 }
8153
8154 struct SpecConstantTwoValGraphicsCase
8155 {
8156 const std::string caseName;
8157 const std::string scDefinition0;
8158 const std::string scDefinition1;
8159 const std::string scResultType;
8160 const std::string scOperation;
8161 SpecConstantValue scActualValue0;
8162 SpecConstantValue scActualValue1;
8163 const std::string resultOperation;
8164 RGBA expectedColors[4];
8165 CaseFlags caseFlags;
8166
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8167 SpecConstantTwoValGraphicsCase (const std::string& name,
8168 const std::string& definition0,
8169 const std::string& definition1,
8170 const std::string& resultType,
8171 const std::string& operation,
8172 const SpecConstantValue& value0,
8173 const SpecConstantValue& value1,
8174 const std::string& resultOp,
8175 const RGBA (&output)[4],
8176 CaseFlags flags = FLAG_NONE)
8177 : caseName (name)
8178 , scDefinition0 (definition0)
8179 , scDefinition1 (definition1)
8180 , scResultType (resultType)
8181 , scOperation (operation)
8182 , scActualValue0 (value0)
8183 , scActualValue1 (value1)
8184 , resultOperation (resultOp)
8185 , caseFlags (flags)
8186 {
8187 expectedColors[0] = output[0];
8188 expectedColors[1] = output[1];
8189 expectedColors[2] = output[2];
8190 expectedColors[3] = output[3];
8191 }
8192 };
8193
createSpecConstantTests(tcu::TestContext & testCtx)8194 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8195 {
8196 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8197 vector<SpecConstantTwoValGraphicsCase> cases;
8198 RGBA inputColors[4];
8199 RGBA outputColors0[4];
8200 RGBA outputColors1[4];
8201 RGBA outputColors2[4];
8202
8203 const char decorations1[] =
8204 "OpDecorate %sc_0 SpecId 0\n"
8205 "OpDecorate %sc_1 SpecId 1\n";
8206
8207 const char typesAndConstants1[] =
8208 "${OPTYPE_DEFINITIONS:opt}"
8209 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
8210 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
8211 "%sc_op = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8212
8213 const char function1[] =
8214 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8215 "%param = OpFunctionParameter %v4f32\n"
8216 "%label = OpLabel\n"
8217 "%result = OpVariable %fp_v4f32 Function\n"
8218 "${TYPE_CONVERT:opt}"
8219 " OpStore %result %param\n"
8220 "%gen = ${GEN_RESULT}\n"
8221 "%index = OpIAdd %i32 %gen %c_i32_1\n"
8222 "%loc = OpAccessChain %fp_f32 %result %index\n"
8223 "%val = OpLoad %f32 %loc\n"
8224 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8225 " OpStore %loc %add\n"
8226 "%ret = OpLoad %v4f32 %result\n"
8227 " OpReturnValue %ret\n"
8228 " OpFunctionEnd\n";
8229
8230 inputColors[0] = RGBA(127, 127, 127, 255);
8231 inputColors[1] = RGBA(127, 0, 0, 255);
8232 inputColors[2] = RGBA(0, 127, 0, 255);
8233 inputColors[3] = RGBA(0, 0, 127, 255);
8234
8235 // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8236 outputColors0[0] = RGBA(255, 127, 127, 255);
8237 outputColors0[1] = RGBA(255, 0, 0, 255);
8238 outputColors0[2] = RGBA(128, 127, 0, 255);
8239 outputColors0[3] = RGBA(128, 0, 127, 255);
8240
8241 // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8242 outputColors1[0] = RGBA(127, 255, 127, 255);
8243 outputColors1[1] = RGBA(127, 128, 0, 255);
8244 outputColors1[2] = RGBA(0, 255, 0, 255);
8245 outputColors1[3] = RGBA(0, 128, 127, 255);
8246
8247 // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8248 outputColors2[0] = RGBA(127, 127, 255, 255);
8249 outputColors2[1] = RGBA(127, 0, 128, 255);
8250 outputColors2[2] = RGBA(0, 127, 128, 255);
8251 outputColors2[3] = RGBA(0, 0, 255, 255);
8252
8253 const char addZeroToSc[] = "OpIAdd %i32 %c_i32_0 %sc_op";
8254 const char addZeroToSc32[] = "OpIAdd %i32 %c_i32_0 %sc_op32";
8255 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8256 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8257
8258 cases.push_back(SpecConstantTwoValGraphicsCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 19, -20, addZeroToSc, outputColors0));
8259 cases.push_back(SpecConstantTwoValGraphicsCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 19, 20, addZeroToSc, outputColors0));
8260 cases.push_back(SpecConstantTwoValGraphicsCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -1, -1, addZeroToSc, outputColors2));
8261 cases.push_back(SpecConstantTwoValGraphicsCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, 126, addZeroToSc, outputColors0));
8262 cases.push_back(SpecConstantTwoValGraphicsCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 126, addZeroToSc, outputColors2));
8263 cases.push_back(SpecConstantTwoValGraphicsCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8264 cases.push_back(SpecConstantTwoValGraphicsCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8265 cases.push_back(SpecConstantTwoValGraphicsCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 1001, 500, addZeroToSc, outputColors2));
8266 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 0x33, 0x0d, addZeroToSc, outputColors2));
8267 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 0, 1, addZeroToSc, outputColors2));
8268 cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 0x2e, 0x2f, addZeroToSc, outputColors2));
8269 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, 1, addZeroToSc, outputColors2));
8270 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, 2, addZeroToSc, outputColors0));
8271 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, 0, addZeroToSc, outputColors2));
8272
8273 // Shifts for other integer sizes.
8274 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{2}, deInt64{1}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8275 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-4}, deInt64{2}, addZeroToSc32, outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8276 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{1}, deInt64{0}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8277 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{2}, deInt16{1}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8278 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-4}, deInt16{2}, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8279 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{1}, deInt16{0}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8280 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{2}, deInt8{1}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8281 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-4}, deInt8{2}, addZeroToSc32, outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8282 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{1}, deInt8{0}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8283
8284 // Shifts for other integer sizes but only in the shift amount.
8285 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt64{1}, addZeroToSc, outputColors2, (FLAG_I64)));
8286 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt64{2}, addZeroToSc, outputColors0, (FLAG_I64)));
8287 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt64{0}, addZeroToSc, outputColors2, (FLAG_I64)));
8288 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt16{1}, addZeroToSc, outputColors2, (FLAG_I16)));
8289 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt16{2}, addZeroToSc, outputColors0, (FLAG_I16)));
8290 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt16{0}, addZeroToSc, outputColors2, (FLAG_I16)));
8291 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt8{1}, addZeroToSc, outputColors2, (FLAG_I8)));
8292 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt8{2}, addZeroToSc, outputColors0, (FLAG_I8)));
8293 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt8{0}, addZeroToSc, outputColors2, (FLAG_I8)));
8294
8295 cases.push_back(SpecConstantTwoValGraphicsCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputColors2));
8296 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputColors2));
8297 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8298 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputColors2));
8299 cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputColors2));
8300 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputColors2));
8301 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8302 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputColors2));
8303 cases.push_back(SpecConstantTwoValGraphicsCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputColors2));
8304 cases.push_back(SpecConstantTwoValGraphicsCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputColors2));
8305 cases.push_back(SpecConstantTwoValGraphicsCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8306 cases.push_back(SpecConstantTwoValGraphicsCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8307 cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8308 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8309 cases.push_back(SpecConstantTwoValGraphicsCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -1, 0, addZeroToSc, outputColors2));
8310 cases.push_back(SpecConstantTwoValGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -2, 0, addZeroToSc, outputColors2));
8311 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputColors2));
8312 cases.push_back(SpecConstantTwoValGraphicsCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc, outputColors2));
8313 cases.push_back(SpecConstantTwoValGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -1, 0, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8314 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32(-1.0), tcu::Float32(0.0), addZeroToSc32, outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8315 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16(-1.0), tcu::Float16(0.0), addZeroToSc32, outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8316 // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8317
8318 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8319 {
8320 map<string, string> specializations;
8321 map<string, string> fragments;
8322 SpecConstants specConstants;
8323 PushConstants noPushConstants;
8324 GraphicsResources noResources;
8325 GraphicsInterfaces noInterfaces;
8326 vector<string> extensions;
8327 VulkanFeatures requiredFeatures;
8328
8329 // Special SPIR-V code when using 16-bit integers.
8330 if (cases[caseNdx].caseFlags & FLAG_I16)
8331 {
8332 requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
8333 fragments["capability"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
8334 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
8335 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8336 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
8337 }
8338
8339 // Special SPIR-V code when using 64-bit integers.
8340 if (cases[caseNdx].caseFlags & FLAG_I64)
8341 {
8342 requiredFeatures.coreFeatures.shaderInt64 = VK_TRUE;
8343 fragments["capability"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
8344 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
8345 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8346 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 64-bit integer to 32-bit integer
8347 }
8348
8349 // Special SPIR-V code when using 64-bit floats.
8350 if (cases[caseNdx].caseFlags & FLAG_F64)
8351 {
8352 requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8353 fragments["capability"] += "OpCapability Float64\n"; // Adds 64-bit float capability
8354 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
8355 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8356 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
8357 }
8358
8359 // Extension needed for float16 and int8.
8360 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8361 extensions.push_back("VK_KHR_shader_float16_int8");
8362
8363 // Special SPIR-V code when using 16-bit floats.
8364 if (cases[caseNdx].caseFlags & FLAG_F16)
8365 {
8366 requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8367 fragments["capability"] += "OpCapability Float16\n"; // Adds 16-bit float capability
8368 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
8369 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8370 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 16-bit float to 32-bit integer
8371 }
8372
8373 // Special SPIR-V code when using 8-bit integers.
8374 if (cases[caseNdx].caseFlags & FLAG_I8)
8375 {
8376 requiredFeatures.extFloat16Int8.shaderInt8 = true;
8377 fragments["capability"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
8378 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
8379 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8380 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 8-bit integer to 32-bit integer
8381 }
8382
8383 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
8384 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
8385 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
8386 specializations["SC_OP"] = cases[caseNdx].scOperation;
8387 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
8388
8389 fragments["decoration"] = tcu::StringTemplate(decorations1).specialize(specializations);
8390 fragments["pre_main"] = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8391 fragments["testfun"] = tcu::StringTemplate(function1).specialize(specializations);
8392
8393 cases[caseNdx].scActualValue0.appendTo(specConstants);
8394 cases[caseNdx].scActualValue1.appendTo(specConstants);
8395
8396 createTestsForAllStages(
8397 cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8398 noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8399 }
8400
8401 const char decorations2[] =
8402 "OpDecorate %sc_0 SpecId 0\n"
8403 "OpDecorate %sc_1 SpecId 1\n"
8404 "OpDecorate %sc_2 SpecId 2\n";
8405
8406 const std::string typesAndConstants2 =
8407 "%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8408 "%vec3_undef = OpUndef %v3i32\n"
8409
8410 + getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8411
8412 "%sc_0 = OpSpecConstant %i32 0\n"
8413 "%sc_1 = OpSpecConstant %i32 0\n"
8414 "%sc_2 = OpSpecConstant %i32 0\n"
8415
8416 + getSpecConstantOpStructConstBlock() +
8417
8418 "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
8419 "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
8420 "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
8421 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
8422 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
8423 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
8424 "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
8425 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
8426 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
8427 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
8428 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
8429 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
8430 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
8431
8432 const std::string function2 =
8433 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8434 "%param = OpFunctionParameter %v4f32\n"
8435 "%label = OpLabel\n"
8436 "%result = OpVariable %fp_v4f32 Function\n"
8437
8438 + getSpecConstantOpStructInstructions() +
8439
8440 " OpStore %result %param\n"
8441 "%loc = OpAccessChain %fp_f32 %result %sc_final\n"
8442 "%val = OpLoad %f32 %loc\n"
8443 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8444 " OpStore %loc %add\n"
8445 "%ret = OpLoad %v4f32 %result\n"
8446 " OpReturnValue %ret\n"
8447 " OpFunctionEnd\n";
8448
8449 map<string, string> fragments;
8450 SpecConstants specConstants;
8451
8452 fragments["decoration"] = decorations2;
8453 fragments["pre_main"] = typesAndConstants2;
8454 fragments["testfun"] = function2;
8455
8456 specConstants.append<deInt32>(56789);
8457 specConstants.append<deInt32>(-2);
8458 specConstants.append<deInt32>(56788);
8459
8460 createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8461
8462 return group.release();
8463 }
8464
createOpPhiTests(tcu::TestContext & testCtx)8465 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8466 {
8467 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8468 RGBA inputColors[4];
8469 RGBA outputColors1[4];
8470 RGBA outputColors2[4];
8471 RGBA outputColors3[4];
8472 RGBA outputColors4[4];
8473 map<string, string> fragments1;
8474 map<string, string> fragments2;
8475 map<string, string> fragments3;
8476 map<string, string> fragments4;
8477 std::vector<std::string> extensions4;
8478 GraphicsResources resources4;
8479 VulkanFeatures vulkanFeatures4;
8480
8481 const char typesAndConstants1[] =
8482 "%c_f32_p2 = OpConstant %f32 0.2\n"
8483 "%c_f32_p4 = OpConstant %f32 0.4\n"
8484 "%c_f32_p5 = OpConstant %f32 0.5\n"
8485 "%c_f32_p8 = OpConstant %f32 0.8\n";
8486
8487 // vec4 test_code(vec4 param) {
8488 // vec4 result = param;
8489 // for (int i = 0; i < 4; ++i) {
8490 // float operand;
8491 // switch (i) {
8492 // case 0: operand = .2; break;
8493 // case 1: operand = .5; break;
8494 // case 2: operand = .4; break;
8495 // case 3: operand = .0; break;
8496 // default: break; // unreachable
8497 // }
8498 // result[i] += operand;
8499 // }
8500 // return result;
8501 // }
8502 const char function1[] =
8503 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8504 "%param1 = OpFunctionParameter %v4f32\n"
8505 "%lbl = OpLabel\n"
8506 "%iptr = OpVariable %fp_i32 Function\n"
8507 "%result = OpVariable %fp_v4f32 Function\n"
8508 " OpStore %iptr %c_i32_0\n"
8509 " OpStore %result %param1\n"
8510 " OpBranch %loop\n"
8511
8512 "%loop = OpLabel\n"
8513 "%ival = OpLoad %i32 %iptr\n"
8514 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8515 " OpLoopMerge %exit %cont None\n"
8516 " OpBranchConditional %lt_4 %entry %exit\n"
8517
8518 "%entry = OpLabel\n"
8519 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8520 "%val = OpLoad %f32 %loc\n"
8521 " OpSelectionMerge %phi None\n"
8522 " OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8523
8524 "%case0 = OpLabel\n"
8525 " OpBranch %phi\n"
8526 "%case1 = OpLabel\n"
8527 " OpBranch %phi\n"
8528 "%case2 = OpLabel\n"
8529 " OpBranch %phi\n"
8530 "%case3 = OpLabel\n"
8531 " OpBranch %phi\n"
8532
8533 "%default = OpLabel\n"
8534 " OpUnreachable\n"
8535
8536 "%phi = OpLabel\n"
8537 "%operand = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8538 " OpBranch %cont\n"
8539 "%cont = OpLabel\n"
8540 "%add = OpFAdd %f32 %val %operand\n"
8541 " OpStore %loc %add\n"
8542 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8543 " OpStore %iptr %ival_next\n"
8544 " OpBranch %loop\n"
8545
8546 "%exit = OpLabel\n"
8547 "%ret = OpLoad %v4f32 %result\n"
8548 " OpReturnValue %ret\n"
8549
8550 " OpFunctionEnd\n";
8551
8552 fragments1["pre_main"] = typesAndConstants1;
8553 fragments1["testfun"] = function1;
8554
8555 getHalfColorsFullAlpha(inputColors);
8556
8557 outputColors1[0] = RGBA(178, 255, 229, 255);
8558 outputColors1[1] = RGBA(178, 127, 102, 255);
8559 outputColors1[2] = RGBA(51, 255, 102, 255);
8560 outputColors1[3] = RGBA(51, 127, 229, 255);
8561
8562 createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8563
8564 const char typesAndConstants2[] =
8565 "%c_f32_p2 = OpConstant %f32 0.2\n";
8566
8567 // Add .4 to the second element of the given parameter.
8568 const char function2[] =
8569 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8570 "%param = OpFunctionParameter %v4f32\n"
8571 "%entry = OpLabel\n"
8572 "%result = OpVariable %fp_v4f32 Function\n"
8573 " OpStore %result %param\n"
8574 "%loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8575 "%val = OpLoad %f32 %loc\n"
8576 " OpBranch %phi\n"
8577
8578 "%phi = OpLabel\n"
8579 "%step = OpPhi %i32 %c_i32_0 %entry %step_next %phi\n"
8580 "%accum = OpPhi %f32 %val %entry %accum_next %phi\n"
8581 "%step_next = OpIAdd %i32 %step %c_i32_1\n"
8582 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8583 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8584 " OpLoopMerge %exit %phi None\n"
8585 " OpBranchConditional %still_loop %phi %exit\n"
8586
8587 "%exit = OpLabel\n"
8588 " OpStore %loc %accum\n"
8589 "%ret = OpLoad %v4f32 %result\n"
8590 " OpReturnValue %ret\n"
8591
8592 " OpFunctionEnd\n";
8593
8594 fragments2["pre_main"] = typesAndConstants2;
8595 fragments2["testfun"] = function2;
8596
8597 outputColors2[0] = RGBA(127, 229, 127, 255);
8598 outputColors2[1] = RGBA(127, 102, 0, 255);
8599 outputColors2[2] = RGBA(0, 229, 0, 255);
8600 outputColors2[3] = RGBA(0, 102, 127, 255);
8601
8602 createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8603
8604 const char typesAndConstants3[] =
8605 "%true = OpConstantTrue %bool\n"
8606 "%false = OpConstantFalse %bool\n"
8607 "%c_f32_p2 = OpConstant %f32 0.2\n";
8608
8609 // Swap the second and the third element of the given parameter.
8610 const char function3[] =
8611 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8612 "%param = OpFunctionParameter %v4f32\n"
8613 "%entry = OpLabel\n"
8614 "%result = OpVariable %fp_v4f32 Function\n"
8615 " OpStore %result %param\n"
8616 "%a_loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8617 "%a_init = OpLoad %f32 %a_loc\n"
8618 "%b_loc = OpAccessChain %fp_f32 %result %c_i32_2\n"
8619 "%b_init = OpLoad %f32 %b_loc\n"
8620 " OpBranch %phi\n"
8621
8622 "%phi = OpLabel\n"
8623 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8624 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
8625 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
8626 " OpLoopMerge %exit %phi None\n"
8627 " OpBranchConditional %still_loop %phi %exit\n"
8628
8629 "%exit = OpLabel\n"
8630 " OpStore %a_loc %a_next\n"
8631 " OpStore %b_loc %b_next\n"
8632 "%ret = OpLoad %v4f32 %result\n"
8633 " OpReturnValue %ret\n"
8634
8635 " OpFunctionEnd\n";
8636
8637 fragments3["pre_main"] = typesAndConstants3;
8638 fragments3["testfun"] = function3;
8639
8640 outputColors3[0] = RGBA(127, 127, 127, 255);
8641 outputColors3[1] = RGBA(127, 0, 0, 255);
8642 outputColors3[2] = RGBA(0, 0, 127, 255);
8643 outputColors3[3] = RGBA(0, 127, 0, 255);
8644
8645 createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8646
8647 const char typesAndConstants4[] =
8648 "%f16 = OpTypeFloat 16\n"
8649 "%v4f16 = OpTypeVector %f16 4\n"
8650 "%fp_f16 = OpTypePointer Function %f16\n"
8651 "%fp_v4f16 = OpTypePointer Function %v4f16\n"
8652 "%true = OpConstantTrue %bool\n"
8653 "%false = OpConstantFalse %bool\n"
8654 "%c_f32_p2 = OpConstant %f32 0.2\n";
8655
8656 // Swap the second and the third element of the given parameter.
8657 const char function4[] =
8658 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8659 "%param = OpFunctionParameter %v4f32\n"
8660 "%entry = OpLabel\n"
8661 "%result = OpVariable %fp_v4f16 Function\n"
8662 "%param16 = OpFConvert %v4f16 %param\n"
8663 " OpStore %result %param16\n"
8664 "%a_loc = OpAccessChain %fp_f16 %result %c_i32_1\n"
8665 "%a_init = OpLoad %f16 %a_loc\n"
8666 "%b_loc = OpAccessChain %fp_f16 %result %c_i32_2\n"
8667 "%b_init = OpLoad %f16 %b_loc\n"
8668 " OpBranch %phi\n"
8669
8670 "%phi = OpLabel\n"
8671 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8672 "%a_next = OpPhi %f16 %a_init %entry %b_next %phi\n"
8673 "%b_next = OpPhi %f16 %b_init %entry %a_next %phi\n"
8674 " OpLoopMerge %exit %phi None\n"
8675 " OpBranchConditional %still_loop %phi %exit\n"
8676
8677 "%exit = OpLabel\n"
8678 " OpStore %a_loc %a_next\n"
8679 " OpStore %b_loc %b_next\n"
8680 "%ret16 = OpLoad %v4f16 %result\n"
8681 "%ret = OpFConvert %v4f32 %ret16\n"
8682 " OpReturnValue %ret\n"
8683
8684 " OpFunctionEnd\n";
8685
8686 fragments4["pre_main"] = typesAndConstants4;
8687 fragments4["testfun"] = function4;
8688 fragments4["capability"] = "OpCapability Float16\n";
8689
8690 extensions4.push_back("VK_KHR_shader_float16_int8");
8691
8692 vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8693
8694 outputColors4[0] = RGBA(127, 127, 127, 255);
8695 outputColors4[1] = RGBA(127, 0, 0, 255);
8696 outputColors4[2] = RGBA(0, 0, 127, 255);
8697 outputColors4[3] = RGBA(0, 127, 0, 255);
8698
8699 createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8700
8701 return group.release();
8702 }
8703
createNoContractionTests(tcu::TestContext & testCtx)8704 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8705 {
8706 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8707 RGBA inputColors[4];
8708 RGBA outputColors[4];
8709
8710 // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8711 // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8712 // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8713 // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8714 const char constantsAndTypes[] =
8715 "%c_vec4_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8716 "%c_vec4_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8717 "%c_f32_1pl2_23 = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8718 "%c_f32_1mi2_23 = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8719 "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n";
8720
8721 const char function[] =
8722 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8723 "%param = OpFunctionParameter %v4f32\n"
8724 "%label = OpLabel\n"
8725 "%var1 = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8726 "%var2 = OpVariable %fp_f32 Function\n"
8727 "%red = OpCompositeExtract %f32 %param 0\n"
8728 "%plus_red = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8729 " OpStore %var2 %plus_red\n"
8730 "%val1 = OpLoad %f32 %var1\n"
8731 "%val2 = OpLoad %f32 %var2\n"
8732 "%mul = OpFMul %f32 %val1 %val2\n"
8733 "%add = OpFAdd %f32 %mul %c_f32_n1\n"
8734 "%is0 = OpFOrdEqual %bool %add %c_f32_0\n"
8735 "%isn1n24 = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8736 "%success = OpLogicalOr %bool %is0 %isn1n24\n"
8737 "%v4success = OpCompositeConstruct %v4bool %success %success %success %success\n"
8738 "%ret = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8739 " OpReturnValue %ret\n"
8740 " OpFunctionEnd\n";
8741
8742 struct CaseNameDecoration
8743 {
8744 string name;
8745 string decoration;
8746 };
8747
8748
8749 CaseNameDecoration tests[] = {
8750 {"multiplication", "OpDecorate %mul NoContraction"},
8751 {"addition", "OpDecorate %add NoContraction"},
8752 {"both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8753 };
8754
8755 getHalfColorsFullAlpha(inputColors);
8756
8757 for (deUint8 idx = 0; idx < 4; ++idx)
8758 {
8759 inputColors[idx].setRed(0);
8760 outputColors[idx] = RGBA(0, 0, 0, 255);
8761 }
8762
8763 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8764 {
8765 map<string, string> fragments;
8766
8767 fragments["decoration"] = tests[testNdx].decoration;
8768 fragments["pre_main"] = constantsAndTypes;
8769 fragments["testfun"] = function;
8770
8771 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8772 }
8773
8774 return group.release();
8775 }
8776
createMemoryAccessTests(tcu::TestContext & testCtx)8777 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8778 {
8779 de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8780 RGBA colors[4];
8781
8782 const char constantsAndTypes[] =
8783 "%c_a2f32_1 = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8784 "%fp_a2f32 = OpTypePointer Function %a2f32\n"
8785 "%stype = OpTypeStruct %v4f32 %a2f32 %f32\n"
8786 "%fp_stype = OpTypePointer Function %stype\n";
8787
8788 const char function[] =
8789 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8790 "%param1 = OpFunctionParameter %v4f32\n"
8791 "%lbl = OpLabel\n"
8792 "%v1 = OpVariable %fp_v4f32 Function\n"
8793 "%v2 = OpVariable %fp_a2f32 Function\n"
8794 "%v3 = OpVariable %fp_f32 Function\n"
8795 "%v = OpVariable %fp_stype Function\n"
8796 "%vv = OpVariable %fp_stype Function\n"
8797 "%vvv = OpVariable %fp_f32 Function\n"
8798
8799 " OpStore %v1 %c_v4f32_1_1_1_1\n"
8800 " OpStore %v2 %c_a2f32_1\n"
8801 " OpStore %v3 %c_f32_1\n"
8802
8803 "%p_v4f32 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8804 "%p_a2f32 = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8805 "%p_f32 = OpAccessChain %fp_f32 %v %c_u32_2\n"
8806 "%v1_v = OpLoad %v4f32 %v1 ${access_type}\n"
8807 "%v2_v = OpLoad %a2f32 %v2 ${access_type}\n"
8808 "%v3_v = OpLoad %f32 %v3 ${access_type}\n"
8809
8810 " OpStore %p_v4f32 %v1_v ${access_type}\n"
8811 " OpStore %p_a2f32 %v2_v ${access_type}\n"
8812 " OpStore %p_f32 %v3_v ${access_type}\n"
8813
8814 " OpCopyMemory %vv %v ${access_type}\n"
8815 " OpCopyMemory %vvv %p_f32 ${access_type}\n"
8816
8817 "%p_f32_2 = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8818 "%v_f32_2 = OpLoad %f32 %p_f32_2\n"
8819 "%v_f32_3 = OpLoad %f32 %vvv\n"
8820
8821 "%ret1 = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8822 "%ret2 = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8823 " OpReturnValue %ret2\n"
8824 " OpFunctionEnd\n";
8825
8826 struct NameMemoryAccess
8827 {
8828 string name;
8829 string accessType;
8830 };
8831
8832
8833 NameMemoryAccess tests[] =
8834 {
8835 { "none", "" },
8836 { "volatile", "Volatile" },
8837 { "aligned", "Aligned 1" },
8838 { "volatile_aligned", "Volatile|Aligned 1" },
8839 { "nontemporal_aligned", "Nontemporal|Aligned 1" },
8840 { "volatile_nontemporal", "Volatile|Nontemporal" },
8841 { "volatile_nontermporal_aligned", "Volatile|Nontemporal|Aligned 1" },
8842 };
8843
8844 getHalfColorsFullAlpha(colors);
8845
8846 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8847 {
8848 map<string, string> fragments;
8849 map<string, string> memoryAccess;
8850 memoryAccess["access_type"] = tests[testNdx].accessType;
8851
8852 fragments["pre_main"] = constantsAndTypes;
8853 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8854 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8855 }
8856 return memoryAccessTests.release();
8857 }
createOpUndefTests(tcu::TestContext & testCtx)8858 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8859 {
8860 de::MovePtr<tcu::TestCaseGroup> opUndefTests (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8861 RGBA defaultColors[4];
8862 map<string, string> fragments;
8863 getDefaultColors(defaultColors);
8864
8865 // First, simple cases that don't do anything with the OpUndef result.
8866 struct NameCodePair { string name, decl, type; };
8867 const NameCodePair tests[] =
8868 {
8869 {"bool", "", "%bool"},
8870 {"vec2uint32", "", "%v2u32"},
8871 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8872 {"sampler", "%type = OpTypeSampler", "%type"},
8873 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8874 {"pointer", "", "%fp_i32"},
8875 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8876 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8877 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8878 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8879 {
8880 fragments["undef_type"] = tests[testNdx].type;
8881 fragments["testfun"] = StringTemplate(
8882 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8883 "%param1 = OpFunctionParameter %v4f32\n"
8884 "%label_testfun = OpLabel\n"
8885 "%undef = OpUndef ${undef_type}\n"
8886 "OpReturnValue %param1\n"
8887 "OpFunctionEnd\n").specialize(fragments);
8888 fragments["pre_main"] = tests[testNdx].decl;
8889 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8890 }
8891 fragments.clear();
8892
8893 fragments["testfun"] =
8894 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8895 "%param1 = OpFunctionParameter %v4f32\n"
8896 "%label_testfun = OpLabel\n"
8897 "%undef = OpUndef %f32\n"
8898 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8899 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8900 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8901 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8902 "%b = OpFAdd %f32 %a %actually_zero\n"
8903 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8904 "OpReturnValue %ret\n"
8905 "OpFunctionEnd\n";
8906
8907 createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8908
8909 fragments["testfun"] =
8910 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8911 "%param1 = OpFunctionParameter %v4f32\n"
8912 "%label_testfun = OpLabel\n"
8913 "%undef = OpUndef %i32\n"
8914 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8915 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8916 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8917 "OpReturnValue %ret\n"
8918 "OpFunctionEnd\n";
8919
8920 createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8921
8922 fragments["testfun"] =
8923 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8924 "%param1 = OpFunctionParameter %v4f32\n"
8925 "%label_testfun = OpLabel\n"
8926 "%undef = OpUndef %u32\n"
8927 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8928 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8929 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8930 "OpReturnValue %ret\n"
8931 "OpFunctionEnd\n";
8932
8933 createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8934
8935 fragments["testfun"] =
8936 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8937 "%param1 = OpFunctionParameter %v4f32\n"
8938 "%label_testfun = OpLabel\n"
8939 "%undef = OpUndef %v4f32\n"
8940 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8941 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8942 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8943 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8944 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8945 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8946 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8947 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8948 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8949 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8950 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8951 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8952 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8953 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8954 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8955 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8956 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8957 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8958 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8959 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8960 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8961 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8962 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8963 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8964 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8965 "OpReturnValue %ret\n"
8966 "OpFunctionEnd\n";
8967
8968 createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8969
8970 fragments["pre_main"] =
8971 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8972 fragments["testfun"] =
8973 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8974 "%param1 = OpFunctionParameter %v4f32\n"
8975 "%label_testfun = OpLabel\n"
8976 "%undef = OpUndef %m2x2f32\n"
8977 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8978 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8979 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8980 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8981 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8982 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8983 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8984 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8985 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8986 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8987 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8988 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8989 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8990 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8991 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8992 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8993 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8994 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8995 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8996 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8997 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8998 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8999 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
9000 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
9001 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
9002 "OpReturnValue %ret\n"
9003 "OpFunctionEnd\n";
9004
9005 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
9006
9007 return opUndefTests.release();
9008 }
9009
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)9010 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
9011 {
9012 const RGBA inputColors[4] =
9013 {
9014 RGBA(0, 0, 0, 255),
9015 RGBA(0, 0, 255, 255),
9016 RGBA(0, 255, 0, 255),
9017 RGBA(0, 255, 255, 255)
9018 };
9019
9020 const RGBA expectedColors[4] =
9021 {
9022 RGBA(255, 0, 0, 255),
9023 RGBA(255, 0, 0, 255),
9024 RGBA(255, 0, 0, 255),
9025 RGBA(255, 0, 0, 255)
9026 };
9027
9028 const struct SingleFP16Possibility
9029 {
9030 const char* name;
9031 const char* constant; // Value to assign to %test_constant.
9032 float valueAsFloat;
9033 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9034 bool preserveNanInf;
9035 } tests[] =
9036 {
9037 {
9038 "negative",
9039 "-0x1.3p1\n",
9040 -constructNormalizedFloat(1, 0x300000),
9041 "%cond = OpFOrdEqual %bool %c %test_constant\n",
9042 false
9043 }, // -19
9044 {
9045 "positive",
9046 "0x1.0p7\n",
9047 constructNormalizedFloat(7, 0x000000),
9048 "%cond = OpFOrdEqual %bool %c %test_constant\n",
9049 false
9050 }, // +128
9051 // SPIR-V requires that OpQuantizeToF16 flushes
9052 // any numbers that would end up denormalized in F16 to zero.
9053 {
9054 "denorm",
9055 "0x0.0006p-126\n",
9056 std::ldexp(1.5f, -140),
9057 "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9058 false
9059 }, // denorm
9060 {
9061 "negative_denorm",
9062 "-0x0.0006p-126\n",
9063 -std::ldexp(1.5f, -140),
9064 "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9065 false
9066 }, // -denorm
9067 {
9068 "too_small",
9069 "0x1.0p-16\n",
9070 std::ldexp(1.0f, -16),
9071 "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9072 false
9073 }, // too small positive
9074 {
9075 "negative_too_small",
9076 "-0x1.0p-32\n",
9077 -std::ldexp(1.0f, -32),
9078 "%cond = OpFOrdEqual %bool %c %c_f32_0\n",
9079 false
9080 }, // too small negative
9081 {
9082 "negative_inf",
9083 "-0x1.0p128\n",
9084 -std::ldexp(1.0f, 128),
9085
9086 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9087 "%inf = OpIsInf %bool %c\n"
9088 "%cond = OpLogicalAnd %bool %gz %inf\n",
9089 true
9090 }, // -inf to -inf
9091 {
9092 "inf",
9093 "0x1.0p128\n",
9094 std::ldexp(1.0f, 128),
9095
9096 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9097 "%inf = OpIsInf %bool %c\n"
9098 "%cond = OpLogicalAnd %bool %gz %inf\n",
9099 true
9100 }, // +inf to +inf
9101 {
9102 "round_to_negative_inf",
9103 "-0x1.0p32\n",
9104 -std::ldexp(1.0f, 32),
9105
9106 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9107 "%inf = OpIsInf %bool %c\n"
9108 "%cond = OpLogicalAnd %bool %gz %inf\n",
9109 true
9110 }, // round to -inf
9111 {
9112 "round_to_inf",
9113 "0x1.0p16\n",
9114 std::ldexp(1.0f, 16),
9115
9116 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9117 "%inf = OpIsInf %bool %c\n"
9118 "%cond = OpLogicalAnd %bool %gz %inf\n",
9119 true
9120 }, // round to +inf
9121 {
9122 "nan",
9123 "0x1.1p128\n",
9124 std::numeric_limits<float>::quiet_NaN(),
9125
9126 // Test for any NaN value, as NaNs are not preserved
9127 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9128 "%cond = OpIsNan %bool %direct_quant\n",
9129 true
9130 }, // nan
9131 {
9132 "negative_nan",
9133 "-0x1.0001p128\n",
9134 std::numeric_limits<float>::quiet_NaN(),
9135
9136 // Test for any NaN value, as NaNs are not preserved
9137 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9138 "%cond = OpIsNan %bool %direct_quant\n",
9139 true
9140 } // -nan
9141 };
9142 const char* constants =
9143 "%test_constant = OpConstant %f32 "; // The value will be test.constant.
9144
9145 StringTemplate function (
9146 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9147 "%param1 = OpFunctionParameter %v4f32\n"
9148 "%label_testfun = OpLabel\n"
9149 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9150 "%b = OpFAdd %f32 %test_constant %a\n"
9151 "%c = OpQuantizeToF16 %f32 %b\n"
9152 "${condition}\n"
9153 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9154 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9155 " OpReturnValue %retval\n"
9156 "OpFunctionEnd\n"
9157 );
9158
9159 const char* specDecorations = "OpDecorate %test_constant SpecId 0\n";
9160 const char* specConstants =
9161 "%test_constant = OpSpecConstant %f32 0.\n"
9162 "%c = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9163
9164 StringTemplate specConstantFunction(
9165 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9166 "%param1 = OpFunctionParameter %v4f32\n"
9167 "%label_testfun = OpLabel\n"
9168 "${condition}\n"
9169 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9170 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9171 " OpReturnValue %retval\n"
9172 "OpFunctionEnd\n"
9173 );
9174
9175 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9176 {
9177 std::vector<std::string> extensions;
9178 VulkanFeatures features;
9179 map<string, string> codeSpecialization;
9180 map<string, string> fragments;
9181 codeSpecialization["condition"] = tests[idx].condition;
9182 fragments["testfun"] = function.specialize(codeSpecialization);
9183 fragments["pre_main"] = string(constants) + tests[idx].constant + "\n";
9184
9185 if (tests[idx].preserveNanInf)
9186 {
9187 fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9188 fragments["extension"] = "OpExtension \"SPV_KHR_float_controls\"\n";
9189 extensions.push_back("VK_KHR_shader_float_controls");
9190 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
9191 }
9192
9193 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, SpecConstants(), PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions, features, testCtx);
9194 }
9195
9196 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9197 {
9198 std::vector<std::string> extensions;
9199 VulkanFeatures features;
9200 map<string, string> codeSpecialization;
9201 map<string, string> fragments;
9202 SpecConstants passConstants;
9203
9204 codeSpecialization["condition"] = tests[idx].condition;
9205 fragments["testfun"] = specConstantFunction.specialize(codeSpecialization);
9206 fragments["decoration"] = specDecorations;
9207 fragments["pre_main"] = specConstants;
9208
9209 passConstants.append<float>(tests[idx].valueAsFloat);
9210
9211 if (tests[idx].preserveNanInf)
9212 {
9213 fragments["capability"] = "OpCapability SignedZeroInfNanPreserve\n";
9214 fragments["extension"] = "OpExtension \"SPV_KHR_float_controls\"\n";
9215 extensions.push_back("VK_KHR_shader_float_controls");
9216 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
9217 }
9218
9219 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, PushConstants(), GraphicsResources(), GraphicsInterfaces(), extensions, features, testCtx);
9220 }
9221 }
9222
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9223 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9224 {
9225 RGBA inputColors[4] = {
9226 RGBA(0, 0, 0, 255),
9227 RGBA(0, 0, 255, 255),
9228 RGBA(0, 255, 0, 255),
9229 RGBA(0, 255, 255, 255)
9230 };
9231
9232 RGBA expectedColors[4] =
9233 {
9234 RGBA(255, 0, 0, 255),
9235 RGBA(255, 0, 0, 255),
9236 RGBA(255, 0, 0, 255),
9237 RGBA(255, 0, 0, 255)
9238 };
9239
9240 struct DualFP16Possibility
9241 {
9242 const char* name;
9243 const char* input;
9244 float inputAsFloat;
9245 const char* possibleOutput1;
9246 const char* possibleOutput2;
9247 } tests[] = {
9248 {
9249 "positive_round_up_or_round_down",
9250 "0x1.3003p8",
9251 constructNormalizedFloat(8, 0x300300),
9252 "0x1.304p8",
9253 "0x1.3p8"
9254 },
9255 {
9256 "negative_round_up_or_round_down",
9257 "-0x1.6008p-7",
9258 -constructNormalizedFloat(-7, 0x600800),
9259 "-0x1.6p-7",
9260 "-0x1.604p-7"
9261 },
9262 {
9263 "carry_bit",
9264 "0x1.01ep2",
9265 constructNormalizedFloat(2, 0x01e000),
9266 "0x1.01cp2",
9267 "0x1.02p2"
9268 },
9269 {
9270 "carry_to_exponent",
9271 "0x1.ffep1",
9272 constructNormalizedFloat(1, 0xffe000),
9273 "0x1.ffcp1",
9274 "0x1.0p2"
9275 },
9276 };
9277 StringTemplate constants (
9278 "%input_const = OpConstant %f32 ${input}\n"
9279 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9280 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9281 );
9282
9283 StringTemplate specConstants (
9284 "%input_const = OpSpecConstant %f32 0.\n"
9285 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9286 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9287 );
9288
9289 const char* specDecorations = "OpDecorate %input_const SpecId 0\n";
9290
9291 const char* function =
9292 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9293 "%param1 = OpFunctionParameter %v4f32\n"
9294 "%label_testfun = OpLabel\n"
9295 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9296 // For the purposes of this test we assume that 0.f will always get
9297 // faithfully passed through the pipeline stages.
9298 "%b = OpFAdd %f32 %input_const %a\n"
9299 "%c = OpQuantizeToF16 %f32 %b\n"
9300 "%eq_1 = OpFOrdEqual %bool %c %possible_solution1\n"
9301 "%eq_2 = OpFOrdEqual %bool %c %possible_solution2\n"
9302 "%cond = OpLogicalOr %bool %eq_1 %eq_2\n"
9303 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9304 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9305 " OpReturnValue %retval\n"
9306 "OpFunctionEnd\n";
9307
9308 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9309 map<string, string> fragments;
9310 map<string, string> constantSpecialization;
9311
9312 constantSpecialization["input"] = tests[idx].input;
9313 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9314 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9315 fragments["testfun"] = function;
9316 fragments["pre_main"] = constants.specialize(constantSpecialization);
9317 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9318 }
9319
9320 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9321 map<string, string> fragments;
9322 map<string, string> constantSpecialization;
9323 SpecConstants passConstants;
9324
9325 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9326 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9327 fragments["testfun"] = function;
9328 fragments["decoration"] = specDecorations;
9329 fragments["pre_main"] = specConstants.specialize(constantSpecialization);
9330
9331 passConstants.append<float>(tests[idx].inputAsFloat);
9332
9333 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9334 }
9335 }
9336
createOpQuantizeTests(tcu::TestContext & testCtx)9337 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9338 {
9339 de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9340 createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9341 createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9342 return opQuantizeTests.release();
9343 }
9344
9345 struct ShaderPermutation
9346 {
9347 deUint8 vertexPermutation;
9348 deUint8 geometryPermutation;
9349 deUint8 tesscPermutation;
9350 deUint8 tessePermutation;
9351 deUint8 fragmentPermutation;
9352 };
9353
getShaderPermutation(deUint8 inputValue)9354 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9355 {
9356 ShaderPermutation permutation =
9357 {
9358 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9359 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9360 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9361 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9362 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9363 };
9364 return permutation;
9365 }
9366
createModuleTests(tcu::TestContext & testCtx)9367 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9368 {
9369 RGBA defaultColors[4];
9370 RGBA invertedColors[4];
9371 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9372
9373 getDefaultColors(defaultColors);
9374 getInvertedDefaultColors(invertedColors);
9375
9376 // Combined module tests
9377 {
9378 // Shader stages: vertex and fragment
9379 {
9380 const ShaderElement combinedPipeline[] =
9381 {
9382 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9383 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9384 };
9385
9386 addFunctionCaseWithPrograms<InstanceContext>(
9387 moduleTests.get(), "same_module", createCombinedModule, runAndVerifyDefaultPipeline,
9388 createInstanceContext(combinedPipeline, map<string, string>()));
9389 }
9390
9391 // Shader stages: vertex, geometry and fragment
9392 {
9393 const ShaderElement combinedPipeline[] =
9394 {
9395 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9396 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9397 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9398 };
9399
9400 addFunctionCaseWithPrograms<InstanceContext>(
9401 moduleTests.get(), "same_module_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9402 createInstanceContext(combinedPipeline, map<string, string>()));
9403 }
9404
9405 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9406 {
9407 const ShaderElement combinedPipeline[] =
9408 {
9409 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9410 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9411 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9412 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9413 };
9414
9415 addFunctionCaseWithPrograms<InstanceContext>(
9416 moduleTests.get(), "same_module_tessc_tesse", createCombinedModule, runAndVerifyDefaultPipeline,
9417 createInstanceContext(combinedPipeline, map<string, string>()));
9418 }
9419
9420 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9421 {
9422 const ShaderElement combinedPipeline[] =
9423 {
9424 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9425 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9426 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9427 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9428 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9429 };
9430
9431 addFunctionCaseWithPrograms<InstanceContext>(
9432 moduleTests.get(), "same_module_tessc_tesse_geom", createCombinedModule, runAndVerifyDefaultPipeline,
9433 createInstanceContext(combinedPipeline, map<string, string>()));
9434 }
9435 }
9436
9437 const char* numbers[] =
9438 {
9439 "1", "2"
9440 };
9441
9442 for (deInt8 idx = 0; idx < 32; ++idx)
9443 {
9444 ShaderPermutation permutation = getShaderPermutation(idx);
9445 string name = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9446 const ShaderElement pipeline[] =
9447 {
9448 ShaderElement("vert", string("vert") + numbers[permutation.vertexPermutation], VK_SHADER_STAGE_VERTEX_BIT),
9449 ShaderElement("geom", string("geom") + numbers[permutation.geometryPermutation], VK_SHADER_STAGE_GEOMETRY_BIT),
9450 ShaderElement("tessc", string("tessc") + numbers[permutation.tesscPermutation], VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9451 ShaderElement("tesse", string("tesse") + numbers[permutation.tessePermutation], VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9452 ShaderElement("frag", string("frag") + numbers[permutation.fragmentPermutation], VK_SHADER_STAGE_FRAGMENT_BIT)
9453 };
9454
9455 // If there are an even number of swaps, then it should be no-op.
9456 // If there are an odd number, the color should be flipped.
9457 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9458 {
9459 addFunctionCaseWithPrograms<InstanceContext>(
9460 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9461 createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9462 }
9463 else
9464 {
9465 addFunctionCaseWithPrograms<InstanceContext>(
9466 moduleTests.get(), name, createMultipleEntries, runAndVerifyDefaultPipeline,
9467 createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9468 }
9469 }
9470 return moduleTests.release();
9471 }
9472
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9473 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9474 {
9475 switch (task)
9476 {
9477 case SHADER_TASK_NONE: return "";
9478 case SHADER_TASK_NORMAL: return prefix + "_normal";
9479 case SHADER_TASK_UNUSED_VAR: return prefix + "_unused_var";
9480 case SHADER_TASK_UNUSED_FUNC: return prefix + "_unused_func";
9481 default: DE_ASSERT(DE_FALSE);
9482 }
9483 // unreachable
9484 return "";
9485 }
9486
getShaderTaskIndexName(ShaderTaskIndex index)9487 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9488 {
9489 switch (index)
9490 {
9491 case SHADER_TASK_INDEX_VERTEX: return "vertex";
9492 case SHADER_TASK_INDEX_GEOMETRY: return "geom";
9493 case SHADER_TASK_INDEX_TESS_CONTROL: return "tessc";
9494 case SHADER_TASK_INDEX_TESS_EVAL: return "tesse";
9495 case SHADER_TASK_INDEX_FRAGMENT: return "frag";
9496 default: DE_ASSERT(DE_FALSE);
9497 }
9498 // unreachable
9499 return "";
9500 }
9501
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9502 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9503 {
9504 std::string testName = location.toString();
9505
9506 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9507 {
9508 if (shaderTasks[i] != SHADER_TASK_NONE)
9509 {
9510 testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9511 }
9512 }
9513
9514 return testName;
9515 }
9516
createUnusedVariableTests(tcu::TestContext & testCtx)9517 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9518 {
9519 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9520
9521 ShaderTaskArray shaderCombinations[] =
9522 {
9523 // Vertex Geometry Tess. Control Tess. Evaluation Fragment
9524 { SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9525 { SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9526 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR },
9527 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC },
9528 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9529 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9530 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9531 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9532 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL },
9533 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL }
9534 };
9535
9536 const VariableLocation testLocations[] =
9537 {
9538 // Set Binding
9539 { 0, 5 },
9540 { 5, 5 },
9541 };
9542
9543 for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9544 {
9545 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9546 {
9547 const ShaderTaskArray& shaderTasks = shaderCombinations[combNdx];
9548 const VariableLocation& location = testLocations[locationNdx];
9549 std::string testName = getUnusedVarTestName(shaderTasks, location);
9550
9551 addFunctionCaseWithPrograms<UnusedVariableContext>(
9552 moduleTests.get(), testName, createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9553 createUnusedVariableContext(shaderTasks, location));
9554 }
9555 }
9556
9557 return moduleTests.release();
9558 }
9559
createLoopTests(tcu::TestContext & testCtx)9560 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9561 {
9562 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9563 RGBA defaultColors[4];
9564 getDefaultColors(defaultColors);
9565 map<string, string> fragments;
9566 fragments["pre_main"] =
9567 "%c_f32_5 = OpConstant %f32 5.\n";
9568
9569 // A loop with a single block. The Continue Target is the loop block
9570 // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9571 // -- the "continue construct" forms the entire loop.
9572 fragments["testfun"] =
9573 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9574 "%param1 = OpFunctionParameter %v4f32\n"
9575
9576 "%entry = OpLabel\n"
9577 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9578 "OpBranch %loop\n"
9579
9580 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9581 "%loop = OpLabel\n"
9582 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9583 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9584 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9585 "%val = OpFAdd %f32 %val1 %delta\n"
9586 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9587 "%count__ = OpISub %i32 %count %c_i32_1\n"
9588 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9589 "OpLoopMerge %exit %loop None\n"
9590 "OpBranchConditional %again %loop %exit\n"
9591
9592 "%exit = OpLabel\n"
9593 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9594 "OpReturnValue %result\n"
9595
9596 "OpFunctionEnd\n";
9597
9598 createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9599
9600 // Body comprised of multiple basic blocks.
9601 const StringTemplate multiBlock(
9602 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9603 "%param1 = OpFunctionParameter %v4f32\n"
9604
9605 "%entry = OpLabel\n"
9606 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9607 "OpBranch %loop\n"
9608
9609 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9610 "%loop = OpLabel\n"
9611 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9612 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9613 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9614 // There are several possibilities for the Continue Target below. Each
9615 // will be specialized into a separate test case.
9616 "OpLoopMerge %exit ${continue_target} None\n"
9617 "OpBranch %if\n"
9618
9619 "%if = OpLabel\n"
9620 ";delta_next = (delta > 0) ? -1 : 1;\n"
9621 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9622 "OpSelectionMerge %gather DontFlatten\n"
9623 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9624
9625 "%odd = OpLabel\n"
9626 "OpBranch %gather\n"
9627
9628 "%even = OpLabel\n"
9629 "OpBranch %gather\n"
9630
9631 "%gather = OpLabel\n"
9632 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9633 "%val = OpFAdd %f32 %val1 %delta\n"
9634 "%count__ = OpISub %i32 %count %c_i32_1\n"
9635 "OpBranch %cont\n"
9636
9637 "%cont = OpLabel\n"
9638 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9639 "OpBranchConditional %again %loop %exit\n"
9640
9641 "%exit = OpLabel\n"
9642 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9643 "OpReturnValue %result\n"
9644
9645 "OpFunctionEnd\n");
9646
9647 map<string, string> continue_target;
9648
9649 // The Continue Target is the loop block itself.
9650 continue_target["continue_target"] = "%if";
9651 fragments["testfun"] = multiBlock.specialize(continue_target);
9652 createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9653
9654 // The Continue Target is at the end of the loop.
9655 continue_target["continue_target"] = "%cont";
9656 fragments["testfun"] = multiBlock.specialize(continue_target);
9657 createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9658
9659 // A loop with continue statement.
9660 fragments["testfun"] =
9661 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9662 "%param1 = OpFunctionParameter %v4f32\n"
9663
9664 "%entry = OpLabel\n"
9665 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9666 "OpBranch %loop\n"
9667
9668 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9669 "%loop = OpLabel\n"
9670 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9671 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9672 "OpLoopMerge %exit %continue None\n"
9673 "OpBranch %if\n"
9674
9675 "%if = OpLabel\n"
9676 ";skip if %count==2\n"
9677 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9678 "OpBranchConditional %eq2 %continue %body\n"
9679
9680 "%body = OpLabel\n"
9681 "%fcount = OpConvertSToF %f32 %count\n"
9682 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9683 "OpBranch %continue\n"
9684
9685 "%continue = OpLabel\n"
9686 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9687 "%count__ = OpISub %i32 %count %c_i32_1\n"
9688 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9689 "OpBranchConditional %again %loop %exit\n"
9690
9691 "%exit = OpLabel\n"
9692 "%same = OpFSub %f32 %val %c_f32_8\n"
9693 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9694 "OpReturnValue %result\n"
9695 "OpFunctionEnd\n";
9696 createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9697
9698 // A loop with break.
9699 fragments["testfun"] =
9700 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9701 "%param1 = OpFunctionParameter %v4f32\n"
9702
9703 "%entry = OpLabel\n"
9704 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9705 "%dot = OpDot %f32 %param1 %param1\n"
9706 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9707 "%zero = OpConvertFToU %u32 %div\n"
9708 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9709 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9710 "OpBranch %loop\n"
9711
9712 ";adds 4 and 3 to %val0 (exits early)\n"
9713 "%loop = OpLabel\n"
9714 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9715 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9716 "OpLoopMerge %exit %continue None\n"
9717 "OpBranch %if\n"
9718
9719 "%if = OpLabel\n"
9720 ";end loop if %count==%two\n"
9721 "%above2 = OpSGreaterThan %bool %count %two\n"
9722 "OpBranchConditional %above2 %body %exit\n"
9723
9724 "%body = OpLabel\n"
9725 "%fcount = OpConvertSToF %f32 %count\n"
9726 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9727 "OpBranch %continue\n"
9728
9729 "%continue = OpLabel\n"
9730 "%count__ = OpISub %i32 %count %c_i32_1\n"
9731 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9732 "OpBranchConditional %again %loop %exit\n"
9733
9734 "%exit = OpLabel\n"
9735 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9736 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9737 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9738 "OpReturnValue %result\n"
9739 "OpFunctionEnd\n";
9740 createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9741
9742 // A loop with return.
9743 fragments["testfun"] =
9744 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9745 "%param1 = OpFunctionParameter %v4f32\n"
9746
9747 "%entry = OpLabel\n"
9748 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9749 "%dot = OpDot %f32 %param1 %param1\n"
9750 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9751 "%zero = OpConvertFToU %u32 %div\n"
9752 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9753 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9754 "OpBranch %loop\n"
9755
9756 ";returns early without modifying %param1\n"
9757 "%loop = OpLabel\n"
9758 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9759 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9760 "OpLoopMerge %exit %continue None\n"
9761 "OpBranch %if\n"
9762
9763 "%if = OpLabel\n"
9764 ";return if %count==%two\n"
9765 "%above2 = OpSGreaterThan %bool %count %two\n"
9766 "OpSelectionMerge %body DontFlatten\n"
9767 "OpBranchConditional %above2 %body %early_exit\n"
9768
9769 "%early_exit = OpLabel\n"
9770 "OpReturnValue %param1\n"
9771
9772 "%body = OpLabel\n"
9773 "%fcount = OpConvertSToF %f32 %count\n"
9774 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9775 "OpBranch %continue\n"
9776
9777 "%continue = OpLabel\n"
9778 "%count__ = OpISub %i32 %count %c_i32_1\n"
9779 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9780 "OpBranchConditional %again %loop %exit\n"
9781
9782 "%exit = OpLabel\n"
9783 ";should never get here, so return an incorrect result\n"
9784 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9785 "OpReturnValue %result\n"
9786 "OpFunctionEnd\n";
9787 createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9788
9789 // Continue inside a switch block to break to enclosing loop's merge block.
9790 // Matches roughly the following GLSL code:
9791 // for (; keep_going; keep_going = false)
9792 // {
9793 // switch (int(param1.x))
9794 // {
9795 // case 0: continue;
9796 // case 1: continue;
9797 // default: continue;
9798 // }
9799 // dead code: modify return value to invalid result.
9800 // }
9801 fragments["pre_main"] =
9802 "%fp_bool = OpTypePointer Function %bool\n"
9803 "%true = OpConstantTrue %bool\n"
9804 "%false = OpConstantFalse %bool\n";
9805
9806 fragments["testfun"] =
9807 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9808 "%param1 = OpFunctionParameter %v4f32\n"
9809
9810 "%entry = OpLabel\n"
9811 "%keep_going = OpVariable %fp_bool Function\n"
9812 "%val_ptr = OpVariable %fp_f32 Function\n"
9813 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9814 "OpStore %keep_going %true\n"
9815 "OpBranch %forloop_begin\n"
9816
9817 "%forloop_begin = OpLabel\n"
9818 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9819 "OpBranch %forloop\n"
9820
9821 "%forloop = OpLabel\n"
9822 "%for_condition = OpLoad %bool %keep_going\n"
9823 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9824
9825 "%forloop_body = OpLabel\n"
9826 "OpStore %val_ptr %param1_x\n"
9827 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9828
9829 "OpSelectionMerge %switch_merge None\n"
9830 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9831 "%case_0 = OpLabel\n"
9832 "OpBranch %forloop_continue\n"
9833 "%case_1 = OpLabel\n"
9834 "OpBranch %forloop_continue\n"
9835 "%default = OpLabel\n"
9836 "OpBranch %forloop_continue\n"
9837 "%switch_merge = OpLabel\n"
9838 ";should never get here, so change the return value to invalid result\n"
9839 "OpStore %val_ptr %c_f32_1\n"
9840 "OpBranch %forloop_continue\n"
9841
9842 "%forloop_continue = OpLabel\n"
9843 "OpStore %keep_going %false\n"
9844 "OpBranch %forloop_begin\n"
9845 "%forloop_merge = OpLabel\n"
9846
9847 "%val = OpLoad %f32 %val_ptr\n"
9848 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9849 "OpReturnValue %result\n"
9850 "OpFunctionEnd\n";
9851 createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9852
9853 return testGroup.release();
9854 }
9855
9856 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)9857 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9858 {
9859 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9860 map<string, string> fragments;
9861
9862 // A barrier inside a function body.
9863 fragments["pre_main"] =
9864 "%Workgroup = OpConstant %i32 2\n"
9865 "%Invocation = OpConstant %i32 4\n"
9866 "%MemorySemanticsNone = OpConstant %i32 0\n";
9867 fragments["testfun"] =
9868 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9869 "%param1 = OpFunctionParameter %v4f32\n"
9870 "%label_testfun = OpLabel\n"
9871 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9872 "OpReturnValue %param1\n"
9873 "OpFunctionEnd\n";
9874 addTessCtrlTest(testGroup.get(), "in_function", fragments);
9875
9876 // Common setup code for the following tests.
9877 fragments["pre_main"] =
9878 "%Workgroup = OpConstant %i32 2\n"
9879 "%Invocation = OpConstant %i32 4\n"
9880 "%MemorySemanticsNone = OpConstant %i32 0\n"
9881 "%c_f32_5 = OpConstant %f32 5.\n";
9882 const string setupPercentZero = // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9883 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9884 "%param1 = OpFunctionParameter %v4f32\n"
9885 "%entry = OpLabel\n"
9886 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9887 "%dot = OpDot %f32 %param1 %param1\n"
9888 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9889 "%zero = OpConvertFToU %u32 %div\n";
9890
9891 // Barriers inside OpSwitch branches.
9892 fragments["testfun"] =
9893 setupPercentZero +
9894 "OpSelectionMerge %switch_exit None\n"
9895 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9896
9897 "%case1 = OpLabel\n"
9898 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9899 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9900 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9901 "OpBranch %switch_exit\n"
9902
9903 "%switch_default = OpLabel\n"
9904 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9905 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9906 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9907 "OpBranch %switch_exit\n"
9908
9909 "%case0 = OpLabel\n"
9910 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9911 "OpBranch %switch_exit\n"
9912
9913 "%switch_exit = OpLabel\n"
9914 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9915 "OpReturnValue %ret\n"
9916 "OpFunctionEnd\n";
9917 addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9918
9919 // Barriers inside if-then-else.
9920 fragments["testfun"] =
9921 setupPercentZero +
9922 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9923 "OpSelectionMerge %exit DontFlatten\n"
9924 "OpBranchConditional %eq0 %then %else\n"
9925
9926 "%else = OpLabel\n"
9927 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9928 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9929 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9930 "OpBranch %exit\n"
9931
9932 "%then = OpLabel\n"
9933 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9934 "OpBranch %exit\n"
9935 "%exit = OpLabel\n"
9936 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9937 "OpReturnValue %ret\n"
9938 "OpFunctionEnd\n";
9939 addTessCtrlTest(testGroup.get(), "in_if", fragments);
9940
9941 // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9942 // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9943 fragments["testfun"] =
9944 setupPercentZero +
9945 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9946 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9947 "OpSelectionMerge %exit DontFlatten\n"
9948 "OpBranchConditional %thread0 %then %else\n"
9949
9950 "%else = OpLabel\n"
9951 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9952 "OpBranch %exit\n"
9953
9954 "%then = OpLabel\n"
9955 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9956 "OpBranch %exit\n"
9957
9958 "%exit = OpLabel\n"
9959 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9960 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9961 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9962 "OpReturnValue %ret\n"
9963 "OpFunctionEnd\n";
9964 addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9965
9966 // A barrier inside a loop.
9967 fragments["pre_main"] =
9968 "%Workgroup = OpConstant %i32 2\n"
9969 "%Invocation = OpConstant %i32 4\n"
9970 "%MemorySemanticsNone = OpConstant %i32 0\n"
9971 "%c_f32_10 = OpConstant %f32 10.\n";
9972 fragments["testfun"] =
9973 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9974 "%param1 = OpFunctionParameter %v4f32\n"
9975 "%entry = OpLabel\n"
9976 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9977 "OpBranch %loop\n"
9978
9979 ";adds 4, 3, 2, and 1 to %val0\n"
9980 "%loop = OpLabel\n"
9981 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9982 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9983 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9984 "%fcount = OpConvertSToF %f32 %count\n"
9985 "%val = OpFAdd %f32 %val1 %fcount\n"
9986 "%count__ = OpISub %i32 %count %c_i32_1\n"
9987 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9988 "OpLoopMerge %exit %loop None\n"
9989 "OpBranchConditional %again %loop %exit\n"
9990
9991 "%exit = OpLabel\n"
9992 "%same = OpFSub %f32 %val %c_f32_10\n"
9993 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9994 "OpReturnValue %ret\n"
9995 "OpFunctionEnd\n";
9996 addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9997
9998 return testGroup.release();
9999 }
10000
10001 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)10002 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
10003 {
10004 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
10005 map<string, string> fragments;
10006 RGBA inputColors[4];
10007 RGBA outputColors[4];
10008
10009 fragments["pre_main"] =
10010 "%c_f32_3 = OpConstant %f32 3.0\n"
10011 "%c_f32_n3 = OpConstant %f32 -3.0\n"
10012 "%c_f32_4 = OpConstant %f32 4.0\n"
10013 "%c_f32_p75 = OpConstant %f32 0.75\n"
10014 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
10015 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
10016 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
10017
10018 // The test does the following.
10019 // vec4 result = (param1 * 8.0) - 4.0;
10020 // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
10021 fragments["testfun"] =
10022 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10023 "%param1 = OpFunctionParameter %v4f32\n"
10024 "%label_testfun = OpLabel\n"
10025 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
10026 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
10027 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
10028 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
10029 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
10030 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
10031 "OpReturnValue %xy_0_1\n"
10032 "OpFunctionEnd\n";
10033
10034
10035 inputColors[0] = RGBA(16, 16, 0, 255);
10036 inputColors[1] = RGBA(232, 232, 0, 255);
10037 inputColors[2] = RGBA(232, 16, 0, 255);
10038 inputColors[3] = RGBA(16, 232, 0, 255);
10039
10040 outputColors[0] = RGBA(64, 64, 0, 255);
10041 outputColors[1] = RGBA(255, 255, 0, 255);
10042 outputColors[2] = RGBA(255, 64, 0, 255);
10043 outputColors[3] = RGBA(64, 255, 0, 255);
10044
10045 createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10046 return testGroup.release();
10047 }
10048
10049 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10050 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10051 {
10052 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
10053 map<string, string> fragments;
10054
10055 fragments["pre_main"] =
10056 "%c_f32_255 = OpConstant %f32 255.0\n"
10057 "%c_i32_128 = OpConstant %i32 128\n"
10058 "%c_i32_255 = OpConstant %i32 255\n"
10059 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10060 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10061 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10062
10063 // The test does the following.
10064 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10065 // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10066 // return float(result + 128) / 255.0;
10067 fragments["testfun"] =
10068 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10069 "%param1 = OpFunctionParameter %v4f32\n"
10070 "%label_testfun = OpLabel\n"
10071 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10072 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10073 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10074 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10075 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10076 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10077 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10078 "%x_out = OpSRem %i32 %x_in %y_in\n"
10079 "%y_out = OpSRem %i32 %y_in %z_in\n"
10080 "%z_out = OpSRem %i32 %z_in %x_in\n"
10081 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10082 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10083 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10084 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10085 "OpReturnValue %float_out\n"
10086 "OpFunctionEnd\n";
10087
10088 const struct CaseParams
10089 {
10090 const char* name;
10091 const char* failMessageTemplate; // customized status message
10092 qpTestResult failResult; // override status on failure
10093 int operands[4][3]; // four (x, y, z) vectors of operands
10094 int results[4][3]; // four (x, y, z) vectors of results
10095 } cases[] =
10096 {
10097 {
10098 "positive",
10099 "${reason}",
10100 QP_TEST_RESULT_FAIL,
10101 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10102 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10103 },
10104 {
10105 "all",
10106 "Inconsistent results, but within specification: ${reason}",
10107 negFailResult, // negative operands, not required by the spec
10108 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10109 { { 5, 12, -2 }, { 0, -5, 2 }, { 3, 8, -6 }, { 25, -60, 0 } }, // results
10110 },
10111 };
10112 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10113
10114 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10115 {
10116 const CaseParams& params = cases[caseNdx];
10117 RGBA inputColors[4];
10118 RGBA outputColors[4];
10119
10120 for (int i = 0; i < 4; ++i)
10121 {
10122 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10123 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10124 }
10125
10126 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10127 }
10128
10129 return testGroup.release();
10130 }
10131
10132 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10133 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10134 {
10135 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10136 map<string, string> fragments;
10137
10138 fragments["pre_main"] =
10139 "%c_f32_255 = OpConstant %f32 255.0\n"
10140 "%c_i32_128 = OpConstant %i32 128\n"
10141 "%c_i32_255 = OpConstant %i32 255\n"
10142 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10143 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10144 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10145
10146 // The test does the following.
10147 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10148 // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10149 // return float(result + 128) / 255.0;
10150 fragments["testfun"] =
10151 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10152 "%param1 = OpFunctionParameter %v4f32\n"
10153 "%label_testfun = OpLabel\n"
10154 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10155 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10156 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10157 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10158 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10159 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10160 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10161 "%x_out = OpSMod %i32 %x_in %y_in\n"
10162 "%y_out = OpSMod %i32 %y_in %z_in\n"
10163 "%z_out = OpSMod %i32 %z_in %x_in\n"
10164 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10165 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10166 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10167 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10168 "OpReturnValue %float_out\n"
10169 "OpFunctionEnd\n";
10170
10171 const struct CaseParams
10172 {
10173 const char* name;
10174 const char* failMessageTemplate; // customized status message
10175 qpTestResult failResult; // override status on failure
10176 int operands[4][3]; // four (x, y, z) vectors of operands
10177 int results[4][3]; // four (x, y, z) vectors of results
10178 } cases[] =
10179 {
10180 {
10181 "positive",
10182 "${reason}",
10183 QP_TEST_RESULT_FAIL,
10184 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10185 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10186 },
10187 {
10188 "all",
10189 "Inconsistent results, but within specification: ${reason}",
10190 negFailResult, // negative operands, not required by the spec
10191 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10192 { { 5, -5, 3 }, { 0, 2, -3 }, { 3, -73, 69 }, { -35, 40, 0 } }, // results
10193 },
10194 };
10195 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10196
10197 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10198 {
10199 const CaseParams& params = cases[caseNdx];
10200 RGBA inputColors[4];
10201 RGBA outputColors[4];
10202
10203 for (int i = 0; i < 4; ++i)
10204 {
10205 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10206 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10207 }
10208
10209 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10210 }
10211 return testGroup.release();
10212 }
10213
10214 enum ConversionDataType
10215 {
10216 DATA_TYPE_SIGNED_8,
10217 DATA_TYPE_SIGNED_16,
10218 DATA_TYPE_SIGNED_32,
10219 DATA_TYPE_SIGNED_64,
10220 DATA_TYPE_UNSIGNED_8,
10221 DATA_TYPE_UNSIGNED_16,
10222 DATA_TYPE_UNSIGNED_32,
10223 DATA_TYPE_UNSIGNED_64,
10224 DATA_TYPE_FLOAT_16,
10225 DATA_TYPE_FLOAT_32,
10226 DATA_TYPE_FLOAT_64,
10227 DATA_TYPE_VEC2_SIGNED_16,
10228 DATA_TYPE_VEC2_SIGNED_32
10229 };
10230
getBitWidthStr(ConversionDataType type)10231 const string getBitWidthStr (ConversionDataType type)
10232 {
10233 switch (type)
10234 {
10235 case DATA_TYPE_SIGNED_8:
10236 case DATA_TYPE_UNSIGNED_8:
10237 return "8";
10238
10239 case DATA_TYPE_SIGNED_16:
10240 case DATA_TYPE_UNSIGNED_16:
10241 case DATA_TYPE_FLOAT_16:
10242 return "16";
10243
10244 case DATA_TYPE_SIGNED_32:
10245 case DATA_TYPE_UNSIGNED_32:
10246 case DATA_TYPE_FLOAT_32:
10247 case DATA_TYPE_VEC2_SIGNED_16:
10248 return "32";
10249
10250 case DATA_TYPE_SIGNED_64:
10251 case DATA_TYPE_UNSIGNED_64:
10252 case DATA_TYPE_FLOAT_64:
10253 case DATA_TYPE_VEC2_SIGNED_32:
10254 return "64";
10255
10256 default:
10257 DE_ASSERT(false);
10258 }
10259 return "";
10260 }
10261
getByteWidthStr(ConversionDataType type)10262 const string getByteWidthStr (ConversionDataType type)
10263 {
10264 switch (type)
10265 {
10266 case DATA_TYPE_SIGNED_8:
10267 case DATA_TYPE_UNSIGNED_8:
10268 return "1";
10269
10270 case DATA_TYPE_SIGNED_16:
10271 case DATA_TYPE_UNSIGNED_16:
10272 case DATA_TYPE_FLOAT_16:
10273 return "2";
10274
10275 case DATA_TYPE_SIGNED_32:
10276 case DATA_TYPE_UNSIGNED_32:
10277 case DATA_TYPE_FLOAT_32:
10278 case DATA_TYPE_VEC2_SIGNED_16:
10279 return "4";
10280
10281 case DATA_TYPE_SIGNED_64:
10282 case DATA_TYPE_UNSIGNED_64:
10283 case DATA_TYPE_FLOAT_64:
10284 case DATA_TYPE_VEC2_SIGNED_32:
10285 return "8";
10286
10287 default:
10288 DE_ASSERT(false);
10289 }
10290 return "";
10291 }
10292
isSigned(ConversionDataType type)10293 bool isSigned (ConversionDataType type)
10294 {
10295 switch (type)
10296 {
10297 case DATA_TYPE_SIGNED_8:
10298 case DATA_TYPE_SIGNED_16:
10299 case DATA_TYPE_SIGNED_32:
10300 case DATA_TYPE_SIGNED_64:
10301 case DATA_TYPE_FLOAT_16:
10302 case DATA_TYPE_FLOAT_32:
10303 case DATA_TYPE_FLOAT_64:
10304 case DATA_TYPE_VEC2_SIGNED_16:
10305 case DATA_TYPE_VEC2_SIGNED_32:
10306 return true;
10307
10308 case DATA_TYPE_UNSIGNED_8:
10309 case DATA_TYPE_UNSIGNED_16:
10310 case DATA_TYPE_UNSIGNED_32:
10311 case DATA_TYPE_UNSIGNED_64:
10312 return false;
10313
10314 default:
10315 DE_ASSERT(false);
10316 }
10317 return false;
10318 }
10319
isInt(ConversionDataType type)10320 bool isInt (ConversionDataType type)
10321 {
10322 switch (type)
10323 {
10324 case DATA_TYPE_SIGNED_8:
10325 case DATA_TYPE_SIGNED_16:
10326 case DATA_TYPE_SIGNED_32:
10327 case DATA_TYPE_SIGNED_64:
10328 case DATA_TYPE_UNSIGNED_8:
10329 case DATA_TYPE_UNSIGNED_16:
10330 case DATA_TYPE_UNSIGNED_32:
10331 case DATA_TYPE_UNSIGNED_64:
10332 return true;
10333
10334 case DATA_TYPE_FLOAT_16:
10335 case DATA_TYPE_FLOAT_32:
10336 case DATA_TYPE_FLOAT_64:
10337 case DATA_TYPE_VEC2_SIGNED_16:
10338 case DATA_TYPE_VEC2_SIGNED_32:
10339 return false;
10340
10341 default:
10342 DE_ASSERT(false);
10343 }
10344 return false;
10345 }
10346
isFloat(ConversionDataType type)10347 bool isFloat (ConversionDataType type)
10348 {
10349 switch (type)
10350 {
10351 case DATA_TYPE_SIGNED_8:
10352 case DATA_TYPE_SIGNED_16:
10353 case DATA_TYPE_SIGNED_32:
10354 case DATA_TYPE_SIGNED_64:
10355 case DATA_TYPE_UNSIGNED_8:
10356 case DATA_TYPE_UNSIGNED_16:
10357 case DATA_TYPE_UNSIGNED_32:
10358 case DATA_TYPE_UNSIGNED_64:
10359 case DATA_TYPE_VEC2_SIGNED_16:
10360 case DATA_TYPE_VEC2_SIGNED_32:
10361 return false;
10362
10363 case DATA_TYPE_FLOAT_16:
10364 case DATA_TYPE_FLOAT_32:
10365 case DATA_TYPE_FLOAT_64:
10366 return true;
10367
10368 default:
10369 DE_ASSERT(false);
10370 }
10371 return false;
10372 }
10373
getTypeName(ConversionDataType type)10374 const string getTypeName (ConversionDataType type)
10375 {
10376 string prefix = isSigned(type) ? "" : "u";
10377
10378 if (isInt(type)) return prefix + "int" + getBitWidthStr(type);
10379 else if (isFloat(type)) return prefix + "float" + getBitWidthStr(type);
10380 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10381 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "i32vec2";
10382 else DE_ASSERT(false);
10383
10384 return "";
10385 }
10386
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10387 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10388 {
10389 const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10390
10391 return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10392 }
10393
getAsmTypeName(ConversionDataType type,deUint32 elements=1)10394 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10395 {
10396 string prefix;
10397
10398 if (isInt(type)) prefix = isSigned(type) ? "i" : "u";
10399 else if (isFloat(type)) prefix = "f";
10400 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10401 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "v2i32";
10402 else DE_ASSERT(false);
10403 if ((isInt(type) || isFloat(type)) && elements == 2)
10404 {
10405 prefix = "v2" + prefix;
10406 }
10407
10408 return prefix + getBitWidthStr(type);
10409 }
10410
10411 template<typename T>
getSpecializedBuffer(deInt64 number,deUint32 elements=1)10412 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10413 {
10414 return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10415 }
10416
getBuffer(ConversionDataType type,deInt64 number,deUint32 elements=1)10417 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10418 {
10419 switch (type)
10420 {
10421 case DATA_TYPE_SIGNED_8: return getSpecializedBuffer<deInt8>(number, elements);
10422 case DATA_TYPE_SIGNED_16: return getSpecializedBuffer<deInt16>(number, elements);
10423 case DATA_TYPE_SIGNED_32: return getSpecializedBuffer<deInt32>(number, elements);
10424 case DATA_TYPE_SIGNED_64: return getSpecializedBuffer<deInt64>(number, elements);
10425 case DATA_TYPE_UNSIGNED_8: return getSpecializedBuffer<deUint8>(number, elements);
10426 case DATA_TYPE_UNSIGNED_16: return getSpecializedBuffer<deUint16>(number, elements);
10427 case DATA_TYPE_UNSIGNED_32: return getSpecializedBuffer<deUint32>(number, elements);
10428 case DATA_TYPE_UNSIGNED_64: return getSpecializedBuffer<deUint64>(number, elements);
10429 case DATA_TYPE_FLOAT_16: return getSpecializedBuffer<deUint16>(number, elements);
10430 case DATA_TYPE_FLOAT_32: return getSpecializedBuffer<deUint32>(number, elements);
10431 case DATA_TYPE_FLOAT_64: return getSpecializedBuffer<deUint64>(number, elements);
10432 case DATA_TYPE_VEC2_SIGNED_16: return getSpecializedBuffer<deUint32>(number, elements);
10433 case DATA_TYPE_VEC2_SIGNED_32: return getSpecializedBuffer<deUint64>(number, elements);
10434
10435 default: TCU_THROW(InternalError, "Unimplemented type passed");
10436 }
10437 }
10438
usesInt8(ConversionDataType from,ConversionDataType to)10439 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10440 {
10441 return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10442 from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10443 }
10444
usesInt16(ConversionDataType from,ConversionDataType to)10445 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10446 {
10447 return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10448 from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10449 from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10450 }
10451
usesInt32(ConversionDataType from,ConversionDataType to)10452 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10453 {
10454 return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10455 from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10456 from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10457 }
10458
usesInt64(ConversionDataType from,ConversionDataType to)10459 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10460 {
10461 return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10462 from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10463 }
10464
usesFloat16(ConversionDataType from,ConversionDataType to)10465 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10466 {
10467 return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10468 }
10469
usesFloat32(ConversionDataType from,ConversionDataType to)10470 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10471 {
10472 return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10473 }
10474
usesFloat64(ConversionDataType from,ConversionDataType to)10475 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10476 {
10477 return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10478 }
10479
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10480 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10481 {
10482 if (usesInt16(from, to) && !usesInt32(from, to))
10483 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10484
10485 if (usesInt64(from, to))
10486 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10487
10488 if (usesFloat64(from, to))
10489 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10490
10491 if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10492 {
10493 extensions.push_back("VK_KHR_16bit_storage");
10494 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10495 }
10496
10497 if (usesFloat16(from, to) || usesInt8(from, to))
10498 {
10499 extensions.push_back("VK_KHR_shader_float16_int8");
10500
10501 if (usesFloat16(from, to))
10502 {
10503 vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10504 }
10505
10506 if (usesInt8(from, to))
10507 {
10508 vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10509
10510 extensions.push_back("VK_KHR_8bit_storage");
10511 vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10512 }
10513 }
10514 }
10515
10516 struct ConvertCase
10517 {
ConvertCasevkt::SpirVAssembly::ConvertCase10518 ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10519 : m_fromType (from)
10520 , m_toType (to)
10521 , m_elements (1)
10522 , m_useStorageExt (useStorageExt)
10523 , m_name (getTestName(from, to, suffix))
10524 {
10525 string caps;
10526 string decl;
10527 string exts;
10528
10529 m_asmTypes["inStorageType"] = getAsmTypeName(from);
10530 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10531 m_asmTypes["inCast"] = "OpCopyObject";
10532 m_asmTypes["outCast"] = "OpCopyObject";
10533 // If the storage extensions are being avoided, tests instead uses
10534 // vectors so that they are easily convertible to 32-bit integers.
10535 // |m_elements| indicates the size of the vector. It modifies how many
10536 // items added to the buffers and converted in the tests.
10537 //
10538 // Currently only supports 1 (default) or 2 elements.
10539 if (!m_useStorageExt)
10540 {
10541 bool in_change = false;
10542 bool out_change = false;
10543 if (usesFloat16(from, from) || usesInt16(from, from))
10544 {
10545 m_asmTypes["inStorageType"] = "u32";
10546 m_asmTypes["inCast"] = "OpBitcast";
10547 m_elements = 2;
10548 in_change = true;
10549 }
10550 if (usesFloat16(to, to) || usesInt16(to, to))
10551 {
10552 m_asmTypes["outStorageType"] = "u32";
10553 m_asmTypes["outCast"] = "OpBitcast";
10554 m_elements = 2;
10555 out_change = true;
10556 }
10557 if (in_change && !out_change)
10558 {
10559 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10560 }
10561 if (!in_change && out_change)
10562 {
10563 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10564 }
10565 }
10566
10567 // Safety check for implementation.
10568 if (m_elements < 1 || m_elements > 2)
10569 TCU_THROW(InternalError, "Unsupported number of elements");
10570
10571 m_asmTypes["inputType"] = getAsmTypeName(from, m_elements);
10572 m_asmTypes["outputType"] = getAsmTypeName(to, m_elements);
10573
10574 m_inputBuffer = getBuffer(from, number, m_elements);
10575 if (separateOutput)
10576 m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10577 else
10578 m_outputBuffer = getBuffer(to, number, m_elements);
10579
10580 if (usesInt8(from, to))
10581 {
10582 bool requiresInt8Capability = true;
10583 if (instruction == "OpUConvert" || instruction == "OpSConvert")
10584 {
10585 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10586 if (usesInt32(from, to))
10587 requiresInt8Capability = false;
10588 }
10589
10590 caps += "OpCapability StorageBuffer8BitAccess\n";
10591 if (requiresInt8Capability)
10592 caps += "OpCapability Int8\n";
10593
10594 decl += "%i8 = OpTypeInt 8 1\n"
10595 "%u8 = OpTypeInt 8 0\n";
10596
10597 if (m_elements == 2)
10598 {
10599 decl += "%v2i8 = OpTypeVector %i8 2\n"
10600 "%v2u8 = OpTypeVector %u8 2\n";
10601 }
10602 exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10603 }
10604
10605 if (usesInt16(from, to))
10606 {
10607 bool requiresInt16Capability = true;
10608
10609 if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10610 {
10611 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10612 if (usesInt32(from, to) || usesFloat32(from, to))
10613 requiresInt16Capability = false;
10614 }
10615
10616 decl += "%i16 = OpTypeInt 16 1\n"
10617 "%u16 = OpTypeInt 16 0\n";
10618 if (m_elements == 2)
10619 {
10620 decl += "%v2i16 = OpTypeVector %i16 2\n"
10621 "%v2u16 = OpTypeVector %u16 2\n";
10622 }
10623 else
10624 {
10625 decl += "%i16vec2 = OpTypeVector %i16 2\n";
10626 }
10627
10628 // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10629 if (requiresInt16Capability || !m_useStorageExt)
10630 caps += "OpCapability Int16\n";
10631 }
10632
10633 if (usesFloat16(from, to))
10634 {
10635 decl += "%f16 = OpTypeFloat 16\n";
10636 if (m_elements == 2)
10637 {
10638 decl += "%v2f16 = OpTypeVector %f16 2\n";
10639 }
10640
10641 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10642 if (!usesFloat32(from, to) || !m_useStorageExt)
10643 caps += "OpCapability Float16\n";
10644 }
10645
10646 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10647 {
10648 caps += "OpCapability StorageUniformBufferBlock16\n";
10649 exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10650 }
10651
10652 if (usesInt64(from, to))
10653 {
10654 caps += "OpCapability Int64\n";
10655 decl += "%i64 = OpTypeInt 64 1\n"
10656 "%u64 = OpTypeInt 64 0\n";
10657 if (m_elements == 2)
10658 {
10659 decl += "%v2i64 = OpTypeVector %i64 2\n"
10660 "%v2u64 = OpTypeVector %u64 2\n";
10661 }
10662 }
10663
10664 if (usesFloat64(from, to))
10665 {
10666 caps += "OpCapability Float64\n";
10667 decl += "%f64 = OpTypeFloat 64\n";
10668 if (m_elements == 2)
10669 {
10670 decl += "%v2f64 = OpTypeVector %f64 2\n";
10671 }
10672 }
10673
10674 m_asmTypes["datatype_capabilities"] = caps;
10675 m_asmTypes["datatype_additional_decl"] = decl;
10676 m_asmTypes["datatype_extensions"] = exts;
10677 }
10678
10679 ConversionDataType m_fromType;
10680 ConversionDataType m_toType;
10681 deUint32 m_elements;
10682 bool m_useStorageExt;
10683 string m_name;
10684 map<string, string> m_asmTypes;
10685 BufferSp m_inputBuffer;
10686 BufferSp m_outputBuffer;
10687 };
10688
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10689 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10690 {
10691 map<string, string> params = convertCase.m_asmTypes;
10692
10693 params["instruction"] = instruction;
10694 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10695 params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
10696
10697 std::string shader (
10698 "OpCapability Shader\n"
10699 "${datatype_capabilities}"
10700 "${datatype_extensions:opt}"
10701 "OpMemoryModel Logical GLSL450\n"
10702 "OpEntryPoint GLCompute %main \"main\"\n"
10703 "OpExecutionMode %main LocalSize 1 1 1\n"
10704 "OpSource GLSL 430\n"
10705 "OpName %main \"main\"\n"
10706 // Decorators
10707 "OpDecorate %indata DescriptorSet 0\n"
10708 "OpDecorate %indata Binding 0\n"
10709 "OpDecorate %outdata DescriptorSet 0\n"
10710 "OpDecorate %outdata Binding 1\n"
10711 "OpDecorate %in_buf BufferBlock\n"
10712 "OpDecorate %out_buf BufferBlock\n"
10713 "OpMemberDecorate %in_buf 0 Offset 0\n"
10714 "OpMemberDecorate %out_buf 0 Offset 0\n"
10715 // Base types
10716 "%void = OpTypeVoid\n"
10717 "%voidf = OpTypeFunction %void\n"
10718 "%u32 = OpTypeInt 32 0\n"
10719 "%i32 = OpTypeInt 32 1\n"
10720 "%f32 = OpTypeFloat 32\n"
10721 "%v2i32 = OpTypeVector %i32 2\n"
10722 "${datatype_additional_decl}"
10723 );
10724 if (addVectors)
10725 {
10726 shader += "%v2u32 = OpTypeVector %u32 2\n"
10727 "%v2f32 = OpTypeVector %f32 2\n";
10728 }
10729 shader +=
10730 "%uvec3 = OpTypeVector %u32 3\n"
10731 // Derived types
10732 "%in_ptr = OpTypePointer Uniform %${inStorageType}\n"
10733 "%out_ptr = OpTypePointer Uniform %${outStorageType}\n"
10734 "%in_buf = OpTypeStruct %${inStorageType}\n"
10735 "%out_buf = OpTypeStruct %${outStorageType}\n"
10736 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
10737 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10738 "%indata = OpVariable %in_bufptr Uniform\n"
10739 "%outdata = OpVariable %out_bufptr Uniform\n"
10740 // Constants
10741 "%zero = OpConstant %i32 0\n"
10742 // Main function
10743 "%main = OpFunction %void None %voidf\n"
10744 "%label = OpLabel\n"
10745 "%inloc = OpAccessChain %in_ptr %indata %zero\n"
10746 "%outloc = OpAccessChain %out_ptr %outdata %zero\n"
10747 "%inval = OpLoad %${inStorageType} %inloc\n"
10748 "%in_cast = ${inCast} %${inputType} %inval\n"
10749 "%conv = ${instruction} %${outputType} %in_cast\n"
10750 "%out_cast = ${outCast} %${outStorageType} %conv\n"
10751 " OpStore %outloc %out_cast\n"
10752 " OpReturn\n"
10753 " OpFunctionEnd\n"
10754 ;
10755
10756 return StringTemplate(shader).specialize(params);
10757 }
10758
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10759 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10760 {
10761 if (instruction == "OpUConvert")
10762 {
10763 // Convert unsigned int to unsigned int
10764 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_16, 42));
10765 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_32, 73));
10766 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_64, 121));
10767
10768 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_8, 33));
10769 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_32, 60653));
10770 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_64, 17991));
10771
10772 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_64, 904256275));
10773 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_16, 6275));
10774 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_8, 17));
10775
10776 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_32, 701256243));
10777 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_16, 4741));
10778 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_8, 65));
10779
10780 // Zero extension for int->uint
10781 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10782 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 209));
10783 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 251));
10784 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10785 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 62195));
10786 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10787
10788 // Truncate for int->uint
10789 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10790 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10791 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10792 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10793 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10794 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10795 }
10796 else if (instruction == "OpSConvert")
10797 {
10798 // Sign extension int->int
10799 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_16, -30));
10800 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_32, 55));
10801 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_64, -3));
10802 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10803 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_64, -3341));
10804 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10805
10806 // Truncate for int->int
10807 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_8, 81));
10808 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_8, -93));
10809 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_8, 3182748172687672ll, true, 56));
10810 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10811 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_32, -972812359));
10812 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_16, -1067742499291926803ll, true, -4371));
10813
10814 // Sign extension for int->uint
10815 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10816 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 4294967249u));
10817 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 18446744073709551611ull));
10818 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10819 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 18446744073709548275ull));
10820 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10821
10822 // Truncate for int->uint
10823 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10824 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10825 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10826 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10827 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10828 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10829
10830 // Sign extension for uint->int
10831 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_16, 71));
10832 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_32, 201, true, -55));
10833 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_64, 188, true, -68));
10834 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10835 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_64, 62195, true, -3341));
10836 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10837
10838 // Truncate for uint->int
10839 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_8, 67));
10840 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_8, 133, true, -123));
10841 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_8, 836927654193256494ull, true, 46));
10842 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10843 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_32, 18446744072736739257ull, true, -972812359));
10844 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_16, 17379001574417624813ull, true, -4371));
10845
10846 // Convert i16vec2 to i32vec2 and vice versa
10847 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10848 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10849 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_16, DATA_TYPE_VEC2_SIGNED_32, (33413u << 16) | 27593, true, (4294935173ull << 32) | 27593));
10850 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_32, DATA_TYPE_VEC2_SIGNED_16, (4294935173ull << 32) | 27593, true, (33413u << 16) | 27593));
10851 }
10852 else if (instruction == "OpFConvert")
10853 {
10854 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10855 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_64, 0x449a4000, true, 0x4093480000000000));
10856 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_32, 0x4093480000000000, true, 0x449a4000));
10857
10858 // Conversion to/from 32-bit floats are supported by both 16-bit
10859 // storage and Float16. The tests are duplicated to exercise both
10860 // cases.
10861 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2));
10862 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000));
10863 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2, "no_storage", false));
10864 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000, "no_storage", false));
10865
10866 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000));
10867 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2));
10868 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000, "no_storage", false));
10869 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2, "no_storage", false));
10870
10871 }
10872 else if (instruction == "OpConvertFToU")
10873 {
10874 // Normal numbers from uint8 range
10875 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5020, true, 33, "33", false));
10876 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x503F, true, 33, "33rtz", false));
10877 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x42280000, true, 42, "42"));
10878 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x422BFFFF, true, 42, "42rtz"));
10879 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x4067800000000000ull, true, 188, "188"));
10880 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x40679FFFFFFFFFFFull, true, 188, "188rtz"));
10881
10882 // Maximum uint8 value
10883 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BF8, true, 255, "max", false));
10884 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BFF, true, 255, "maxrtz", false));
10885 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437F0000, true, 255, "max"));
10886 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437FFFFF, true, 255, "maxrtz"));
10887 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FE00000000000ull, true, 255, "max"));
10888 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FFFFFFFFFFFFFull, true, 255, "maxrtz"));
10889
10890 // +0
10891 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x0000, true, 0, "p0", false));
10892 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x00000000, true, 0, "p0"));
10893 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
10894
10895 // -0
10896 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x8000, true, 0, "m0", false));
10897 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x80000000, true, 0, "m0"));
10898 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
10899
10900 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10901 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x64D2, true, 1234, "1234", false));
10902 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x64D2, true, 1234, "1234", false));
10903 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x64D2, true, 1234, "1234", false));
10904
10905 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10906 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x7BFF, true, 65504, "max", false));
10907 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x7BFF, true, 65504, "max", false));
10908 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x7BFF, true, 65504, "max", false));
10909
10910 // Show round to zero behaviour
10911 // Example: see https://float.exposed/0x58ff
10912 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x44FF, true, 4, "p4rtz", false));
10913 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x58FF, true, 159, "p159rtz", false));
10914 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x58FF, true, 159, "p159rtz", false));
10915
10916 // +0
10917 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x0000, true, 0, "p0", false));
10918 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x0000, true, 0, "p0", false));
10919 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x0000, true, 0, "p0", false));
10920
10921 // -0
10922 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x8000, true, 0, "m0", false));
10923 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x8000, true, 0, "m0", false));
10924 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x8000, true, 0, "m0", false));
10925
10926 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a4000, true, 1234));
10927 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a5fff, true, 1234, "rtz"));
10928 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a4000, true, 1234));
10929 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a5fff, true, 1234, "rtz"));
10930 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a4000, true, 1234));
10931 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a5fff, true, 1234, "rtz"));
10932 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x51b9ad78, true, 99684909056ll, "large"));
10933 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x4093480000000000, true, 1234));
10934 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x40934bffffffffff, true, 1234, "rtz"));
10935 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x4093480000000000, true, 1234));
10936 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x40934bffffffffff, true, 1234, "rtz"));
10937 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x4093480000000000, true, 1234));
10938 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x40934bffffffffff, true, 1234, "rtz"));
10939 }
10940 else if (instruction == "OpConvertUToF")
10941 {
10942 // Normal numbers from uint8 range
10943 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 116, true, 0x5740, "116", false));
10944 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 232, true, 0x43680000, "232"));
10945 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 164, true, 0x4064800000000000ull, "164"));
10946
10947 // Maximum uint8 value
10948 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 255, true, 0x5BF8, "max", false));
10949 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 255, true, 0x437F0000, "max"));
10950 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 255, true, 0x406FE00000000000ull, "max"));
10951
10952 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10953 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10954 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10955 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10956
10957 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10958 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10959 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10960 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10961
10962 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "4294967296", false));
10963 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "4294967296", false));
10964
10965 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 0xffffff0000000000, true, 0x5f7fffff, "max", false));
10966
10967 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10968 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10969 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10970 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10971 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10972 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 99684909056ll, true, 0x51b9ad78, "large"));
10973 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10974 }
10975 else if (instruction == "OpConvertFToS")
10976 {
10977 // Normal numbers from int8 range
10978 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC980, true, -11, "m11", false));
10979 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC9e5, /*-11.7890625*/ true, -11, "m11rtz", false));
10980 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2140000, true, -37, "m37"));
10981 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2178000, /*-37.875*/ true, -37, "m37rtz"));
10982 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050800000000000ull, true, -66, "m66"));
10983 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050B80000000000ull, /*-66.875*/ true, -66, "m66rtz"));
10984
10985 // Minimum int8 value
10986 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD800, true, -128, "min", false));
10987 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD807, true, -128, "minrtz", false));
10988 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC3000000, true, -128, "min"));
10989 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC300e003, true, -128, "minrtz"));
10990 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC060000000000000ull, true, -128, "min"));
10991 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC0601E4FE0000001ull, true, -128, "minrtz"));
10992
10993 // Maximum int8 value
10994 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57F0, true, 127, "max", false));
10995 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57FF, true, 127, "maxrtz", false));
10996 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FE0000, true, 127, "max"));
10997 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FFFFFF, true, 127, "maxrtz"));
10998 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FC00000000000ull, true, 127, "max"));
10999 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FFFFFFFFFFFFFull, true, 127, "maxrtz"));
11000
11001 // +0
11002 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x0000, true, 0, "p0", false));
11003 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x00000000, true, 0, "p0"));
11004 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
11005
11006 // -0
11007 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x8000, true, 0, "m0", false));
11008 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x80000000, true, 0, "m0"));
11009 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
11010
11011 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
11012 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xE4D2, true, -1234, "m1234", false));
11013 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xE4D2, true, -1234, "m1234", false));
11014 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xE4D2, true, -1234, "m1234", false));
11015
11016 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11017 // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
11018 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xF800, true, -32768, "min", false));
11019 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xFBFF, true, -65504, "min", false));
11020 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xFBFF, true, -65504, "min", false));
11021
11022 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11023 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11024 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x77FF, true, 32752, "max", false));
11025 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x7BFF, true, 65504, "max", false));
11026 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x7BFF, true, 65504, "max", false));
11027
11028 // Show round to zero behaviour, from negative side.
11029 // Example: see https://float.exposed/0xd8ff
11030 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xC4FF, true, -4, "m4rtz", false));
11031 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xD8FF, true, -159, "m159rtz", false));
11032 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xD8FF, true, -159, "m159rtz", false));
11033
11034 // Show round to zero behaviour, from positive side.
11035 // Example: see https://float.exposed/0x58ff
11036 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x44FF, true, 4, "p4rtz", false));
11037 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x58FF, true, 159, "p159rtz", false));
11038 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x58FF, true, 159, "p159rtz", false));
11039
11040 // +0
11041 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x0000, true, 0, "p0", false));
11042 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x0000, true, 0, "p0", false));
11043 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x0000, true, 0, "p0", false));
11044
11045 // -0
11046 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x8000, true, 0, "m0", false));
11047 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x8000, true, 0, "m0", false));
11048 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x8000, true, 0, "m0", false));
11049
11050 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a4000, true, -1234));
11051 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a4000, true, -1234));
11052 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a4000, true, -1234));
11053 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a5f00, true, -1234, "rtz"));
11054 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a5f00, true, -1234, "rtz"));
11055 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a5f00, true, -1234, "rtz"));
11056 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xd1b9ad78, true, -99684909056ll, "largepos"));
11057 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0x51b9ad78, true, 99684909056ll, "largeneg"));
11058 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc093480000000000, true, -1234));
11059 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc093480000000000, true, -1234));
11060 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc093480000000000, true, -1234));
11061 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc0934bff000000ff, true, -1234, "rtz"));
11062 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc0934bff000000ff, true, -1234, "rtz"));
11063 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc0934bff000000ff, true, -1234, "rtz"));
11064 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9000, true, 3001, "p3001"));
11065 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9fff, true, 3001, "p3001rtz"));
11066 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9000, true, -3001, "m3001"));
11067 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9fff, true, -3001, "m3001rtz"));
11068 }
11069 else if (instruction == "OpConvertSToF")
11070 {
11071 // Normal numbers from int8 range
11072 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -12, true, 0xCA00, "m21", false));
11073 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -21, true, 0xC1A80000, "m21"));
11074 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -99, true, 0xC058C00000000000ull, "m99"));
11075
11076 // Minimum int8 value
11077 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -128, true, 0xD800, "min", false));
11078 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -128, true, 0xC3000000, "min"));
11079 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -128, true, 0xC060000000000000ull, "min"));
11080
11081 // Maximum int8 value
11082 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, 127, true, 0x57F0, "max", false));
11083 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, 127, true, 0x42FE0000, "max"));
11084 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, 127, true, 0x405FC00000000000ull, "max"));
11085
11086 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
11087 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11088 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11089 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
11090
11091 // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
11092 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11093 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
11094
11095 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
11096 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11097 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
11098
11099 // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11100 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "min", false));
11101 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11102 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
11103
11104 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11105 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11106 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, 32752, true, 0x77FF, "max", false));
11107 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11108 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
11109
11110 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "p4294967296", false));
11111 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "p4294967296", false));
11112 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -4294967296ll, true, 0xcf800000, "m4294967296", false));
11113 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -4294967296ll, true, 0xc1f0000000000000, "m4294967296", false));
11114
11115 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 0x7fffff8000000000, true, 0x5effffff, "max", false));
11116 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -0x7fffff8000000000, true, 0xdeffffff, "min", false));
11117
11118 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11119 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11120 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11121 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11122 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11123 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -99684909056ll, true, 0xd1b9ad78, "large"));
11124 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11125 }
11126 else
11127 DE_FATAL("Unknown instruction");
11128 }
11129
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11130 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11131 {
11132 map<string, string> params = convertCase.m_asmTypes;
11133 map<string, string> fragments;
11134
11135 params["instruction"] = instruction;
11136 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11137
11138 const StringTemplate decoration (
11139 " OpDecorate %SSBOi DescriptorSet 0\n"
11140 " OpDecorate %SSBOo DescriptorSet 0\n"
11141 " OpDecorate %SSBOi Binding 0\n"
11142 " OpDecorate %SSBOo Binding 1\n"
11143 " OpDecorate %s_SSBOi Block\n"
11144 " OpDecorate %s_SSBOo Block\n"
11145 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11146 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11147
11148 const StringTemplate pre_main (
11149 "${datatype_additional_decl:opt}"
11150 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11151 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11152 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11153 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11154 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11155 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11156 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11157 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11158
11159 const StringTemplate testfun (
11160 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11161 "%param = OpFunctionParameter %v4f32\n"
11162 "%label = OpLabel\n"
11163 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11164 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11165 "%valIn = OpLoad %${inStorageType} %iLoc\n"
11166 "%valInCast = ${inCast} %${inputType} %valIn\n"
11167 "%conv = ${instruction} %${outputType} %valInCast\n"
11168 "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11169 " OpStore %oLoc %valOutCast\n"
11170 " OpReturnValue %param\n"
11171 " OpFunctionEnd\n");
11172
11173 params["datatype_extensions"] =
11174 params["datatype_extensions"] +
11175 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11176
11177 fragments["capability"] = params["datatype_capabilities"];
11178 fragments["extension"] = params["datatype_extensions"];
11179 fragments["decoration"] = decoration.specialize(params);
11180 fragments["pre_main"] = pre_main.specialize(params);
11181 fragments["testfun"] = testfun.specialize(params);
11182
11183 return fragments;
11184 }
11185
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11186 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11187 {
11188 map<string, string> params = convertCase.m_asmTypes;
11189 map<string, string> fragments;
11190
11191 params["instruction"] = instruction;
11192 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11193
11194 const StringTemplate decoration(
11195 " OpDecorate %SSBOi DescriptorSet 0\n"
11196 " OpDecorate %SSBOo DescriptorSet 0\n"
11197 " OpDecorate %SSBOi Binding 0\n"
11198 " OpDecorate %SSBOo Binding 1\n"
11199 " OpDecorate %s_SSBOi Block\n"
11200 " OpDecorate %s_SSBOo Block\n"
11201 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11202 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11203
11204 const StringTemplate pre_main(
11205 "${datatype_additional_decl:opt}"
11206 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11207 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11208 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11209 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11210 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11211 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11212 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11213 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11214
11215 const StringTemplate testfun(
11216 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11217 "%param = OpFunctionParameter %v4f32\n"
11218 "%label = OpLabel\n"
11219 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11220 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11221 "%inval = OpLoad %${inStorageType} %iLoc\n"
11222 "%in_cast = ${inCast} %${inputType} %inval\n"
11223 "%conv = ${instruction} %${outputType} %in_cast\n"
11224 "%out_cast = ${outCast} %${outStorageType} %conv\n"
11225 " OpStore %oLoc %out_cast\n"
11226 " OpReturnValue %param\n"
11227 " OpFunctionEnd\n");
11228
11229 params["datatype_extensions"] =
11230 params["datatype_extensions"] +
11231 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11232
11233 fragments["capability"] = params["datatype_capabilities"];
11234 fragments["extension"] = params["datatype_extensions"];
11235 fragments["decoration"] = decoration.specialize(params);
11236 fragments["pre_main"] = pre_main.specialize(params);
11237 fragments["testfun"] = testfun.specialize(params);
11238 return fragments;
11239 }
11240
11241 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11242 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11243 {
11244 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11245 vector<ConvertCase> testCases;
11246 createConvertCases(testCases, instruction);
11247
11248 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11249 {
11250 ComputeShaderSpec spec;
11251 spec.assembly = getConvertCaseShaderStr(instruction, *test, true);
11252 spec.numWorkGroups = IVec3(1, 1, 1);
11253 spec.inputs.push_back (test->m_inputBuffer);
11254 spec.outputs.push_back (test->m_outputBuffer);
11255
11256 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11257
11258 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), spec));
11259 }
11260 return group.release();
11261 }
11262
11263 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11264 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11265 {
11266 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11267 vector<ConvertCase> testCases;
11268 createConvertCases(testCases, instruction);
11269
11270 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11271 {
11272 map<string, string> fragments = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11273 VulkanFeatures vulkanFeatures;
11274 GraphicsResources resources;
11275 vector<string> extensions;
11276 SpecConstants noSpecConstants;
11277 PushConstants noPushConstants;
11278 GraphicsInterfaces noInterfaces;
11279 tcu::RGBA defaultColors[4];
11280
11281 getDefaultColors (defaultColors);
11282 resources.inputs.push_back (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11283 resources.outputs.push_back (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11284 extensions.push_back ("VK_KHR_storage_buffer_storage_class");
11285
11286 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11287
11288 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
11289 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
11290
11291 createTestsForAllStages(
11292 test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11293 noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11294 }
11295 return group.release();
11296 }
11297
11298 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11299 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11300 {
11301 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11302 RGBA inputColors[4];
11303 RGBA outputColors[4];
11304 vector<string> extensions;
11305 GraphicsResources resources;
11306 VulkanFeatures features;
11307
11308 const char functionStart[] =
11309 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11310 "%param1 = OpFunctionParameter %v4f32\n"
11311 "%lbl = OpLabel\n";
11312
11313 const char functionEnd[] =
11314 "%transformed_param_32 = OpFConvert %v4f32 %transformed_param\n"
11315 " OpReturnValue %transformed_param_32\n"
11316 " OpFunctionEnd\n";
11317
11318 struct NameConstantsCode
11319 {
11320 string name;
11321 string constants;
11322 string code;
11323 };
11324
11325 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11326 "%f16 = OpTypeFloat 16\n" \
11327 "%c_f16_0 = OpConstant %f16 0.0\n" \
11328 "%c_f16_0_5 = OpConstant %f16 0.5\n" \
11329 "%c_f16_1 = OpConstant %f16 1.0\n" \
11330 "%v4f16 = OpTypeVector %f16 4\n" \
11331 "%fp_f16 = OpTypePointer Function %f16\n" \
11332 "%fp_v4f16 = OpTypePointer Function %v4f16\n" \
11333 "%c_v4f16_1_1_1_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11334 "%a4f16 = OpTypeArray %f16 %c_u32_4\n" \
11335
11336 NameConstantsCode tests[] =
11337 {
11338 {
11339 "vec4",
11340
11341 FLOAT_16_COMMON_TYPES_AND_CONSTS
11342 "%cval = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11343 "%param1_16 = OpFConvert %v4f16 %param1\n"
11344 "%transformed_param = OpFAdd %v4f16 %param1_16 %cval\n"
11345 },
11346 {
11347 "struct",
11348
11349 FLOAT_16_COMMON_TYPES_AND_CONSTS
11350 "%stype = OpTypeStruct %v4f16 %f16\n"
11351 "%fp_stype = OpTypePointer Function %stype\n"
11352 "%f16_n_1 = OpConstant %f16 -1.0\n"
11353 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11354 "%cvec = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11355 "%cval = OpConstantComposite %stype %cvec %f16_n_1\n",
11356
11357 "%v = OpVariable %fp_stype Function %cval\n"
11358 "%vec_ptr = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11359 "%f16_ptr = OpAccessChain %fp_f16 %v %c_u32_1\n"
11360 "%vec_val = OpLoad %v4f16 %vec_ptr\n"
11361 "%f16_val = OpLoad %f16 %f16_ptr\n"
11362 "%tmp1 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11363 "%param1_16 = OpFConvert %v4f16 %param1\n"
11364 "%tmp2 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11365 "%transformed_param = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11366 },
11367 {
11368 // [1|0|0|0.5] [x] = x + 0.5
11369 // [0|1|0|0.5] [y] = y + 0.5
11370 // [0|0|1|0.5] [z] = z + 0.5
11371 // [0|0|0|1 ] [1] = 1
11372 "matrix",
11373
11374 FLOAT_16_COMMON_TYPES_AND_CONSTS
11375 "%mat4x4_f16 = OpTypeMatrix %v4f16 4\n"
11376 "%v4f16_1_0_0_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11377 "%v4f16_0_1_0_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11378 "%v4f16_0_0_1_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11379 "%v4f16_0_5_0_5_0_5_1 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11380 "%cval = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11381
11382 "%param1_16 = OpFConvert %v4f16 %param1\n"
11383 "%transformed_param = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11384 },
11385 {
11386 "array",
11387
11388 FLOAT_16_COMMON_TYPES_AND_CONSTS
11389 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11390 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11391 "%f16_n_1 = OpConstant %f16 -1.0\n"
11392 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11393 "%carr = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11394
11395 "%v = OpVariable %fp_a4f16 Function %carr\n"
11396 "%f = OpAccessChain %fp_f16 %v %c_u32_0\n"
11397 "%f1 = OpAccessChain %fp_f16 %v %c_u32_1\n"
11398 "%f2 = OpAccessChain %fp_f16 %v %c_u32_2\n"
11399 "%f3 = OpAccessChain %fp_f16 %v %c_u32_3\n"
11400 "%f_val = OpLoad %f16 %f\n"
11401 "%f1_val = OpLoad %f16 %f1\n"
11402 "%f2_val = OpLoad %f16 %f2\n"
11403 "%f3_val = OpLoad %f16 %f3\n"
11404 "%ftot1 = OpFAdd %f16 %f_val %f1_val\n"
11405 "%ftot2 = OpFAdd %f16 %ftot1 %f2_val\n"
11406 "%ftot3 = OpFAdd %f16 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
11407 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11408 "%param1_16 = OpFConvert %v4f16 %param1\n"
11409 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11410 },
11411 {
11412 //
11413 // [
11414 // {
11415 // 0.0,
11416 // [ 1.0, 1.0, 1.0, 1.0]
11417 // },
11418 // {
11419 // 1.0,
11420 // [ 0.0, 0.5, 0.0, 0.0]
11421 // }, // ^^^
11422 // {
11423 // 0.0,
11424 // [ 1.0, 1.0, 1.0, 1.0]
11425 // }
11426 // ]
11427 "array_of_struct_of_array",
11428
11429 FLOAT_16_COMMON_TYPES_AND_CONSTS
11430 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11431 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11432 "%stype = OpTypeStruct %f16 %a4f16\n"
11433 "%a3stype = OpTypeArray %stype %c_u32_3\n"
11434 "%fp_a3stype = OpTypePointer Function %a3stype\n"
11435 "%ca4f16_0 = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11436 "%ca4f16_1 = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11437 "%cstype1 = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11438 "%cstype2 = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11439 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11440
11441 "%v = OpVariable %fp_a3stype Function %carr\n"
11442 "%f = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11443 "%f_l = OpLoad %f16 %f\n"
11444 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11445 "%param1_16 = OpFConvert %v4f16 %param1\n"
11446 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11447 }
11448 };
11449
11450 getHalfColorsFullAlpha(inputColors);
11451 outputColors[0] = RGBA(255, 255, 255, 255);
11452 outputColors[1] = RGBA(255, 127, 127, 255);
11453 outputColors[2] = RGBA(127, 255, 127, 255);
11454 outputColors[3] = RGBA(127, 127, 255, 255);
11455
11456 extensions.push_back("VK_KHR_shader_float16_int8");
11457 features.extFloat16Int8.shaderFloat16 = true;
11458
11459 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11460 {
11461 map<string, string> fragments;
11462
11463 fragments["capability"] = "OpCapability Float16\n";
11464 fragments["pre_main"] = tests[testNdx].constants;
11465 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
11466
11467 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11468 }
11469 return opConstantCompositeTests.release();
11470 }
11471
11472 template<typename T>
11473 void finalizeTestsCreation (T& specResource,
11474 const map<string, string>& fragments,
11475 tcu::TestContext& testCtx,
11476 tcu::TestCaseGroup& testGroup,
11477 const std::string& testName,
11478 const VulkanFeatures& vulkanFeatures,
11479 const vector<string>& extensions,
11480 const IVec3& numWorkGroups,
11481 const bool splitRenderArea = false);
11482
11483 template<>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11484 void finalizeTestsCreation (GraphicsResources& specResource,
11485 const map<string, string>& fragments,
11486 tcu::TestContext& ,
11487 tcu::TestCaseGroup& testGroup,
11488 const std::string& testName,
11489 const VulkanFeatures& vulkanFeatures,
11490 const vector<string>& extensions,
11491 const IVec3& ,
11492 const bool splitRenderArea)
11493 {
11494 RGBA defaultColors[4];
11495 getDefaultColors(defaultColors);
11496
11497 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11498 }
11499
11500 template<>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11501 void finalizeTestsCreation (ComputeShaderSpec& specResource,
11502 const map<string, string>& fragments,
11503 tcu::TestContext& testCtx,
11504 tcu::TestCaseGroup& testGroup,
11505 const std::string& testName,
11506 const VulkanFeatures& vulkanFeatures,
11507 const vector<string>& extensions,
11508 const IVec3& numWorkGroups,
11509 bool)
11510 {
11511 specResource.numWorkGroups = numWorkGroups;
11512 specResource.requestedVulkanFeatures = vulkanFeatures;
11513 specResource.extensions = extensions;
11514
11515 specResource.assembly = makeComputeShaderAssembly(fragments);
11516
11517 testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), specResource));
11518 }
11519
11520 template<class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11521 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11522 {
11523 const string nan = nanSupported ? "_nan" : "";
11524 const string groupName = "logical" + nan;
11525 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11526
11527 de::Random rnd (deStringHash(testGroup->getName()));
11528 const string spvCapabilities = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11529 const string spvExtensions = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11530 const string spvExecutionMode = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11531 const deUint32 numDataPointsScalar = 16;
11532 const deUint32 numDataPointsVector = 14;
11533 const vector<deFloat16> float16DataScalar = getFloat16s(rnd, numDataPointsScalar);
11534 const vector<deFloat16> float16DataVector = getFloat16s(rnd, numDataPointsVector);
11535 const vector<deFloat16> float16Data1 = squarize(float16DataScalar, 0); // Total Size: square(sizeof(float16DataScalar))
11536 const vector<deFloat16> float16Data2 = squarize(float16DataScalar, 1);
11537 const vector<deFloat16> float16DataVec1 = squarizeVector(float16DataVector, 0); // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11538 const vector<deFloat16> float16DataVec2 = squarizeVector(float16DataVector, 1);
11539 const vector<deFloat16> float16OutUnused (float16Data1.size(), 0);
11540 const vector<deFloat16> float16OutVecUnused (float16DataVec1.size(), 0);
11541
11542 struct TestOp
11543 {
11544 const char* opCode;
11545 VerifyIOFunc verifyFuncNan;
11546 VerifyIOFunc verifyFuncNonNan;
11547 const deUint32 argCount;
11548 };
11549
11550 const TestOp testOps[] =
11551 {
11552 { "OpIsNan" , compareFP16Logical<fp16isNan, true, false, true>, compareFP16Logical<fp16isNan, true, false, false>, 1 },
11553 { "OpIsInf" , compareFP16Logical<fp16isInf, true, false, true>, compareFP16Logical<fp16isInf, true, false, false>, 1 },
11554 { "OpFOrdEqual" , compareFP16Logical<fp16isEqual, false, true, true>, compareFP16Logical<fp16isEqual, false, true, false>, 2 },
11555 { "OpFUnordEqual" , compareFP16Logical<fp16isEqual, false, false, true>, compareFP16Logical<fp16isEqual, false, false, false>, 2 },
11556 { "OpFOrdNotEqual" , compareFP16Logical<fp16isUnequal, false, true, true>, compareFP16Logical<fp16isUnequal, false, true, false>, 2 },
11557 { "OpFUnordNotEqual" , compareFP16Logical<fp16isUnequal, false, false, true>, compareFP16Logical<fp16isUnequal, false, false, false>, 2 },
11558 { "OpFOrdLessThan" , compareFP16Logical<fp16isLess, false, true, true>, compareFP16Logical<fp16isLess, false, true, false>, 2 },
11559 { "OpFUnordLessThan" , compareFP16Logical<fp16isLess, false, false, true>, compareFP16Logical<fp16isLess, false, false, false>, 2 },
11560 { "OpFOrdGreaterThan" , compareFP16Logical<fp16isGreater, false, true, true>, compareFP16Logical<fp16isGreater, false, true, false>, 2 },
11561 { "OpFUnordGreaterThan" , compareFP16Logical<fp16isGreater, false, false, true>, compareFP16Logical<fp16isGreater, false, false, false>, 2 },
11562 { "OpFOrdLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, true, true>, compareFP16Logical<fp16isLessOrEqual, false, true, false>, 2 },
11563 { "OpFUnordLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, false, true>, compareFP16Logical<fp16isLessOrEqual, false, false, false>, 2 },
11564 { "OpFOrdGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, true, true>, compareFP16Logical<fp16isGreaterOrEqual, false, true, false>, 2 },
11565 { "OpFUnordGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, false, true>, compareFP16Logical<fp16isGreaterOrEqual, false, false, false>, 2 },
11566 };
11567
11568 { // scalar cases
11569 const StringTemplate preMain
11570 (
11571 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11572 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11573 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11574 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11575 " %f16 = OpTypeFloat 16\n"
11576 " %v2f16 = OpTypeVector %f16 2\n"
11577 " %c_f16_0 = OpConstant %f16 0.0\n"
11578 " %c_f16_1 = OpConstant %f16 1.0\n"
11579 " %up_u32 = OpTypePointer Uniform %u32\n"
11580 " %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11581 " %SSBO16 = OpTypeStruct %ra_u32\n"
11582 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11583 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11584 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11585 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11586 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11587 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11588 );
11589
11590 const StringTemplate decoration
11591 (
11592 "OpDecorate %ra_u32 ArrayStride 4\n"
11593 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11594 "OpDecorate %SSBO16 BufferBlock\n"
11595 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11596 "OpDecorate %ssbo_src0 Binding 0\n"
11597 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11598 "OpDecorate %ssbo_src1 Binding 1\n"
11599 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11600 "OpDecorate %ssbo_dst Binding 2\n"
11601 );
11602
11603 const StringTemplate testFun
11604 (
11605 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11606 " %param = OpFunctionParameter %v4f32\n"
11607
11608 " %entry = OpLabel\n"
11609 " %i = OpVariable %fp_i32 Function\n"
11610 " OpStore %i %c_i32_0\n"
11611 " OpBranch %loop\n"
11612
11613 " %loop = OpLabel\n"
11614 " %i_cmp = OpLoad %i32 %i\n"
11615 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11616 " OpLoopMerge %merge %next None\n"
11617 " OpBranchConditional %lt %write %merge\n"
11618
11619 " %write = OpLabel\n"
11620 " %ndx = OpLoad %i32 %i\n"
11621
11622 " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11623
11624 "${op_arg1_calc}"
11625
11626 " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11627 " %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11628 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11629 " OpBranch %next\n"
11630
11631 " %next = OpLabel\n"
11632 " %i_cur = OpLoad %i32 %i\n"
11633 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11634 " OpStore %i %i_new\n"
11635 " OpBranch %loop\n"
11636
11637 " %merge = OpLabel\n"
11638 " OpReturnValue %param\n"
11639
11640 " OpFunctionEnd\n"
11641 );
11642
11643 const StringTemplate arg1Calc
11644 (
11645 " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11646 );
11647
11648 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11649 {
11650 const size_t iterations = float16Data1.size();
11651 const TestOp& testOp = testOps[testOpsIdx];
11652 const string testName = de::toLower(string(testOp.opCode)) + "_scalar";
11653 SpecResource specResource;
11654 map<string, string> specs;
11655 VulkanFeatures features;
11656 map<string, string> fragments;
11657 vector<string> extensions;
11658
11659 specs["num_data_points"] = de::toString(iterations);
11660 specs["op_code"] = testOp.opCode;
11661 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11662 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11663
11664 fragments["extension"] = spvExtensions;
11665 fragments["capability"] = spvCapabilities;
11666 fragments["execution_mode"] = spvExecutionMode;
11667 fragments["decoration"] = decoration.specialize(specs);
11668 fragments["pre_main"] = preMain.specialize(specs);
11669 fragments["testfun"] = testFun.specialize(specs);
11670 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11671 if (testOp.argCount > 1)
11672 {
11673 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11674 }
11675 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11676
11677 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11678 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11679 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11680 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11681
11682 extensions.push_back("VK_KHR_shader_float16_int8");
11683
11684 if (nanSupported)
11685 {
11686 extensions.push_back("VK_KHR_shader_float_controls");
11687
11688 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11689 }
11690
11691 features.extFloat16Int8.shaderFloat16 = true;
11692
11693 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11694 }
11695 }
11696 { // vector cases
11697 const StringTemplate preMain
11698 (
11699 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11700 " %v2bool = OpTypeVector %bool 2\n"
11701 " %f16 = OpTypeFloat 16\n"
11702 " %c_f16_0 = OpConstant %f16 0.0\n"
11703 " %c_f16_1 = OpConstant %f16 1.0\n"
11704 " %v2f16 = OpTypeVector %f16 2\n"
11705 " %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11706 " %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11707 " %up_u32 = OpTypePointer Uniform %u32\n"
11708 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11709 " %SSBO16 = OpTypeStruct %ra_u32\n"
11710 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11711 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11712 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11713 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11714 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11715 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11716 );
11717
11718 const StringTemplate decoration
11719 (
11720 "OpDecorate %ra_u32 ArrayStride 4\n"
11721 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11722 "OpDecorate %SSBO16 BufferBlock\n"
11723 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11724 "OpDecorate %ssbo_src0 Binding 0\n"
11725 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11726 "OpDecorate %ssbo_src1 Binding 1\n"
11727 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11728 "OpDecorate %ssbo_dst Binding 2\n"
11729 );
11730
11731 const StringTemplate testFun
11732 (
11733 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11734 " %param = OpFunctionParameter %v4f32\n"
11735
11736 " %entry = OpLabel\n"
11737 " %i = OpVariable %fp_i32 Function\n"
11738 " OpStore %i %c_i32_0\n"
11739 " OpBranch %loop\n"
11740
11741 " %loop = OpLabel\n"
11742 " %i_cmp = OpLoad %i32 %i\n"
11743 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11744 " OpLoopMerge %merge %next None\n"
11745 " OpBranchConditional %lt %write %merge\n"
11746
11747 " %write = OpLabel\n"
11748 " %ndx = OpLoad %i32 %i\n"
11749
11750 " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11751
11752 "${op_arg1_calc}"
11753
11754 " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11755 " %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11756 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11757 " OpBranch %next\n"
11758
11759 " %next = OpLabel\n"
11760 " %i_cur = OpLoad %i32 %i\n"
11761 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11762 " OpStore %i %i_new\n"
11763 " OpBranch %loop\n"
11764
11765 " %merge = OpLabel\n"
11766 " OpReturnValue %param\n"
11767
11768 " OpFunctionEnd\n"
11769 );
11770
11771 const StringTemplate arg1Calc
11772 (
11773 " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11774 );
11775
11776 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11777 {
11778 const deUint32 itemsPerVec = 2;
11779 const size_t iterations = float16DataVec1.size() / itemsPerVec;
11780 const TestOp& testOp = testOps[testOpsIdx];
11781 const string testName = de::toLower(string(testOp.opCode)) + "_vector";
11782 SpecResource specResource;
11783 map<string, string> specs;
11784 vector<string> extensions;
11785 VulkanFeatures features;
11786 map<string, string> fragments;
11787
11788 specs["num_data_points"] = de::toString(iterations);
11789 specs["op_code"] = testOp.opCode;
11790 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11791 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11792
11793 fragments["extension"] = spvExtensions;
11794 fragments["capability"] = spvCapabilities;
11795 fragments["execution_mode"] = spvExecutionMode;
11796 fragments["decoration"] = decoration.specialize(specs);
11797 fragments["pre_main"] = preMain.specialize(specs);
11798 fragments["testfun"] = testFun.specialize(specs);
11799 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11800 if (testOp.argCount > 1)
11801 {
11802 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11803 }
11804 fragments["testfun"] += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11805
11806 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11807 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11808 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11809 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11810
11811 extensions.push_back("VK_KHR_shader_float16_int8");
11812
11813 if (nanSupported)
11814 {
11815 extensions.push_back("VK_KHR_shader_float_controls");
11816
11817 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11818 }
11819
11820 features.extFloat16Int8.shaderFloat16 = true;
11821
11822 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11823 }
11824 }
11825
11826 return testGroup.release();
11827 }
11828
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11829 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11830 {
11831 if (inputs.size() != 1 || outputAllocs.size() != 1)
11832 return false;
11833
11834 vector<deUint8> input1Bytes;
11835
11836 inputs[0].getBytes(input1Bytes);
11837
11838 const deUint16* const input1AsFP16 = (const deUint16*)&input1Bytes[0];
11839 const deUint16* const outputAsFP16 = (const deUint16*)outputAllocs[0]->getHostPtr();
11840 std::string error;
11841
11842 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11843 {
11844 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11845 {
11846 log << TestLog::Message << error << TestLog::EndMessage;
11847
11848 return false;
11849 }
11850 }
11851
11852 return true;
11853 }
11854
11855 template<class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)11856 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11857 {
11858 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11859
11860 de::Random rnd (deStringHash(testGroup->getName()));
11861 const StringTemplate capabilities ("OpCapability Float16\n");
11862 const deUint32 numDataPoints = 256;
11863 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
11864 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
11865 map<string, string> fragments;
11866
11867 struct TestType
11868 {
11869 const deUint32 typeComponents;
11870 const char* typeName;
11871 const char* typeDecls;
11872 const char* typeStorage;
11873 const string loadFunc;
11874 const string storeFunc;
11875 };
11876
11877 const TestType testTypes[] =
11878 {
11879 {
11880 1,
11881 "f16",
11882 " %v2f16 = OpTypeVector %f16 2\n"
11883 "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11884 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11885 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11886 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11887 "u32_hndp",
11888 loadScalarF16FromUint,
11889 storeScalarF16AsUint
11890 },
11891 {
11892 2,
11893 "v2f16",
11894 " %v2f16 = OpTypeVector %f16 2\n"
11895 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11896 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11897 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11898 "u32_ndp",
11899 loadV2F16FromUint,
11900 storeV2F16AsUint
11901 },
11902 {
11903 4,
11904 "v4f16",
11905 " %v2f16 = OpTypeVector %f16 2\n"
11906 " %v4f16 = OpTypeVector %f16 4\n"
11907 " %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11908 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11909 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11910 "ra_u32_2",
11911 loadV4F16FromUints,
11912 storeV4F16AsUints
11913 },
11914 };
11915
11916 const StringTemplate preMain
11917 (
11918 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11919 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11920 " %v2bool = OpTypeVector %bool 2\n"
11921 " %f16 = OpTypeFloat 16\n"
11922 " %c_f16_0 = OpConstant %f16 0.0\n"
11923
11924 "${type_decls}"
11925
11926 " %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11927 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11928 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11929 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11930 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11931 " %up_u32 = OpTypePointer Uniform %u32\n"
11932 " %SSBO16 = OpTypeStruct %ra_${ts}\n"
11933 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11934 " %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11935 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11936 );
11937
11938 const StringTemplate decoration
11939 (
11940 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11941 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11942 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11943 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11944 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11945 "OpDecorate %SSBO16 BufferBlock\n"
11946 "OpDecorate %ssbo_src DescriptorSet 0\n"
11947 "OpDecorate %ssbo_src Binding 0\n"
11948 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11949 "OpDecorate %ssbo_dst Binding 1\n"
11950 );
11951
11952 const StringTemplate testFun
11953 (
11954 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11955 " %param = OpFunctionParameter %v4f32\n"
11956 " %entry = OpLabel\n"
11957
11958 " %i = OpVariable %fp_i32 Function\n"
11959 " OpStore %i %c_i32_0\n"
11960 " OpBranch %loop\n"
11961
11962 " %loop = OpLabel\n"
11963 " %i_cmp = OpLoad %i32 %i\n"
11964 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11965 " OpLoopMerge %merge %next None\n"
11966 " OpBranchConditional %lt %write %merge\n"
11967
11968 " %write = OpLabel\n"
11969 " %ndx = OpLoad %i32 %i\n"
11970
11971 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11972 " %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11973 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11974 " OpBranch %next\n"
11975
11976 " %next = OpLabel\n"
11977 " %i_cur = OpLoad %i32 %i\n"
11978 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11979 " OpStore %i %i_new\n"
11980 " OpBranch %loop\n"
11981
11982 " %merge = OpLabel\n"
11983 " OpReturnValue %param\n"
11984
11985 " OpFunctionEnd\n"
11986
11987 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11988 " %param0 = OpFunctionParameter %${tt}\n"
11989 " %entry_pf = OpLabel\n"
11990 " %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11991 " OpReturnValue %res0\n"
11992 " OpFunctionEnd\n"
11993 );
11994
11995 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11996 {
11997 const TestType& testType = testTypes[testTypeIdx];
11998 const string testName = testType.typeName;
11999 const deUint32 itemsPerType = testType.typeComponents;
12000 const size_t iterations = float16InputData.size() / itemsPerType;
12001 const size_t typeStride = itemsPerType * sizeof(deFloat16);
12002 SpecResource specResource;
12003 map<string, string> specs;
12004 VulkanFeatures features;
12005 vector<string> extensions;
12006
12007 specs["num_data_points"] = de::toString(iterations);
12008 specs["tt"] = testType.typeName;
12009 specs["ts"] = testType.typeStorage;
12010 specs["tt_stride"] = de::toString(typeStride);
12011 specs["type_decls"] = testType.typeDecls;
12012
12013 fragments["capability"] = capabilities.specialize(specs);
12014 fragments["decoration"] = decoration.specialize(specs);
12015 fragments["pre_main"] = preMain.specialize(specs);
12016 fragments["testfun"] = testFun.specialize(specs);
12017 fragments["testfun"] += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
12018 fragments["testfun"] += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
12019
12020 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12021 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12022 specResource.verifyIO = compareFP16FunctionSetFunc;
12023
12024 extensions.push_back("VK_KHR_shader_float16_int8");
12025
12026 features.extFloat16Int8.shaderFloat16 = true;
12027
12028 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12029 }
12030
12031 return testGroup.release();
12032 }
12033
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12034 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12035 {
12036 if (inputs.size() != 2 || outputAllocs.size() != 1)
12037 return false;
12038
12039 vector<deUint8> input1Bytes;
12040 vector<deUint8> input2Bytes;
12041
12042 inputs[0].getBytes(input1Bytes);
12043 inputs[1].getBytes(input2Bytes);
12044
12045 DE_ASSERT(input1Bytes.size() > 0);
12046 DE_ASSERT(input2Bytes.size() > 0);
12047 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12048
12049 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
12050 const size_t components = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12051 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12052 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
12053 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12054 std::string error;
12055
12056 DE_ASSERT(components == 2 || components == 4);
12057 DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
12058
12059 for (size_t idx = 0; idx < iterations; ++idx)
12060 {
12061 const deUint32 componentNdx = inputIndices[idx];
12062
12063 DE_ASSERT(componentNdx < components);
12064
12065 const deFloat16 expected = input1AsFP16[components * idx + componentNdx];
12066
12067 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
12068 {
12069 log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
12070
12071 return false;
12072 }
12073 }
12074
12075 return true;
12076 }
12077
12078 template<class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)12079 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
12080 {
12081 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
12082
12083 de::Random rnd (deStringHash(testGroup->getName()));
12084 const deUint32 numDataPoints = 256;
12085 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12086 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
12087
12088 struct TestType
12089 {
12090 const deUint32 typeComponents;
12091 const size_t typeStride;
12092 const char* typeName;
12093 const char* typeDecls;
12094 const char* typeStorage;
12095 const string loadFunction;
12096 const string storeFunction;
12097 };
12098
12099 const TestType testTypes[] =
12100 {
12101 {
12102 2,
12103 2 * sizeof(deFloat16),
12104 "v2f16",
12105 " %v2f16 = OpTypeVector %f16 2\n"
12106 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12107 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12108 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12109 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12110 "u32",
12111 loadV2F16FromUint,
12112 storeScalarF16AsUint
12113 },
12114 {
12115 3,
12116 4 * sizeof(deFloat16),
12117 "v3f16",
12118 " %v2f16 = OpTypeVector %f16 2\n"
12119 " %v3f16 = OpTypeVector %f16 3\n"
12120 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12121 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12122 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12123 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12124 "ra_u32_2",
12125 loadV3F16FromUints,
12126 storeScalarF16AsUint
12127 },
12128 {
12129 4,
12130 4 * sizeof(deFloat16),
12131 "v4f16",
12132 " %v2f16 = OpTypeVector %f16 2\n"
12133 " %v4f16 = OpTypeVector %f16 4\n"
12134 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12135 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12136 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12137 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12138 "ra_u32_2",
12139 loadV4F16FromUints,
12140 storeScalarF16AsUint
12141 },
12142 };
12143
12144 const StringTemplate preMain
12145 (
12146 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12147 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12148 " %f16 = OpTypeFloat 16\n"
12149
12150 "${type_decl}"
12151
12152 " %up_u32 = OpTypePointer Uniform %u32\n"
12153 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12154 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12155 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12156
12157 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12158 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12159 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12160 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12161 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12162
12163 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12164 " %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12165 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12166
12167 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12168 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12169 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12170 );
12171
12172 const StringTemplate decoration
12173 (
12174 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12175 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12176 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12177 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12178 "OpDecorate %SSBO_SRC BufferBlock\n"
12179 "OpDecorate %ssbo_src DescriptorSet 0\n"
12180 "OpDecorate %ssbo_src Binding 0\n"
12181
12182 "OpDecorate %ra_u32 ArrayStride 4\n"
12183 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12184 "OpDecorate %SSBO_IDX BufferBlock\n"
12185 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12186 "OpDecorate %ssbo_idx Binding 1\n"
12187
12188 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12189 "OpDecorate %SSBO_DST BufferBlock\n"
12190 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12191 "OpDecorate %ssbo_dst Binding 2\n"
12192 );
12193
12194 const StringTemplate testFun
12195 (
12196 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12197 " %param = OpFunctionParameter %v4f32\n"
12198 " %entry = OpLabel\n"
12199
12200 " %i = OpVariable %fp_i32 Function\n"
12201 " OpStore %i %c_i32_0\n"
12202
12203 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12204 " OpSelectionMerge %end_if None\n"
12205 " OpBranchConditional %will_run %run_test %end_if\n"
12206
12207 " %run_test = OpLabel\n"
12208 " OpBranch %loop\n"
12209
12210 " %loop = OpLabel\n"
12211 " %i_cmp = OpLoad %i32 %i\n"
12212 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12213 " OpLoopMerge %merge %next None\n"
12214 " OpBranchConditional %lt %write %merge\n"
12215
12216 " %write = OpLabel\n"
12217 " %ndx = OpLoad %i32 %i\n"
12218
12219 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12220
12221 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12222 " %val_idx = OpLoad %u32 %src_idx\n"
12223
12224 " %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12225 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12226
12227 " OpBranch %next\n"
12228
12229 " %next = OpLabel\n"
12230 " %i_cur = OpLoad %i32 %i\n"
12231 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12232 " OpStore %i %i_new\n"
12233 " OpBranch %loop\n"
12234
12235 " %merge = OpLabel\n"
12236 " OpBranch %end_if\n"
12237 " %end_if = OpLabel\n"
12238 " OpReturnValue %param\n"
12239
12240 " OpFunctionEnd\n"
12241 );
12242
12243 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12244 {
12245 const TestType& testType = testTypes[testTypeIdx];
12246 const string testName = testType.typeName;
12247 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12248 const size_t iterations = float16InputData.size() / itemsPerType;
12249 SpecResource specResource;
12250 map<string, string> specs;
12251 VulkanFeatures features;
12252 vector<deUint32> inputDataNdx;
12253 map<string, string> fragments;
12254 vector<string> extensions;
12255
12256 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12257 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12258
12259 specs["num_data_points"] = de::toString(iterations);
12260 specs["tt"] = testType.typeName;
12261 specs["ts"] = testType.typeStorage;
12262 specs["tt_stride"] = de::toString(testType.typeStride);
12263 specs["type_decl"] = testType.typeDecls;
12264
12265 fragments["capability"] = "OpCapability Float16\n";
12266 fragments["decoration"] = decoration.specialize(specs);
12267 fragments["pre_main"] = preMain.specialize(specs);
12268 fragments["testfun"] = testFun.specialize(specs);
12269 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12270 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12271
12272 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12273 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12274 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12275 specResource.verifyIO = compareFP16VectorExtractFunc;
12276
12277 extensions.push_back("VK_KHR_shader_float16_int8");
12278
12279 features.extFloat16Int8.shaderFloat16 = true;
12280
12281 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12282 }
12283
12284 return testGroup.release();
12285 }
12286
12287 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12288 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12289 {
12290 if (inputs.size() != 2 || outputAllocs.size() != 1)
12291 return false;
12292
12293 vector<deUint8> input1Bytes;
12294 vector<deUint8> input2Bytes;
12295
12296 inputs[0].getBytes(input1Bytes);
12297 inputs[1].getBytes(input2Bytes);
12298
12299 DE_ASSERT(input1Bytes.size() > 0);
12300 DE_ASSERT(input2Bytes.size() > 0);
12301 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12302
12303 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
12304 const size_t componentsStride = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12305 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12306 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
12307 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12308 const deFloat16 magic = tcu::Float16(float(REPLACEMENT)).bits();
12309 std::string error;
12310
12311 DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12312 DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12313
12314 for (size_t idx = 0; idx < iterations; ++idx)
12315 {
12316 const deFloat16* inputVec = &input1AsFP16[componentsStride * idx];
12317 const deFloat16* outputVec = &outputAsFP16[componentsStride * idx];
12318 const deUint32 replacedCompNdx = inputIndices[idx];
12319
12320 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12321
12322 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12323 {
12324 const deFloat16 expected = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12325
12326 if (!compare16BitFloat(expected, outputVec[compNdx], error))
12327 {
12328 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12329
12330 return false;
12331 }
12332 }
12333 }
12334
12335 return true;
12336 }
12337
12338 template<class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12339 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12340 {
12341 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12342
12343 de::Random rnd (deStringHash(testGroup->getName()));
12344 const deUint32 replacement = 42;
12345 const deUint32 numDataPoints = 256;
12346 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12347 const vector<deFloat16> float16OutputUnused (float16InputData.size(), 0);
12348
12349 struct TestType
12350 {
12351 const deUint32 typeComponents;
12352 const size_t typeStride;
12353 const char* typeName;
12354 const char* typeDecls;
12355 VerifyIOFunc verifyIOFunc;
12356 const char* typeStorage;
12357 const string loadFunction;
12358 const string storeFunction;
12359 };
12360
12361 const TestType testTypes[] =
12362 {
12363 {
12364 2,
12365 2 * sizeof(deFloat16),
12366 "v2f16",
12367 " %v2f16 = OpTypeVector %f16 2\n"
12368 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12369 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12370 compareFP16VectorInsertFunc<2, replacement>,
12371 "u32",
12372 loadV2F16FromUint,
12373 storeV2F16AsUint
12374 },
12375 {
12376 3,
12377 4 * sizeof(deFloat16),
12378 "v3f16",
12379 " %v2f16 = OpTypeVector %f16 2\n"
12380 " %v3f16 = OpTypeVector %f16 3\n"
12381 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12382 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12383 compareFP16VectorInsertFunc<3, replacement>,
12384 "ra_u32_2",
12385 loadV3F16FromUints,
12386 storeV3F16AsUints
12387 },
12388 {
12389 4,
12390 4 * sizeof(deFloat16),
12391 "v4f16",
12392 " %v2f16 = OpTypeVector %f16 2\n"
12393 " %v4f16 = OpTypeVector %f16 4\n"
12394 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12395 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12396 compareFP16VectorInsertFunc<4, replacement>,
12397 "ra_u32_2",
12398 loadV4F16FromUints,
12399 storeV4F16AsUints
12400 },
12401 };
12402
12403 const StringTemplate preMain
12404 (
12405 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12406 " %f16 = OpTypeFloat 16\n"
12407 " %c_f16_ins = OpConstant %f16 ${replacement}\n"
12408
12409 "${type_decl}"
12410
12411 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12412 " %up_u32 = OpTypePointer Uniform %u32\n"
12413 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12414 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12415
12416 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12417 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12418 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12419 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12420
12421 " %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12422 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12423
12424 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12425 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12426 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12427 );
12428
12429 const StringTemplate decoration
12430 (
12431 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12432 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12433 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12434 "OpDecorate %SSBO_SRC BufferBlock\n"
12435 "OpDecorate %ssbo_src DescriptorSet 0\n"
12436 "OpDecorate %ssbo_src Binding 0\n"
12437
12438 "OpDecorate %ra_u32 ArrayStride 4\n"
12439 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12440 "OpDecorate %SSBO_IDX BufferBlock\n"
12441 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12442 "OpDecorate %ssbo_idx Binding 1\n"
12443
12444 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12445 "OpDecorate %SSBO_DST BufferBlock\n"
12446 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12447 "OpDecorate %ssbo_dst Binding 2\n"
12448 );
12449
12450 const StringTemplate testFun
12451 (
12452 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12453 " %param = OpFunctionParameter %v4f32\n"
12454 " %entry = OpLabel\n"
12455
12456 " %i = OpVariable %fp_i32 Function\n"
12457 " OpStore %i %c_i32_0\n"
12458
12459 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12460 " OpSelectionMerge %end_if None\n"
12461 " OpBranchConditional %will_run %run_test %end_if\n"
12462
12463 " %run_test = OpLabel\n"
12464 " OpBranch %loop\n"
12465
12466 " %loop = OpLabel\n"
12467 " %i_cmp = OpLoad %i32 %i\n"
12468 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12469 " OpLoopMerge %merge %next None\n"
12470 " OpBranchConditional %lt %write %merge\n"
12471
12472 " %write = OpLabel\n"
12473 " %ndx = OpLoad %i32 %i\n"
12474
12475 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12476
12477 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12478 " %val_idx = OpLoad %u32 %src_idx\n"
12479
12480 " %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12481 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12482
12483 " OpBranch %next\n"
12484
12485 " %next = OpLabel\n"
12486 " %i_cur = OpLoad %i32 %i\n"
12487 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12488 " OpStore %i %i_new\n"
12489 " OpBranch %loop\n"
12490
12491 " %merge = OpLabel\n"
12492 " OpBranch %end_if\n"
12493 " %end_if = OpLabel\n"
12494 " OpReturnValue %param\n"
12495
12496 " OpFunctionEnd\n"
12497 );
12498
12499 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12500 {
12501 const TestType& testType = testTypes[testTypeIdx];
12502 const string testName = testType.typeName;
12503 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12504 const size_t iterations = float16InputData.size() / itemsPerType;
12505 SpecResource specResource;
12506 map<string, string> specs;
12507 VulkanFeatures features;
12508 vector<deUint32> inputDataNdx;
12509 map<string, string> fragments;
12510 vector<string> extensions;
12511
12512 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12513 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12514
12515 specs["num_data_points"] = de::toString(iterations);
12516 specs["tt"] = testType.typeName;
12517 specs["ts"] = testType.typeStorage;
12518 specs["tt_stride"] = de::toString(testType.typeStride);
12519 specs["type_decl"] = testType.typeDecls;
12520 specs["replacement"] = de::toString(replacement);
12521
12522 fragments["capability"] = "OpCapability Float16\n";
12523 fragments["decoration"] = decoration.specialize(specs);
12524 fragments["pre_main"] = preMain.specialize(specs);
12525 fragments["testfun"] = testFun.specialize(specs);
12526 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12527 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12528
12529 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12530 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12531 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12532 specResource.verifyIO = testType.verifyIOFunc;
12533
12534 extensions.push_back("VK_KHR_shader_float16_int8");
12535
12536 features.extFloat16Int8.shaderFloat16 = true;
12537
12538 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12539 }
12540
12541 return testGroup.release();
12542 }
12543
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12544 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12545 {
12546 const size_t compNdxCount = (vec1Len + vec2Len + 1);
12547 const size_t compNdxLimited = iteration % (compNdxCount * compNdxCount);
12548 size_t comp;
12549
12550 switch (componentNdx)
12551 {
12552 case 0: comp = compNdxLimited / compNdxCount; break;
12553 case 1: comp = compNdxLimited % compNdxCount; break;
12554 case 2: comp = 0; break;
12555 case 3: comp = 1; break;
12556 default: TCU_THROW(InternalError, "Impossible");
12557 }
12558
12559 if (comp >= vec1Len + vec2Len)
12560 {
12561 validate = false;
12562 return 0;
12563 }
12564 else
12565 {
12566 validate = true;
12567 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12568 }
12569 }
12570
12571 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12572 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12573 {
12574 DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12575 DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12576 DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12577
12578 if (inputs.size() != 2 || outputAllocs.size() != 1)
12579 return false;
12580
12581 vector<deUint8> input1Bytes;
12582 vector<deUint8> input2Bytes;
12583
12584 inputs[0].getBytes(input1Bytes);
12585 inputs[1].getBytes(input2Bytes);
12586
12587 DE_ASSERT(input1Bytes.size() > 0);
12588 DE_ASSERT(input2Bytes.size() > 0);
12589 DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12590
12591 const size_t componentsStrideDst = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12592 const size_t componentsStrideSrc0 = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12593 const size_t componentsStrideSrc1 = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12594 const size_t iterations = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12595 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12596 const deFloat16* const input2AsFP16 = (const deFloat16*)&input2Bytes[0];
12597 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12598 std::string error;
12599
12600 DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12601 DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12602
12603 for (size_t idx = 0; idx < iterations; ++idx)
12604 {
12605 const deFloat16* input1Vec = &input1AsFP16[componentsStrideSrc0 * idx];
12606 const deFloat16* input2Vec = &input2AsFP16[componentsStrideSrc1 * idx];
12607 const deFloat16* outputVec = &outputAsFP16[componentsStrideDst * idx];
12608
12609 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12610 {
12611 bool validate = true;
12612 deFloat16 expected = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12613
12614 if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12615 {
12616 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12617
12618 return false;
12619 }
12620 }
12621 }
12622
12623 return true;
12624 }
12625
getFloat16VectorShuffleVerifyIOFunc(deUint32 dstComponentsCount,deUint32 src0ComponentsCount,deUint32 src1ComponentsCount)12626 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12627 {
12628 DE_ASSERT(dstComponentsCount <= 4);
12629 DE_ASSERT(src0ComponentsCount <= 4);
12630 DE_ASSERT(src1ComponentsCount <= 4);
12631 deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12632
12633 switch (funcCode)
12634 {
12635 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12636 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12637 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12638 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12639 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12640 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12641 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12642 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12643 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12644 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12645 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12646 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12647 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12648 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12649 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12650 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12651 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12652 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12653 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12654 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12655 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12656 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12657 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12658 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12659 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12660 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12661 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12662 default: TCU_THROW(InternalError, "Invalid number of components specified.");
12663 }
12664 }
12665
12666 template<class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)12667 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12668 {
12669 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12670 const int testSpecificSeed = deStringHash(testGroup->getName());
12671 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12672 de::Random rnd (seed);
12673 const deUint32 numDataPoints = 128;
12674 map<string, string> fragments;
12675
12676 struct TestType
12677 {
12678 const deUint32 typeComponents;
12679 const char* typeName;
12680 const string loadFunction;
12681 const string storeFunction;
12682 };
12683
12684 const TestType testTypes[] =
12685 {
12686 {
12687 2,
12688 "v2f16",
12689 loadV2F16FromUint,
12690 storeV2F16AsUint
12691 },
12692 {
12693 3,
12694 "v3f16",
12695 loadV3F16FromUints,
12696 storeV3F16AsUints
12697 },
12698 {
12699 4,
12700 "v4f16",
12701 loadV4F16FromUints,
12702 storeV4F16AsUints
12703 },
12704 };
12705
12706 const StringTemplate preMain
12707 (
12708 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12709 " %c_i32_cc = OpConstant %i32 ${case_count}\n"
12710 " %f16 = OpTypeFloat 16\n"
12711 " %v2f16 = OpTypeVector %f16 2\n"
12712 " %v3f16 = OpTypeVector %f16 3\n"
12713 " %v4f16 = OpTypeVector %f16 4\n"
12714
12715 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12716 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12717 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12718 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12719 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12720 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12721
12722 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12723 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12724 " %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12725 " %up_u32 = OpTypePointer Uniform %u32\n"
12726 " %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12727 " %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12728 " %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12729
12730 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12731 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12732 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12733
12734 " %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12735
12736 " %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12737 " %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12738 " %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12739 );
12740
12741 const StringTemplate decoration
12742 (
12743 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12744 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12745 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12746
12747 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12748 "OpDecorate %SSBO_v2f16 BufferBlock\n"
12749
12750 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12751 "OpDecorate %SSBO_v3f16 BufferBlock\n"
12752
12753 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12754 "OpDecorate %SSBO_v4f16 BufferBlock\n"
12755
12756 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12757 "OpDecorate %ssbo_src0 Binding 0\n"
12758 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12759 "OpDecorate %ssbo_src1 Binding 1\n"
12760 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12761 "OpDecorate %ssbo_dst Binding 2\n"
12762 );
12763
12764 const StringTemplate testFun
12765 (
12766 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12767 " %param = OpFunctionParameter %v4f32\n"
12768 " %entry = OpLabel\n"
12769
12770 " %i = OpVariable %fp_i32 Function\n"
12771 " OpStore %i %c_i32_0\n"
12772
12773 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12774 " OpSelectionMerge %end_if None\n"
12775 " OpBranchConditional %will_run %run_test %end_if\n"
12776
12777 " %run_test = OpLabel\n"
12778 " OpBranch %loop\n"
12779
12780 " %loop = OpLabel\n"
12781 " %i_cmp = OpLoad %i32 %i\n"
12782 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12783 " OpLoopMerge %merge %next None\n"
12784 " OpBranchConditional %lt %write %merge\n"
12785
12786 " %write = OpLabel\n"
12787 " %ndx = OpLoad %i32 %i\n"
12788 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12789 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12790 " %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12791 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12792 " OpBranch %next\n"
12793
12794 " %next = OpLabel\n"
12795 " %i_cur = OpLoad %i32 %i\n"
12796 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12797 " OpStore %i %i_new\n"
12798 " OpBranch %loop\n"
12799
12800 " %merge = OpLabel\n"
12801 " OpBranch %end_if\n"
12802 " %end_if = OpLabel\n"
12803 " OpReturnValue %param\n"
12804 " OpFunctionEnd\n"
12805 "\n"
12806
12807 " %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12808 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12809 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12810 "%sw_paramn = OpFunctionParameter %i32\n"
12811 " %sw_entry = OpLabel\n"
12812 " %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12813 " OpSelectionMerge %switch_e None\n"
12814 " OpSwitch %modulo %default ${case_list}\n"
12815 "${case_bodies}"
12816 "%default = OpLabel\n"
12817 " OpUnreachable\n" // Unreachable default case for switch statement
12818 "%switch_e = OpLabel\n"
12819 " OpUnreachable\n" // Unreachable merge block for switch statement
12820 " OpFunctionEnd\n"
12821 );
12822
12823 const StringTemplate testCaseBody
12824 (
12825 "%case_${case_ndx} = OpLabel\n"
12826 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12827 " OpReturnValue %val_dst_${case_ndx}\n"
12828 );
12829
12830 for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12831 {
12832 const TestType& dstType = testTypes[dstTypeIdx];
12833
12834 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12835 {
12836 const TestType& src0Type = testTypes[comp0Idx];
12837
12838 for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12839 {
12840 const TestType& src1Type = testTypes[comp1Idx];
12841 const deUint32 input0Stride = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12842 const deUint32 input1Stride = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12843 const deUint32 outputStride = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12844 const vector<deFloat16> float16Input0Data = getFloat16s(rnd, input0Stride * numDataPoints);
12845 const vector<deFloat16> float16Input1Data = getFloat16s(rnd, input1Stride * numDataPoints);
12846 const vector<deFloat16> float16OutputUnused (outputStride * numDataPoints, 0);
12847 const string testName = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12848 deUint32 caseCount = 0;
12849 SpecResource specResource;
12850 map<string, string> specs;
12851 vector<string> extensions;
12852 VulkanFeatures features;
12853 string caseBodies;
12854 string caseList;
12855
12856 // Generate case
12857 {
12858 vector<string> componentList;
12859
12860 // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12861 {
12862 deUint32 caseNo = 0;
12863
12864 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12865 componentList.push_back(de::toString(caseNo++));
12866 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12867 componentList.push_back(de::toString(caseNo++));
12868 componentList.push_back("0xFFFFFFFF");
12869 }
12870
12871 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12872 {
12873 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12874 {
12875 map<string, string> specCase;
12876 string shuffle = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12877
12878 for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12879 shuffle += " " + de::toString(compIdx - 2);
12880
12881 specCase["case_ndx"] = de::toString(caseCount);
12882 specCase["shuffle"] = shuffle;
12883 specCase["tt_dst"] = dstType.typeName;
12884
12885 caseBodies += testCaseBody.specialize(specCase);
12886 caseList += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12887
12888 caseCount++;
12889 }
12890 }
12891 }
12892
12893 specs["num_data_points"] = de::toString(numDataPoints);
12894 specs["tt_dst"] = dstType.typeName;
12895 specs["tt_src0"] = src0Type.typeName;
12896 specs["tt_src1"] = src1Type.typeName;
12897 specs["case_bodies"] = caseBodies;
12898 specs["case_list"] = caseList;
12899 specs["case_count"] = de::toString(caseCount);
12900
12901 fragments["capability"] = "OpCapability Float16\n";
12902 fragments["decoration"] = decoration.specialize(specs);
12903 fragments["pre_main"] = preMain.specialize(specs);
12904 fragments["testfun"] = testFun.specialize(specs);
12905 fragments["testfun"] += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12906 fragments["testfun"] += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12907 fragments["testfun"] += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12908
12909 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12910 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12911 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12912 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12913
12914 extensions.push_back("VK_KHR_shader_float16_int8");
12915
12916 features.extFloat16Int8.shaderFloat16 = true;
12917
12918 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12919 }
12920 }
12921 }
12922
12923 return testGroup.release();
12924 }
12925
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12926 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12927 {
12928 if (inputs.size() != 1 || outputAllocs.size() != 1)
12929 return false;
12930
12931 vector<deUint8> input1Bytes;
12932
12933 inputs[0].getBytes(input1Bytes);
12934
12935 DE_ASSERT(input1Bytes.size() > 0);
12936 DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12937
12938 const size_t iterations = input1Bytes.size() / sizeof(deFloat16);
12939 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12940 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12941 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12942 std::string error;
12943
12944 for (size_t idx = 0; idx < iterations; ++idx)
12945 {
12946 if (input1AsFP16[idx] == exceptionValue)
12947 continue;
12948
12949 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12950 {
12951 log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12952
12953 return false;
12954 }
12955 }
12956
12957 return true;
12958 }
12959
12960 template<class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)12961 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12962 {
12963 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12964 const deUint32 numElements = 8;
12965 const string testName = "struct";
12966 const deUint32 structItemsCount = 88;
12967 const deUint32 exceptionIndices[] = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12968 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12969 const deUint32 fieldModifier = 2;
12970 const deUint32 fieldModifiedMulIndex = 60;
12971 const deUint32 fieldModifiedAddIndex = 66;
12972
12973 const StringTemplate preMain
12974 (
12975 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12976 " %f16 = OpTypeFloat 16\n"
12977 " %v2f16 = OpTypeVector %f16 2\n"
12978 " %v3f16 = OpTypeVector %f16 3\n"
12979 " %v4f16 = OpTypeVector %f16 4\n"
12980 " %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12981
12982 "${consts}"
12983
12984 " %c_f16_n1 = OpConstant %f16 -1.0\n"
12985 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12986 " %c_u32_5 = OpConstant %u32 5\n"
12987 " %c_u32_6 = OpConstant %u32 6\n"
12988 " %c_u32_7 = OpConstant %u32 7\n"
12989 " %c_u32_8 = OpConstant %u32 8\n"
12990 " %c_u32_9 = OpConstant %u32 9\n"
12991 " %c_u32_10 = OpConstant %u32 10\n"
12992 " %c_u32_11 = OpConstant %u32 11\n"
12993 " %c_u32_12 = OpConstant %u32 12\n"
12994 " %c_u32_13 = OpConstant %u32 13\n"
12995 " %c_u32_14 = OpConstant %u32 14\n"
12996 " %c_u32_15 = OpConstant %u32 15\n"
12997 " %c_u32_16 = OpConstant %u32 16\n"
12998 " %c_u32_17 = OpConstant %u32 17\n"
12999 " %c_u32_18 = OpConstant %u32 18\n"
13000 " %c_u32_19 = OpConstant %u32 19\n"
13001 " %c_u32_20 = OpConstant %u32 20\n"
13002 " %c_u32_21 = OpConstant %u32 21\n"
13003 " %c_u32_22 = OpConstant %u32 22\n"
13004 " %c_u32_23 = OpConstant %u32 23\n"
13005 " %c_u32_24 = OpConstant %u32 24\n"
13006 " %c_u32_25 = OpConstant %u32 25\n"
13007 " %c_u32_26 = OpConstant %u32 26\n"
13008 " %c_u32_27 = OpConstant %u32 27\n"
13009 " %c_u32_28 = OpConstant %u32 28\n"
13010 " %c_u32_29 = OpConstant %u32 29\n"
13011 " %c_u32_30 = OpConstant %u32 30\n"
13012 " %c_u32_31 = OpConstant %u32 31\n"
13013 " %c_u32_33 = OpConstant %u32 33\n"
13014 " %c_u32_34 = OpConstant %u32 34\n"
13015 " %c_u32_35 = OpConstant %u32 35\n"
13016 " %c_u32_36 = OpConstant %u32 36\n"
13017 " %c_u32_37 = OpConstant %u32 37\n"
13018 " %c_u32_38 = OpConstant %u32 38\n"
13019 " %c_u32_39 = OpConstant %u32 39\n"
13020 " %c_u32_40 = OpConstant %u32 40\n"
13021 " %c_u32_41 = OpConstant %u32 41\n"
13022 " %c_u32_44 = OpConstant %u32 44\n"
13023
13024 " %f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13025 " %v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13026 " %v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13027 " %v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13028 " %v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13029 " %struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13030 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13031 " %st_test = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
13032
13033 " %up_u32 = OpTypePointer Uniform %u32\n"
13034 " %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
13035 " %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
13036 " %SSBO_st = OpTypeStruct %ra_ra_u32\n"
13037 " %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
13038
13039 " %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
13040 );
13041
13042 const StringTemplate decoration
13043 (
13044 "OpDecorate %SSBO_st BufferBlock\n"
13045 "OpDecorate %ra_u32_44 ArrayStride 4\n"
13046 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
13047 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13048 "OpDecorate %ssbo_dst Binding 1\n"
13049
13050 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
13051
13052 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13053 "OpMemberDecorate %struct16 0 Offset 0\n"
13054 "OpMemberDecorate %struct16 1 Offset 4\n"
13055 "OpDecorate %struct16arr3 ArrayStride 16\n"
13056 "OpDecorate %f16arr3 ArrayStride 2\n"
13057 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13058 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13059 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13060
13061 "OpMemberDecorate %st_test 0 Offset 0\n"
13062 "OpMemberDecorate %st_test 1 Offset 4\n"
13063 "OpMemberDecorate %st_test 2 Offset 8\n"
13064 "OpMemberDecorate %st_test 3 Offset 16\n"
13065 "OpMemberDecorate %st_test 4 Offset 24\n"
13066 "OpMemberDecorate %st_test 5 Offset 32\n"
13067 "OpMemberDecorate %st_test 6 Offset 80\n"
13068 "OpMemberDecorate %st_test 7 Offset 100\n"
13069 "OpMemberDecorate %st_test 8 Offset 104\n"
13070 "OpMemberDecorate %st_test 9 Offset 144\n"
13071 );
13072
13073 const StringTemplate testFun
13074 (
13075 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13076 " %param = OpFunctionParameter %v4f32\n"
13077 " %entry = OpLabel\n"
13078
13079 " %i = OpVariable %fp_i32 Function\n"
13080 " OpStore %i %c_i32_0\n"
13081
13082 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13083 " OpSelectionMerge %end_if None\n"
13084 " OpBranchConditional %will_run %run_test %end_if\n"
13085
13086 " %run_test = OpLabel\n"
13087 " OpBranch %loop\n"
13088
13089 " %loop = OpLabel\n"
13090 " %i_cmp = OpLoad %i32 %i\n"
13091 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13092 " OpLoopMerge %merge %next None\n"
13093 " OpBranchConditional %lt %write %merge\n"
13094
13095 " %write = OpLabel\n"
13096 " %ndx = OpLoad %i32 %i\n"
13097
13098 " %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13099 " %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13100 " %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13101
13102 " %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13103
13104 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13105 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13106 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13107 " %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13108 " %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13109
13110 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13111 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13112 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13113 " %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13114 " %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13115
13116 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13117 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13118 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13119 " %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13120 " %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13121
13122 " %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13123
13124 " %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13125 " %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13126 " %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13127 " %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13128 " %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13129 " %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13130
13131 " %fndx = OpConvertSToF %f16 %ndx\n"
13132 " %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13133 " %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13134
13135 " %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13136 " %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13137 " %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13138 " %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13139 " %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13140 " %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13141 " %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13142 " %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13143
13144 " %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13145 " %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13146 " %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13147 " %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13148
13149 " %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13150
13151 // Storage section: all elements that are not directly accessed should
13152 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13153 // is constructed with one element from a constant -1.0.
13154 // half offset 0
13155 " %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13156 " %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13157 " %bc_0 = OpBitcast %u32 %vec_0\n"
13158 " %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13159 " OpStore %gep_0 %bc_0\n"
13160
13161 // <2 x half> offset 4
13162 " %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13163 " %bc_1 = OpBitcast %u32 %ex_1\n"
13164 " %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13165 " OpStore %gep_1 %bc_1\n"
13166
13167 // <3 x half> offset 8
13168 " %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13169 " %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13170 " %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13171 " %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13172 " %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13173 " %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13174 " %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13175 " OpStore %gep_2_0 %bc_2_0\n"
13176 " OpStore %gep_2_1 %bc_2_1\n"
13177
13178 // <4 x half> offset 16
13179 " %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13180 " %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13181 " %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13182 " %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13183 " %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13184 " %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13185 " %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13186 " OpStore %gep_3_0 %bc_3_0\n"
13187 " OpStore %gep_3_1 %bc_3_1\n"
13188
13189 // [3 x half] offset 24
13190 " %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13191 " %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13192 " %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13193 " %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13194 " %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13195 " %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13196 " %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13197 " %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13198 " %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13199 " OpStore %gep_4_0 %bc_4_0\n"
13200 " OpStore %gep_4_1 %bc_4_1\n"
13201
13202 // [3 x {half, [3 x <2 x half>]}] offset 32
13203 " %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13204 " %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13205 " %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13206 " %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13207 " %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13208 " %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13209 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13210 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13211 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13212 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13213 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13214 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13215 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13216 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13217 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13218 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13219 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13220 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13221 " %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13222 " %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13223 " %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13224 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13225 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13226 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13227 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13228 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13229 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13230 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13231 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13232 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13233 " %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13234 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13235 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13236 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13237 " %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13238 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13239 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13240 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13241 " %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13242 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13243 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13244 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13245 " OpStore %gep_5_0_0 %bc_5_0_0\n"
13246 " OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13247 " OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13248 " OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13249 " OpStore %gep_5_1_0 %bc_5_1_0\n"
13250 " OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13251 " OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13252 " OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13253 " OpStore %gep_5_2_0 %bc_5_2_0\n"
13254 " OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13255 " OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13256 " OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13257
13258 // [5 x <2 x half>] offset 80
13259 " %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13260 " %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13261 " %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13262 " %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13263 " %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13264 " %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13265 " %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13266 " %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13267 " %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13268 " %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13269 " %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13270 " %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13271 " %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13272 " %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13273 " %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13274 " OpStore %gep_6_0 %bc_6_0\n"
13275 " OpStore %gep_6_1 %bc_6_1\n"
13276 " OpStore %gep_6_2 %bc_6_2\n"
13277 " OpStore %gep_6_3 %bc_6_3\n"
13278 " OpStore %gep_6_4 %bc_6_4\n"
13279
13280 // half offset 100
13281 " %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13282 " %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13283 " %bc_7 = OpBitcast %u32 %vec_7\n"
13284 " %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13285 " OpStore %gep_7 %bc_7\n"
13286
13287 // [5 x <3 x half>] offset 104
13288 " %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13289 " %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13290 " %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13291 " %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13292 " %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13293 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13294 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13295 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13296 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13297 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13298 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13299 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13300 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13301 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13302 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13303 " %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13304 " %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13305 " %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13306 " %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13307 " %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13308 " %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13309 " %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13310 " %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13311 " %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13312 " %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13313 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13314 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13315 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13316 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13317 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13318 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13319 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13320 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13321 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13322 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13323 " OpStore %gep_8_0_0 %bc_8_0_0\n"
13324 " OpStore %gep_8_0_1 %bc_8_0_1\n"
13325 " OpStore %gep_8_1_0 %bc_8_1_0\n"
13326 " OpStore %gep_8_1_1 %bc_8_1_1\n"
13327 " OpStore %gep_8_2_0 %bc_8_2_0\n"
13328 " OpStore %gep_8_2_1 %bc_8_2_1\n"
13329 " OpStore %gep_8_3_0 %bc_8_3_0\n"
13330 " OpStore %gep_8_3_1 %bc_8_3_1\n"
13331 " OpStore %gep_8_4_0 %bc_8_4_0\n"
13332 " OpStore %gep_8_4_1 %bc_8_4_1\n"
13333
13334 // [3 x <4 x half>] offset 144
13335 " %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13336 " %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13337 " %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13338 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13339 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13340 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13341 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13342 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13343 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13344 " %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13345 " %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13346 " %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13347 " %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13348 " %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13349 " %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13350 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13351 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13352 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13353 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13354 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13355 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13356 " OpStore %gep_9_0_0 %bc_9_0_0\n"
13357 " OpStore %gep_9_0_1 %bc_9_0_1\n"
13358 " OpStore %gep_9_1_0 %bc_9_1_0\n"
13359 " OpStore %gep_9_1_1 %bc_9_1_1\n"
13360 " OpStore %gep_9_2_0 %bc_9_2_0\n"
13361 " OpStore %gep_9_2_1 %bc_9_2_1\n"
13362
13363 " OpBranch %next\n"
13364
13365 " %next = OpLabel\n"
13366 " %i_cur = OpLoad %i32 %i\n"
13367 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13368 " OpStore %i %i_new\n"
13369 " OpBranch %loop\n"
13370
13371 " %merge = OpLabel\n"
13372 " OpBranch %end_if\n"
13373 " %end_if = OpLabel\n"
13374 " OpReturnValue %param\n"
13375 " OpFunctionEnd\n"
13376 );
13377
13378 {
13379 SpecResource specResource;
13380 map<string, string> specs;
13381 VulkanFeatures features;
13382 map<string, string> fragments;
13383 vector<string> extensions;
13384 vector<deFloat16> expectedOutput;
13385 string consts;
13386
13387 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13388 {
13389 vector<deFloat16> expectedIterationOutput;
13390
13391 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13392 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13393
13394 for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13395 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13396
13397 expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13398 expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13399
13400 expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13401 }
13402
13403 for (deUint32 i = 0; i < structItemsCount; ++i)
13404 consts += " %c_f16_" + de::toString(i) + " = OpConstant %f16 " + de::toString(i) + "\n";
13405
13406 specs["num_elements"] = de::toString(numElements);
13407 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
13408 specs["field_modifier"] = de::toString(fieldModifier);
13409 specs["consts"] = consts;
13410
13411 fragments["capability"] = "OpCapability Float16\n";
13412 fragments["decoration"] = decoration.specialize(specs);
13413 fragments["pre_main"] = preMain.specialize(specs);
13414 fragments["testfun"] = testFun.specialize(specs);
13415
13416 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13417 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13418 specResource.verifyIO = compareFP16CompositeFunc;
13419
13420 extensions.push_back("VK_KHR_shader_float16_int8");
13421
13422 features.extFloat16Int8.shaderFloat16 = true;
13423
13424 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13425 }
13426
13427 return testGroup.release();
13428 }
13429
13430 template<class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13431 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13432 {
13433 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13434 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13435 const string opName (op);
13436 const deUint32 opIndex = (opName == "OpCompositeInsert") ? 0
13437 : (opName == "OpCompositeExtract") ? 1
13438 : std::numeric_limits<deUint32>::max();
13439
13440 const StringTemplate preMain
13441 (
13442 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13443 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13444 " %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13445 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13446 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13447 " %f16 = OpTypeFloat 16\n"
13448 " %v2f16 = OpTypeVector %f16 2\n"
13449 " %v3f16 = OpTypeVector %f16 3\n"
13450 " %v4f16 = OpTypeVector %f16 4\n"
13451 " %c_f16_na = OpConstant %f16 -1.0\n"
13452 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13453 " %c_u32_5 = OpConstant %u32 5\n"
13454 " %c_i32_5 = OpConstant %i32 5\n"
13455 " %c_i32_6 = OpConstant %i32 6\n"
13456 " %c_i32_7 = OpConstant %i32 7\n"
13457 " %c_i32_8 = OpConstant %i32 8\n"
13458 " %c_i32_9 = OpConstant %i32 9\n"
13459 " %c_i32_10 = OpConstant %i32 10\n"
13460 " %c_i32_11 = OpConstant %i32 11\n"
13461
13462 "%f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13463 "%v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13464 "%v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13465 "%v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13466 "%v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13467 "%struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13468 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13469 "%st_test = OpTypeStruct %${field_type}\n"
13470
13471 " %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13472 " %ra_st = OpTypeArray %u32 %c_i32_size\n"
13473 " %up_u32 = OpTypePointer Uniform %u32\n"
13474 " %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13475 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13476 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13477 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13478 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13479 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13480
13481 "${op_premain_decls}"
13482
13483 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13484 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13485
13486 " %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13487 " %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13488 );
13489
13490 const StringTemplate decoration
13491 (
13492 "OpDecorate %SSBO_src BufferBlock\n"
13493 "OpDecorate %SSBO_dst BufferBlock\n"
13494 "OpDecorate %ra_f16 ArrayStride 4\n"
13495 "OpDecorate %ra_st ArrayStride 4\n"
13496 "OpDecorate %ssbo_src DescriptorSet 0\n"
13497 "OpDecorate %ssbo_src Binding 0\n"
13498 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13499 "OpDecorate %ssbo_dst Binding 1\n"
13500
13501 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13502 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13503
13504 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13505 "OpMemberDecorate %struct16 0 Offset 0\n"
13506 "OpMemberDecorate %struct16 1 Offset 4\n"
13507 "OpDecorate %struct16arr3 ArrayStride 16\n"
13508 "OpDecorate %f16arr3 ArrayStride 2\n"
13509 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13510 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13511 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13512
13513 "OpMemberDecorate %st_test 0 Offset 0\n"
13514 );
13515
13516 const StringTemplate testFun
13517 (
13518 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13519 " %param = OpFunctionParameter %v4f32\n"
13520 " %entry = OpLabel\n"
13521
13522 " %i = OpVariable %fp_i32 Function\n"
13523 " OpStore %i %c_i32_0\n"
13524
13525 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13526 " OpSelectionMerge %end_if None\n"
13527 " OpBranchConditional %will_run %run_test %end_if\n"
13528
13529 " %run_test = OpLabel\n"
13530 " OpBranch %loop\n"
13531
13532 " %loop = OpLabel\n"
13533 " %i_cmp = OpLoad %i32 %i\n"
13534 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13535 " OpLoopMerge %merge %next None\n"
13536 " OpBranchConditional %lt %write %merge\n"
13537
13538 " %write = OpLabel\n"
13539 " %ndx = OpLoad %i32 %i\n"
13540
13541 "${op_sw_fun_call}"
13542
13543 " %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13544 " OpBranch %next\n"
13545
13546 " %next = OpLabel\n"
13547 " %i_cur = OpLoad %i32 %i\n"
13548 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13549 " OpStore %i %i_new\n"
13550 " OpBranch %loop\n"
13551
13552 " %merge = OpLabel\n"
13553 " OpBranch %end_if\n"
13554 " %end_if = OpLabel\n"
13555 " OpReturnValue %param\n"
13556 " OpFunctionEnd\n"
13557
13558 "${op_sw_fun_header}"
13559 " %sw_param = OpFunctionParameter %st_test\n"
13560 "%sw_paramn = OpFunctionParameter %i32\n"
13561 " %sw_entry = OpLabel\n"
13562 " OpSelectionMerge %switch_e None\n"
13563 " OpSwitch %sw_paramn %default ${case_list}\n"
13564
13565 "${case_bodies}"
13566
13567 "%default = OpLabel\n"
13568 " OpReturnValue ${op_case_default_value}\n"
13569 "%switch_e = OpLabel\n"
13570 " OpUnreachable\n" // Unreachable merge block for switch statement
13571 " OpFunctionEnd\n"
13572 );
13573
13574 const StringTemplate testCaseBody
13575 (
13576 "%case_${case_ndx} = OpLabel\n"
13577 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13578 " OpReturnValue %val_ret_${case_ndx}\n"
13579 );
13580
13581 const string loadF16
13582 (
13583 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13584 " %ld_${var}_param = OpFunctionParameter %i32\n"
13585 " %ld_${var}_entry = OpLabel\n"
13586 " %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13587 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13588 " OpReturnValue %ld_${var}_st_test\n"
13589 " OpFunctionEnd\n" +
13590 loadScalarF16FromUint
13591 );
13592
13593 const string loadV2F16
13594 (
13595 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13596 " %ld_${var}_param = OpFunctionParameter %i32\n"
13597 " %ld_${var}_entry = OpLabel\n"
13598 " %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13599 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13600 " OpReturnValue %ld_${var}_st_test\n"
13601 " OpFunctionEnd\n" +
13602 loadV2F16FromUint
13603 );
13604
13605 const string loadV3F16
13606 (
13607 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13608 " %ld_${var}_param = OpFunctionParameter %i32\n"
13609 " %ld_${var}_entry = OpLabel\n"
13610 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13611 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13612 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13613 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13614 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13615 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13616 " %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13617 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13618 " OpReturnValue %ld_${var}_st_test\n"
13619 " OpFunctionEnd\n"
13620 );
13621
13622 const string loadV4F16
13623 (
13624 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13625 " %ld_${var}_param = OpFunctionParameter %i32\n"
13626 " %ld_${var}_entry = OpLabel\n"
13627 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13628 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13629 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13630 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13631 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13632 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13633 " %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13634 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13635 " OpReturnValue %ld_${var}_st_test\n"
13636 " OpFunctionEnd\n"
13637 );
13638
13639 const string loadF16Arr3
13640 (
13641 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13642 " %ld_${var}_param = OpFunctionParameter %i32\n"
13643 " %ld_${var}_entry = OpLabel\n"
13644 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13645 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13646 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13647 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13648 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13649 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13650 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13651 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13652 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13653 " %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13654 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13655 " OpReturnValue %ld_${var}_st_test\n"
13656 " OpFunctionEnd\n"
13657 );
13658
13659 const string loadV2F16Arr5
13660 (
13661 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13662 " %ld_${var}_param = OpFunctionParameter %i32\n"
13663 " %ld_${var}_label = OpLabel\n"
13664 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13665 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13666 " %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13667 " %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13668 " %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13669 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13670 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13671 " %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13672 " %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13673 " %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13674 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13675 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13676 " %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13677 " %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13678 " %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13679 " %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13680 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13681 " OpReturnValue %ld_${var}_st_test\n"
13682 " OpFunctionEnd\n"
13683 );
13684
13685 const string loadV3F16Arr5
13686 (
13687 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13688 " %ld_${var}_param = OpFunctionParameter %i32\n"
13689 " %ld_${var}_entry = OpLabel\n"
13690 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13691 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13692 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13693 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13694 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13695 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13696 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13697 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13698 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13699 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13700 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13701 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13702 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13703 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13704 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13705 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13706 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13707 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13708 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13709 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13710 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13711 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13712 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13713 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13714 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13715 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13716 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13717 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13718 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13719 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13720 " %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13721 " %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13722 " %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13723 " %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13724 " %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13725 " %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13726 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13727 " OpReturnValue %ld_${var}_st_test\n"
13728 " OpFunctionEnd\n"
13729 );
13730
13731 const string loadV4F16Arr3
13732 (
13733 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13734 " %ld_${var}_param = OpFunctionParameter %i32\n"
13735 " %ld_${var}_entry = OpLabel\n"
13736 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13737 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13738 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13739 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13740 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13741 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13742 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13743 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13744 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13745 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13746 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13747 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13748 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13749 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13750 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13751 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13752 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13753 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13754 " %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13755 " %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13756 " %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13757 " %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13758 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13759 " OpReturnValue %ld_${var}_st_test\n"
13760 " OpFunctionEnd\n"
13761 );
13762
13763 const string loadStruct16Arr3
13764 (
13765 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13766 " %ld_${var}_param = OpFunctionParameter %i32\n"
13767 " %ld_${var}_entry = OpLabel\n"
13768 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13769 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13770 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13771 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13772 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13773 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13774 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13775 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13776 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13777 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13778 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13779 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13780 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13781 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13782 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13783 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13784 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13785 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13786 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13787 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13788 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13789 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13790 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13791 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13792 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13793 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13794 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13795 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13796 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13797 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13798 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13799 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13800 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13801 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13802 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13803 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13804 " %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13805 " %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13806 " %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13807 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13808 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13809 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13810 " %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13811 " %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13812 " %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13813 " %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13814 " %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13815 " OpReturnValue %ld_${var}_st_test\n"
13816 " OpFunctionEnd\n"
13817 );
13818
13819 const string storeF16
13820 (
13821 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13822 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13823 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13824 " %st_${var}_entry = OpLabel\n"
13825 " %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13826 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13827 " OpReturn\n"
13828 " OpFunctionEnd\n" +
13829 storeScalarF16AsUint
13830 );
13831
13832 const string storeV2F16
13833 (
13834 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13835 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13836 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13837 " %st_${var}_entry = OpLabel\n"
13838 " %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13839 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13840 " OpReturn\n"
13841 " OpFunctionEnd\n" +
13842 storeV2F16AsUint
13843 );
13844
13845 const string storeV3F16
13846 (
13847 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13848 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13849 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13850 " %st_${var}_entry = OpLabel\n"
13851 " %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13852 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13853 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13854 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13855 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13856 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13857 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13858 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13859 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13860 " OpReturn\n"
13861 " OpFunctionEnd\n"
13862 );
13863
13864 const string storeV4F16
13865 (
13866 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13867 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13868 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13869 " %st_${var}_entry = OpLabel\n"
13870 " %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13871 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13872 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13873 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13874 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13875 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13876 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13877 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13878 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13879 " OpReturn\n"
13880 " OpFunctionEnd\n"
13881 );
13882
13883 const string storeF16Arr3
13884 (
13885 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13886 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13887 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13888 " %st_${var}_entry = OpLabel\n"
13889 " %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13890 " %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13891 " %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13892 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13893 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13894 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13895 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13896 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13897 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13898 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13899 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13900 " OpReturn\n"
13901 " OpFunctionEnd\n"
13902 );
13903
13904 const string storeV2F16Arr5
13905 (
13906 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13907 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13908 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13909 " %st_${var}_entry = OpLabel\n"
13910 " %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13911 " %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13912 " %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13913 " %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13914 " %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13915 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13916 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13917 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13918 " %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13919 " %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13920 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13921 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13922 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13923 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13924 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13925 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13926 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13927 " OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13928 " OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13929 " OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13930 " OpReturn\n"
13931 " OpFunctionEnd\n"
13932 );
13933
13934 const string storeV3F16Arr5
13935 (
13936 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13937 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13938 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13939 " %st_${var}_entry = OpLabel\n"
13940 " %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13941 " %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13942 " %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13943 " %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13944 " %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13945 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13946 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13947 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13948 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13949 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13950 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13951 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13952 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13953 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13954 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13955 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13956 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13957 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13958 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13959 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13960 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13961 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13962 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13963 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13964 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13965 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13966 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13967 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13968 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13969 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13970 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13971 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13972 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13973 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13974 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13975 " OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13976 " OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13977 " OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13978 " OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13979 " OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13980 " OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13981 " OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13982 " OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13983 " OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13984 " OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13985 " OpReturn\n"
13986 " OpFunctionEnd\n"
13987 );
13988
13989 const string storeV4F16Arr3
13990 (
13991 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13992 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13993 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13994 " %st_${var}_entry = OpLabel\n"
13995 " %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13996 " %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13997 " %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13998 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13999 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
14000 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
14001 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
14002 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
14003 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
14004 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
14005 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
14006 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
14007 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
14008 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
14009 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
14010 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14011 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14012 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14013 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14014 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14015 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14016 " OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
14017 " OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
14018 " OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
14019 " OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
14020 " OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
14021 " OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
14022 " OpReturn\n"
14023 " OpFunctionEnd\n"
14024 );
14025
14026 const string storeStruct16Arr3
14027 (
14028 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
14029 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
14030 " %st_${var}_param2 = OpFunctionParameter %i32\n"
14031 " %st_${var}_entry = OpLabel\n"
14032 " %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
14033 " %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
14034 " %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
14035 " %st_${var}_el_0 = OpCompositeExtract %f16 %st_${var}_st_0 0\n"
14036 " %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
14037 " %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
14038 " %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
14039 " %st_${var}_el_1 = OpCompositeExtract %f16 %st_${var}_st_1 0\n"
14040 " %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
14041 " %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
14042 " %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
14043 " %st_${var}_el_2 = OpCompositeExtract %f16 %st_${var}_st_2 0\n"
14044 " %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
14045 " %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
14046 " %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
14047 " %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
14048 " %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
14049 " %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
14050 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_v2_0\n"
14051 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
14052 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
14053 " %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
14054 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_v2_1\n"
14055 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
14056 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
14057 " %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
14058 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_v2_2\n"
14059 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
14060 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
14061 " %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
14062 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
14063 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
14064 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
14065 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
14066 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
14067 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
14068 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
14069 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
14070 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
14071 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
14072 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
14073 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
14074 " OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
14075 " OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
14076 " OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
14077 " OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
14078 " OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
14079 " OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
14080 " OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
14081 " OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
14082 " OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
14083 " OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
14084 " OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
14085 " OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
14086 " OpReturn\n"
14087 " OpFunctionEnd\n"
14088 );
14089
14090 struct OpParts
14091 {
14092 const char* premainDecls;
14093 const char* swFunCall;
14094 const char* swFunHeader;
14095 const char* caseDefaultValue;
14096 const char* argsPartial;
14097 };
14098
14099 OpParts opPartsArray[] =
14100 {
14101 // OpCompositeInsert
14102 {
14103 " %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14104 " %SSBO_src = OpTypeStruct %ra_f16\n"
14105 " %SSBO_dst = OpTypeStruct %ra_st\n",
14106
14107 " %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14108 " %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14109 " %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14110
14111 " %sw_fun = OpFunction %st_test None %fun_t\n"
14112 "%sw_paramv = OpFunctionParameter %f16\n",
14113
14114 "%sw_param",
14115
14116 "%st_test %sw_paramv %sw_param",
14117 },
14118 // OpCompositeExtract
14119 {
14120 " %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14121 " %SSBO_src = OpTypeStruct %ra_st\n"
14122 " %SSBO_dst = OpTypeStruct %ra_f16\n",
14123
14124 " %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14125 " %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14126
14127 " %sw_fun = OpFunction %f16 None %fun_t\n",
14128
14129 "%c_f16_na",
14130
14131 "%f16 %sw_param",
14132 },
14133 };
14134
14135 DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14136
14137 const char* accessPathF16[] =
14138 {
14139 "0", // %f16
14140 DE_NULL,
14141 };
14142 const char* accessPathV2F16[] =
14143 {
14144 "0 0", // %v2f16
14145 "0 1",
14146 };
14147 const char* accessPathV3F16[] =
14148 {
14149 "0 0", // %v3f16
14150 "0 1",
14151 "0 2",
14152 DE_NULL,
14153 };
14154 const char* accessPathV4F16[] =
14155 {
14156 "0 0", // %v4f16"
14157 "0 1",
14158 "0 2",
14159 "0 3",
14160 };
14161 const char* accessPathF16Arr3[] =
14162 {
14163 "0 0", // %f16arr3
14164 "0 1",
14165 "0 2",
14166 DE_NULL,
14167 };
14168 const char* accessPathStruct16Arr3[] =
14169 {
14170 "0 0 0", // %struct16arr3
14171 DE_NULL,
14172 "0 0 1 0 0",
14173 "0 0 1 0 1",
14174 "0 0 1 1 0",
14175 "0 0 1 1 1",
14176 "0 0 1 2 0",
14177 "0 0 1 2 1",
14178 "0 1 0",
14179 DE_NULL,
14180 "0 1 1 0 0",
14181 "0 1 1 0 1",
14182 "0 1 1 1 0",
14183 "0 1 1 1 1",
14184 "0 1 1 2 0",
14185 "0 1 1 2 1",
14186 "0 2 0",
14187 DE_NULL,
14188 "0 2 1 0 0",
14189 "0 2 1 0 1",
14190 "0 2 1 1 0",
14191 "0 2 1 1 1",
14192 "0 2 1 2 0",
14193 "0 2 1 2 1",
14194 };
14195 const char* accessPathV2F16Arr5[] =
14196 {
14197 "0 0 0", // %v2f16arr5
14198 "0 0 1",
14199 "0 1 0",
14200 "0 1 1",
14201 "0 2 0",
14202 "0 2 1",
14203 "0 3 0",
14204 "0 3 1",
14205 "0 4 0",
14206 "0 4 1",
14207 };
14208 const char* accessPathV3F16Arr5[] =
14209 {
14210 "0 0 0", // %v3f16arr5
14211 "0 0 1",
14212 "0 0 2",
14213 DE_NULL,
14214 "0 1 0",
14215 "0 1 1",
14216 "0 1 2",
14217 DE_NULL,
14218 "0 2 0",
14219 "0 2 1",
14220 "0 2 2",
14221 DE_NULL,
14222 "0 3 0",
14223 "0 3 1",
14224 "0 3 2",
14225 DE_NULL,
14226 "0 4 0",
14227 "0 4 1",
14228 "0 4 2",
14229 DE_NULL,
14230 };
14231 const char* accessPathV4F16Arr3[] =
14232 {
14233 "0 0 0", // %v4f16arr3
14234 "0 0 1",
14235 "0 0 2",
14236 "0 0 3",
14237 "0 1 0",
14238 "0 1 1",
14239 "0 1 2",
14240 "0 1 3",
14241 "0 2 0",
14242 "0 2 1",
14243 "0 2 2",
14244 "0 2 3",
14245 DE_NULL,
14246 DE_NULL,
14247 DE_NULL,
14248 DE_NULL,
14249 };
14250
14251 struct TypeTestParameters
14252 {
14253 const char* name;
14254 size_t accessPathLength;
14255 const char** accessPath;
14256 const string loadFunction;
14257 const string storeFunction;
14258 };
14259
14260 const TypeTestParameters typeTestParameters[] =
14261 {
14262 { "f16", DE_LENGTH_OF_ARRAY(accessPathF16), accessPathF16, loadF16, storeF16 },
14263 { "v2f16", DE_LENGTH_OF_ARRAY(accessPathV2F16), accessPathV2F16, loadV2F16, storeV2F16 },
14264 { "v3f16", DE_LENGTH_OF_ARRAY(accessPathV3F16), accessPathV3F16, loadV3F16, storeV3F16 },
14265 { "v4f16", DE_LENGTH_OF_ARRAY(accessPathV4F16), accessPathV4F16, loadV4F16, storeV4F16 },
14266 { "f16arr3", DE_LENGTH_OF_ARRAY(accessPathF16Arr3), accessPathF16Arr3, loadF16Arr3, storeF16Arr3 },
14267 { "v2f16arr5", DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5), accessPathV2F16Arr5, loadV2F16Arr5, storeV2F16Arr5 },
14268 { "v3f16arr5", DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5), accessPathV3F16Arr5, loadV3F16Arr5, storeV3F16Arr5 },
14269 { "v4f16arr3", DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3), accessPathV4F16Arr3, loadV4F16Arr3, storeV4F16Arr3 },
14270 { "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3), accessPathStruct16Arr3, loadStruct16Arr3, storeStruct16Arr3},
14271 };
14272
14273 for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14274 {
14275 const OpParts opParts = opPartsArray[opIndex];
14276 const string testName = typeTestParameters[typeTestNdx].name;
14277 const size_t structItemsCount = typeTestParameters[typeTestNdx].accessPathLength;
14278 const char** accessPath = typeTestParameters[typeTestNdx].accessPath;
14279 SpecResource specResource;
14280 map<string, string> specs;
14281 VulkanFeatures features;
14282 map<string, string> fragments;
14283 vector<string> extensions;
14284 vector<deFloat16> inputFP16;
14285 vector<deFloat16> unusedFP16Output;
14286
14287 // Generate values for input
14288 inputFP16.reserve(structItemsCount);
14289 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14290 inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14291
14292 unusedFP16Output.resize(structItemsCount);
14293
14294 // Generate cases for OpSwitch
14295 {
14296 string caseBodies;
14297 string caseList;
14298
14299 for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14300 if (accessPath[caseNdx] != DE_NULL)
14301 {
14302 map<string, string> specCase;
14303
14304 specCase["case_ndx"] = de::toString(caseNdx);
14305 specCase["access_path"] = accessPath[caseNdx];
14306 specCase["op_args_part"] = opParts.argsPartial;
14307 specCase["op_name"] = opName;
14308
14309 caseBodies += testCaseBody.specialize(specCase);
14310 caseList += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14311 }
14312
14313 specs["case_bodies"] = caseBodies;
14314 specs["case_list"] = caseList;
14315 }
14316
14317 specs["num_elements"] = de::toString(structItemsCount);
14318 specs["field_type"] = typeTestParameters[typeTestNdx].name;
14319 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
14320 specs["struct_u32s"] = de::toString(structItemsCount / 2);
14321 specs["op_premain_decls"] = opParts.premainDecls;
14322 specs["op_sw_fun_call"] = opParts.swFunCall;
14323 specs["op_sw_fun_header"] = opParts.swFunHeader;
14324 specs["op_case_default_value"] = opParts.caseDefaultValue;
14325 if (opIndex == 0) {
14326 specs["st_call"] = "st_ssbo_dst";
14327 specs["st_ndx"] = "c_i32_0";
14328 } else {
14329 specs["st_call"] = "st_fn_ssbo_dst";
14330 specs["st_ndx"] = "ndx";
14331 }
14332
14333 fragments["capability"] = "OpCapability Float16\n";
14334 fragments["decoration"] = decoration.specialize(specs);
14335 fragments["pre_main"] = preMain.specialize(specs);
14336 fragments["testfun"] = testFun.specialize(specs);
14337 if (opIndex == 0) {
14338 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14339 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14340 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14341 } else {
14342 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14343 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14344 }
14345
14346 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14347 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14348 specResource.verifyIO = compareFP16CompositeFunc;
14349
14350 extensions.push_back("VK_KHR_shader_float16_int8");
14351
14352 features.extFloat16Int8.shaderFloat16 = true;
14353
14354 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14355 }
14356
14357 return testGroup.release();
14358 }
14359
14360 struct fp16PerComponent
14361 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14362 fp16PerComponent()
14363 : flavor(0)
14364 , floatFormat16 (-14, 15, 10, true)
14365 , outCompCount(0)
14366 , argCompCount(3, 0)
14367 {
14368 }
14369
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14370 bool callOncePerComponent () { return true; }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14371 deUint32 getComponentValidity () { return static_cast<deUint32>(-1); }
14372
getULPsvkt::SpirVAssembly::fp16PerComponent14373 virtual double getULPs (vector<const deFloat16*>&) { return 1.0; }
getMinvkt::SpirVAssembly::fp16PerComponent14374 virtual double getMin (double value, double ulps) { return value - floatFormat16.ulp(deAbs(value), ulps); }
getMaxvkt::SpirVAssembly::fp16PerComponent14375 virtual double getMax (double value, double ulps) { return value + floatFormat16.ulp(deAbs(value), ulps); }
14376
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14377 virtual size_t getFlavorCount () { return flavorNames.empty() ? 1 : flavorNames.size(); }
setFlavorvkt::SpirVAssembly::fp16PerComponent14378 virtual void setFlavor (size_t flavorNo) { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
getFlavorvkt::SpirVAssembly::fp16PerComponent14379 virtual size_t getFlavor () { return flavor; }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14380 virtual string getCurrentFlavorName () { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14381
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14382 virtual void setOutCompCount (size_t compCount) { outCompCount = compCount; }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14383 virtual size_t getOutCompCount () { return outCompCount; }
14384
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14385 virtual void setArgCompCount (size_t argNo, size_t compCount) { argCompCount[argNo] = compCount; }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14386 virtual size_t getArgCompCount (size_t argNo) { return argCompCount[argNo]; }
14387
14388 protected:
14389 size_t flavor;
14390 tcu::FloatFormat floatFormat16;
14391 size_t outCompCount;
14392 vector<size_t> argCompCount;
14393 vector<string> flavorNames;
14394 };
14395
14396 struct fp16OpFNegate : public fp16PerComponent
14397 {
14398 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14399 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14400 {
14401 const fp16type x (*in[0]);
14402 const double d (x.asDouble());
14403 const double result (0.0 - d);
14404
14405 out[0] = fp16type(result).bits();
14406 min[0] = getMin(result, getULPs(in));
14407 max[0] = getMax(result, getULPs(in));
14408
14409 return true;
14410 }
14411 };
14412
14413 struct fp16Round : public fp16PerComponent
14414 {
fp16Roundvkt::SpirVAssembly::fp16Round14415 fp16Round() : fp16PerComponent()
14416 {
14417 flavorNames.push_back("Floor(x+0.5)");
14418 flavorNames.push_back("Floor(x-0.5)");
14419 flavorNames.push_back("RoundEven");
14420 }
14421
14422 template<class fp16type>
calcvkt::SpirVAssembly::fp16Round14423 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14424 {
14425 const fp16type x (*in[0]);
14426 const double d (x.asDouble());
14427 double result (0.0);
14428
14429 switch (flavor)
14430 {
14431 case 0: result = deRound(d); break;
14432 case 1: result = deFloor(d - 0.5); break;
14433 case 2: result = deRoundEven(d); break;
14434 default: TCU_THROW(InternalError, "Invalid flavor specified");
14435 }
14436
14437 out[0] = fp16type(result).bits();
14438 min[0] = getMin(result, getULPs(in));
14439 max[0] = getMax(result, getULPs(in));
14440
14441 return true;
14442 }
14443 };
14444
14445 struct fp16RoundEven : public fp16PerComponent
14446 {
14447 template<class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14448 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14449 {
14450 const fp16type x (*in[0]);
14451 const double d (x.asDouble());
14452 const double result (deRoundEven(d));
14453
14454 out[0] = fp16type(result).bits();
14455 min[0] = getMin(result, getULPs(in));
14456 max[0] = getMax(result, getULPs(in));
14457
14458 return true;
14459 }
14460 };
14461
14462 struct fp16Trunc : public fp16PerComponent
14463 {
14464 template<class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14465 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14466 {
14467 const fp16type x (*in[0]);
14468 const double d (x.asDouble());
14469 const double result (deTrunc(d));
14470
14471 out[0] = fp16type(result).bits();
14472 min[0] = getMin(result, getULPs(in));
14473 max[0] = getMax(result, getULPs(in));
14474
14475 return true;
14476 }
14477 };
14478
14479 struct fp16FAbs : public fp16PerComponent
14480 {
14481 template<class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14482 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14483 {
14484 const fp16type x (*in[0]);
14485 const double d (x.asDouble());
14486 const double result (deAbs(d));
14487
14488 out[0] = fp16type(result).bits();
14489 min[0] = getMin(result, getULPs(in));
14490 max[0] = getMax(result, getULPs(in));
14491
14492 return true;
14493 }
14494 };
14495
14496 struct fp16FSign : public fp16PerComponent
14497 {
14498 template<class fp16type>
calcvkt::SpirVAssembly::fp16FSign14499 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14500 {
14501 const fp16type x (*in[0]);
14502 const double d (x.asDouble());
14503 const double result (deSign(d));
14504
14505 if (x.isNaN())
14506 return false;
14507
14508 out[0] = fp16type(result).bits();
14509 min[0] = getMin(result, getULPs(in));
14510 max[0] = getMax(result, getULPs(in));
14511
14512 return true;
14513 }
14514 };
14515
14516 struct fp16Floor : public fp16PerComponent
14517 {
14518 template<class fp16type>
calcvkt::SpirVAssembly::fp16Floor14519 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14520 {
14521 const fp16type x (*in[0]);
14522 const double d (x.asDouble());
14523 const double result (deFloor(d));
14524
14525 out[0] = fp16type(result).bits();
14526 min[0] = getMin(result, getULPs(in));
14527 max[0] = getMax(result, getULPs(in));
14528
14529 return true;
14530 }
14531 };
14532
14533 struct fp16Ceil : public fp16PerComponent
14534 {
14535 template<class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14536 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14537 {
14538 const fp16type x (*in[0]);
14539 const double d (x.asDouble());
14540 const double result (deCeil(d));
14541
14542 out[0] = fp16type(result).bits();
14543 min[0] = getMin(result, getULPs(in));
14544 max[0] = getMax(result, getULPs(in));
14545
14546 return true;
14547 }
14548 };
14549
14550 struct fp16Fract : public fp16PerComponent
14551 {
14552 template<class fp16type>
calcvkt::SpirVAssembly::fp16Fract14553 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14554 {
14555 const fp16type x (*in[0]);
14556 const double d (x.asDouble());
14557 const double result (deFrac(d));
14558
14559 out[0] = fp16type(result).bits();
14560 min[0] = getMin(result, getULPs(in));
14561 max[0] = getMax(result, getULPs(in));
14562
14563 return true;
14564 }
14565 };
14566
14567 struct fp16Radians : public fp16PerComponent
14568 {
getULPsvkt::SpirVAssembly::fp16Radians14569 virtual double getULPs (vector<const deFloat16*>& in)
14570 {
14571 DE_UNREF(in);
14572
14573 return 2.5;
14574 }
14575
14576 template<class fp16type>
calcvkt::SpirVAssembly::fp16Radians14577 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14578 {
14579 const fp16type x (*in[0]);
14580 const float d (x.asFloat());
14581 const float result (deFloatRadians(d));
14582
14583 out[0] = fp16type(result).bits();
14584 min[0] = getMin(result, getULPs(in));
14585 max[0] = getMax(result, getULPs(in));
14586
14587 return true;
14588 }
14589 };
14590
14591 struct fp16Degrees : public fp16PerComponent
14592 {
getULPsvkt::SpirVAssembly::fp16Degrees14593 virtual double getULPs (vector<const deFloat16*>& in)
14594 {
14595 DE_UNREF(in);
14596
14597 return 2.5;
14598 }
14599
14600 template<class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14601 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14602 {
14603 const fp16type x (*in[0]);
14604 const float d (x.asFloat());
14605 const float result (deFloatDegrees(d));
14606
14607 out[0] = fp16type(result).bits();
14608 min[0] = getMin(result, getULPs(in));
14609 max[0] = getMax(result, getULPs(in));
14610
14611 return true;
14612 }
14613 };
14614
14615 struct fp16Sin : public fp16PerComponent
14616 {
14617 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sin14618 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14619 {
14620 const fp16type x (*in[0]);
14621 const double d (x.asDouble());
14622 const double result (deSin(d));
14623 const double unspecUlp (16.0);
14624 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14625
14626 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14627 return false;
14628
14629 out[0] = fp16type(result).bits();
14630 min[0] = result - err;
14631 max[0] = result + err;
14632
14633 return true;
14634 }
14635 };
14636
14637 struct fp16Cos : public fp16PerComponent
14638 {
14639 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cos14640 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14641 {
14642 const fp16type x (*in[0]);
14643 const double d (x.asDouble());
14644 const double result (deCos(d));
14645 const double unspecUlp (16.0);
14646 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14647
14648 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14649 return false;
14650
14651 out[0] = fp16type(result).bits();
14652 min[0] = result - err;
14653 max[0] = result + err;
14654
14655 return true;
14656 }
14657 };
14658
14659 struct fp16Tan : public fp16PerComponent
14660 {
14661 template<class fp16type>
calcvkt::SpirVAssembly::fp16Tan14662 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14663 {
14664 const fp16type x (*in[0]);
14665 const double d (x.asDouble());
14666 const double result (deTan(d));
14667
14668 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14669 return false;
14670
14671 out[0] = fp16type(result).bits();
14672 {
14673 const double err = deLdExp(1.0, -7);
14674 const double s1 = deSin(d) + err;
14675 const double s2 = deSin(d) - err;
14676 const double c1 = deCos(d) + err;
14677 const double c2 = deCos(d) - err;
14678 const double edgeVals[] = {s1/c1, s1/c2, s2/c1, s2/c2};
14679 double edgeLeft = out[0];
14680 double edgeRight = out[0];
14681
14682 if (deSign(c1 * c2) < 0.0)
14683 {
14684 edgeLeft = -std::numeric_limits<double>::infinity();
14685 edgeRight = +std::numeric_limits<double>::infinity();
14686 }
14687 else
14688 {
14689 edgeLeft = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14690 edgeRight = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14691 }
14692
14693 min[0] = edgeLeft;
14694 max[0] = edgeRight;
14695 }
14696
14697 return true;
14698 }
14699 };
14700
14701 struct fp16Asin : public fp16PerComponent
14702 {
14703 template<class fp16type>
calcvkt::SpirVAssembly::fp16Asin14704 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14705 {
14706 const fp16type x (*in[0]);
14707 const double d (x.asDouble());
14708 const double result (deAsin(d));
14709 const double error (deAtan2(d, sqrt(1.0 - d * d)));
14710
14711 if (!x.isNaN() && deAbs(d) > 1.0)
14712 return false;
14713
14714 out[0] = fp16type(result).bits();
14715 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14716 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14717
14718 return true;
14719 }
14720 };
14721
14722 struct fp16Acos : public fp16PerComponent
14723 {
14724 template<class fp16type>
calcvkt::SpirVAssembly::fp16Acos14725 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14726 {
14727 const fp16type x (*in[0]);
14728 const double d (x.asDouble());
14729 const double result (deAcos(d));
14730 const double error (deAtan2(sqrt(1.0 - d * d), d));
14731
14732 if (!x.isNaN() && deAbs(d) > 1.0)
14733 return false;
14734
14735 out[0] = fp16type(result).bits();
14736 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14737 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14738
14739 return true;
14740 }
14741 };
14742
14743 struct fp16Atan : public fp16PerComponent
14744 {
getULPsvkt::SpirVAssembly::fp16Atan14745 virtual double getULPs(vector<const deFloat16*>& in)
14746 {
14747 DE_UNREF(in);
14748
14749 return 2 * 5.0; // This is not a precision test. Value is not from spec
14750 }
14751
14752 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan14753 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14754 {
14755 const fp16type x (*in[0]);
14756 const double d (x.asDouble());
14757 const double result (deAtanOver(d));
14758
14759 out[0] = fp16type(result).bits();
14760 min[0] = getMin(result, getULPs(in));
14761 max[0] = getMax(result, getULPs(in));
14762
14763 return true;
14764 }
14765 };
14766
14767 struct fp16Sinh : public fp16PerComponent
14768 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh14769 fp16Sinh() : fp16PerComponent()
14770 {
14771 flavorNames.push_back("Double");
14772 flavorNames.push_back("ExpFP16");
14773 }
14774
14775 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sinh14776 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14777 {
14778 const fp16type x (*in[0]);
14779 const double d (x.asDouble());
14780 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14781 double result (0.0);
14782 double error (0.0);
14783
14784 if (getFlavor() == 0)
14785 {
14786 result = deSinh(d);
14787 error = floatFormat16.ulp(deAbs(result), ulps);
14788 }
14789 else if (getFlavor() == 1)
14790 {
14791 const fp16type epx (deExp(d));
14792 const fp16type enx (deExp(-d));
14793 const fp16type esx (epx.asDouble() - enx.asDouble());
14794 const fp16type sx2 (esx.asDouble() / 2.0);
14795
14796 result = sx2.asDouble();
14797 error = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14798 }
14799 else
14800 {
14801 TCU_THROW(InternalError, "Unknown flavor");
14802 }
14803
14804 out[0] = fp16type(result).bits();
14805 min[0] = result - error;
14806 max[0] = result + error;
14807
14808 return true;
14809 }
14810 };
14811
14812 struct fp16Cosh : public fp16PerComponent
14813 {
fp16Coshvkt::SpirVAssembly::fp16Cosh14814 fp16Cosh() : fp16PerComponent()
14815 {
14816 flavorNames.push_back("Double");
14817 flavorNames.push_back("ExpFP16");
14818 }
14819
14820 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cosh14821 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14822 {
14823 const fp16type x (*in[0]);
14824 const double d (x.asDouble());
14825 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14826 double result (0.0);
14827
14828 if (getFlavor() == 0)
14829 {
14830 result = deCosh(d);
14831 }
14832 else if (getFlavor() == 1)
14833 {
14834 const fp16type epx (deExp(d));
14835 const fp16type enx (deExp(-d));
14836 const fp16type esx (epx.asDouble() + enx.asDouble());
14837 const fp16type sx2 (esx.asDouble() / 2.0);
14838
14839 result = sx2.asDouble();
14840 }
14841 else
14842 {
14843 TCU_THROW(InternalError, "Unknown flavor");
14844 }
14845
14846 out[0] = fp16type(result).bits();
14847 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14848 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14849
14850 return true;
14851 }
14852 };
14853
14854 struct fp16Tanh : public fp16PerComponent
14855 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh14856 fp16Tanh() : fp16PerComponent()
14857 {
14858 flavorNames.push_back("Tanh");
14859 flavorNames.push_back("SinhCosh");
14860 flavorNames.push_back("SinhCoshFP16");
14861 flavorNames.push_back("PolyFP16");
14862 }
14863
getULPsvkt::SpirVAssembly::fp16Tanh14864 virtual double getULPs (vector<const deFloat16*>& in)
14865 {
14866 const tcu::Float16 x (*in[0]);
14867 const double d (x.asDouble());
14868
14869 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14870 }
14871
14872 template<class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh14873 inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14874 {
14875 const fp16type esx (espx.asDouble() - esnx.asDouble());
14876 const fp16type sx2 (esx.asDouble() / 2.0);
14877 const fp16type ecx (ecpx.asDouble() + ecnx.asDouble());
14878 const fp16type cx2 (ecx.asDouble() / 2.0);
14879 const fp16type tg (sx2.asDouble() / cx2.asDouble());
14880 const double rez (tg.asDouble());
14881
14882 return rez;
14883 }
14884
14885 template<class fp16type>
calcvkt::SpirVAssembly::fp16Tanh14886 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14887 {
14888 const fp16type x (*in[0]);
14889 const double d (x.asDouble());
14890 double result (0.0);
14891
14892 if (getFlavor() == 0)
14893 {
14894 result = deTanh(d);
14895 min[0] = getMin(result, getULPs(in));
14896 max[0] = getMax(result, getULPs(in));
14897 }
14898 else if (getFlavor() == 1)
14899 {
14900 result = deSinh(d) / deCosh(d);
14901 min[0] = getMin(result, getULPs(in));
14902 max[0] = getMax(result, getULPs(in));
14903 }
14904 else if (getFlavor() == 2)
14905 {
14906 const fp16type s (deSinh(d));
14907 const fp16type c (deCosh(d));
14908
14909 result = s.asDouble() / c.asDouble();
14910 min[0] = getMin(result, getULPs(in));
14911 max[0] = getMax(result, getULPs(in));
14912 }
14913 else if (getFlavor() == 3)
14914 {
14915 const double ulps (getULPs(in));
14916 const double epxm (deExp( d));
14917 const double enxm (deExp(-d));
14918 const double epxmerr = floatFormat16.ulp(epxm, ulps);
14919 const double enxmerr = floatFormat16.ulp(enxm, ulps);
14920 const fp16type epx[] = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14921 const fp16type enx[] = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14922 const fp16type epxm16 (epxm);
14923 const fp16type enxm16 (enxm);
14924 vector<double> tgs;
14925
14926 for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14927 for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14928 for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14929 for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14930 {
14931 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14932
14933 tgs.push_back(tgh);
14934 }
14935
14936 result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14937 min[0] = *std::min_element(tgs.begin(), tgs.end());
14938 max[0] = *std::max_element(tgs.begin(), tgs.end());
14939 }
14940 else
14941 {
14942 TCU_THROW(InternalError, "Unknown flavor");
14943 }
14944
14945 out[0] = fp16type(result).bits();
14946
14947 return true;
14948 }
14949 };
14950
14951 struct fp16Asinh : public fp16PerComponent
14952 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh14953 fp16Asinh() : fp16PerComponent()
14954 {
14955 flavorNames.push_back("Double");
14956 flavorNames.push_back("PolyFP16Wiki");
14957 flavorNames.push_back("PolyFP16Abs");
14958 }
14959
getULPsvkt::SpirVAssembly::fp16Asinh14960 virtual double getULPs (vector<const deFloat16*>& in)
14961 {
14962 DE_UNREF(in);
14963
14964 return 256.0; // This is not a precision test. Value is not from spec
14965 }
14966
14967 template<class fp16type>
calcvkt::SpirVAssembly::fp16Asinh14968 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14969 {
14970 const fp16type x (*in[0]);
14971 const double d (x.asDouble());
14972 double result (0.0);
14973
14974 if (getFlavor() == 0)
14975 {
14976 result = deAsinh(d);
14977 }
14978 else if (getFlavor() == 1)
14979 {
14980 const fp16type x2 (d * d);
14981 const fp16type x2p1 (x2.asDouble() + 1.0);
14982 const fp16type sq (deSqrt(x2p1.asDouble()));
14983 const fp16type sxsq (d + sq.asDouble());
14984 const fp16type lsxsq (deLog(sxsq.asDouble()));
14985
14986 if (lsxsq.isInf())
14987 return false;
14988
14989 result = lsxsq.asDouble();
14990 }
14991 else if (getFlavor() == 2)
14992 {
14993 const fp16type x2 (d * d);
14994 const fp16type x2p1 (x2.asDouble() + 1.0);
14995 const fp16type sq (deSqrt(x2p1.asDouble()));
14996 const fp16type sxsq (deAbs(d) + sq.asDouble());
14997 const fp16type lsxsq (deLog(sxsq.asDouble()));
14998
14999 result = deSign(d) * lsxsq.asDouble();
15000 }
15001 else
15002 {
15003 TCU_THROW(InternalError, "Unknown flavor");
15004 }
15005
15006 out[0] = fp16type(result).bits();
15007 min[0] = getMin(result, getULPs(in));
15008 max[0] = getMax(result, getULPs(in));
15009
15010 return true;
15011 }
15012 };
15013
15014 struct fp16Acosh : public fp16PerComponent
15015 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh15016 fp16Acosh() : fp16PerComponent()
15017 {
15018 flavorNames.push_back("Double");
15019 flavorNames.push_back("PolyFP16");
15020 }
15021
getULPsvkt::SpirVAssembly::fp16Acosh15022 virtual double getULPs (vector<const deFloat16*>& in)
15023 {
15024 DE_UNREF(in);
15025
15026 return 16.0; // This is not a precision test. Value is not from spec
15027 }
15028
15029 template<class fp16type>
calcvkt::SpirVAssembly::fp16Acosh15030 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15031 {
15032 const fp16type x (*in[0]);
15033 const double d (x.asDouble());
15034 double result (0.0);
15035
15036 if (!x.isNaN() && d < 1.0)
15037 return false;
15038
15039 if (getFlavor() == 0)
15040 {
15041 result = deAcosh(d);
15042 }
15043 else if (getFlavor() == 1)
15044 {
15045 const fp16type x2 (d * d);
15046 const fp16type x2m1 (x2.asDouble() - 1.0);
15047 const fp16type sq (deSqrt(x2m1.asDouble()));
15048 const fp16type sxsq (d + sq.asDouble());
15049 const fp16type lsxsq (deLog(sxsq.asDouble()));
15050
15051 result = lsxsq.asDouble();
15052 }
15053 else
15054 {
15055 TCU_THROW(InternalError, "Unknown flavor");
15056 }
15057
15058 out[0] = fp16type(result).bits();
15059 min[0] = getMin(result, getULPs(in));
15060 max[0] = getMax(result, getULPs(in));
15061
15062 return true;
15063 }
15064 };
15065
15066 struct fp16Atanh : public fp16PerComponent
15067 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh15068 fp16Atanh() : fp16PerComponent()
15069 {
15070 flavorNames.push_back("Double");
15071 flavorNames.push_back("PolyFP16");
15072 }
15073
15074 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atanh15075 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15076 {
15077 const fp16type x (*in[0]);
15078 const double d (x.asDouble());
15079 double result (0.0);
15080
15081 if (deAbs(d) >= 1.0)
15082 return false;
15083
15084 if (getFlavor() == 0)
15085 {
15086 const double ulps (16.0); // This is not a precision test. Value is not from spec
15087
15088 result = deAtanh(d);
15089 min[0] = getMin(result, ulps);
15090 max[0] = getMax(result, ulps);
15091 }
15092 else if (getFlavor() == 1)
15093 {
15094 const fp16type x1a (1.0 + d);
15095 const fp16type x1b (1.0 - d);
15096 const fp16type x1d (x1a.asDouble() / x1b.asDouble());
15097 const fp16type lx1d (deLog(x1d.asDouble()));
15098 const fp16type lx1d2 (0.5 * lx1d.asDouble());
15099 const double error (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15100
15101 result = lx1d2.asDouble();
15102 min[0] = result - error;
15103 max[0] = result + error;
15104 }
15105 else
15106 {
15107 TCU_THROW(InternalError, "Unknown flavor");
15108 }
15109
15110 out[0] = fp16type(result).bits();
15111
15112 return true;
15113 }
15114 };
15115
15116 struct fp16Exp : public fp16PerComponent
15117 {
15118 template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp15119 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15120 {
15121 const fp16type x (*in[0]);
15122 const double d (x.asDouble());
15123 const double ulps (10.0 * (1.0 + 2.0 * deAbs(d)));
15124 const double result (deExp(d));
15125
15126 out[0] = fp16type(result).bits();
15127 min[0] = getMin(result, ulps);
15128 max[0] = getMax(result, ulps);
15129
15130 return true;
15131 }
15132 };
15133
15134 struct fp16Log : public fp16PerComponent
15135 {
15136 template<class fp16type>
calcvkt::SpirVAssembly::fp16Log15137 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15138 {
15139 const fp16type x (*in[0]);
15140 const double d (x.asDouble());
15141 const double result (deLog(d));
15142 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15143
15144 if (d <= 0.0)
15145 return false;
15146
15147 out[0] = fp16type(result).bits();
15148 min[0] = result - error;
15149 max[0] = result + error;
15150
15151 return true;
15152 }
15153 };
15154
15155 struct fp16Exp2 : public fp16PerComponent
15156 {
15157 template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp215158 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15159 {
15160 const fp16type x (*in[0]);
15161 const double d (x.asDouble());
15162 const double result (deExp2(d));
15163 const double ulps (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15164
15165 out[0] = fp16type(result).bits();
15166 min[0] = getMin(result, ulps);
15167 max[0] = getMax(result, ulps);
15168
15169 return true;
15170 }
15171 };
15172
15173 struct fp16Log2 : public fp16PerComponent
15174 {
15175 template<class fp16type>
calcvkt::SpirVAssembly::fp16Log215176 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15177 {
15178 const fp16type x (*in[0]);
15179 const double d (x.asDouble());
15180 const double result (deLog2(d));
15181 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15182
15183 if (d <= 0.0)
15184 return false;
15185
15186 out[0] = fp16type(result).bits();
15187 min[0] = result - error;
15188 max[0] = result + error;
15189
15190 return true;
15191 }
15192 };
15193
15194 struct fp16Sqrt : public fp16PerComponent
15195 {
getULPsvkt::SpirVAssembly::fp16Sqrt15196 virtual double getULPs (vector<const deFloat16*>& in)
15197 {
15198 DE_UNREF(in);
15199
15200 return 6.0;
15201 }
15202
15203 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15204 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15205 {
15206 const fp16type x (*in[0]);
15207 const double d (x.asDouble());
15208 const double result (deSqrt(d));
15209
15210 if (!x.isNaN() && d < 0.0)
15211 return false;
15212
15213 out[0] = fp16type(result).bits();
15214 min[0] = getMin(result, getULPs(in));
15215 max[0] = getMax(result, getULPs(in));
15216
15217 return true;
15218 }
15219 };
15220
15221 struct fp16InverseSqrt : public fp16PerComponent
15222 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15223 virtual double getULPs (vector<const deFloat16*>& in)
15224 {
15225 DE_UNREF(in);
15226
15227 return 2.0;
15228 }
15229
15230 template<class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15231 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15232 {
15233 const fp16type x (*in[0]);
15234 const double d (x.asDouble());
15235 const double result (1.0/deSqrt(d));
15236
15237 if (!x.isNaN() && d <= 0.0)
15238 return false;
15239
15240 out[0] = fp16type(result).bits();
15241 min[0] = getMin(result, getULPs(in));
15242 max[0] = getMax(result, getULPs(in));
15243
15244 return true;
15245 }
15246 };
15247
15248 struct fp16ModfFrac : public fp16PerComponent
15249 {
15250 template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15251 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15252 {
15253 const fp16type x (*in[0]);
15254 const double d (x.asDouble());
15255 double i (0.0);
15256 const double result (deModf(d, &i));
15257
15258 if (x.isInf() || x.isNaN())
15259 return false;
15260
15261 out[0] = fp16type(result).bits();
15262 min[0] = getMin(result, getULPs(in));
15263 max[0] = getMax(result, getULPs(in));
15264
15265 return true;
15266 }
15267 };
15268
15269 struct fp16ModfInt : public fp16PerComponent
15270 {
15271 template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15272 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15273 {
15274 const fp16type x (*in[0]);
15275 const double d (x.asDouble());
15276 double i (0.0);
15277 const double unused (deModf(d, &i));
15278 const double result (i);
15279
15280 DE_UNREF(unused);
15281
15282 if (x.isInf() || x.isNaN())
15283 return false;
15284
15285 out[0] = fp16type(result).bits();
15286 min[0] = getMin(result, getULPs(in));
15287 max[0] = getMax(result, getULPs(in));
15288
15289 return true;
15290 }
15291 };
15292
15293 struct fp16FrexpS : public fp16PerComponent
15294 {
15295 template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15296 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15297 {
15298 const fp16type x (*in[0]);
15299 const double d (x.asDouble());
15300 int e (0);
15301 const double result (deFrExp(d, &e));
15302
15303 if (x.isNaN() || x.isInf())
15304 return false;
15305
15306 out[0] = fp16type(result).bits();
15307 min[0] = getMin(result, getULPs(in));
15308 max[0] = getMax(result, getULPs(in));
15309
15310 return true;
15311 }
15312 };
15313
15314 struct fp16FrexpE : public fp16PerComponent
15315 {
15316 template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15317 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15318 {
15319 const fp16type x (*in[0]);
15320 const double d (x.asDouble());
15321 int e (0);
15322 const double unused (deFrExp(d, &e));
15323 const double result (static_cast<double>(e));
15324
15325 DE_UNREF(unused);
15326
15327 if (x.isNaN() || x.isInf())
15328 return false;
15329
15330 out[0] = fp16type(result).bits();
15331 min[0] = getMin(result, getULPs(in));
15332 max[0] = getMax(result, getULPs(in));
15333
15334 return true;
15335 }
15336 };
15337
15338 struct fp16OpFAdd : public fp16PerComponent
15339 {
15340 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15341 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15342 {
15343 const fp16type x (*in[0]);
15344 const fp16type y (*in[1]);
15345 const double xd (x.asDouble());
15346 const double yd (y.asDouble());
15347 const double result (xd + yd);
15348
15349 out[0] = fp16type(result).bits();
15350 min[0] = getMin(result, getULPs(in));
15351 max[0] = getMax(result, getULPs(in));
15352
15353 return true;
15354 }
15355 };
15356
15357 struct fp16OpFSub : public fp16PerComponent
15358 {
15359 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15360 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15361 {
15362 const fp16type x (*in[0]);
15363 const fp16type y (*in[1]);
15364 const double xd (x.asDouble());
15365 const double yd (y.asDouble());
15366 const double result (xd - yd);
15367
15368 out[0] = fp16type(result).bits();
15369 min[0] = getMin(result, getULPs(in));
15370 max[0] = getMax(result, getULPs(in));
15371
15372 return true;
15373 }
15374 };
15375
15376 struct fp16OpFMul : public fp16PerComponent
15377 {
15378 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15379 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15380 {
15381 const fp16type x (*in[0]);
15382 const fp16type y (*in[1]);
15383 const double xd (x.asDouble());
15384 const double yd (y.asDouble());
15385 const double result (xd * yd);
15386
15387 out[0] = fp16type(result).bits();
15388 min[0] = getMin(result, getULPs(in));
15389 max[0] = getMax(result, getULPs(in));
15390
15391 return true;
15392 }
15393 };
15394
15395 struct fp16OpFDiv : public fp16PerComponent
15396 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15397 fp16OpFDiv() : fp16PerComponent()
15398 {
15399 flavorNames.push_back("DirectDiv");
15400 flavorNames.push_back("InverseDiv");
15401 }
15402
15403 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15404 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15405 {
15406 const fp16type x (*in[0]);
15407 const fp16type y (*in[1]);
15408 const double xd (x.asDouble());
15409 const double yd (y.asDouble());
15410 const double unspecUlp (16.0);
15411 const double ulpCnt (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15412 double result (0.0);
15413
15414 if (y.isZero())
15415 return false;
15416
15417 if (getFlavor() == 0)
15418 {
15419 result = (xd / yd);
15420 }
15421 else if (getFlavor() == 1)
15422 {
15423 const double invyd (1.0 / yd);
15424 const fp16type invy (invyd);
15425
15426 result = (xd * invy.asDouble());
15427 }
15428 else
15429 {
15430 TCU_THROW(InternalError, "Unknown flavor");
15431 }
15432
15433 out[0] = fp16type(result).bits();
15434 min[0] = getMin(result, ulpCnt);
15435 max[0] = getMax(result, ulpCnt);
15436
15437 return true;
15438 }
15439 };
15440
15441 struct fp16Atan2 : public fp16PerComponent
15442 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215443 fp16Atan2() : fp16PerComponent()
15444 {
15445 flavorNames.push_back("DoubleCalc");
15446 flavorNames.push_back("DoubleCalc_PI");
15447 }
15448
getULPsvkt::SpirVAssembly::fp16Atan215449 virtual double getULPs(vector<const deFloat16*>& in)
15450 {
15451 DE_UNREF(in);
15452
15453 return 2 * 5.0; // This is not a precision test. Value is not from spec
15454 }
15455
15456 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan215457 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15458 {
15459 const fp16type x (*in[0]);
15460 const fp16type y (*in[1]);
15461 const double xd (x.asDouble());
15462 const double yd (y.asDouble());
15463 double result (0.0);
15464
15465 if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15466 return false;
15467
15468 if (getFlavor() == 0)
15469 {
15470 result = deAtan2(xd, yd);
15471 }
15472 else if (getFlavor() == 1)
15473 {
15474 const double ulps (2.0 * 5.0); // This is not a precision test. Value is not from spec
15475 const double eps (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15476
15477 result = deAtan2(xd, yd);
15478
15479 if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15480 result = -result;
15481 }
15482 else
15483 {
15484 TCU_THROW(InternalError, "Unknown flavor");
15485 }
15486
15487 out[0] = fp16type(result).bits();
15488 min[0] = getMin(result, getULPs(in));
15489 max[0] = getMax(result, getULPs(in));
15490
15491 return true;
15492 }
15493 };
15494
15495 struct fp16Pow : public fp16PerComponent
15496 {
fp16Powvkt::SpirVAssembly::fp16Pow15497 fp16Pow() : fp16PerComponent()
15498 {
15499 flavorNames.push_back("Pow");
15500 flavorNames.push_back("PowLog2");
15501 flavorNames.push_back("PowLog2FP16");
15502 }
15503
15504 template<class fp16type>
calcvkt::SpirVAssembly::fp16Pow15505 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15506 {
15507 const fp16type x (*in[0]);
15508 const fp16type y (*in[1]);
15509 const double xd (x.asDouble());
15510 const double yd (y.asDouble());
15511 const double logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15512 const double ulps1 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15513 const double ulps2 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15514 const double ulps (deMax(deAbs(ulps1), deAbs(ulps2)));
15515 double result (0.0);
15516
15517 if (xd < 0.0)
15518 return false;
15519
15520 if (x.isZero() && yd <= 0.0)
15521 return false;
15522
15523 if (getFlavor() == 0)
15524 {
15525 result = dePow(xd, yd);
15526 }
15527 else if (getFlavor() == 1)
15528 {
15529 const double l2d (deLog2(xd));
15530 const double e2d (deExp2(yd * l2d));
15531
15532 result = e2d;
15533 }
15534 else if (getFlavor() == 2)
15535 {
15536 const double l2d (deLog2(xd));
15537 const fp16type l2 (l2d);
15538 const double e2d (deExp2(yd * l2.asDouble()));
15539 const fp16type e2 (e2d);
15540
15541 result = e2.asDouble();
15542 }
15543 else
15544 {
15545 TCU_THROW(InternalError, "Unknown flavor");
15546 }
15547
15548 out[0] = fp16type(result).bits();
15549 min[0] = getMin(result, ulps);
15550 max[0] = getMax(result, ulps);
15551
15552 return true;
15553 }
15554 };
15555
15556 struct fp16FMin : public fp16PerComponent
15557 {
15558 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMin15559 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15560 {
15561 const fp16type x (*in[0]);
15562 const fp16type y (*in[1]);
15563 const double xd (x.asDouble());
15564 const double yd (y.asDouble());
15565 const double result (deMin(xd, yd));
15566
15567 if (x.isNaN() || y.isNaN())
15568 return false;
15569
15570 out[0] = fp16type(result).bits();
15571 min[0] = getMin(result, getULPs(in));
15572 max[0] = getMax(result, getULPs(in));
15573
15574 return true;
15575 }
15576 };
15577
15578 struct fp16FMax : public fp16PerComponent
15579 {
15580 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMax15581 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15582 {
15583 const fp16type x (*in[0]);
15584 const fp16type y (*in[1]);
15585 const double xd (x.asDouble());
15586 const double yd (y.asDouble());
15587 const double result (deMax(xd, yd));
15588
15589 if (x.isNaN() || y.isNaN())
15590 return false;
15591
15592 out[0] = fp16type(result).bits();
15593 min[0] = getMin(result, getULPs(in));
15594 max[0] = getMax(result, getULPs(in));
15595
15596 return true;
15597 }
15598 };
15599
15600 struct fp16Step : public fp16PerComponent
15601 {
15602 template<class fp16type>
calcvkt::SpirVAssembly::fp16Step15603 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15604 {
15605 const fp16type edge (*in[0]);
15606 const fp16type x (*in[1]);
15607 const double edged (edge.asDouble());
15608 const double xd (x.asDouble());
15609 const double result (deStep(edged, xd));
15610
15611 out[0] = fp16type(result).bits();
15612 min[0] = getMin(result, getULPs(in));
15613 max[0] = getMax(result, getULPs(in));
15614
15615 return true;
15616 }
15617 };
15618
15619 struct fp16Ldexp : public fp16PerComponent
15620 {
15621 template<class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15622 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15623 {
15624 const fp16type x (*in[0]);
15625 const fp16type y (*in[1]);
15626 const double xd (x.asDouble());
15627 const int yd (static_cast<int>(deTrunc(y.asDouble())));
15628 const double result (deLdExp(xd, yd));
15629
15630 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15631 return false;
15632
15633 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15634 if (fp16type(result).isInf())
15635 return false;
15636
15637 out[0] = fp16type(result).bits();
15638 min[0] = getMin(result, getULPs(in));
15639 max[0] = getMax(result, getULPs(in));
15640
15641 return true;
15642 }
15643 };
15644
15645 struct fp16FClamp : public fp16PerComponent
15646 {
15647 template<class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15648 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15649 {
15650 const fp16type x (*in[0]);
15651 const fp16type minVal (*in[1]);
15652 const fp16type maxVal (*in[2]);
15653 const double xd (x.asDouble());
15654 const double minVald (minVal.asDouble());
15655 const double maxVald (maxVal.asDouble());
15656 const double result (deClamp(xd, minVald, maxVald));
15657
15658 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15659 return false;
15660
15661 out[0] = fp16type(result).bits();
15662 min[0] = getMin(result, getULPs(in));
15663 max[0] = getMax(result, getULPs(in));
15664
15665 return true;
15666 }
15667 };
15668
15669 struct fp16FMix : public fp16PerComponent
15670 {
fp16FMixvkt::SpirVAssembly::fp16FMix15671 fp16FMix() : fp16PerComponent()
15672 {
15673 flavorNames.push_back("DoubleCalc");
15674 flavorNames.push_back("EmulatingFP16");
15675 flavorNames.push_back("EmulatingFP16YminusX");
15676 }
15677
15678 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMix15679 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15680 {
15681 const fp16type x (*in[0]);
15682 const fp16type y (*in[1]);
15683 const fp16type a (*in[2]);
15684 const double ulps (8.0); // This is not a precision test. Value is not from spec
15685 double result (0.0);
15686
15687 if (getFlavor() == 0)
15688 {
15689 const double xd (x.asDouble());
15690 const double yd (y.asDouble());
15691 const double ad (a.asDouble());
15692 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15693 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15694 const double eps (xeps + yeps);
15695
15696 result = deMix(xd, yd, ad);
15697 min[0] = result - eps;
15698 max[0] = result + eps;
15699 }
15700 else if (getFlavor() == 1)
15701 {
15702 const double xd (x.asDouble());
15703 const double yd (y.asDouble());
15704 const double ad (a.asDouble());
15705 const fp16type am (1.0 - ad);
15706 const double amd (am.asDouble());
15707 const fp16type xam (xd * amd);
15708 const double xamd (xam.asDouble());
15709 const fp16type ya (yd * ad);
15710 const double yad (ya.asDouble());
15711 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15712 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15713 const double eps (xeps + yeps);
15714
15715 result = xamd + yad;
15716 min[0] = result - eps;
15717 max[0] = result + eps;
15718 }
15719 else if (getFlavor() == 2)
15720 {
15721 const double xd (x.asDouble());
15722 const double yd (y.asDouble());
15723 const double ad (a.asDouble());
15724 const fp16type ymx (yd - xd);
15725 const double ymxd (ymx.asDouble());
15726 const fp16type ymxa (ymxd * ad);
15727 const double ymxad (ymxa.asDouble());
15728 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15729 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15730 const double eps (xeps + yeps);
15731
15732 result = xd + ymxad;
15733 min[0] = result - eps;
15734 max[0] = result + eps;
15735 }
15736 else
15737 {
15738 TCU_THROW(InternalError, "Unknown flavor");
15739 }
15740
15741 out[0] = fp16type(result).bits();
15742
15743 return true;
15744 }
15745 };
15746
15747 struct fp16SmoothStep : public fp16PerComponent
15748 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep15749 fp16SmoothStep() : fp16PerComponent()
15750 {
15751 flavorNames.push_back("FloatCalc");
15752 flavorNames.push_back("EmulatingFP16");
15753 flavorNames.push_back("EmulatingFP16WClamp");
15754 }
15755
getULPsvkt::SpirVAssembly::fp16SmoothStep15756 virtual double getULPs(vector<const deFloat16*>& in)
15757 {
15758 DE_UNREF(in);
15759
15760 return 4.0; // This is not a precision test. Value is not from spec
15761 }
15762
15763 template<class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep15764 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15765 {
15766 const fp16type edge0 (*in[0]);
15767 const fp16type edge1 (*in[1]);
15768 const fp16type x (*in[2]);
15769 double result (0.0);
15770
15771 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15772 return false;
15773
15774 if (edge0.isInf() || edge1.isInf() || x.isInf())
15775 return false;
15776
15777 if (getFlavor() == 0)
15778 {
15779 const float edge0d (edge0.asFloat());
15780 const float edge1d (edge1.asFloat());
15781 const float xd (x.asFloat());
15782 const float sstep (deFloatSmoothStep(edge0d, edge1d, xd));
15783
15784 result = sstep;
15785 }
15786 else if (getFlavor() == 1)
15787 {
15788 const double edge0d (edge0.asDouble());
15789 const double edge1d (edge1.asDouble());
15790 const double xd (x.asDouble());
15791
15792 if (xd <= edge0d)
15793 result = 0.0;
15794 else if (xd >= edge1d)
15795 result = 1.0;
15796 else
15797 {
15798 const fp16type a (xd - edge0d);
15799 const fp16type b (edge1d - edge0d);
15800 const fp16type t (a.asDouble() / b.asDouble());
15801 const fp16type t2 (2.0 * t.asDouble());
15802 const fp16type t3 (3.0 - t2.asDouble());
15803 const fp16type t4 (t.asDouble() * t3.asDouble());
15804 const fp16type t5 (t.asDouble() * t4.asDouble());
15805
15806 result = t5.asDouble();
15807 }
15808 }
15809 else if (getFlavor() == 2)
15810 {
15811 const double edge0d (edge0.asDouble());
15812 const double edge1d (edge1.asDouble());
15813 const double xd (x.asDouble());
15814 const fp16type a (xd - edge0d);
15815 const fp16type b (edge1d - edge0d);
15816 const fp16type bi (1.0 / b.asDouble());
15817 const fp16type t0 (a.asDouble() * bi.asDouble());
15818 const double tc (deClamp(t0.asDouble(), 0.0, 1.0));
15819 const fp16type t (tc);
15820 const fp16type t2 (2.0 * t.asDouble());
15821 const fp16type t3 (3.0 - t2.asDouble());
15822 const fp16type t4 (t.asDouble() * t3.asDouble());
15823 const fp16type t5 (t.asDouble() * t4.asDouble());
15824
15825 result = t5.asDouble();
15826 }
15827 else
15828 {
15829 TCU_THROW(InternalError, "Unknown flavor");
15830 }
15831
15832 out[0] = fp16type(result).bits();
15833 min[0] = getMin(result, getULPs(in));
15834 max[0] = getMax(result, getULPs(in));
15835
15836 return true;
15837 }
15838 };
15839
15840 struct fp16Fma : public fp16PerComponent
15841 {
fp16Fmavkt::SpirVAssembly::fp16Fma15842 fp16Fma()
15843 {
15844 flavorNames.push_back("DoubleCalc");
15845 flavorNames.push_back("EmulatingFP16");
15846 }
15847
getULPsvkt::SpirVAssembly::fp16Fma15848 virtual double getULPs(vector<const deFloat16*>& in)
15849 {
15850 DE_UNREF(in);
15851
15852 return 16.0;
15853 }
15854
15855 template<class fp16type>
calcvkt::SpirVAssembly::fp16Fma15856 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15857 {
15858 DE_ASSERT(in.size() == 3);
15859 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15860 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15861 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15862 DE_ASSERT(getOutCompCount() > 0);
15863
15864 const fp16type a (*in[0]);
15865 const fp16type b (*in[1]);
15866 const fp16type c (*in[2]);
15867 double result (0.0);
15868
15869 if (getFlavor() == 0)
15870 {
15871 const double ad (a.asDouble());
15872 const double bd (b.asDouble());
15873 const double cd (c.asDouble());
15874
15875 result = deMadd(ad, bd, cd);
15876 }
15877 else if (getFlavor() == 1)
15878 {
15879 const double ad (a.asDouble());
15880 const double bd (b.asDouble());
15881 const double cd (c.asDouble());
15882 const fp16type ab (ad * bd);
15883 const fp16type r (ab.asDouble() + cd);
15884
15885 result = r.asDouble();
15886 }
15887 else
15888 {
15889 TCU_THROW(InternalError, "Unknown flavor");
15890 }
15891
15892 out[0] = fp16type(result).bits();
15893 min[0] = getMin(result, getULPs(in));
15894 max[0] = getMax(result, getULPs(in));
15895
15896 return true;
15897 }
15898 };
15899
15900
15901 struct fp16AllComponents : public fp16PerComponent
15902 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents15903 bool callOncePerComponent () { return false; }
15904 };
15905
15906 struct fp16Length : public fp16AllComponents
15907 {
fp16Lengthvkt::SpirVAssembly::fp16Length15908 fp16Length() : fp16AllComponents()
15909 {
15910 flavorNames.push_back("EmulatingFP16");
15911 flavorNames.push_back("DoubleCalc");
15912 }
15913
getULPsvkt::SpirVAssembly::fp16Length15914 virtual double getULPs(vector<const deFloat16*>& in)
15915 {
15916 DE_UNREF(in);
15917
15918 return 4.0;
15919 }
15920
15921 template<class fp16type>
calcvkt::SpirVAssembly::fp16Length15922 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15923 {
15924 DE_ASSERT(getOutCompCount() == 1);
15925 DE_ASSERT(in.size() == 1);
15926
15927 double result (0.0);
15928
15929 if (getFlavor() == 0)
15930 {
15931 fp16type r (0.0);
15932
15933 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15934 {
15935 const fp16type x (in[0][componentNdx]);
15936 const fp16type q (x.asDouble() * x.asDouble());
15937
15938 r = fp16type(r.asDouble() + q.asDouble());
15939 }
15940
15941 result = deSqrt(r.asDouble());
15942
15943 out[0] = fp16type(result).bits();
15944 }
15945 else if (getFlavor() == 1)
15946 {
15947 double r (0.0);
15948
15949 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15950 {
15951 const fp16type x (in[0][componentNdx]);
15952 const double q (x.asDouble() * x.asDouble());
15953
15954 r += q;
15955 }
15956
15957 result = deSqrt(r);
15958
15959 out[0] = fp16type(result).bits();
15960 }
15961 else
15962 {
15963 TCU_THROW(InternalError, "Unknown flavor");
15964 }
15965
15966 min[0] = getMin(result, getULPs(in));
15967 max[0] = getMax(result, getULPs(in));
15968
15969 return true;
15970 }
15971 };
15972
15973 struct fp16Distance : public fp16AllComponents
15974 {
fp16Distancevkt::SpirVAssembly::fp16Distance15975 fp16Distance() : fp16AllComponents()
15976 {
15977 flavorNames.push_back("EmulatingFP16");
15978 flavorNames.push_back("DoubleCalc");
15979 }
15980
getULPsvkt::SpirVAssembly::fp16Distance15981 virtual double getULPs(vector<const deFloat16*>& in)
15982 {
15983 DE_UNREF(in);
15984
15985 return 4.0;
15986 }
15987
15988 template<class fp16type>
calcvkt::SpirVAssembly::fp16Distance15989 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15990 {
15991 DE_ASSERT(getOutCompCount() == 1);
15992 DE_ASSERT(in.size() == 2);
15993 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15994
15995 double result (0.0);
15996
15997 if (getFlavor() == 0)
15998 {
15999 fp16type r (0.0);
16000
16001 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16002 {
16003 const fp16type x (in[0][componentNdx]);
16004 const fp16type y (in[1][componentNdx]);
16005 const fp16type d (x.asDouble() - y.asDouble());
16006 const fp16type q (d.asDouble() * d.asDouble());
16007
16008 r = fp16type(r.asDouble() + q.asDouble());
16009 }
16010
16011 result = deSqrt(r.asDouble());
16012 }
16013 else if (getFlavor() == 1)
16014 {
16015 double r (0.0);
16016
16017 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16018 {
16019 const fp16type x (in[0][componentNdx]);
16020 const fp16type y (in[1][componentNdx]);
16021 const double d (x.asDouble() - y.asDouble());
16022 const double q (d * d);
16023
16024 r += q;
16025 }
16026
16027 result = deSqrt(r);
16028 }
16029 else
16030 {
16031 TCU_THROW(InternalError, "Unknown flavor");
16032 }
16033
16034 out[0] = fp16type(result).bits();
16035 min[0] = getMin(result, getULPs(in));
16036 max[0] = getMax(result, getULPs(in));
16037
16038 return true;
16039 }
16040 };
16041
16042 struct fp16Cross : public fp16AllComponents
16043 {
fp16Crossvkt::SpirVAssembly::fp16Cross16044 fp16Cross() : fp16AllComponents()
16045 {
16046 flavorNames.push_back("EmulatingFP16");
16047 flavorNames.push_back("DoubleCalc");
16048 }
16049
getULPsvkt::SpirVAssembly::fp16Cross16050 virtual double getULPs(vector<const deFloat16*>& in)
16051 {
16052 DE_UNREF(in);
16053
16054 return 4.0;
16055 }
16056
16057 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cross16058 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16059 {
16060 DE_ASSERT(getOutCompCount() == 3);
16061 DE_ASSERT(in.size() == 2);
16062 DE_ASSERT(getArgCompCount(0) == 3);
16063 DE_ASSERT(getArgCompCount(1) == 3);
16064
16065 if (getFlavor() == 0)
16066 {
16067 const fp16type x0 (in[0][0]);
16068 const fp16type x1 (in[0][1]);
16069 const fp16type x2 (in[0][2]);
16070 const fp16type y0 (in[1][0]);
16071 const fp16type y1 (in[1][1]);
16072 const fp16type y2 (in[1][2]);
16073 const fp16type x1y2 (x1.asDouble() * y2.asDouble());
16074 const fp16type y1x2 (y1.asDouble() * x2.asDouble());
16075 const fp16type x2y0 (x2.asDouble() * y0.asDouble());
16076 const fp16type y2x0 (y2.asDouble() * x0.asDouble());
16077 const fp16type x0y1 (x0.asDouble() * y1.asDouble());
16078 const fp16type y0x1 (y0.asDouble() * x1.asDouble());
16079
16080 out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
16081 out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
16082 out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
16083 }
16084 else if (getFlavor() == 1)
16085 {
16086 const fp16type x0 (in[0][0]);
16087 const fp16type x1 (in[0][1]);
16088 const fp16type x2 (in[0][2]);
16089 const fp16type y0 (in[1][0]);
16090 const fp16type y1 (in[1][1]);
16091 const fp16type y2 (in[1][2]);
16092 const double x1y2 (x1.asDouble() * y2.asDouble());
16093 const double y1x2 (y1.asDouble() * x2.asDouble());
16094 const double x2y0 (x2.asDouble() * y0.asDouble());
16095 const double y2x0 (y2.asDouble() * x0.asDouble());
16096 const double x0y1 (x0.asDouble() * y1.asDouble());
16097 const double y0x1 (y0.asDouble() * x1.asDouble());
16098
16099 out[0] = fp16type(x1y2 - y1x2).bits();
16100 out[1] = fp16type(x2y0 - y2x0).bits();
16101 out[2] = fp16type(x0y1 - y0x1).bits();
16102 }
16103 else
16104 {
16105 TCU_THROW(InternalError, "Unknown flavor");
16106 }
16107
16108 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16109 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16110 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16111 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16112
16113 return true;
16114 }
16115 };
16116
16117 struct fp16Normalize : public fp16AllComponents
16118 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16119 fp16Normalize() : fp16AllComponents()
16120 {
16121 flavorNames.push_back("EmulatingFP16");
16122 flavorNames.push_back("DoubleCalc");
16123
16124 permutationsFlavorStart = 0;
16125 permutationsFlavorEnd = flavorNames.size();
16126
16127 // flavorNames will be extended later
16128 }
16129
setArgCompCountvkt::SpirVAssembly::fp16Normalize16130 virtual void setArgCompCount (size_t argNo, size_t compCount)
16131 {
16132 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16133
16134 if (argNo == 0 && argCompCount[argNo] == 0)
16135 {
16136 const size_t maxPermutationsCount = 24u; // Equal to 4!
16137 std::vector<int> indices;
16138
16139 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16140 indices.push_back(static_cast<int>(componentNdx));
16141
16142 m_permutations.reserve(maxPermutationsCount);
16143
16144 permutationsFlavorStart = flavorNames.size();
16145
16146 do
16147 {
16148 tcu::UVec4 permutation;
16149 std::string name = "Permutted_";
16150
16151 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16152 {
16153 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16154 name += de::toString(indices[componentNdx]);
16155 }
16156
16157 m_permutations.push_back(permutation);
16158 flavorNames.push_back(name);
16159
16160 } while(std::next_permutation(indices.begin(), indices.end()));
16161
16162 permutationsFlavorEnd = flavorNames.size();
16163 }
16164
16165 fp16AllComponents::setArgCompCount(argNo, compCount);
16166 }
getULPsvkt::SpirVAssembly::fp16Normalize16167 virtual double getULPs(vector<const deFloat16*>& in)
16168 {
16169 DE_UNREF(in);
16170
16171 return 8.0;
16172 }
16173
16174 template<class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16175 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16176 {
16177 DE_ASSERT(in.size() == 1);
16178 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16179
16180 if (getFlavor() == 0)
16181 {
16182 fp16type r(0.0);
16183
16184 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16185 {
16186 const fp16type x (in[0][componentNdx]);
16187 const fp16type q (x.asDouble() * x.asDouble());
16188
16189 r = fp16type(r.asDouble() + q.asDouble());
16190 }
16191
16192 r = fp16type(deSqrt(r.asDouble()));
16193
16194 if (r.isZero())
16195 return false;
16196
16197 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16198 {
16199 const fp16type x (in[0][componentNdx]);
16200
16201 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16202 }
16203 }
16204 else if (getFlavor() == 1)
16205 {
16206 double r(0.0);
16207
16208 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16209 {
16210 const fp16type x (in[0][componentNdx]);
16211 const double q (x.asDouble() * x.asDouble());
16212
16213 r += q;
16214 }
16215
16216 r = deSqrt(r);
16217
16218 if (r == 0)
16219 return false;
16220
16221 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16222 {
16223 const fp16type x (in[0][componentNdx]);
16224
16225 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16226 }
16227 }
16228 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16229 {
16230 const int compCount (static_cast<int>(getArgCompCount(0)));
16231 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16232 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16233 fp16type r (0.0);
16234
16235 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16236 {
16237 const size_t componentNdx (permutation[permComponentNdx]);
16238 const fp16type x (in[0][componentNdx]);
16239 const fp16type q (x.asDouble() * x.asDouble());
16240
16241 r = fp16type(r.asDouble() + q.asDouble());
16242 }
16243
16244 r = fp16type(deSqrt(r.asDouble()));
16245
16246 if (r.isZero())
16247 return false;
16248
16249 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16250 {
16251 const size_t componentNdx (permutation[permComponentNdx]);
16252 const fp16type x (in[0][componentNdx]);
16253
16254 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16255 }
16256 }
16257 else
16258 {
16259 TCU_THROW(InternalError, "Unknown flavor");
16260 }
16261
16262 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16263 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16264 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16265 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16266
16267 return true;
16268 }
16269
16270 private:
16271 std::vector<tcu::UVec4> m_permutations;
16272 size_t permutationsFlavorStart;
16273 size_t permutationsFlavorEnd;
16274 };
16275
16276 struct fp16FaceForward : public fp16AllComponents
16277 {
getULPsvkt::SpirVAssembly::fp16FaceForward16278 virtual double getULPs(vector<const deFloat16*>& in)
16279 {
16280 DE_UNREF(in);
16281
16282 return 4.0;
16283 }
16284
16285 template<class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16286 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16287 {
16288 DE_ASSERT(in.size() == 3);
16289 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16290 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16291 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16292
16293 fp16type dp(0.0);
16294
16295 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16296 {
16297 const fp16type x (in[1][componentNdx]);
16298 const fp16type y (in[2][componentNdx]);
16299 const double xd (x.asDouble());
16300 const double yd (y.asDouble());
16301 const fp16type q (xd * yd);
16302
16303 dp = fp16type(dp.asDouble() + q.asDouble());
16304 }
16305
16306 if (dp.isNaN() || dp.isZero())
16307 return false;
16308
16309 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16310 {
16311 const fp16type n (in[0][componentNdx]);
16312
16313 out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16314 }
16315
16316 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16317 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16318 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16319 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16320
16321 return true;
16322 }
16323 };
16324
16325 struct fp16Reflect : public fp16AllComponents
16326 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16327 fp16Reflect() : fp16AllComponents()
16328 {
16329 flavorNames.push_back("EmulatingFP16");
16330 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16331 flavorNames.push_back("FloatCalc");
16332 flavorNames.push_back("FloatCalc+KeepZeroSign");
16333 flavorNames.push_back("EmulatingFP16+2Nfirst");
16334 flavorNames.push_back("EmulatingFP16+2Ifirst");
16335 }
16336
getULPsvkt::SpirVAssembly::fp16Reflect16337 virtual double getULPs(vector<const deFloat16*>& in)
16338 {
16339 DE_UNREF(in);
16340
16341 return 256.0; // This is not a precision test. Value is not from spec
16342 }
16343
16344 template<class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16345 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16346 {
16347 DE_ASSERT(in.size() == 2);
16348 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16349 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16350
16351 if (getFlavor() < 4)
16352 {
16353 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16354 const bool floatCalc ((flavor & 2) != 0 ? true : false);
16355
16356 if (floatCalc)
16357 {
16358 float dp(0.0f);
16359
16360 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16361 {
16362 const fp16type i (in[0][componentNdx]);
16363 const fp16type n (in[1][componentNdx]);
16364 const float id (i.asFloat());
16365 const float nd (n.asFloat());
16366 const float qd (id * nd);
16367
16368 if (keepZeroSign)
16369 dp = (componentNdx == 0) ? qd : dp + qd;
16370 else
16371 dp = dp + qd;
16372 }
16373
16374 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16375 {
16376 const fp16type i (in[0][componentNdx]);
16377 const fp16type n (in[1][componentNdx]);
16378 const float dpnd (dp * n.asFloat());
16379 const float dpn2d (2.0f * dpnd);
16380 const float idpn2d (i.asFloat() - dpn2d);
16381 const fp16type result (idpn2d);
16382
16383 out[componentNdx] = result.bits();
16384 }
16385 }
16386 else
16387 {
16388 fp16type dp(0.0);
16389
16390 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16391 {
16392 const fp16type i (in[0][componentNdx]);
16393 const fp16type n (in[1][componentNdx]);
16394 const double id (i.asDouble());
16395 const double nd (n.asDouble());
16396 const fp16type q (id * nd);
16397
16398 if (keepZeroSign)
16399 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16400 else
16401 dp = fp16type(dp.asDouble() + q.asDouble());
16402 }
16403
16404 if (dp.isNaN())
16405 return false;
16406
16407 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16408 {
16409 const fp16type i (in[0][componentNdx]);
16410 const fp16type n (in[1][componentNdx]);
16411 const fp16type dpn (dp.asDouble() * n.asDouble());
16412 const fp16type dpn2 (2 * dpn.asDouble());
16413 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16414
16415 out[componentNdx] = idpn2.bits();
16416 }
16417 }
16418 }
16419 else if (getFlavor() == 4)
16420 {
16421 fp16type dp(0.0);
16422
16423 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16424 {
16425 const fp16type i (in[0][componentNdx]);
16426 const fp16type n (in[1][componentNdx]);
16427 const double id (i.asDouble());
16428 const double nd (n.asDouble());
16429 const fp16type q (id * nd);
16430
16431 dp = fp16type(dp.asDouble() + q.asDouble());
16432 }
16433
16434 if (dp.isNaN())
16435 return false;
16436
16437 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16438 {
16439 const fp16type i (in[0][componentNdx]);
16440 const fp16type n (in[1][componentNdx]);
16441 const fp16type n2 (2 * n.asDouble());
16442 const fp16type dpn2 (dp.asDouble() * n2.asDouble());
16443 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16444
16445 out[componentNdx] = idpn2.bits();
16446 }
16447 }
16448 else if (getFlavor() == 5)
16449 {
16450 fp16type dp2(0.0);
16451
16452 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16453 {
16454 const fp16type i (in[0][componentNdx]);
16455 const fp16type n (in[1][componentNdx]);
16456 const fp16type i2 (2.0 * i.asDouble());
16457 const double i2d (i2.asDouble());
16458 const double nd (n.asDouble());
16459 const fp16type q (i2d * nd);
16460
16461 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16462 }
16463
16464 if (dp2.isNaN())
16465 return false;
16466
16467 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16468 {
16469 const fp16type i (in[0][componentNdx]);
16470 const fp16type n (in[1][componentNdx]);
16471 const fp16type dpn2 (dp2.asDouble() * n.asDouble());
16472 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16473
16474 out[componentNdx] = idpn2.bits();
16475 }
16476 }
16477 else
16478 {
16479 TCU_THROW(InternalError, "Unknown flavor");
16480 }
16481
16482 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16483 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16484 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16485 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16486
16487 return true;
16488 }
16489 };
16490
16491 struct fp16Refract : public fp16AllComponents
16492 {
fp16Refractvkt::SpirVAssembly::fp16Refract16493 fp16Refract() : fp16AllComponents()
16494 {
16495 flavorNames.push_back("EmulatingFP16");
16496 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16497 flavorNames.push_back("FloatCalc");
16498 flavorNames.push_back("FloatCalc+KeepZeroSign");
16499 }
16500
getULPsvkt::SpirVAssembly::fp16Refract16501 virtual double getULPs(vector<const deFloat16*>& in)
16502 {
16503 DE_UNREF(in);
16504
16505 return 8192.0; // This is not a precision test. Value is not from spec
16506 }
16507
16508 template<class fp16type>
calcvkt::SpirVAssembly::fp16Refract16509 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16510 {
16511 DE_ASSERT(in.size() == 3);
16512 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16513 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16514 DE_ASSERT(getArgCompCount(2) == 1);
16515
16516 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16517 const bool doubleCalc ((flavor & 2) != 0 ? true : false);
16518 const fp16type eta (*in[2]);
16519
16520 if (doubleCalc)
16521 {
16522 double dp (0.0);
16523
16524 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16525 {
16526 const fp16type i (in[0][componentNdx]);
16527 const fp16type n (in[1][componentNdx]);
16528 const double id (i.asDouble());
16529 const double nd (n.asDouble());
16530 const double qd (id * nd);
16531
16532 if (keepZeroSign)
16533 dp = (componentNdx == 0) ? qd : dp + qd;
16534 else
16535 dp = dp + qd;
16536 }
16537
16538 const double eta2 (eta.asDouble() * eta.asDouble());
16539 const double dp2 (dp * dp);
16540 const double dp1 (1.0 - dp2);
16541 const double dpe (eta2 * dp1);
16542 const double k (1.0 - dpe);
16543
16544 if (k < 0.0)
16545 {
16546 const fp16type zero (0.0);
16547
16548 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16549 out[componentNdx] = zero.bits();
16550 }
16551 else
16552 {
16553 const double sk (deSqrt(k));
16554
16555 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16556 {
16557 const fp16type i (in[0][componentNdx]);
16558 const fp16type n (in[1][componentNdx]);
16559 const double etai (i.asDouble() * eta.asDouble());
16560 const double etadp (eta.asDouble() * dp);
16561 const double etadpk (etadp + sk);
16562 const double etadpkn (etadpk * n.asDouble());
16563 const double full (etai - etadpkn);
16564 const fp16type result (full);
16565
16566 if (result.isInf())
16567 return false;
16568
16569 out[componentNdx] = result.bits();
16570 }
16571 }
16572 }
16573 else
16574 {
16575 fp16type dp (0.0);
16576
16577 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16578 {
16579 const fp16type i (in[0][componentNdx]);
16580 const fp16type n (in[1][componentNdx]);
16581 const double id (i.asDouble());
16582 const double nd (n.asDouble());
16583 const fp16type q (id * nd);
16584
16585 if (keepZeroSign)
16586 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16587 else
16588 dp = fp16type(dp.asDouble() + q.asDouble());
16589 }
16590
16591 if (dp.isNaN())
16592 return false;
16593
16594 const fp16type eta2(eta.asDouble() * eta.asDouble());
16595 const fp16type dp2 (dp.asDouble() * dp.asDouble());
16596 const fp16type dp1 (1.0 - dp2.asDouble());
16597 const fp16type dpe (eta2.asDouble() * dp1.asDouble());
16598 const fp16type k (1.0 - dpe.asDouble());
16599
16600 if (k.asDouble() < 0.0)
16601 {
16602 const fp16type zero (0.0);
16603
16604 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16605 out[componentNdx] = zero.bits();
16606 }
16607 else
16608 {
16609 const fp16type sk (deSqrt(k.asDouble()));
16610
16611 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16612 {
16613 const fp16type i (in[0][componentNdx]);
16614 const fp16type n (in[1][componentNdx]);
16615 const fp16type etai (i.asDouble() * eta.asDouble());
16616 const fp16type etadp (eta.asDouble() * dp.asDouble());
16617 const fp16type etadpk (etadp.asDouble() + sk.asDouble());
16618 const fp16type etadpkn (etadpk.asDouble() * n.asDouble());
16619 const fp16type full (etai.asDouble() - etadpkn.asDouble());
16620
16621 if (full.isNaN() || full.isInf())
16622 return false;
16623
16624 out[componentNdx] = full.bits();
16625 }
16626 }
16627 }
16628
16629 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16630 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16631 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16632 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16633
16634 return true;
16635 }
16636 };
16637
16638 struct fp16Dot : public fp16AllComponents
16639 {
fp16Dotvkt::SpirVAssembly::fp16Dot16640 fp16Dot() : fp16AllComponents()
16641 {
16642 flavorNames.push_back("EmulatingFP16");
16643 flavorNames.push_back("FloatCalc");
16644 flavorNames.push_back("DoubleCalc");
16645
16646 permutationsFlavorStart = 0;
16647 permutationsFlavorEnd = flavorNames.size();
16648
16649 // flavorNames will be extended later
16650 }
16651
setArgCompCountvkt::SpirVAssembly::fp16Dot16652 virtual void setArgCompCount (size_t argNo, size_t compCount)
16653 {
16654 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16655
16656 if (argNo == 0 && argCompCount[argNo] == 0)
16657 {
16658 const size_t maxPermutationsCount = 24u; // Equal to 4!
16659 std::vector<int> indices;
16660
16661 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16662 indices.push_back(static_cast<int>(componentNdx));
16663
16664 m_permutations.reserve(maxPermutationsCount);
16665
16666 permutationsFlavorStart = flavorNames.size();
16667
16668 do
16669 {
16670 tcu::UVec4 permutation;
16671 std::string name = "Permutted_";
16672
16673 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16674 {
16675 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16676 name += de::toString(indices[componentNdx]);
16677 }
16678
16679 m_permutations.push_back(permutation);
16680 flavorNames.push_back(name);
16681
16682 } while(std::next_permutation(indices.begin(), indices.end()));
16683
16684 permutationsFlavorEnd = flavorNames.size();
16685 }
16686
16687 fp16AllComponents::setArgCompCount(argNo, compCount);
16688 }
16689
getULPsvkt::SpirVAssembly::fp16Dot16690 virtual double getULPs(vector<const deFloat16*>& in)
16691 {
16692 DE_UNREF(in);
16693
16694 return 16.0; // This is not a precision test. Value is not from spec
16695 }
16696
16697 template<class fp16type>
calcvkt::SpirVAssembly::fp16Dot16698 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16699 {
16700 DE_ASSERT(in.size() == 2);
16701 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16702 DE_ASSERT(getOutCompCount() == 1);
16703
16704 double result (0.0);
16705 double eps (0.0);
16706
16707 if (getFlavor() == 0)
16708 {
16709 fp16type dp (0.0);
16710
16711 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16712 {
16713 const fp16type x (in[0][componentNdx]);
16714 const fp16type y (in[1][componentNdx]);
16715 const fp16type q (x.asDouble() * y.asDouble());
16716
16717 dp = fp16type(dp.asDouble() + q.asDouble());
16718 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16719 }
16720
16721 result = dp.asDouble();
16722 }
16723 else if (getFlavor() == 1)
16724 {
16725 float dp (0.0);
16726
16727 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16728 {
16729 const fp16type x (in[0][componentNdx]);
16730 const fp16type y (in[1][componentNdx]);
16731 const float q (x.asFloat() * y.asFloat());
16732
16733 dp += q;
16734 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16735 }
16736
16737 result = dp;
16738 }
16739 else if (getFlavor() == 2)
16740 {
16741 double dp (0.0);
16742
16743 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16744 {
16745 const fp16type x (in[0][componentNdx]);
16746 const fp16type y (in[1][componentNdx]);
16747 const double q (x.asDouble() * y.asDouble());
16748
16749 dp += q;
16750 eps += floatFormat16.ulp(q, 2.0);
16751 }
16752
16753 result = dp;
16754 }
16755 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16756 {
16757 const int compCount (static_cast<int>(getArgCompCount(1)));
16758 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16759 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16760 fp16type dp (0.0);
16761
16762 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16763 {
16764 const size_t componentNdx (permutation[permComponentNdx]);
16765 const fp16type x (in[0][componentNdx]);
16766 const fp16type y (in[1][componentNdx]);
16767 const fp16type q (x.asDouble() * y.asDouble());
16768
16769 dp = fp16type(dp.asDouble() + q.asDouble());
16770 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16771 }
16772
16773 result = dp.asDouble();
16774 }
16775 else
16776 {
16777 TCU_THROW(InternalError, "Unknown flavor");
16778 }
16779
16780 out[0] = fp16type(result).bits();
16781 min[0] = result - eps;
16782 max[0] = result + eps;
16783
16784 return true;
16785 }
16786
16787 private:
16788 std::vector<tcu::UVec4> m_permutations;
16789 size_t permutationsFlavorStart;
16790 size_t permutationsFlavorEnd;
16791 };
16792
16793 struct fp16VectorTimesScalar : public fp16AllComponents
16794 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar16795 virtual double getULPs(vector<const deFloat16*>& in)
16796 {
16797 DE_UNREF(in);
16798
16799 return 2.0;
16800 }
16801
16802 template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar16803 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16804 {
16805 DE_ASSERT(in.size() == 2);
16806 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16807 DE_ASSERT(getArgCompCount(1) == 1);
16808
16809 fp16type s (*in[1]);
16810
16811 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16812 {
16813 const fp16type x (in[0][componentNdx]);
16814 const double result (s.asDouble() * x.asDouble());
16815 const fp16type m (result);
16816
16817 out[componentNdx] = m.bits();
16818 min[componentNdx] = getMin(result, getULPs(in));
16819 max[componentNdx] = getMax(result, getULPs(in));
16820 }
16821
16822 return true;
16823 }
16824 };
16825
16826 struct fp16MatrixBase : public fp16AllComponents
16827 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase16828 deUint32 getComponentValidity ()
16829 {
16830 return static_cast<deUint32>(-1);
16831 }
16832
getNdxvkt::SpirVAssembly::fp16MatrixBase16833 inline size_t getNdx (const size_t rowCount, const size_t col, const size_t row)
16834 {
16835 const size_t minComponentCount = 0;
16836 const size_t maxComponentCount = 3;
16837 const size_t alignedRowsCount = (rowCount == 3) ? 4 : rowCount;
16838
16839 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16840 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16841 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16842 DE_UNREF(minComponentCount);
16843 DE_UNREF(maxComponentCount);
16844
16845 return col * alignedRowsCount + row;
16846 }
16847
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase16848 deUint32 getComponentMatrixValidityMask (size_t cols, size_t rows)
16849 {
16850 deUint32 result = 0u;
16851
16852 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16853 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16854 {
16855 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16856
16857 DE_ASSERT(bitNdx < sizeof(result) * 8);
16858
16859 result |= (1<<bitNdx);
16860 }
16861
16862 return result;
16863 }
16864 };
16865
16866 template<size_t cols, size_t rows>
16867 struct fp16Transpose : public fp16MatrixBase
16868 {
getULPsvkt::SpirVAssembly::fp16Transpose16869 virtual double getULPs(vector<const deFloat16*>& in)
16870 {
16871 DE_UNREF(in);
16872
16873 return 1.0;
16874 }
16875
getComponentValidityvkt::SpirVAssembly::fp16Transpose16876 deUint32 getComponentValidity ()
16877 {
16878 return getComponentMatrixValidityMask(rows, cols);
16879 }
16880
16881 template<class fp16type>
calcvkt::SpirVAssembly::fp16Transpose16882 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16883 {
16884 DE_ASSERT(in.size() == 1);
16885
16886 const size_t alignedCols = (cols == 3) ? 4 : cols;
16887 const size_t alignedRows = (rows == 3) ? 4 : rows;
16888 vector<deFloat16> output (alignedCols * alignedRows, 0);
16889
16890 DE_ASSERT(output.size() == alignedCols * alignedRows);
16891
16892 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16893 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16894 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16895
16896 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16897 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16898 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16899
16900 return true;
16901 }
16902 };
16903
16904 template<size_t cols, size_t rows>
16905 struct fp16MatrixTimesScalar : public fp16MatrixBase
16906 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar16907 virtual double getULPs(vector<const deFloat16*>& in)
16908 {
16909 DE_UNREF(in);
16910
16911 return 4.0;
16912 }
16913
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar16914 deUint32 getComponentValidity ()
16915 {
16916 return getComponentMatrixValidityMask(cols, rows);
16917 }
16918
16919 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar16920 bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16921 {
16922 DE_ASSERT(in.size() == 2);
16923 DE_ASSERT(getArgCompCount(1) == 1);
16924
16925 const fp16type y (in[1][0]);
16926 const float scalar (y.asFloat());
16927 const size_t alignedCols = (cols == 3) ? 4 : cols;
16928 const size_t alignedRows = (rows == 3) ? 4 : rows;
16929
16930 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16931 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16932 DE_UNREF(alignedCols);
16933
16934 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16935 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16936 {
16937 const size_t ndx (colNdx * alignedRows + rowNdx);
16938 const fp16type x (in[0][ndx]);
16939 const double result (scalar * x.asFloat());
16940
16941 out[ndx] = fp16type(result).bits();
16942 min[ndx] = getMin(result, getULPs(in));
16943 max[ndx] = getMax(result, getULPs(in));
16944 }
16945
16946 return true;
16947 }
16948 };
16949
16950 template<size_t cols, size_t rows>
16951 struct fp16VectorTimesMatrix : public fp16MatrixBase
16952 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix16953 fp16VectorTimesMatrix() : fp16MatrixBase()
16954 {
16955 flavorNames.push_back("EmulatingFP16");
16956 flavorNames.push_back("FloatCalc");
16957 }
16958
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix16959 virtual double getULPs (vector<const deFloat16*>& in)
16960 {
16961 DE_UNREF(in);
16962
16963 return (8.0 * cols);
16964 }
16965
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix16966 deUint32 getComponentValidity ()
16967 {
16968 return getComponentMatrixValidityMask(cols, 1);
16969 }
16970
16971 template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix16972 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16973 {
16974 DE_ASSERT(in.size() == 2);
16975
16976 const size_t alignedCols = (cols == 3) ? 4 : cols;
16977 const size_t alignedRows = (rows == 3) ? 4 : rows;
16978
16979 DE_ASSERT(getOutCompCount() == cols);
16980 DE_ASSERT(getArgCompCount(0) == rows);
16981 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16982 DE_UNREF(alignedCols);
16983
16984 if (getFlavor() == 0)
16985 {
16986 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16987 {
16988 fp16type s (fp16type::zero(1));
16989
16990 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16991 {
16992 const fp16type v (in[0][rowNdx]);
16993 const float vf (v.asFloat());
16994 const size_t ndx (colNdx * alignedRows + rowNdx);
16995 const fp16type x (in[1][ndx]);
16996 const float xf (x.asFloat());
16997 const fp16type m (vf * xf);
16998
16999 s = fp16type(s.asFloat() + m.asFloat());
17000 }
17001
17002 out[colNdx] = s.bits();
17003 min[colNdx] = getMin(s.asDouble(), getULPs(in));
17004 max[colNdx] = getMax(s.asDouble(), getULPs(in));
17005 }
17006 }
17007 else if (getFlavor() == 1)
17008 {
17009 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17010 {
17011 float s (0.0f);
17012
17013 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17014 {
17015 const fp16type v (in[0][rowNdx]);
17016 const float vf (v.asFloat());
17017 const size_t ndx (colNdx * alignedRows + rowNdx);
17018 const fp16type x (in[1][ndx]);
17019 const float xf (x.asFloat());
17020 const float m (vf * xf);
17021
17022 s += m;
17023 }
17024
17025 out[colNdx] = fp16type(s).bits();
17026 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
17027 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
17028 }
17029 }
17030 else
17031 {
17032 TCU_THROW(InternalError, "Unknown flavor");
17033 }
17034
17035 return true;
17036 }
17037 };
17038
17039 template<size_t cols, size_t rows>
17040 struct fp16MatrixTimesVector : public fp16MatrixBase
17041 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector17042 fp16MatrixTimesVector() : fp16MatrixBase()
17043 {
17044 flavorNames.push_back("EmulatingFP16");
17045 flavorNames.push_back("FloatCalc");
17046 }
17047
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector17048 virtual double getULPs (vector<const deFloat16*>& in)
17049 {
17050 DE_UNREF(in);
17051
17052 return (8.0 * rows);
17053 }
17054
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector17055 deUint32 getComponentValidity ()
17056 {
17057 return getComponentMatrixValidityMask(rows, 1);
17058 }
17059
17060 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector17061 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17062 {
17063 DE_ASSERT(in.size() == 2);
17064
17065 const size_t alignedCols = (cols == 3) ? 4 : cols;
17066 const size_t alignedRows = (rows == 3) ? 4 : rows;
17067
17068 DE_ASSERT(getOutCompCount() == rows);
17069 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
17070 DE_ASSERT(getArgCompCount(1) == cols);
17071 DE_UNREF(alignedCols);
17072
17073 if (getFlavor() == 0)
17074 {
17075 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17076 {
17077 fp16type s (fp16type::zero(1));
17078
17079 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17080 {
17081 const size_t ndx (colNdx * alignedRows + rowNdx);
17082 const fp16type x (in[0][ndx]);
17083 const float xf (x.asFloat());
17084 const fp16type v (in[1][colNdx]);
17085 const float vf (v.asFloat());
17086 const fp16type m (vf * xf);
17087
17088 s = fp16type(s.asFloat() + m.asFloat());
17089 }
17090
17091 out[rowNdx] = s.bits();
17092 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
17093 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
17094 }
17095 }
17096 else if (getFlavor() == 1)
17097 {
17098 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17099 {
17100 float s (0.0f);
17101
17102 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17103 {
17104 const size_t ndx (colNdx * alignedRows + rowNdx);
17105 const fp16type x (in[0][ndx]);
17106 const float xf (x.asFloat());
17107 const fp16type v (in[1][colNdx]);
17108 const float vf (v.asFloat());
17109 const float m (vf * xf);
17110
17111 s += m;
17112 }
17113
17114 out[rowNdx] = fp16type(s).bits();
17115 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17116 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17117 }
17118 }
17119 else
17120 {
17121 TCU_THROW(InternalError, "Unknown flavor");
17122 }
17123
17124 return true;
17125 }
17126 };
17127
17128 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17129 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17130 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17131 fp16MatrixTimesMatrix() : fp16MatrixBase()
17132 {
17133 flavorNames.push_back("EmulatingFP16");
17134 flavorNames.push_back("FloatCalc");
17135 }
17136
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17137 virtual double getULPs (vector<const deFloat16*>& in)
17138 {
17139 DE_UNREF(in);
17140
17141 return 32.0;
17142 }
17143
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17144 deUint32 getComponentValidity ()
17145 {
17146 return getComponentMatrixValidityMask(colsR, rowsL);
17147 }
17148
17149 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17150 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17151 {
17152 DE_STATIC_ASSERT(colsL == rowsR);
17153
17154 DE_ASSERT(in.size() == 2);
17155
17156 const size_t alignedColsL = (colsL == 3) ? 4 : colsL;
17157 const size_t alignedRowsL = (rowsL == 3) ? 4 : rowsL;
17158 const size_t alignedColsR = (colsR == 3) ? 4 : colsR;
17159 const size_t alignedRowsR = (rowsR == 3) ? 4 : rowsR;
17160
17161 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17162 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17163 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17164 DE_UNREF(alignedColsL);
17165 DE_UNREF(alignedColsR);
17166
17167 if (getFlavor() == 0)
17168 {
17169 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17170 {
17171 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17172 {
17173 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17174 fp16type s (fp16type::zero(1));
17175
17176 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17177 {
17178 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17179 const fp16type l (in[0][ndxl]);
17180 const float lf (l.asFloat());
17181 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17182 const fp16type r (in[1][ndxr]);
17183 const float rf (r.asFloat());
17184 const fp16type m (lf * rf);
17185
17186 s = fp16type(s.asFloat() + m.asFloat());
17187 }
17188
17189 out[ndx] = s.bits();
17190 min[ndx] = getMin(s.asDouble(), getULPs(in));
17191 max[ndx] = getMax(s.asDouble(), getULPs(in));
17192 }
17193 }
17194 }
17195 else if (getFlavor() == 1)
17196 {
17197 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17198 {
17199 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17200 {
17201 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17202 float s (0.0f);
17203
17204 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17205 {
17206 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17207 const fp16type l (in[0][ndxl]);
17208 const float lf (l.asFloat());
17209 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17210 const fp16type r (in[1][ndxr]);
17211 const float rf (r.asFloat());
17212 const float m (lf * rf);
17213
17214 s += m;
17215 }
17216
17217 out[ndx] = fp16type(s).bits();
17218 min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17219 max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17220 }
17221 }
17222 }
17223 else
17224 {
17225 TCU_THROW(InternalError, "Unknown flavor");
17226 }
17227
17228 return true;
17229 }
17230 };
17231
17232 template<size_t cols, size_t rows>
17233 struct fp16OuterProduct : public fp16MatrixBase
17234 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17235 virtual double getULPs (vector<const deFloat16*>& in)
17236 {
17237 DE_UNREF(in);
17238
17239 return 2.0;
17240 }
17241
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17242 deUint32 getComponentValidity ()
17243 {
17244 return getComponentMatrixValidityMask(cols, rows);
17245 }
17246
17247 template<class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17248 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17249 {
17250 DE_ASSERT(in.size() == 2);
17251
17252 const size_t alignedCols = (cols == 3) ? 4 : cols;
17253 const size_t alignedRows = (rows == 3) ? 4 : rows;
17254
17255 DE_ASSERT(getArgCompCount(0) == rows);
17256 DE_ASSERT(getArgCompCount(1) == cols);
17257 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17258 DE_UNREF(alignedCols);
17259
17260 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17261 {
17262 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17263 {
17264 const size_t ndx (colNdx * alignedRows + rowNdx);
17265 const fp16type x (in[0][rowNdx]);
17266 const float xf (x.asFloat());
17267 const fp16type y (in[1][colNdx]);
17268 const float yf (y.asFloat());
17269 const fp16type m (xf * yf);
17270
17271 out[ndx] = m.bits();
17272 min[ndx] = getMin(m.asDouble(), getULPs(in));
17273 max[ndx] = getMax(m.asDouble(), getULPs(in));
17274 }
17275 }
17276
17277 return true;
17278 }
17279 };
17280
17281 template<size_t size>
17282 struct fp16Determinant;
17283
17284 template<>
17285 struct fp16Determinant<2> : public fp16MatrixBase
17286 {
getULPsvkt::SpirVAssembly::fp16Determinant17287 virtual double getULPs (vector<const deFloat16*>& in)
17288 {
17289 DE_UNREF(in);
17290
17291 return 128.0; // This is not a precision test. Value is not from spec
17292 }
17293
getComponentValidityvkt::SpirVAssembly::fp16Determinant17294 deUint32 getComponentValidity ()
17295 {
17296 return 1;
17297 }
17298
17299 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17300 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17301 {
17302 const size_t cols = 2;
17303 const size_t rows = 2;
17304 const size_t alignedCols = (cols == 3) ? 4 : cols;
17305 const size_t alignedRows = (rows == 3) ? 4 : rows;
17306
17307 DE_ASSERT(in.size() == 1);
17308 DE_ASSERT(getOutCompCount() == 1);
17309 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17310 DE_UNREF(alignedCols);
17311 DE_UNREF(alignedRows);
17312
17313 // [ a b ]
17314 // [ c d ]
17315 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17316 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17317 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17318 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17319 const float ad (a * d);
17320 const fp16type adf16 (ad);
17321 const float bc (b * c);
17322 const fp16type bcf16 (bc);
17323 const float r (adf16.asFloat() - bcf16.asFloat());
17324 const fp16type rf16 (r);
17325
17326 out[0] = rf16.bits();
17327 min[0] = getMin(r, getULPs(in));
17328 max[0] = getMax(r, getULPs(in));
17329
17330 return true;
17331 }
17332 };
17333
17334 template<>
17335 struct fp16Determinant<3> : public fp16MatrixBase
17336 {
getULPsvkt::SpirVAssembly::fp16Determinant17337 virtual double getULPs (vector<const deFloat16*>& in)
17338 {
17339 DE_UNREF(in);
17340
17341 return 128.0; // This is not a precision test. Value is not from spec
17342 }
17343
getComponentValidityvkt::SpirVAssembly::fp16Determinant17344 deUint32 getComponentValidity ()
17345 {
17346 return 1;
17347 }
17348
17349 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17350 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17351 {
17352 const size_t cols = 3;
17353 const size_t rows = 3;
17354 const size_t alignedCols = (cols == 3) ? 4 : cols;
17355 const size_t alignedRows = (rows == 3) ? 4 : rows;
17356
17357 DE_ASSERT(in.size() == 1);
17358 DE_ASSERT(getOutCompCount() == 1);
17359 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17360 DE_UNREF(alignedCols);
17361 DE_UNREF(alignedRows);
17362
17363 // [ a b c ]
17364 // [ d e f ]
17365 // [ g h i ]
17366 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17367 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17368 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17369 const float d (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17370 const float e (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17371 const float f (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17372 const float g (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17373 const float h (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17374 const float i (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17375 const fp16type aei (a * e * i);
17376 const fp16type bfg (b * f * g);
17377 const fp16type cdh (c * d * h);
17378 const fp16type ceg (c * e * g);
17379 const fp16type bdi (b * d * i);
17380 const fp16type afh (a * f * h);
17381 const float r (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17382 const fp16type rf16 (r);
17383
17384 out[0] = rf16.bits();
17385 min[0] = getMin(r, getULPs(in));
17386 max[0] = getMax(r, getULPs(in));
17387
17388 return true;
17389 }
17390 };
17391
17392 template<>
17393 struct fp16Determinant<4> : public fp16MatrixBase
17394 {
getULPsvkt::SpirVAssembly::fp16Determinant17395 virtual double getULPs (vector<const deFloat16*>& in)
17396 {
17397 DE_UNREF(in);
17398
17399 return 128.0; // This is not a precision test. Value is not from spec
17400 }
17401
getComponentValidityvkt::SpirVAssembly::fp16Determinant17402 deUint32 getComponentValidity ()
17403 {
17404 return 1;
17405 }
17406
17407 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17408 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17409 {
17410 const size_t rows = 4;
17411 const size_t cols = 4;
17412 const size_t alignedCols = (cols == 3) ? 4 : cols;
17413 const size_t alignedRows = (rows == 3) ? 4 : rows;
17414
17415 DE_ASSERT(in.size() == 1);
17416 DE_ASSERT(getOutCompCount() == 1);
17417 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17418 DE_UNREF(alignedCols);
17419 DE_UNREF(alignedRows);
17420
17421 // [ a b c d ]
17422 // [ e f g h ]
17423 // [ i j k l ]
17424 // [ m n o p ]
17425 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17426 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17427 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17428 const float d (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17429 const float e (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17430 const float f (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17431 const float g (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17432 const float h (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17433 const float i (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17434 const float j (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17435 const float k (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17436 const float l (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17437 const float m (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17438 const float n (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17439 const float o (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17440 const float p (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17441
17442 // [ f g h ]
17443 // [ j k l ]
17444 // [ n o p ]
17445 const fp16type fkp (f * k * p);
17446 const fp16type gln (g * l * n);
17447 const fp16type hjo (h * j * o);
17448 const fp16type hkn (h * k * n);
17449 const fp16type gjp (g * j * p);
17450 const fp16type flo (f * l * o);
17451 const fp16type detA (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17452
17453 // [ e g h ]
17454 // [ i k l ]
17455 // [ m o p ]
17456 const fp16type ekp (e * k * p);
17457 const fp16type glm (g * l * m);
17458 const fp16type hio (h * i * o);
17459 const fp16type hkm (h * k * m);
17460 const fp16type gip (g * i * p);
17461 const fp16type elo (e * l * o);
17462 const fp16type detB (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17463
17464 // [ e f h ]
17465 // [ i j l ]
17466 // [ m n p ]
17467 const fp16type ejp (e * j * p);
17468 const fp16type flm (f * l * m);
17469 const fp16type hin (h * i * n);
17470 const fp16type hjm (h * j * m);
17471 const fp16type fip (f * i * p);
17472 const fp16type eln (e * l * n);
17473 const fp16type detC (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17474
17475 // [ e f g ]
17476 // [ i j k ]
17477 // [ m n o ]
17478 const fp16type ejo (e * j * o);
17479 const fp16type fkm (f * k * m);
17480 const fp16type gin (g * i * n);
17481 const fp16type gjm (g * j * m);
17482 const fp16type fio (f * i * o);
17483 const fp16type ekn (e * k * n);
17484 const fp16type detD (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17485
17486 const float r (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17487 const fp16type rf16 (r);
17488
17489 out[0] = rf16.bits();
17490 min[0] = getMin(r, getULPs(in));
17491 max[0] = getMax(r, getULPs(in));
17492
17493 return true;
17494 }
17495 };
17496
17497 template<size_t size>
17498 struct fp16Inverse;
17499
17500 template<>
17501 struct fp16Inverse<2> : public fp16MatrixBase
17502 {
getULPsvkt::SpirVAssembly::fp16Inverse17503 virtual double getULPs (vector<const deFloat16*>& in)
17504 {
17505 DE_UNREF(in);
17506
17507 return 128.0; // This is not a precision test. Value is not from spec
17508 }
17509
getComponentValidityvkt::SpirVAssembly::fp16Inverse17510 deUint32 getComponentValidity ()
17511 {
17512 return getComponentMatrixValidityMask(2, 2);
17513 }
17514
17515 template<class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17516 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17517 {
17518 const size_t cols = 2;
17519 const size_t rows = 2;
17520 const size_t alignedCols = (cols == 3) ? 4 : cols;
17521 const size_t alignedRows = (rows == 3) ? 4 : rows;
17522
17523 DE_ASSERT(in.size() == 1);
17524 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17525 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17526 DE_UNREF(alignedCols);
17527
17528 // [ a b ]
17529 // [ c d ]
17530 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17531 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17532 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17533 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17534 const float ad (a * d);
17535 const fp16type adf16 (ad);
17536 const float bc (b * c);
17537 const fp16type bcf16 (bc);
17538 const float det (adf16.asFloat() - bcf16.asFloat());
17539 const fp16type det16 (det);
17540
17541 out[0] = fp16type( d / det16.asFloat()).bits();
17542 out[1] = fp16type(-c / det16.asFloat()).bits();
17543 out[2] = fp16type(-b / det16.asFloat()).bits();
17544 out[3] = fp16type( a / det16.asFloat()).bits();
17545
17546 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17547 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17548 {
17549 const size_t ndx (colNdx * alignedRows + rowNdx);
17550 const fp16type s (out[ndx]);
17551
17552 min[ndx] = getMin(s.asDouble(), getULPs(in));
17553 max[ndx] = getMax(s.asDouble(), getULPs(in));
17554 }
17555
17556 return true;
17557 }
17558 };
17559
fp16ToString(deFloat16 val)17560 inline std::string fp16ToString(deFloat16 val)
17561 {
17562 return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17563 }
17564
17565 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17566 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17567 {
17568 if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17569 return false;
17570
17571 const size_t resultStep = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17572 const size_t iterationsCount = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17573 const size_t inputsSteps[3] =
17574 {
17575 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17576 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17577 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17578 };
17579
17580 DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17581 DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17582
17583 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17584 {
17585 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17586 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17587 }
17588
17589 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
17590 TestedArithmeticFunction func;
17591
17592 func.setOutCompCount(RES_COMPONENTS);
17593 func.setArgCompCount(0, ARG0_COMPONENTS);
17594 func.setArgCompCount(1, ARG1_COMPONENTS);
17595 func.setArgCompCount(2, ARG2_COMPONENTS);
17596
17597 const bool callOncePerComponent = func.callOncePerComponent();
17598 const deUint32 componentValidityMask = func.getComponentValidity();
17599 const size_t denormModesCount = 2;
17600 const char* denormModes[denormModesCount] = { "keep denormal numbers", "flush to zero" };
17601 const size_t successfulRunsPerComponent = denormModesCount * func.getFlavorCount();
17602 bool success = true;
17603 size_t validatedCount = 0;
17604
17605 vector<deUint8> inputBytes[3];
17606
17607 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17608 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17609
17610 const deFloat16* const inputsAsFP16[3] =
17611 {
17612 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17613 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17614 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17615 };
17616
17617 for (size_t idx = 0; idx < iterationsCount; ++idx)
17618 {
17619 std::vector<size_t> successfulRuns (RES_COMPONENTS, successfulRunsPerComponent);
17620 std::vector<std::string> errors (RES_COMPONENTS);
17621 bool iterationValidated (true);
17622
17623 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17624 {
17625 for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17626 {
17627 func.setFlavor(flavorNdx);
17628
17629 const deFloat16* iterationOutputFP16 = &outputAsFP16[idx * resultStep];
17630 vector<deFloat16> iterationCalculatedFP16 (resultStep, 0);
17631 vector<double> iterationEdgeMin (resultStep, 0.0);
17632 vector<double> iterationEdgeMax (resultStep, 0.0);
17633 vector<const deFloat16*> arguments;
17634
17635 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17636 {
17637 std::string error;
17638 bool reportError = false;
17639
17640 if (callOncePerComponent || componentNdx == 0)
17641 {
17642 bool funcCallResult;
17643
17644 arguments.clear();
17645
17646 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17647 arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17648
17649 if (denormNdx == 0)
17650 funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17651 else
17652 funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17653
17654 if (!funcCallResult)
17655 {
17656 iterationValidated = false;
17657
17658 if (callOncePerComponent)
17659 continue;
17660 else
17661 break;
17662 }
17663 }
17664
17665 if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17666 continue;
17667
17668 reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17669
17670 if (reportError)
17671 {
17672 tcu::Float16 expected (iterationCalculatedFP16[componentNdx]);
17673 tcu::Float16 outputted (iterationOutputFP16[componentNdx]);
17674 tcu::Float64 edgeMin (iterationEdgeMin[componentNdx]);
17675 tcu::Float64 edgeMax (iterationEdgeMax[componentNdx]);
17676
17677 if (reportError && expected.isNaN())
17678 reportError = false;
17679
17680 if (reportError && !expected.isNaN() && !outputted.isNaN())
17681 {
17682 if (reportError && !expected.isInf() && !outputted.isInf())
17683 {
17684 // Ignore rounding
17685 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17686 reportError = false;
17687 }
17688
17689 if (reportError && expected.isInf())
17690 {
17691 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17692 if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17693 reportError = false;
17694 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17695 reportError = false;
17696 }
17697
17698 if (reportError)
17699 {
17700 const double outputtedDouble = outputted.asDouble();
17701
17702 DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17703
17704 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17705 reportError = false;
17706 }
17707 }
17708
17709 if (reportError)
17710 {
17711 const size_t inputsComps[3] =
17712 {
17713 ARG0_COMPONENTS,
17714 ARG1_COMPONENTS,
17715 ARG2_COMPONENTS,
17716 };
17717 string inputsValues ("Inputs:");
17718 string flavorName (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17719 std::stringstream errStream;
17720
17721 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17722 {
17723 const size_t inputCompsCount = inputsComps[inputNdx];
17724
17725 inputsValues += " [" + de::toString(inputNdx) + "]=(";
17726
17727 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17728 {
17729 const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17730
17731 inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17732 }
17733 }
17734
17735 errStream << "At"
17736 << " iteration " << de::toString(idx)
17737 << " component " << de::toString(componentNdx)
17738 << " denormMode " << de::toString(denormNdx)
17739 << " (" << denormModes[denormNdx] << ")"
17740 << " " << flavorName
17741 << " " << inputsValues
17742 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17743 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17744 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17745 << " " << error << "."
17746 << std::endl;
17747
17748 errors[componentNdx] += errStream.str();
17749
17750 successfulRuns[componentNdx]--;
17751 }
17752 }
17753 }
17754 }
17755 }
17756
17757 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17758 {
17759 // Check if any component has total failure
17760 if (successfulRuns[componentNdx] == 0)
17761 {
17762 // Test failed in all denorm modes and all flavors for certain component: dump errors
17763 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17764
17765 success = false;
17766 }
17767 }
17768
17769 if (iterationValidated)
17770 validatedCount++;
17771 }
17772
17773 if (validatedCount < 16)
17774 TCU_THROW(InternalError, "Too few samples have been validated.");
17775
17776 return success;
17777 }
17778
17779 // IEEE-754 floating point numbers:
17780 // +--------+------+----------+-------------+
17781 // | binary | sign | exponent | significand |
17782 // +--------+------+----------+-------------+
17783 // | 16-bit | 1 | 5 | 10 |
17784 // +--------+------+----------+-------------+
17785 // | 32-bit | 1 | 8 | 23 |
17786 // +--------+------+----------+-------------+
17787 //
17788 // 16-bit floats:
17789 //
17790 // 0 000 00 00 0000 0001 (0x0001: 2e-24: minimum positive denormalized)
17791 // 0 000 00 11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17792 // 0 000 01 00 0000 0000 (0x0400: 2e-14: minimum positive normalized)
17793 // 0 111 10 11 1111 1111 (0x7bff: 65504: maximum positive normalized)
17794 //
17795 // 0 000 00 00 0000 0000 (0x0000: +0)
17796 // 0 111 11 00 0000 0000 (0x7c00: +Inf)
17797 // 0 000 00 11 1111 0000 (0x03f0: +Denorm)
17798 // 0 000 01 00 0000 0001 (0x0401: +Norm)
17799 // 0 111 11 00 0000 1111 (0x7c0f: +SNaN)
17800 // 0 111 11 11 1111 0000 (0x7ff0: +QNaN)
17801 // Generate and return 16-bit floats and their corresponding 32-bit values.
17802 //
17803 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17804 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,deUint32 count)17805 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17806 {
17807 vector<deFloat16> float16;
17808
17809 float16.reserve(count);
17810
17811 // Zero
17812 float16.push_back(deUint16(0x0000));
17813 float16.push_back(deUint16(0x8000));
17814 // Infinity
17815 float16.push_back(deUint16(0x7c00));
17816 float16.push_back(deUint16(0xfc00));
17817 // Normalized
17818 float16.push_back(deUint16(0x0401));
17819 float16.push_back(deUint16(0x8401));
17820 // Some normal number
17821 float16.push_back(deUint16(0x14cb));
17822 float16.push_back(deUint16(0x94cb));
17823 // Min/max positive normal
17824 float16.push_back(deUint16(0x0400));
17825 float16.push_back(deUint16(0x7bff));
17826 // Min/max negative normal
17827 float16.push_back(deUint16(0x8400));
17828 float16.push_back(deUint16(0xfbff));
17829 // PI
17830 float16.push_back(deUint16(0x4248)); // 3.140625
17831 float16.push_back(deUint16(0xb248)); // -3.140625
17832 // PI/2
17833 float16.push_back(deUint16(0x3e48)); // 1.5703125
17834 float16.push_back(deUint16(0xbe48)); // -1.5703125
17835 float16.push_back(deUint16(0x3c00)); // 1.0
17836 float16.push_back(deUint16(0x3800)); // 0.5
17837 // Some useful constants
17838 float16.push_back(tcu::Float16(-2.5f).bits());
17839 float16.push_back(tcu::Float16(-1.0f).bits());
17840 float16.push_back(tcu::Float16( 0.4f).bits());
17841 float16.push_back(tcu::Float16( 2.5f).bits());
17842
17843 const deUint32 numPicks = static_cast<deUint32>(float16.size());
17844
17845 DE_ASSERT(count >= numPicks);
17846 count -= numPicks;
17847
17848 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17849 {
17850 int sign = (rnd.getUint16() % 2 == 0) ? +1 : -1;
17851 int exponent = (rnd.getUint16() % 29) - 14 + 1;
17852 deUint16 mantissa = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17853
17854 // Exclude power of -14 to avoid denorms
17855 DE_ASSERT(de::inRange(exponent, -13, 15));
17856
17857 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17858 }
17859
17860 return float16;
17861 }
17862
getInputData1(deUint32 seed,size_t count,size_t argNo)17863 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17864 {
17865 DE_UNREF(argNo);
17866
17867 de::Random rnd(seed);
17868
17869 return getFloat16a(rnd, static_cast<deUint32>(count));
17870 }
17871
getInputData2(deUint32 seed,size_t count,size_t argNo)17872 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17873 {
17874 de::Random rnd (seed);
17875 size_t newCount = static_cast<size_t>(deSqrt(double(count)));
17876
17877 DE_ASSERT(newCount * newCount == count);
17878
17879 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17880
17881 return squarize(float16, static_cast<deUint32>(argNo));
17882 }
17883
getInputData3(deUint32 seed,size_t count,size_t argNo)17884 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17885 {
17886 if (argNo == 0 || argNo == 1)
17887 return getInputData2(seed, count, argNo);
17888 else
17889 return getInputData1(seed<<argNo, count, argNo);
17890 }
17891
getInputData(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17892 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17893 {
17894 DE_UNREF(stride);
17895
17896 vector<deFloat16> result;
17897
17898 switch (argCount)
17899 {
17900 case 1:result = getInputData1(seed, count, argNo); break;
17901 case 2:result = getInputData2(seed, count, argNo); break;
17902 case 3:result = getInputData3(seed, count, argNo); break;
17903 default: TCU_THROW(InternalError, "Invalid argument count specified");
17904 }
17905
17906 if (compCount == 3)
17907 {
17908 const size_t newCount = (3 * count) / 4;
17909 vector<deFloat16> newResult;
17910
17911 newResult.reserve(result.size());
17912
17913 for (size_t ndx = 0; ndx < newCount; ++ndx)
17914 {
17915 newResult.push_back(result[ndx]);
17916
17917 if (ndx % 3 == 2)
17918 newResult.push_back(0);
17919 }
17920
17921 result = newResult;
17922 }
17923
17924 DE_ASSERT(result.size() == count);
17925
17926 return result;
17927 }
17928
17929 // Generator for functions requiring data in range [1, inf]
getInputDataAC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17930 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17931 {
17932 vector<deFloat16> result;
17933
17934 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17935
17936 // Filter out values below 1.0 from upper half of numbers
17937 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17938 {
17939 const float f = tcu::Float16(result[idx]).asFloat();
17940
17941 if (f < 1.0f)
17942 result[idx] = tcu::Float16(1.0f - f).bits();
17943 }
17944
17945 return result;
17946 }
17947
17948 // Generator for functions requiring data in range [-1, 1]
getInputDataA(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17949 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17950 {
17951 vector<deFloat16> result;
17952
17953 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17954
17955 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17956 {
17957 const float f = tcu::Float16(result[idx]).asFloat();
17958
17959 if (!de::inRange(f, -1.0f, 1.0f))
17960 result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17961 }
17962
17963 return result;
17964 }
17965
17966 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17967 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17968 {
17969 vector<deFloat16> result;
17970
17971 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17972
17973 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17974 {
17975 const float f = tcu::Float16(result[idx]).asFloat();
17976
17977 if (!de::inRange(f, -DE_PI, DE_PI))
17978 result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17979 }
17980
17981 return result;
17982 }
17983
17984 // Generator for functions requiring data in range [0, inf]
getInputDataP(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17985 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17986 {
17987 vector<deFloat16> result;
17988
17989 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17990
17991 if (argNo == 0)
17992 {
17993 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17994 result[idx] &= static_cast<deFloat16>(~0x8000);
17995 }
17996
17997 return result;
17998 }
17999
getInputDataV(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18000 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18001 {
18002 DE_UNREF(stride);
18003 DE_UNREF(argCount);
18004
18005 vector<deFloat16> result;
18006
18007 if (argNo == 0)
18008 result = getInputData2(seed, count, argNo);
18009 else
18010 {
18011 const size_t alignedCount = (compCount == 3) ? 4 : compCount;
18012 const size_t newCountX = static_cast<size_t>(deSqrt(double(count * alignedCount)));
18013 const size_t newCountY = count / newCountX;
18014 de::Random rnd (seed);
18015 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
18016
18017 DE_ASSERT(newCountX * newCountX == alignedCount * count);
18018
18019 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
18020 {
18021 const vector<deFloat16> tmp(newCountY, float16[numIdx]);
18022
18023 result.insert(result.end(), tmp.begin(), tmp.end());
18024 }
18025 }
18026
18027 DE_ASSERT(result.size() == count);
18028
18029 return result;
18030 }
18031
getInputDataM(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18032 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18033 {
18034 DE_UNREF(compCount);
18035 DE_UNREF(stride);
18036 DE_UNREF(argCount);
18037
18038 de::Random rnd (seed << argNo);
18039 vector<deFloat16> result;
18040
18041 result = getFloat16a(rnd, static_cast<deUint32>(count));
18042
18043 DE_ASSERT(result.size() == count);
18044
18045 return result;
18046 }
18047
getInputDataD(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18048 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18049 {
18050 DE_UNREF(compCount);
18051 DE_UNREF(argCount);
18052
18053 de::Random rnd (seed << argNo);
18054 vector<deFloat16> result;
18055
18056 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18057 {
18058 int num = (rnd.getUint16() % 16) - 8;
18059
18060 result.push_back(tcu::Float16(float(num)).bits());
18061 }
18062
18063 result[0 * stride] = deUint16(0x7c00); // +Inf
18064 result[1 * stride] = deUint16(0xfc00); // -Inf
18065
18066 DE_ASSERT(result.size() == count);
18067
18068 return result;
18069 }
18070
18071 // Generator for smoothstep function
getInputDataSS(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18072 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18073 {
18074 vector<deFloat16> result;
18075
18076 result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
18077
18078 if (argNo == 0)
18079 {
18080 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18081 {
18082 const float f = tcu::Float16(result[idx]).asFloat();
18083
18084 if (f > 4.0f)
18085 result[idx] = tcu::Float16(-f).bits();
18086 }
18087 }
18088
18089 if (argNo == 1)
18090 {
18091 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
18092 {
18093 const float f = tcu::Float16(result[idx]).asFloat();
18094
18095 if (f < 4.0f)
18096 result[idx] = tcu::Float16(-f).bits();
18097 }
18098 }
18099
18100 return result;
18101 }
18102
18103 // Generates normalized vectors for arguments 0 and 1
getInputDataN(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18104 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18105 {
18106 DE_UNREF(compCount);
18107 DE_UNREF(argCount);
18108
18109 de::Random rnd (seed << argNo);
18110 vector<deFloat16> result;
18111
18112 if (argNo == 0 || argNo == 1)
18113 {
18114 // The input parameters for the incident vector I and the surface normal N must already be normalized
18115 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18116 {
18117 vector <float> unnormolized;
18118 float sum = 0;
18119
18120 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18121 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18122
18123 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18124 sum += unnormolized[compIdx] * unnormolized[compIdx];
18125
18126 sum = deFloatSqrt(sum);
18127 if (sum == 0.0f)
18128 unnormolized[0] = sum = 1.0f;
18129
18130 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18131 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18132
18133 for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18134 result.push_back(0);
18135 }
18136 }
18137 else
18138 {
18139 // Input parameter eta
18140 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18141 {
18142 int num = (rnd.getUint16() % 16) - 8;
18143
18144 result.push_back(tcu::Float16(float(num)).bits());
18145 }
18146 }
18147
18148 DE_ASSERT(result.size() == count);
18149
18150 return result;
18151 }
18152
18153 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18154 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18155 {
18156 DE_UNREF(compCount);
18157 DE_UNREF(stride);
18158 DE_UNREF(argCount);
18159
18160 de::Random rnd (seed << argNo);
18161 vector<deFloat16> result;
18162
18163 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18164 {
18165 int num = (rnd.getUint16() % 16) - 8;
18166
18167 result.push_back(tcu::Float16(float(num)).bits());
18168 }
18169
18170 DE_ASSERT(result.size() == count);
18171
18172 return result;
18173 }
18174
18175 struct Math16TestType
18176 {
18177 const char* typePrefix;
18178 const size_t typeComponents;
18179 const size_t typeArrayStride;
18180 const size_t typeStructStride;
18181 const char* storage_type;
18182 };
18183
18184 enum Math16DataTypes
18185 {
18186 NONE = 0,
18187 SCALAR = 1,
18188 VEC2 = 2,
18189 VEC3 = 3,
18190 VEC4 = 4,
18191 MAT2X2,
18192 MAT2X3,
18193 MAT2X4,
18194 MAT3X2,
18195 MAT3X3,
18196 MAT3X4,
18197 MAT4X2,
18198 MAT4X3,
18199 MAT4X4,
18200 MATH16_TYPE_LAST
18201 };
18202
18203 struct Math16ArgFragments
18204 {
18205 const char* bodies;
18206 const char* variables;
18207 const char* decorations;
18208 const char* funcVariables;
18209 };
18210
18211 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18212
18213 struct Math16TestFunc
18214 {
18215 const char* funcName;
18216 const char* funcSuffix;
18217 size_t funcArgsCount;
18218 size_t typeResult;
18219 size_t typeArg0;
18220 size_t typeArg1;
18221 size_t typeArg2;
18222 Math16GetInputData* getInputDataFunc;
18223 VerifyIOFunc verifyFunc;
18224 };
18225
18226 template<class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18227 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18228 {
18229 const int testSpecificSeed = deStringHash(testGroup.getName());
18230 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18231 const size_t numDataPointsByAxis = 32;
18232 const size_t numDataPoints = numDataPointsByAxis * numDataPointsByAxis;
18233 const char* componentType = "f16";
18234 const Math16TestType testTypes[MATH16_TYPE_LAST] =
18235 {
18236 { "", 0, 0, 0, "" },
18237 { "", 1, 1 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_half_ndp" },
18238 { "v2", 2, 2 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_ndp" },
18239 { "v3", 3, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18240 { "v4", 4, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18241 { "m2x2", 0, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18242 { "m2x3", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18243 { "m2x4", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18244 { "m3x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_3" },
18245 { "m3x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18246 { "m3x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18247 { "m4x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18248 { "m4x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18249 { "m4x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18250 };
18251
18252 DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18253
18254
18255 const StringTemplate preMain
18256 (
18257 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18258
18259 " %f16 = OpTypeFloat 16\n"
18260 " %v2f16 = OpTypeVector %f16 2\n"
18261 " %v3f16 = OpTypeVector %f16 3\n"
18262 " %v4f16 = OpTypeVector %f16 4\n"
18263 " %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18264 " %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18265 " %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18266 " %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18267 " %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18268 " %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18269 " %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18270 " %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18271 " %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18272
18273 " %fp_v2i32 = OpTypePointer Function %v2i32\n"
18274 " %fp_v3i32 = OpTypePointer Function %v3i32\n"
18275 " %fp_v4i32 = OpTypePointer Function %v4i32\n"
18276
18277 " %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18278 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18279 " %c_u32_5 = OpConstant %u32 5\n"
18280 " %c_u32_6 = OpConstant %u32 6\n"
18281 " %c_u32_7 = OpConstant %u32 7\n"
18282 " %c_u32_8 = OpConstant %u32 8\n"
18283 " %c_f16_0 = OpConstant %f16 0\n"
18284 " %c_f16_1 = OpConstant %f16 1\n"
18285 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18286 " %up_u32 = OpTypePointer Uniform %u32\n"
18287 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18288 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18289
18290 " %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18291 " %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18292 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18293 " %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18294 " %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18295 " %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18296 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18297 " %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18298 " %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18299 " %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18300 " %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18301 " %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18302 " %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18303 " %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18304 " %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18305 " %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18306 " %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18307 " %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18308 " %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18309 " %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18310 " %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18311 " %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18312 " %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18313 " %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18314 " %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18315 " %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18316 " %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18317 " %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18318 " %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18319 " %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18320 " %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18321
18322 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18323 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18324 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18325 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18326 " %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18327 " %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18328 " %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18329 " %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18330 " %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18331 " %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18332 " %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18333 " %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18334 " %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18335 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18336 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18337 " %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18338 " %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18339 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18340 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18341 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18342 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18343 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18344 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18345 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18346 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18347 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18348 "${arg_vars}"
18349 );
18350
18351 const StringTemplate decoration
18352 (
18353 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18354 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18355 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18356
18357 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18358 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18359 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18360
18361 "OpDecorate %ra_u32_2 ArrayStride 4\n"
18362 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18363 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18364 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18365
18366 "OpDecorate %ra_u32_4 ArrayStride 4\n"
18367 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18368 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18369 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18370
18371 "OpDecorate %ra_u32_3 ArrayStride 4\n"
18372 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18373 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18374 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18375
18376 "OpDecorate %ra_u32_6 ArrayStride 4\n"
18377 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18378 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18379 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18380
18381 "OpDecorate %ra_u32_8 ArrayStride 4\n"
18382 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18383 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18384 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18385
18386 "${arg_decorations}"
18387 );
18388
18389 const StringTemplate testFun
18390 (
18391 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18392 " %param = OpFunctionParameter %v4f32\n"
18393 " %entry = OpLabel\n"
18394
18395 " %i = OpVariable %fp_i32 Function\n"
18396 "${arg_infunc_vars}"
18397 " OpStore %i %c_i32_0\n"
18398 " OpBranch %loop\n"
18399
18400 " %loop = OpLabel\n"
18401 " %i_cmp = OpLoad %i32 %i\n"
18402 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18403 " OpLoopMerge %merge %next None\n"
18404 " OpBranchConditional %lt %write %merge\n"
18405
18406 " %write = OpLabel\n"
18407 " %ndx = OpLoad %i32 %i\n"
18408
18409 "${arg_func_call}"
18410
18411 " OpBranch %next\n"
18412
18413 " %next = OpLabel\n"
18414 " %i_cur = OpLoad %i32 %i\n"
18415 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18416 " OpStore %i %i_new\n"
18417 " OpBranch %loop\n"
18418
18419 " %merge = OpLabel\n"
18420 " OpReturnValue %param\n"
18421 " OpFunctionEnd\n"
18422 );
18423
18424 const Math16ArgFragments argFragment1 =
18425 {
18426 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18427 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18428 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18429 "",
18430 "",
18431 "",
18432 };
18433
18434 const Math16ArgFragments argFragment2 =
18435 {
18436 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18437 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18438 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18439 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18440 "",
18441 "",
18442 "",
18443 };
18444
18445 const Math16ArgFragments argFragment3 =
18446 {
18447 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18448 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18449 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18450 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18451 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18452 "",
18453 "",
18454 "",
18455 };
18456
18457 const Math16ArgFragments argFragmentLdExp =
18458 {
18459 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18460 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18461 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18462 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18463 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18464
18465 "",
18466
18467 "",
18468
18469 "",
18470 };
18471
18472 const Math16ArgFragments argFragmentModfFrac =
18473 {
18474 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18475 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18476 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18477
18478 " %fp_tmp = OpTypePointer Function %${tr}\n",
18479
18480 "",
18481
18482 " %tmp = OpVariable %fp_tmp Function\n",
18483 };
18484
18485 const Math16ArgFragments argFragmentModfInt =
18486 {
18487 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18488 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18489 " %tmp0 = OpAccessChain %fp_tmp %tmp\n"
18490 " %val_dst = OpLoad %${tr} %tmp0\n"
18491 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18492
18493 " %fp_tmp = OpTypePointer Function %${tr}\n",
18494
18495 "",
18496
18497 " %tmp = OpVariable %fp_tmp Function\n",
18498 };
18499
18500 const Math16ArgFragments argFragmentModfStruct =
18501 {
18502 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18503 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18504 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18505 " OpStore %tmp_ptr_s %val_tmp\n"
18506 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18507 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18508 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18509
18510 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18511 " %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18512 " %fp_tmp = OpTypePointer Function %st_tmp\n"
18513 " %c_frac = OpConstant %i32 0\n"
18514 " %c_int = OpConstant %i32 1\n",
18515
18516 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18517 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18518
18519 " %tmp = OpVariable %fp_tmp Function\n",
18520 };
18521
18522 const Math16ArgFragments argFragmentFrexpStructS =
18523 {
18524 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18525 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18526 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18527 " OpStore %tmp_ptr_s %val_tmp\n"
18528 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18529 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18530 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18531
18532 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18533 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18534 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18535
18536 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18537 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18538
18539 " %tmp = OpVariable %fp_tmp Function\n",
18540 };
18541
18542 const Math16ArgFragments argFragmentFrexpStructE =
18543 {
18544 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18545 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18546 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18547 " OpStore %tmp_ptr_s %val_tmp\n"
18548 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18549 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18550 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18551 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18552
18553 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18554 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18555
18556 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18557 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18558
18559 " %tmp = OpVariable %fp_tmp Function\n",
18560 };
18561
18562 const Math16ArgFragments argFragmentFrexpS =
18563 {
18564 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18565 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18566 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18567 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18568
18569 "",
18570
18571 "",
18572
18573 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18574 };
18575
18576 const Math16ArgFragments argFragmentFrexpE =
18577 {
18578 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18579 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18580 "%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18581 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
18582 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18583 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18584
18585 "",
18586
18587 "",
18588
18589 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18590 };
18591
18592 string load_funcs[MATH16_TYPE_LAST];
18593 load_funcs[SCALAR] = loadScalarF16FromUint;
18594 load_funcs[VEC2] = loadV2F16FromUint;
18595 load_funcs[VEC3] = loadV3F16FromUints;
18596 load_funcs[VEC4] = loadV4F16FromUints;
18597 load_funcs[MAT2X2] = loadM2x2F16FromUints;
18598 load_funcs[MAT2X3] = loadM2x3F16FromUints;
18599 load_funcs[MAT2X4] = loadM2x4F16FromUints;
18600 load_funcs[MAT3X2] = loadM3x2F16FromUints;
18601 load_funcs[MAT3X3] = loadM3x3F16FromUints;
18602 load_funcs[MAT3X4] = loadM3x4F16FromUints;
18603 load_funcs[MAT4X2] = loadM4x2F16FromUints;
18604 load_funcs[MAT4X3] = loadM4x3F16FromUints;
18605 load_funcs[MAT4X4] = loadM4x4F16FromUints;
18606
18607 string store_funcs[MATH16_TYPE_LAST];
18608 store_funcs[SCALAR] = storeScalarF16AsUint;
18609 store_funcs[VEC2] = storeV2F16AsUint;
18610 store_funcs[VEC3] = storeV3F16AsUints;
18611 store_funcs[VEC4] = storeV4F16AsUints;
18612 store_funcs[MAT2X2] = storeM2x2F16AsUints;
18613 store_funcs[MAT2X3] = storeM2x3F16AsUints;
18614 store_funcs[MAT2X4] = storeM2x4F16AsUints;
18615 store_funcs[MAT3X2] = storeM3x2F16AsUints;
18616 store_funcs[MAT3X3] = storeM3x3F16AsUints;
18617 store_funcs[MAT3X4] = storeM3x4F16AsUints;
18618 store_funcs[MAT4X2] = storeM4x2F16AsUints;
18619 store_funcs[MAT4X3] = storeM4x3F16AsUints;
18620 store_funcs[MAT4X4] = storeM4x4F16AsUints;
18621
18622 const Math16TestType& testType = testTypes[testTypeIdx];
18623 const string funcNameString = string(testFunc.funcName) + string(testFunc.funcSuffix);
18624 const string testName = de::toLower(funcNameString);
18625 const Math16ArgFragments* argFragments = DE_NULL;
18626 const size_t typeStructStride = testType.typeStructStride;
18627 const bool extInst = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18628 const size_t numFloatsPerArg0Type = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18629 const size_t iterations = numDataPoints / numFloatsPerArg0Type;
18630 const size_t numFloatsPerResultType = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18631 const vector<deFloat16> float16UnusedOutput (iterations * numFloatsPerResultType, 0);
18632 VulkanFeatures features;
18633 SpecResource specResource;
18634 map<string, string> specs;
18635 map<string, string> fragments;
18636 vector<string> extensions;
18637 string funcCall;
18638 string funcVariables;
18639 string variables;
18640 string declarations;
18641 string decorations;
18642 string functions;
18643
18644 switch (testFunc.funcArgsCount)
18645 {
18646 case 1:
18647 {
18648 argFragments = &argFragment1;
18649
18650 if (funcNameString == "ModfFrac") argFragments = &argFragmentModfFrac;
18651 if (funcNameString == "ModfInt") argFragments = &argFragmentModfInt;
18652 if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
18653 if (funcNameString == "ModfStructInt") argFragments = &argFragmentModfStruct;
18654 if (funcNameString == "FrexpS") argFragments = &argFragmentFrexpS;
18655 if (funcNameString == "FrexpE") argFragments = &argFragmentFrexpE;
18656 if (funcNameString == "FrexpStructS") argFragments = &argFragmentFrexpStructS;
18657 if (funcNameString == "FrexpStructE") argFragments = &argFragmentFrexpStructE;
18658
18659 break;
18660 }
18661 case 2:
18662 {
18663 argFragments = &argFragment2;
18664
18665 if (funcNameString == "Ldexp") argFragments = &argFragmentLdExp;
18666
18667 break;
18668 }
18669 case 3:
18670 {
18671 argFragments = &argFragment3;
18672
18673 break;
18674 }
18675 default:
18676 {
18677 TCU_THROW(InternalError, "Invalid number of arguments");
18678 }
18679 }
18680
18681 functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18682 if (testFunc.funcArgsCount == 1)
18683 {
18684 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18685 variables +=
18686 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18687 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18688
18689 decorations +=
18690 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18691 "OpDecorate %ssbo_src0 Binding 0\n"
18692 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18693 "OpDecorate %ssbo_dst Binding 1\n";
18694 }
18695 else if (testFunc.funcArgsCount == 2)
18696 {
18697 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18698 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18699 variables +=
18700 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18701 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18702 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18703
18704 decorations +=
18705 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18706 "OpDecorate %ssbo_src0 Binding 0\n"
18707 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18708 "OpDecorate %ssbo_src1 Binding 1\n"
18709 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18710 "OpDecorate %ssbo_dst Binding 2\n";
18711 }
18712 else if (testFunc.funcArgsCount == 3)
18713 {
18714 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18715 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18716 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18717 variables +=
18718 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18719 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18720 " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18721 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18722
18723 decorations +=
18724 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18725 "OpDecorate %ssbo_src0 Binding 0\n"
18726 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18727 "OpDecorate %ssbo_src1 Binding 1\n"
18728 "OpDecorate %ssbo_src2 DescriptorSet 0\n"
18729 "OpDecorate %ssbo_src2 Binding 2\n"
18730 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18731 "OpDecorate %ssbo_dst Binding 3\n";
18732 }
18733 else
18734 {
18735 TCU_THROW(InternalError, "Invalid number of function arguments");
18736 }
18737
18738 variables += argFragments->variables;
18739 decorations += argFragments->decorations;
18740
18741 specs["dr"] = testTypes[testFunc.typeResult].typePrefix;
18742 specs["d0"] = testTypes[testFunc.typeArg0].typePrefix;
18743 specs["d1"] = testTypes[testFunc.typeArg1].typePrefix;
18744 specs["d2"] = testTypes[testFunc.typeArg2].typePrefix;
18745 specs["tr"] = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18746 specs["t0"] = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18747 specs["t1"] = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18748 specs["t2"] = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18749 specs["store_tr"] = string(testTypes[testFunc.typeResult].storage_type);
18750 specs["store_t0"] = string(testTypes[testFunc.typeArg0].storage_type);
18751 specs["store_t1"] = string(testTypes[testFunc.typeArg1].storage_type);
18752 specs["store_t2"] = string(testTypes[testFunc.typeArg2].storage_type);
18753 specs["struct_stride"] = de::toString(typeStructStride);
18754 specs["op"] = extInst ? "OpExtInst" : testFunc.funcName;
18755 specs["ext_inst"] = extInst ? string("%ext_import ") + testFunc.funcName : "";
18756 specs["struct_member"] = de::toLower(testFunc.funcSuffix);
18757
18758 variables = StringTemplate(variables).specialize(specs);
18759 decorations = StringTemplate(decorations).specialize(specs);
18760 funcVariables = StringTemplate(argFragments->funcVariables).specialize(specs);
18761 funcCall = StringTemplate(argFragments->bodies).specialize(specs);
18762
18763 specs["num_data_points"] = de::toString(iterations);
18764 specs["arg_vars"] = variables;
18765 specs["arg_decorations"] = decorations;
18766 specs["arg_infunc_vars"] = funcVariables;
18767 specs["arg_func_call"] = funcCall;
18768
18769 fragments["extension"] = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18770 fragments["capability"] = "OpCapability Matrix\nOpCapability Float16\n";
18771 fragments["decoration"] = decoration.specialize(specs);
18772 fragments["pre_main"] = preMain.specialize(specs) + functions;
18773 fragments["testfun"] = testFun.specialize(specs);
18774
18775 for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18776 {
18777 const size_t numFloatsPerItem = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18778 : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18779 : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18780 : -1;
18781 const vector<deFloat16> inputData = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18782
18783 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18784 }
18785
18786 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18787 specResource.verifyIO = testFunc.verifyFunc;
18788
18789 extensions.push_back("VK_KHR_shader_float16_int8");
18790
18791 features.extFloat16Int8.shaderFloat16 = true;
18792
18793 finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18794 }
18795
18796 template<size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18797 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18798 {
18799 DE_STATIC_ASSERT(C >= 1 && C <= 4);
18800
18801 const std::string testGroupName (string("arithmetic_") + de::toString(C));
18802 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18803 const Math16TestFunc testFuncs[] =
18804 {
18805 { "OpFNegate", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16OpFNegate> },
18806 { "Round", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Round> },
18807 { "RoundEven", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16RoundEven> },
18808 { "Trunc", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Trunc> },
18809 { "FAbs", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FAbs> },
18810 { "FSign", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FSign> },
18811 { "Floor", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Floor> },
18812 { "Ceil", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Ceil> },
18813 { "Fract", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Fract> },
18814 { "Radians", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Radians> },
18815 { "Degrees", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Degrees> },
18816 { "Sin", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sin> },
18817 { "Cos", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cos> },
18818 { "Tan", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tan> },
18819 { "Asin", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asin> },
18820 { "Acos", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acos> },
18821 { "Atan", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atan> },
18822 { "Sinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sinh> },
18823 { "Cosh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cosh> },
18824 { "Tanh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tanh> },
18825 { "Asinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asinh> },
18826 { "Acosh", "", 1, C, C, 0, 0, &getInputDataAC, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acosh> },
18827 { "Atanh", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atanh> },
18828 { "Exp", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp> },
18829 { "Log", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log> },
18830 { "Exp2", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp2> },
18831 { "Log2", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log2> },
18832 { "Sqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sqrt> },
18833 { "InverseSqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16InverseSqrt> },
18834 { "Modf", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18835 { "Modf", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18836 { "ModfStruct", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18837 { "ModfStruct", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18838 { "Frexp", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18839 { "Frexp", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18840 { "FrexpStruct", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18841 { "FrexpStruct", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18842 { "OpFAdd", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFAdd> },
18843 { "OpFSub", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFSub> },
18844 { "OpFMul", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFMul> },
18845 { "OpFDiv", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFDiv> },
18846 { "Atan2", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Atan2> },
18847 { "Pow", "", 2, C, C, C, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, C, 0, fp16Pow> },
18848 { "FMin", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMin> },
18849 { "FMax", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMax> },
18850 { "Step", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Step> },
18851 { "Ldexp", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Ldexp> },
18852 { "FClamp", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16FClamp> },
18853 { "FMix", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FMix> },
18854 { "SmoothStep", "", 3, C, C, C, C, &getInputDataSS, compareFP16ArithmeticFunc< C, C, C, C, fp16SmoothStep> },
18855 { "Fma", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16Fma> },
18856 { "Length", "", 1, 1, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, 0, 0, fp16Length> },
18857 { "Distance", "", 2, 1, C, C, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Distance> },
18858 { "Cross", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Cross> },
18859 { "Normalize", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Normalize> },
18860 { "FaceForward", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FaceForward> },
18861 { "Reflect", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Reflect> },
18862 { "Refract", "", 3, C, C, C, 1, &getInputDataN, compareFP16ArithmeticFunc< C, C, C, 1, fp16Refract> },
18863 { "OpDot", "", 2, 1, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Dot> },
18864 { "OpVectorTimesScalar", "", 2, C, C, 1, 0, &getInputDataV, compareFP16ArithmeticFunc< C, C, 1, 0, fp16VectorTimesScalar> },
18865 };
18866
18867 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18868 {
18869 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18870 const string funcNameString = testFunc.funcName;
18871
18872 if ((C != 3) && funcNameString == "Cross")
18873 continue;
18874
18875 if ((C < 2) && funcNameString == "OpDot")
18876 continue;
18877
18878 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18879 continue;
18880
18881 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18882 }
18883
18884 return testGroup.release();
18885 }
18886
18887 template<class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18888 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18889 {
18890 const std::string testGroupName ("arithmetic");
18891 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18892 const Math16TestFunc testFuncs[] =
18893 {
18894 { "OpTranspose", "2x2", 1, MAT2X2, MAT2X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Transpose<2,2> > },
18895 { "OpTranspose", "3x2", 1, MAT2X3, MAT3X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<3,2> > },
18896 { "OpTranspose", "4x2", 1, MAT2X4, MAT4X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<4,2> > },
18897 { "OpTranspose", "2x3", 1, MAT3X2, MAT2X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,3> > },
18898 { "OpTranspose", "3x3", 1, MAT3X3, MAT3X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,3> > },
18899 { "OpTranspose", "4x3", 1, MAT3X4, MAT4X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,3> > },
18900 { "OpTranspose", "2x4", 1, MAT4X2, MAT2X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,4> > },
18901 { "OpTranspose", "3x4", 1, MAT4X3, MAT3X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,4> > },
18902 { "OpTranspose", "4x4", 1, MAT4X4, MAT4X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,4> > },
18903 { "OpMatrixTimesScalar", "2x2", 2, MAT2X2, MAT2X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 1, 0, fp16MatrixTimesScalar<2,2> > },
18904 { "OpMatrixTimesScalar", "2x3", 2, MAT2X3, MAT2X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,3> > },
18905 { "OpMatrixTimesScalar", "2x4", 2, MAT2X4, MAT2X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,4> > },
18906 { "OpMatrixTimesScalar", "3x2", 2, MAT3X2, MAT3X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<3,2> > },
18907 { "OpMatrixTimesScalar", "3x3", 2, MAT3X3, MAT3X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,3> > },
18908 { "OpMatrixTimesScalar", "3x4", 2, MAT3X4, MAT3X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,4> > },
18909 { "OpMatrixTimesScalar", "4x2", 2, MAT4X2, MAT4X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<4,2> > },
18910 { "OpMatrixTimesScalar", "4x3", 2, MAT4X3, MAT4X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,3> > },
18911 { "OpMatrixTimesScalar", "4x4", 2, MAT4X4, MAT4X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,4> > },
18912 { "OpVectorTimesMatrix", "2x2", 2, VEC2, VEC2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 2, 4, 0, fp16VectorTimesMatrix<2,2> > },
18913 { "OpVectorTimesMatrix", "2x3", 2, VEC2, VEC3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 3, 8, 0, fp16VectorTimesMatrix<2,3> > },
18914 { "OpVectorTimesMatrix", "2x4", 2, VEC2, VEC4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 8, 0, fp16VectorTimesMatrix<2,4> > },
18915 { "OpVectorTimesMatrix", "3x2", 2, VEC3, VEC2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 2, 8, 0, fp16VectorTimesMatrix<3,2> > },
18916 { "OpVectorTimesMatrix", "3x3", 2, VEC3, VEC3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 3, 16, 0, fp16VectorTimesMatrix<3,3> > },
18917 { "OpVectorTimesMatrix", "3x4", 2, VEC3, VEC4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 4, 16, 0, fp16VectorTimesMatrix<3,4> > },
18918 { "OpVectorTimesMatrix", "4x2", 2, VEC4, VEC2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 8, 0, fp16VectorTimesMatrix<4,2> > },
18919 { "OpVectorTimesMatrix", "4x3", 2, VEC4, VEC3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 3, 16, 0, fp16VectorTimesMatrix<4,3> > },
18920 { "OpVectorTimesMatrix", "4x4", 2, VEC4, VEC4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 16, 0, fp16VectorTimesMatrix<4,4> > },
18921 { "OpMatrixTimesVector", "2x2", 2, VEC2, MAT2X2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 2, 0, fp16MatrixTimesVector<2,2> > },
18922 { "OpMatrixTimesVector", "2x3", 2, VEC3, MAT2X3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 8, 2, 0, fp16MatrixTimesVector<2,3> > },
18923 { "OpMatrixTimesVector", "2x4", 2, VEC4, MAT2X4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 2, 0, fp16MatrixTimesVector<2,4> > },
18924 { "OpMatrixTimesVector", "3x2", 2, VEC2, MAT3X2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 3, 0, fp16MatrixTimesVector<3,2> > },
18925 { "OpMatrixTimesVector", "3x3", 2, VEC3, MAT3X3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 3, 0, fp16MatrixTimesVector<3,3> > },
18926 { "OpMatrixTimesVector", "3x4", 2, VEC4, MAT3X4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 3, 0, fp16MatrixTimesVector<3,4> > },
18927 { "OpMatrixTimesVector", "4x2", 2, VEC2, MAT4X2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 4, 0, fp16MatrixTimesVector<4,2> > },
18928 { "OpMatrixTimesVector", "4x3", 2, VEC3, MAT4X3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 4, 0, fp16MatrixTimesVector<4,3> > },
18929 { "OpMatrixTimesVector", "4x4", 2, VEC4, MAT4X4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 4, 0, fp16MatrixTimesVector<4,4> > },
18930 { "OpMatrixTimesMatrix", "2x2_2x2", 2, MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 4, 0, fp16MatrixTimesMatrix<2,2,2,2> > },
18931 { "OpMatrixTimesMatrix", "2x2_3x2", 2, MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,3,2> > },
18932 { "OpMatrixTimesMatrix", "2x2_4x2", 2, MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,4,2> > },
18933 { "OpMatrixTimesMatrix", "2x3_2x2", 2, MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,3,2,2> > },
18934 { "OpMatrixTimesMatrix", "2x3_3x2", 2, MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,3,2> > },
18935 { "OpMatrixTimesMatrix", "2x3_4x2", 2, MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,4,2> > },
18936 { "OpMatrixTimesMatrix", "2x4_2x2", 2, MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,4,2,2> > },
18937 { "OpMatrixTimesMatrix", "2x4_3x2", 2, MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,3,2> > },
18938 { "OpMatrixTimesMatrix", "2x4_4x2", 2, MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,4,2> > },
18939 { "OpMatrixTimesMatrix", "3x2_2x3", 2, MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<3,2,2,3> > },
18940 { "OpMatrixTimesMatrix", "3x2_3x3", 2, MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,3,3> > },
18941 { "OpMatrixTimesMatrix", "3x2_4x3", 2, MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,4,3> > },
18942 { "OpMatrixTimesMatrix", "3x3_2x3", 2, MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,3,2,3> > },
18943 { "OpMatrixTimesMatrix", "3x3_3x3", 2, MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,3,3> > },
18944 { "OpMatrixTimesMatrix", "3x3_4x3", 2, MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,4,3> > },
18945 { "OpMatrixTimesMatrix", "3x4_2x3", 2, MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,4,2,3> > },
18946 { "OpMatrixTimesMatrix", "3x4_3x3", 2, MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,3,3> > },
18947 { "OpMatrixTimesMatrix", "3x4_4x3", 2, MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,4,3> > },
18948 { "OpMatrixTimesMatrix", "4x2_2x4", 2, MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<4,2,2,4> > },
18949 { "OpMatrixTimesMatrix", "4x2_3x4", 2, MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,3,4> > },
18950 { "OpMatrixTimesMatrix", "4x2_4x4", 2, MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,4,4> > },
18951 { "OpMatrixTimesMatrix", "4x3_2x4", 2, MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,3,2,4> > },
18952 { "OpMatrixTimesMatrix", "4x3_3x4", 2, MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,3,4> > },
18953 { "OpMatrixTimesMatrix", "4x3_4x4", 2, MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,4,4> > },
18954 { "OpMatrixTimesMatrix", "4x4_2x4", 2, MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,4,2,4> > },
18955 { "OpMatrixTimesMatrix", "4x4_3x4", 2, MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,3,4> > },
18956 { "OpMatrixTimesMatrix", "4x4_4x4", 2, MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,4,4> > },
18957 { "OpOuterProduct", "2x2", 2, MAT2X2, VEC2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 2, 0, fp16OuterProduct<2,2> > },
18958 { "OpOuterProduct", "2x3", 2, MAT2X3, VEC3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 3, 2, 0, fp16OuterProduct<2,3> > },
18959 { "OpOuterProduct", "2x4", 2, MAT2X4, VEC4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 2, 0, fp16OuterProduct<2,4> > },
18960 { "OpOuterProduct", "3x2", 2, MAT3X2, VEC2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 3, 0, fp16OuterProduct<3,2> > },
18961 { "OpOuterProduct", "3x3", 2, MAT3X3, VEC3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 3, 0, fp16OuterProduct<3,3> > },
18962 { "OpOuterProduct", "3x4", 2, MAT3X4, VEC4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 3, 0, fp16OuterProduct<3,4> > },
18963 { "OpOuterProduct", "4x2", 2, MAT4X2, VEC2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 4, 0, fp16OuterProduct<4,2> > },
18964 { "OpOuterProduct", "4x3", 2, MAT4X3, VEC3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 4, 0, fp16OuterProduct<4,3> > },
18965 { "OpOuterProduct", "4x4", 2, MAT4X4, VEC4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 4, 0, fp16OuterProduct<4,4> > },
18966 { "Determinant", "2x2", 1, SCALAR, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 4, 0, 0, fp16Determinant<2> > },
18967 { "Determinant", "3x3", 1, SCALAR, MAT3X3, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<3> > },
18968 { "Determinant", "4x4", 1, SCALAR, MAT4X4, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<4> > },
18969 { "MatrixInverse", "2x2", 1, MAT2X2, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Inverse<2> > },
18970 };
18971
18972 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18973 {
18974 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18975
18976 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18977 }
18978
18979 return testGroup.release();
18980 }
18981
18982 struct ComparisonCase
18983 {
18984 string name;
18985 string desc;
18986 };
18987
18988 template<size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)18989 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18990 {
18991 const string testGroupName ("comparison_" + de::toString(C));
18992 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18993 #ifndef CTS_USES_VULKANSC
18994 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
18995
18996 const ComparisonCase amberTests[] =
18997 {
18998 { "modfstruct", "modf and modfStruct" },
18999 { "frexpstruct", "frexp and frexpStruct" }
19000 };
19001
19002 for (ComparisonCase test : amberTests)
19003 {
19004 const string caseDesc ("Compare output of " + test.desc);
19005 const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
19006
19007 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
19008 test.name.c_str(),
19009 caseDesc.c_str(),
19010 dataDir,
19011 fileName));
19012 }
19013 #endif
19014 return testGroup.release();
19015 }
19016
19017 struct ShaderStage
19018 {
19019 string name;
19020 vector<string> requirement;
19021 };
19022
19023 template<size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)19024 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
19025 {
19026 const string testGroupName ("comparison_" + de::toString(C));
19027 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
19028 #ifndef CTS_USES_VULKANSC
19029 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
19030
19031 const ShaderStage stages[] =
19032 {
19033 { "vert", vector<string>(0) },
19034 { "tesc", vector<string>(1, "Features.tessellationShader") },
19035 { "tese", vector<string>(1, "Features.tessellationShader") },
19036 { "geom", vector<string>(1, "Features.geometryShader") },
19037 { "frag", vector<string>(0) }
19038 };
19039
19040 const ComparisonCase amberTests[] =
19041 {
19042 { "modfstruct", "modf and modfStruct" },
19043 { "frexpstruct", "frexp and frexpStruct" }
19044 };
19045
19046 for (ComparisonCase test : amberTests)
19047 for (ShaderStage stage : stages)
19048 {
19049 const string caseName (test.name + "_" + stage.name);
19050 const string caseDesc ("Compare output of " + test.desc);
19051 const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
19052
19053 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
19054 caseName.c_str(),
19055 caseDesc.c_str(),
19056 dataDir,
19057 fileName,
19058 stage.requirement));
19059 }
19060 #endif
19061
19062 return testGroup.release();
19063 }
19064
getNumberTypeName(const NumberType type)19065 const string getNumberTypeName (const NumberType type)
19066 {
19067 if (type == NUMBERTYPE_INT32)
19068 {
19069 return "int";
19070 }
19071 else if (type == NUMBERTYPE_UINT32)
19072 {
19073 return "uint";
19074 }
19075 else if (type == NUMBERTYPE_FLOAT32)
19076 {
19077 return "float";
19078 }
19079 else
19080 {
19081 DE_ASSERT(false);
19082 return "";
19083 }
19084 }
19085
getInt(de::Random & rnd)19086 deInt32 getInt(de::Random& rnd)
19087 {
19088 return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
19089 }
19090
repeatString(const string & str,int times)19091 const string repeatString (const string& str, int times)
19092 {
19093 string filler;
19094 for (int i = 0; i < times; ++i)
19095 {
19096 filler += str;
19097 }
19098 return filler;
19099 }
19100
getRandomConstantString(const NumberType type,de::Random & rnd)19101 const string getRandomConstantString (const NumberType type, de::Random& rnd)
19102 {
19103 if (type == NUMBERTYPE_INT32)
19104 {
19105 return numberToString<deInt32>(getInt(rnd));
19106 }
19107 else if (type == NUMBERTYPE_UINT32)
19108 {
19109 return numberToString<deUint32>(rnd.getUint32());
19110 }
19111 else if (type == NUMBERTYPE_FLOAT32)
19112 {
19113 return numberToString<float>(rnd.getFloat());
19114 }
19115 else
19116 {
19117 DE_ASSERT(false);
19118 return "";
19119 }
19120 }
19121
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19122 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19123 {
19124 map<string, string> params;
19125
19126 // Vec2 to Vec4
19127 for (int width = 2; width <= 4; ++width)
19128 {
19129 const string randomConst = numberToString(getInt(rnd));
19130 const string widthStr = numberToString(width);
19131 const string composite_type = "${customType}vec" + widthStr;
19132 const int index = rnd.getInt(0, width-1);
19133
19134 params["type"] = "vec";
19135 params["name"] = params["type"] + "_" + widthStr;
19136 params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19137 params["compositeType"] = composite_type;
19138 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19139 params["compositeConstruct"] = "%instance = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19140 params["indexes"] = numberToString(index);
19141 testCases.push_back(params);
19142 }
19143 }
19144
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19145 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19146 {
19147 const int limit = 10;
19148 map<string, string> params;
19149
19150 for (int width = 2; width <= limit; ++width)
19151 {
19152 string randomConst = numberToString(getInt(rnd));
19153 string widthStr = numberToString(width);
19154 int index = rnd.getInt(0, width-1);
19155
19156 params["type"] = "array";
19157 params["name"] = params["type"] + "_" + widthStr;
19158 params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19159 + "%composite = OpTypeArray ${customType} %arraywidth\n";
19160 params["compositeType"] = "%composite";
19161 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19162 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19163 params["indexes"] = numberToString(index);
19164 testCases.push_back(params);
19165 }
19166 }
19167
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19168 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19169 {
19170 const int limit = 10;
19171 map<string, string> params;
19172
19173 for (int width = 2; width <= limit; ++width)
19174 {
19175 string randomConst = numberToString(getInt(rnd));
19176 int index = rnd.getInt(0, width-1);
19177
19178 params["type"] = "struct";
19179 params["name"] = params["type"] + "_" + numberToString(width);
19180 params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19181 params["compositeType"] = "%composite";
19182 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19183 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19184 params["indexes"] = numberToString(index);
19185 testCases.push_back(params);
19186 }
19187 }
19188
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19189 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19190 {
19191 map<string, string> params;
19192
19193 // Vec2 to Vec4
19194 for (int width = 2; width <= 4; ++width)
19195 {
19196 string widthStr = numberToString(width);
19197
19198 for (int column = 2 ; column <= 4; ++column)
19199 {
19200 int index_0 = rnd.getInt(0, column-1);
19201 int index_1 = rnd.getInt(0, width-1);
19202 string columnStr = numberToString(column);
19203
19204 params["type"] = "matrix";
19205 params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
19206 params["compositeDecl"] = string("%vectype = OpTypeVector ${customType} " + widthStr + "\n")
19207 + "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19208 params["compositeType"] = "%composite";
19209
19210 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19211 + "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19212
19213 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19214 params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
19215 testCases.push_back(params);
19216 }
19217 }
19218 }
19219
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19220 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19221 {
19222 createVectorCompositeCases(testCases, rnd, type);
19223 createArrayCompositeCases(testCases, rnd, type);
19224 createStructCompositeCases(testCases, rnd, type);
19225 // Matrix only supports float types
19226 if (type == NUMBERTYPE_FLOAT32)
19227 {
19228 createMatrixCompositeCases(testCases, rnd, type);
19229 }
19230 }
19231
getAssemblyTypeDeclaration(const NumberType type)19232 const string getAssemblyTypeDeclaration (const NumberType type)
19233 {
19234 switch (type)
19235 {
19236 case NUMBERTYPE_INT32: return "OpTypeInt 32 1";
19237 case NUMBERTYPE_UINT32: return "OpTypeInt 32 0";
19238 case NUMBERTYPE_FLOAT32: return "OpTypeFloat 32";
19239 default: DE_ASSERT(false); return "";
19240 }
19241 }
19242
getAssemblyTypeName(const NumberType type)19243 const string getAssemblyTypeName (const NumberType type)
19244 {
19245 switch (type)
19246 {
19247 case NUMBERTYPE_INT32: return "%i32";
19248 case NUMBERTYPE_UINT32: return "%u32";
19249 case NUMBERTYPE_FLOAT32: return "%f32";
19250 default: DE_ASSERT(false); return "";
19251 }
19252 }
19253
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19254 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19255 {
19256 map<string, string> parameters(params);
19257
19258 const string customType = getAssemblyTypeName(type);
19259 map<string, string> substCustomType;
19260 substCustomType["customType"] = customType;
19261 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19262 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19263 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19264 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19265 parameters["customType"] = customType;
19266 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19267
19268 if (parameters.at("compositeType") != "%u32vec3")
19269 {
19270 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19271 }
19272
19273 return StringTemplate(
19274 "OpCapability Shader\n"
19275 "OpCapability Matrix\n"
19276 "OpMemoryModel Logical GLSL450\n"
19277 "OpEntryPoint GLCompute %main \"main\" %id\n"
19278 "OpExecutionMode %main LocalSize 1 1 1\n"
19279
19280 "OpSource GLSL 430\n"
19281 "OpName %main \"main\"\n"
19282 "OpName %id \"gl_GlobalInvocationID\"\n"
19283
19284 // Decorators
19285 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19286 "OpDecorate %buf BufferBlock\n"
19287 "OpDecorate %indata DescriptorSet 0\n"
19288 "OpDecorate %indata Binding 0\n"
19289 "OpDecorate %outdata DescriptorSet 0\n"
19290 "OpDecorate %outdata Binding 1\n"
19291 "OpDecorate %customarr ArrayStride 4\n"
19292 "${compositeDecorator}"
19293 "OpMemberDecorate %buf 0 Offset 0\n"
19294
19295 // General types
19296 "%void = OpTypeVoid\n"
19297 "%voidf = OpTypeFunction %void\n"
19298 "%u32 = OpTypeInt 32 0\n"
19299 "%i32 = OpTypeInt 32 1\n"
19300 "%f32 = OpTypeFloat 32\n"
19301
19302 // Composite declaration
19303 "${compositeDecl}"
19304
19305 // Constants
19306 "${filler}"
19307
19308 "${u32vec3Decl:opt}"
19309 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19310
19311 // Inherited from custom
19312 "%customptr = OpTypePointer Uniform ${customType}\n"
19313 "%customarr = OpTypeRuntimeArray ${customType}\n"
19314 "%buf = OpTypeStruct %customarr\n"
19315 "%bufptr = OpTypePointer Uniform %buf\n"
19316
19317 "%indata = OpVariable %bufptr Uniform\n"
19318 "%outdata = OpVariable %bufptr Uniform\n"
19319
19320 "%id = OpVariable %uvec3ptr Input\n"
19321 "%zero = OpConstant %i32 0\n"
19322
19323 "%main = OpFunction %void None %voidf\n"
19324 "%label = OpLabel\n"
19325 "%idval = OpLoad %u32vec3 %id\n"
19326 "%x = OpCompositeExtract %u32 %idval 0\n"
19327
19328 "%inloc = OpAccessChain %customptr %indata %zero %x\n"
19329 "%outloc = OpAccessChain %customptr %outdata %zero %x\n"
19330 // Read the input value
19331 "%inval = OpLoad ${customType} %inloc\n"
19332 // Create the composite and fill it
19333 "${compositeConstruct}"
19334 // Insert the input value to a place
19335 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19336 // Read back the value from the position
19337 "%out_val = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19338 // Store it in the output position
19339 " OpStore %outloc %out_val\n"
19340 " OpReturn\n"
19341 " OpFunctionEnd\n"
19342 ).specialize(parameters);
19343 }
19344
19345 template<typename T>
createCompositeBuffer(T number)19346 BufferSp createCompositeBuffer(T number)
19347 {
19348 return BufferSp(new Buffer<T>(vector<T>(1, number)));
19349 }
19350
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19351 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19352 {
19353 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19354 de::Random rnd (deStringHash(group->getName()));
19355
19356 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19357 {
19358 NumberType numberType = NumberType(type);
19359 const string typeName = getNumberTypeName(numberType);
19360 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19361 vector<map<string, string> > testCases;
19362
19363 createCompositeCases(testCases, rnd, numberType);
19364
19365 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19366 {
19367 ComputeShaderSpec spec;
19368
19369 spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19370
19371 switch (numberType)
19372 {
19373 case NUMBERTYPE_INT32:
19374 {
19375 deInt32 number = getInt(rnd);
19376 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19377 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19378 break;
19379 }
19380 case NUMBERTYPE_UINT32:
19381 {
19382 deUint32 number = rnd.getUint32();
19383 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19384 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19385 break;
19386 }
19387 case NUMBERTYPE_FLOAT32:
19388 {
19389 float number = rnd.getFloat();
19390 spec.inputs.push_back(createCompositeBuffer<float>(number));
19391 spec.outputs.push_back(createCompositeBuffer<float>(number));
19392 break;
19393 }
19394 default:
19395 DE_ASSERT(false);
19396 }
19397
19398 spec.numWorkGroups = IVec3(1, 1, 1);
19399 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19400 }
19401 group->addChild(subGroup.release());
19402 }
19403 return group.release();
19404 }
19405
19406 struct AssemblyStructInfo
19407 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19408 AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19409 : components (comp)
19410 , index (idx)
19411 {}
19412
19413 deUint32 components;
19414 deUint32 index;
19415 };
19416
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19417 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19418 {
19419 // Create the full index string
19420 string fullIndex = numberToString(structInfo.index) + " " + params.at("indexes");
19421 // Convert it to list of indexes
19422 vector<string> indexes = de::splitString(fullIndex, ' ');
19423
19424 map<string, string> parameters (params);
19425 parameters["structType"] = repeatString(" ${compositeType}", structInfo.components);
19426 parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
19427 parameters["insertIndexes"] = fullIndex;
19428
19429 // In matrix cases the last two index is the CompositeExtract indexes
19430 const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19431
19432 // Construct the extractIndex
19433 for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19434 {
19435 parameters["extractIndexes"] += " " + *index;
19436 }
19437
19438 // Remove the last 1 or 2 element depends on matrix case or not
19439 indexes.erase(indexes.end() - extractIndexes, indexes.end());
19440
19441 deUint32 id = 0;
19442 // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19443 for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19444 {
19445 string indexId = "%index_" + numberToString(id++);
19446 parameters["accessChainConstDeclaration"] += indexId + " = OpConstant %u32 " + *index + "\n";
19447 parameters["accessChainIndexes"] += " " + indexId;
19448 }
19449
19450 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19451
19452 const string customType = getAssemblyTypeName(type);
19453 map<string, string> substCustomType;
19454 substCustomType["customType"] = customType;
19455 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19456 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19457 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19458 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19459 parameters["customType"] = customType;
19460
19461 const string compositeType = parameters.at("compositeType");
19462 map<string, string> substCompositeType;
19463 substCompositeType["compositeType"] = compositeType;
19464 parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19465 if (compositeType != "%u32vec3")
19466 {
19467 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19468 }
19469
19470 return StringTemplate(
19471 "OpCapability Shader\n"
19472 "OpCapability Matrix\n"
19473 "OpMemoryModel Logical GLSL450\n"
19474 "OpEntryPoint GLCompute %main \"main\" %id\n"
19475 "OpExecutionMode %main LocalSize 1 1 1\n"
19476
19477 "OpSource GLSL 430\n"
19478 "OpName %main \"main\"\n"
19479 "OpName %id \"gl_GlobalInvocationID\"\n"
19480 // Decorators
19481 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19482 "OpDecorate %buf BufferBlock\n"
19483 "OpDecorate %indata DescriptorSet 0\n"
19484 "OpDecorate %indata Binding 0\n"
19485 "OpDecorate %outdata DescriptorSet 0\n"
19486 "OpDecorate %outdata Binding 1\n"
19487 "OpDecorate %customarr ArrayStride 4\n"
19488 "${compositeDecorator}"
19489 "OpMemberDecorate %buf 0 Offset 0\n"
19490 // General types
19491 "%void = OpTypeVoid\n"
19492 "%voidf = OpTypeFunction %void\n"
19493 "%i32 = OpTypeInt 32 1\n"
19494 "%u32 = OpTypeInt 32 0\n"
19495 "%f32 = OpTypeFloat 32\n"
19496 // Custom types
19497 "${compositeDecl}"
19498 // %u32vec3 if not already declared in ${compositeDecl}
19499 "${u32vec3Decl:opt}"
19500 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19501 // Inherited from composite
19502 "%composite_p = OpTypePointer Function ${compositeType}\n"
19503 "%struct_t = OpTypeStruct${structType}\n"
19504 "%struct_p = OpTypePointer Function %struct_t\n"
19505 // Constants
19506 "${filler}"
19507 "${accessChainConstDeclaration}"
19508 // Inherited from custom
19509 "%customptr = OpTypePointer Uniform ${customType}\n"
19510 "%customarr = OpTypeRuntimeArray ${customType}\n"
19511 "%buf = OpTypeStruct %customarr\n"
19512 "%bufptr = OpTypePointer Uniform %buf\n"
19513 "%indata = OpVariable %bufptr Uniform\n"
19514 "%outdata = OpVariable %bufptr Uniform\n"
19515
19516 "%id = OpVariable %uvec3ptr Input\n"
19517 "%zero = OpConstant %u32 0\n"
19518 "%main = OpFunction %void None %voidf\n"
19519 "%label = OpLabel\n"
19520 "%struct_v = OpVariable %struct_p Function\n"
19521 "%idval = OpLoad %u32vec3 %id\n"
19522 "%x = OpCompositeExtract %u32 %idval 0\n"
19523 // Create the input/output type
19524 "%inloc = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19525 "%outloc = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19526 // Read the input value
19527 "%inval = OpLoad ${customType} %inloc\n"
19528 // Create the composite and fill it
19529 "${compositeConstruct}"
19530 // Create the struct and fill it with the composite
19531 "%struct = OpCompositeConstruct %struct_t${structConstruct}\n"
19532 // Insert the value
19533 "%comp_obj = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19534 // Store the object
19535 " OpStore %struct_v %comp_obj\n"
19536 // Get deepest possible composite pointer
19537 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19538 "%read_obj = OpLoad ${compositeType} %inner_ptr\n"
19539 // Read back the stored value
19540 "%read_val = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19541 " OpStore %outloc %read_val\n"
19542 " OpReturn\n"
19543 " OpFunctionEnd\n"
19544 ).specialize(parameters);
19545 }
19546
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19547 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19548 {
19549 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19550 de::Random rnd (deStringHash(group->getName()));
19551
19552 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19553 {
19554 NumberType numberType = NumberType(type);
19555 const string typeName = getNumberTypeName(numberType);
19556 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19557
19558 vector<map<string, string> > testCases;
19559 createCompositeCases(testCases, rnd, numberType);
19560
19561 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19562 {
19563 ComputeShaderSpec spec;
19564
19565 // Number of components inside of a struct
19566 deUint32 structComponents = rnd.getInt(2, 8);
19567 // Component index value
19568 deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19569 AssemblyStructInfo structInfo(structComponents, structIndex);
19570
19571 spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19572
19573 switch (numberType)
19574 {
19575 case NUMBERTYPE_INT32:
19576 {
19577 deInt32 number = getInt(rnd);
19578 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19579 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19580 break;
19581 }
19582 case NUMBERTYPE_UINT32:
19583 {
19584 deUint32 number = rnd.getUint32();
19585 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19586 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19587 break;
19588 }
19589 case NUMBERTYPE_FLOAT32:
19590 {
19591 float number = rnd.getFloat();
19592 spec.inputs.push_back(createCompositeBuffer<float>(number));
19593 spec.outputs.push_back(createCompositeBuffer<float>(number));
19594 break;
19595 }
19596 default:
19597 DE_ASSERT(false);
19598 }
19599 spec.numWorkGroups = IVec3(1, 1, 1);
19600 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), spec));
19601 }
19602 group->addChild(subGroup.release());
19603 }
19604 return group.release();
19605 }
19606
19607 // If the params missing, uninitialized case
19608 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19609 {
19610 map<string, string> parameters(params);
19611
19612 parameters["customType"] = getAssemblyTypeName(type);
19613
19614 // Declare the const value, and use it in the initializer
19615 if (params.find("constValue") != params.end())
19616 {
19617 parameters["variableInitializer"] = " %const";
19618 }
19619 // Uninitialized case
19620 else
19621 {
19622 parameters["commentDecl"] = ";";
19623 }
19624
19625 return StringTemplate(
19626 "OpCapability Shader\n"
19627 "OpMemoryModel Logical GLSL450\n"
19628 "OpEntryPoint GLCompute %main \"main\" %id\n"
19629 "OpExecutionMode %main LocalSize 1 1 1\n"
19630 "OpSource GLSL 430\n"
19631 "OpName %main \"main\"\n"
19632 "OpName %id \"gl_GlobalInvocationID\"\n"
19633 // Decorators
19634 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19635 "OpDecorate %indata DescriptorSet 0\n"
19636 "OpDecorate %indata Binding 0\n"
19637 "OpDecorate %outdata DescriptorSet 0\n"
19638 "OpDecorate %outdata Binding 1\n"
19639 "OpDecorate %in_arr ArrayStride 4\n"
19640 "OpDecorate %in_buf BufferBlock\n"
19641 "OpMemberDecorate %in_buf 0 Offset 0\n"
19642 // Base types
19643 "%void = OpTypeVoid\n"
19644 "%voidf = OpTypeFunction %void\n"
19645 "%u32 = OpTypeInt 32 0\n"
19646 "%i32 = OpTypeInt 32 1\n"
19647 "%f32 = OpTypeFloat 32\n"
19648 "%uvec3 = OpTypeVector %u32 3\n"
19649 "%uvec3ptr = OpTypePointer Input %uvec3\n"
19650 "${commentDecl:opt}%const = OpConstant ${customType} ${constValue:opt}\n"
19651 // Derived types
19652 "%in_ptr = OpTypePointer Uniform ${customType}\n"
19653 "%in_arr = OpTypeRuntimeArray ${customType}\n"
19654 "%in_buf = OpTypeStruct %in_arr\n"
19655 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
19656 "%indata = OpVariable %in_bufptr Uniform\n"
19657 "%outdata = OpVariable %in_bufptr Uniform\n"
19658 "%id = OpVariable %uvec3ptr Input\n"
19659 "%var_ptr = OpTypePointer Function ${customType}\n"
19660 // Constants
19661 "%zero = OpConstant %i32 0\n"
19662 // Main function
19663 "%main = OpFunction %void None %voidf\n"
19664 "%label = OpLabel\n"
19665 "%out_var = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19666 "%idval = OpLoad %uvec3 %id\n"
19667 "%x = OpCompositeExtract %u32 %idval 0\n"
19668 "%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
19669 "%outloc = OpAccessChain %in_ptr %outdata %zero %x\n"
19670
19671 "%outval = OpLoad ${customType} %out_var\n"
19672 " OpStore %outloc %outval\n"
19673 " OpReturn\n"
19674 " OpFunctionEnd\n"
19675 ).specialize(parameters);
19676 }
19677
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)19678 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19679 {
19680 DE_ASSERT(outputAllocs.size() != 0);
19681 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19682
19683 // Use custom epsilon because of the float->string conversion
19684 const float epsilon = 0.00001f;
19685
19686 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19687 {
19688 vector<deUint8> expectedBytes;
19689 float expected;
19690 float actual;
19691
19692 expectedOutputs[outputNdx].getBytes(expectedBytes);
19693 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19694 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19695
19696 // Test with epsilon
19697 if (fabs(expected - actual) > epsilon)
19698 {
19699 log << TestLog::Message << "Error: The actual and expected values not matching."
19700 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19701 return false;
19702 }
19703 }
19704 return true;
19705 }
19706
19707 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)19708 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19709 {
19710 DE_ASSERT(outputAllocs.size() != 0);
19711 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19712
19713 // Copy and discard the result.
19714 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19715 {
19716 vector<deUint8> expectedBytes;
19717 expectedOutputs[outputNdx].getBytes(expectedBytes);
19718
19719 const size_t width = expectedBytes.size();
19720 vector<char> data (width);
19721
19722 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19723 }
19724 return true;
19725 }
19726
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)19727 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19728 {
19729 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19730 de::Random rnd (deStringHash(group->getName()));
19731
19732 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19733 {
19734 NumberType numberType = NumberType(type);
19735 const string typeName = getNumberTypeName(numberType);
19736 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str()));
19737
19738 // 2 similar subcases (initialized and uninitialized)
19739 for (int subCase = 0; subCase < 2; ++subCase)
19740 {
19741 ComputeShaderSpec spec;
19742 spec.numWorkGroups = IVec3(1, 1, 1);
19743
19744 map<string, string> params;
19745
19746 switch (numberType)
19747 {
19748 case NUMBERTYPE_INT32:
19749 {
19750 deInt32 number = getInt(rnd);
19751 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19752 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19753 params["constValue"] = numberToString(number);
19754 break;
19755 }
19756 case NUMBERTYPE_UINT32:
19757 {
19758 deUint32 number = rnd.getUint32();
19759 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19760 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19761 params["constValue"] = numberToString(number);
19762 break;
19763 }
19764 case NUMBERTYPE_FLOAT32:
19765 {
19766 float number = rnd.getFloat();
19767 spec.inputs.push_back(createCompositeBuffer<float>(number));
19768 spec.outputs.push_back(createCompositeBuffer<float>(number));
19769 spec.verifyIO = &compareFloats;
19770 params["constValue"] = numberToString(number);
19771 break;
19772 }
19773 default:
19774 DE_ASSERT(false);
19775 }
19776
19777 // Initialized subcase
19778 if (!subCase)
19779 {
19780 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19781 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", spec));
19782 }
19783 // Uninitialized subcase
19784 else
19785 {
19786 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19787 spec.verifyIO = &passthruVerify;
19788 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", spec));
19789 }
19790 }
19791 group->addChild(subGroup.release());
19792 }
19793 return group.release();
19794 }
19795
createOpNopTests(tcu::TestContext & testCtx)19796 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19797 {
19798 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19799 RGBA defaultColors[4];
19800 map<string, string> opNopFragments;
19801
19802 getDefaultColors(defaultColors);
19803
19804 opNopFragments["testfun"] =
19805 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19806 "%param1 = OpFunctionParameter %v4f32\n"
19807 "%label_testfun = OpLabel\n"
19808 "OpNop\n"
19809 "OpNop\n"
19810 "OpNop\n"
19811 "OpNop\n"
19812 "OpNop\n"
19813 "OpNop\n"
19814 "OpNop\n"
19815 "OpNop\n"
19816 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19817 "%b = OpFAdd %f32 %a %a\n"
19818 "OpNop\n"
19819 "%c = OpFSub %f32 %b %a\n"
19820 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19821 "OpNop\n"
19822 "OpNop\n"
19823 "OpReturnValue %ret\n"
19824 "OpFunctionEnd\n";
19825
19826 createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19827
19828 return testGroup.release();
19829 }
19830
createOpNameTests(tcu::TestContext & testCtx)19831 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19832 {
19833 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19834 RGBA defaultColors[4];
19835 map<string, string> opNameFragments;
19836
19837 getDefaultColors(defaultColors);
19838
19839 opNameFragments["testfun"] =
19840 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19841 "%param1 = OpFunctionParameter %v4f32\n"
19842 "%label_func = OpLabel\n"
19843 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19844 "%b = OpFAdd %f32 %a %a\n"
19845 "%c = OpFSub %f32 %b %a\n"
19846 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19847 "OpReturnValue %ret\n"
19848 "OpFunctionEnd\n";
19849
19850 opNameFragments["debug"] =
19851 "OpName %BP_main \"not_main\"";
19852
19853 createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19854
19855 return testGroup.release();
19856 }
19857
createFloat16Tests(tcu::TestContext & testCtx)19858 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19859 {
19860 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19861
19862 testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19863 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19864 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19865 testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19866 testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19867 testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19868 testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19869 testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19870 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19871 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19872 testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19873 testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19874 testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19875 testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19876 testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19877
19878 return testGroup.release();
19879 }
19880
createFloat32Tests(tcu::TestContext & testCtx)19881 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19882 {
19883 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19884
19885 testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19886 testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19887 testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19888 testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19889
19890 return testGroup.release();
19891 }
19892
createFloat16Group(tcu::TestContext & testCtx)19893 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19894 {
19895 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19896
19897 testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19898 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19899 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19900 testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19901 testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19902 testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19903 testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19904 testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19905 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19906 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19907 testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19908 testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19909 testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19910 testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19911 testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19912
19913 return testGroup.release();
19914 }
19915
createFloat32Group(tcu::TestContext & testCtx)19916 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19917 {
19918 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19919
19920 testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19921 testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19922 testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19923 testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19924
19925 return testGroup.release();
19926 }
19927
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)19928 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19929 {
19930 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19931
19932 de::Random rnd (deStringHash(group->getName()));
19933 const int numElements = 100;
19934 vector<float> inputData (numElements, 0);
19935 vector<float> outputData (numElements, 0);
19936 fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19937
19938 const StringTemplate shaderTemplate (
19939 "${CAPS}\n"
19940 "OpMemoryModel Logical GLSL450\n"
19941 "OpEntryPoint GLCompute %main \"main\" %id\n"
19942 "OpExecutionMode %main LocalSize 1 1 1\n"
19943 "OpSource GLSL 430\n"
19944 "OpName %main \"main\"\n"
19945 "OpName %id \"gl_GlobalInvocationID\"\n"
19946
19947 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19948
19949 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19950
19951 "%id = OpVariable %uvec3ptr Input\n"
19952 "${CONST}\n"
19953 "%main = OpFunction %void None %voidf\n"
19954 "%label = OpLabel\n"
19955 "%idval = OpLoad %uvec3 %id\n"
19956 "%x = OpCompositeExtract %u32 %idval 0\n"
19957 "%inloc = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19958
19959 "${TEST}\n"
19960
19961 "%outloc = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19962 " OpStore %outloc %res\n"
19963 " OpReturn\n"
19964 " OpFunctionEnd\n"
19965 );
19966
19967 // Each test case produces 4 boolean values, and we want each of these values
19968 // to come froma different combination of the available bit-sizes, so compute
19969 // all possible combinations here.
19970 vector<deUint32> widths;
19971 widths.push_back(32);
19972 widths.push_back(16);
19973 widths.push_back(8);
19974
19975 vector<IVec4> cases;
19976 for (size_t width0 = 0; width0 < widths.size(); width0++)
19977 {
19978 for (size_t width1 = 0; width1 < widths.size(); width1++)
19979 {
19980 for (size_t width2 = 0; width2 < widths.size(); width2++)
19981 {
19982 for (size_t width3 = 0; width3 < widths.size(); width3++)
19983 {
19984 cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19985 }
19986 }
19987 }
19988 }
19989
19990 for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19991 {
19992 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19993 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19994 continue;
19995
19996 map<string, string> specializations;
19997 ComputeShaderSpec spec;
19998
19999 // Inject appropriate capabilities and reference constants depending
20000 // on the bit-sizes required by this test case
20001 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
20002 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
20003 bool hasInt8 = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
20004
20005 string capsStr = "OpCapability Shader\n";
20006 string constStr =
20007 "%c0i32 = OpConstant %i32 0\n"
20008 "%c1f32 = OpConstant %f32 1.0\n"
20009 "%c0f32 = OpConstant %f32 0.0\n";
20010
20011 if (hasFloat32)
20012 {
20013 constStr +=
20014 "%c10f32 = OpConstant %f32 10.0\n"
20015 "%c25f32 = OpConstant %f32 25.0\n"
20016 "%c50f32 = OpConstant %f32 50.0\n"
20017 "%c90f32 = OpConstant %f32 90.0\n";
20018 }
20019
20020 if (hasFloat16)
20021 {
20022 capsStr += "OpCapability Float16\n";
20023 constStr +=
20024 "%f16 = OpTypeFloat 16\n"
20025 "%c10f16 = OpConstant %f16 10.0\n"
20026 "%c25f16 = OpConstant %f16 25.0\n"
20027 "%c50f16 = OpConstant %f16 50.0\n"
20028 "%c90f16 = OpConstant %f16 90.0\n";
20029 }
20030
20031 if (hasInt8)
20032 {
20033 capsStr += "OpCapability Int8\n";
20034 constStr +=
20035 "%i8 = OpTypeInt 8 1\n"
20036 "%c10i8 = OpConstant %i8 10\n"
20037 "%c25i8 = OpConstant %i8 25\n"
20038 "%c50i8 = OpConstant %i8 50\n"
20039 "%c90i8 = OpConstant %i8 90\n";
20040 }
20041
20042 // Each invocation reads a different float32 value as input. Depending on
20043 // the bit-sizes required by the particular test case, we also produce
20044 // float16 and/or and int8 values by converting from the 32-bit float.
20045 string testStr = "";
20046 testStr += "%inval32 = OpLoad %f32 %inloc\n";
20047 if (hasFloat16)
20048 testStr += "%inval16 = OpFConvert %f16 %inval32\n";
20049 if (hasInt8)
20050 testStr += "%inval8 = OpConvertFToS %i8 %inval32\n";
20051
20052 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
20053 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
20054 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
20055 // other way around, so in this case we want < instead of <=.
20056 if (cases[caseNdx][0] == 32)
20057 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
20058 else if (cases[caseNdx][0] == 16)
20059 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
20060 else
20061 testStr += "%cmp1 = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
20062
20063 if (cases[caseNdx][1] == 32)
20064 testStr += "%cmp2 = OpFOrdLessThan %bool %inval32 %c50f32\n";
20065 else if (cases[caseNdx][1] == 16)
20066 testStr += "%cmp2 = OpFOrdLessThan %bool %inval16 %c50f16\n";
20067 else
20068 testStr += "%cmp2 = OpSLessThan %bool %inval8 %c50i8\n";
20069
20070 if (cases[caseNdx][2] == 32)
20071 testStr += "%cmp3 = OpFOrdLessThan %bool %inval32 %c10f32\n";
20072 else if (cases[caseNdx][2] == 16)
20073 testStr += "%cmp3 = OpFOrdLessThan %bool %inval16 %c10f16\n";
20074 else
20075 testStr += "%cmp3 = OpSLessThan %bool %inval8 %c10i8\n";
20076
20077 if (cases[caseNdx][3] == 32)
20078 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
20079 else if (cases[caseNdx][3] == 16)
20080 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
20081 else
20082 testStr += "%cmp4 = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
20083
20084 testStr += "%and1 = OpLogicalAnd %bool %cmp1 %cmp2\n";
20085 testStr += "%or1 = OpLogicalOr %bool %cmp3 %cmp4\n";
20086 testStr += "%or2 = OpLogicalOr %bool %and1 %or1\n";
20087 testStr += "%not1 = OpLogicalNot %bool %or2\n";
20088 testStr += "%res = OpSelect %f32 %not1 %c1f32 %c0f32\n";
20089
20090 specializations["CAPS"] = capsStr;
20091 specializations["CONST"] = constStr;
20092 specializations["TEST"] = testStr;
20093
20094 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
20095 for (size_t ndx = 0; ndx < numElements; ++ndx)
20096 outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20097
20098 spec.assembly = shaderTemplate.specialize(specializations);
20099 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20100 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20101 spec.numWorkGroups = IVec3(numElements, 1, 1);
20102 if (hasFloat16)
20103 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20104 if (hasInt8)
20105 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20106 spec.extensions.push_back("VK_KHR_shader_float16_int8");
20107
20108 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20109 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), spec));
20110 }
20111
20112 return group.release();
20113 }
20114
createBoolGroup(tcu::TestContext & testCtx)20115 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20116 {
20117 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20118
20119 testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20120
20121 return testGroup.release();
20122 }
20123
createOpNameAbuseTests(tcu::TestContext & testCtx)20124 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20125 {
20126 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20127 vector<CaseParameter> abuseCases;
20128 RGBA defaultColors[4];
20129 map<string, string> opNameFragments;
20130
20131 getOpNameAbuseCases(abuseCases);
20132 getDefaultColors(defaultColors);
20133
20134 opNameFragments["testfun"] =
20135 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20136 "%param1 = OpFunctionParameter %v4f32\n"
20137 "%label_func = OpLabel\n"
20138 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20139 "%b = OpFAdd %f32 %a %a\n"
20140 "%c = OpFSub %f32 %b %a\n"
20141 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20142 "OpReturnValue %ret\n"
20143 "OpFunctionEnd\n";
20144
20145 for (unsigned int i = 0; i < abuseCases.size(); i++)
20146 {
20147 string casename;
20148 casename = string("main") + abuseCases[i].name;
20149
20150 opNameFragments["debug"] =
20151 "OpName %BP_main \"" + abuseCases[i].param + "\"";
20152
20153 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20154 }
20155
20156 for (unsigned int i = 0; i < abuseCases.size(); i++)
20157 {
20158 string casename;
20159 casename = string("b") + abuseCases[i].name;
20160
20161 opNameFragments["debug"] =
20162 "OpName %b \"" + abuseCases[i].param + "\"";
20163
20164 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20165 }
20166
20167 {
20168 opNameFragments["debug"] =
20169 "OpName %test_code \"name1\"\n"
20170 "OpName %param1 \"name2\"\n"
20171 "OpName %a \"name3\"\n"
20172 "OpName %b \"name4\"\n"
20173 "OpName %c \"name5\"\n"
20174 "OpName %ret \"name6\"\n";
20175
20176 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20177 }
20178
20179 {
20180 opNameFragments["debug"] =
20181 "OpName %test_code \"the_same\"\n"
20182 "OpName %param1 \"the_same\"\n"
20183 "OpName %a \"the_same\"\n"
20184 "OpName %b \"the_same\"\n"
20185 "OpName %c \"the_same\"\n"
20186 "OpName %ret \"the_same\"\n";
20187
20188 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20189 }
20190
20191 {
20192 opNameFragments["debug"] =
20193 "OpName %BP_main \"to_be\"\n"
20194 "OpName %BP_main \"or_not\"\n"
20195 "OpName %BP_main \"to_be\"\n";
20196
20197 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20198 }
20199
20200 {
20201 opNameFragments["debug"] =
20202 "OpName %b \"to_be\"\n"
20203 "OpName %b \"or_not\"\n"
20204 "OpName %b \"to_be\"\n";
20205
20206 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20207 }
20208
20209 return abuseGroup.release();
20210 }
20211
20212
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20213 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20214 {
20215 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20216 vector<CaseParameter> abuseCases;
20217 RGBA defaultColors[4];
20218 map<string, string> opMemberNameFragments;
20219
20220 getOpNameAbuseCases(abuseCases);
20221 getDefaultColors(defaultColors);
20222
20223 opMemberNameFragments["pre_main"] =
20224 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20225
20226 opMemberNameFragments["testfun"] =
20227 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20228 "%param1 = OpFunctionParameter %v4f32\n"
20229 "%label_func = OpLabel\n"
20230 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20231 "%b = OpFAdd %f32 %a %a\n"
20232 "%c = OpFSub %f32 %b %a\n"
20233 "%cstr = OpCompositeConstruct %f3str %c %c %c\n"
20234 "%d = OpCompositeExtract %f32 %cstr 0\n"
20235 "%ret = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20236 "OpReturnValue %ret\n"
20237 "OpFunctionEnd\n";
20238
20239 for (unsigned int i = 0; i < abuseCases.size(); i++)
20240 {
20241 string casename;
20242 casename = string("f3str_x") + abuseCases[i].name;
20243
20244 opMemberNameFragments["debug"] =
20245 "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20246
20247 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20248 }
20249
20250 {
20251 opMemberNameFragments["debug"] =
20252 "OpMemberName %f3str 0 \"name1\"\n"
20253 "OpMemberName %f3str 1 \"name2\"\n"
20254 "OpMemberName %f3str 2 \"name3\"\n";
20255
20256 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20257 }
20258
20259 {
20260 opMemberNameFragments["debug"] =
20261 "OpMemberName %f3str 0 \"the_same\"\n"
20262 "OpMemberName %f3str 1 \"the_same\"\n"
20263 "OpMemberName %f3str 2 \"the_same\"\n";
20264
20265 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20266 }
20267
20268 {
20269 opMemberNameFragments["debug"] =
20270 "OpMemberName %f3str 0 \"to_be\"\n"
20271 "OpMemberName %f3str 1 \"or_not\"\n"
20272 "OpMemberName %f3str 0 \"to_be\"\n"
20273 "OpMemberName %f3str 2 \"makes_no\"\n"
20274 "OpMemberName %f3str 0 \"difference\"\n"
20275 "OpMemberName %f3str 0 \"to_me\"\n";
20276
20277
20278 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20279 }
20280
20281 return abuseGroup.release();
20282 }
20283
getSparseIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20284 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20285 {
20286 vector<deUint32> result;
20287 de::Random rnd (seed);
20288
20289 result.reserve(numDataPoints);
20290
20291 for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20292 result.push_back(rnd.getUint32());
20293
20294 return result;
20295 }
20296
getSparseIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2)20297 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20298 {
20299 vector<deUint32> result;
20300
20301 result.reserve(inData1.size());
20302
20303 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20304 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20305
20306 return result;
20307 }
20308
20309 template<class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20310 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20311 {
20312 const deUint32 numDataPoints = 16;
20313 const std::string testName ("sparse_ids");
20314 const deUint32 seed (deStringHash(testName.c_str()));
20315 const vector<deUint32> inData1 (getSparseIdsAbuseData(numDataPoints, seed + 1));
20316 const vector<deUint32> inData2 (getSparseIdsAbuseData(numDataPoints, seed + 2));
20317 const vector<deUint32> outData (getSparseIdsAbuseResults(inData1, inData2));
20318 const StringTemplate preMain
20319 (
20320 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20321 " %up_u32 = OpTypePointer Uniform %u32\n"
20322 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20323 " %SSBO32 = OpTypeStruct %ra_u32\n"
20324 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20325 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20326 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20327 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20328 );
20329 const StringTemplate decoration
20330 (
20331 "OpDecorate %ra_u32 ArrayStride 4\n"
20332 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20333 "OpDecorate %SSBO32 BufferBlock\n"
20334 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20335 "OpDecorate %ssbo_src0 Binding 0\n"
20336 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20337 "OpDecorate %ssbo_src1 Binding 1\n"
20338 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20339 "OpDecorate %ssbo_dst Binding 2\n"
20340 );
20341 const StringTemplate testFun
20342 (
20343 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20344 " %param = OpFunctionParameter %v4f32\n"
20345
20346 " %entry = OpLabel\n"
20347 " %i = OpVariable %fp_i32 Function\n"
20348 " OpStore %i %c_i32_0\n"
20349 " OpBranch %loop\n"
20350
20351 " %loop = OpLabel\n"
20352 " %i_cmp = OpLoad %i32 %i\n"
20353 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20354 " OpLoopMerge %merge %next None\n"
20355 " OpBranchConditional %lt %write %merge\n"
20356
20357 " %write = OpLabel\n"
20358 " %ndx = OpLoad %i32 %i\n"
20359
20360 " %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20361 " %128 = OpLoad %u32 %127\n"
20362
20363 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20364 " %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20365 " %4194001 = OpLoad %u32 %4194000\n"
20366
20367 " %2097151 = OpIAdd %u32 %128 %4194001\n"
20368 " %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20369 " OpStore %2097152 %2097151\n"
20370 " OpBranch %next\n"
20371
20372 " %next = OpLabel\n"
20373 " %i_cur = OpLoad %i32 %i\n"
20374 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20375 " OpStore %i %i_new\n"
20376 " OpBranch %loop\n"
20377
20378 " %merge = OpLabel\n"
20379 " OpReturnValue %param\n"
20380
20381 " OpFunctionEnd\n"
20382 );
20383 SpecResource specResource;
20384 map<string, string> specs;
20385 VulkanFeatures features;
20386 map<string, string> fragments;
20387 vector<string> extensions;
20388
20389 specs["num_data_points"] = de::toString(numDataPoints);
20390
20391 fragments["decoration"] = decoration.specialize(specs);
20392 fragments["pre_main"] = preMain.specialize(specs);
20393 fragments["testfun"] = testFun.specialize(specs);
20394
20395 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20396 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20397 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20398
20399 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20400 {
20401 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20402 features.coreFeatures.fragmentStoresAndAtomics = true;
20403 }
20404
20405 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20406 }
20407
getLotsIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20408 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20409 {
20410 vector<deUint32> result;
20411 de::Random rnd (seed);
20412
20413 result.reserve(numDataPoints);
20414
20415 // Fixed value
20416 result.push_back(1u);
20417
20418 // Random values
20419 for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20420 result.push_back(rnd.getUint8());
20421
20422 return result;
20423 }
20424
getLotsIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2,const deUint32 count)20425 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20426 {
20427 vector<deUint32> result;
20428
20429 result.reserve(inData1.size());
20430
20431 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20432 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20433
20434 return result;
20435 }
20436
20437 template<class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20438 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20439 {
20440 const deUint32 numDataPoints = 16;
20441 const deUint32 firstNdx = 100u;
20442 const deUint32 sequenceCount = 10000u;
20443 const std::string testName ("lots_ids");
20444 const deUint32 seed (deStringHash(testName.c_str()));
20445 const vector<deUint32> inData1 (getLotsIdsAbuseData(numDataPoints, seed + 1));
20446 const vector<deUint32> inData2 (getLotsIdsAbuseData(numDataPoints, seed + 2));
20447 const vector<deUint32> outData (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20448 const StringTemplate preMain
20449 (
20450 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20451 " %up_u32 = OpTypePointer Uniform %u32\n"
20452 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20453 " %SSBO32 = OpTypeStruct %ra_u32\n"
20454 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20455 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20456 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20457 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20458 );
20459 const StringTemplate decoration
20460 (
20461 "OpDecorate %ra_u32 ArrayStride 4\n"
20462 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20463 "OpDecorate %SSBO32 BufferBlock\n"
20464 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20465 "OpDecorate %ssbo_src0 Binding 0\n"
20466 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20467 "OpDecorate %ssbo_src1 Binding 1\n"
20468 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20469 "OpDecorate %ssbo_dst Binding 2\n"
20470 );
20471 const StringTemplate testFun
20472 (
20473 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20474 " %param = OpFunctionParameter %v4f32\n"
20475
20476 " %entry = OpLabel\n"
20477 " %i = OpVariable %fp_i32 Function\n"
20478 " OpStore %i %c_i32_0\n"
20479 " OpBranch %loop\n"
20480
20481 " %loop = OpLabel\n"
20482 " %i_cmp = OpLoad %i32 %i\n"
20483 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20484 " OpLoopMerge %merge %next None\n"
20485 " OpBranchConditional %lt %write %merge\n"
20486
20487 " %write = OpLabel\n"
20488 " %ndx = OpLoad %i32 %i\n"
20489
20490 " %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20491 " %91 = OpLoad %u32 %90\n"
20492
20493 " %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20494 " %${zeroth_id} = OpLoad %u32 %98\n"
20495
20496 "${seq}\n"
20497
20498 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20499 " %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20500 " OpStore %dst %${last_id}\n"
20501 " OpBranch %next\n"
20502
20503 " %next = OpLabel\n"
20504 " %i_cur = OpLoad %i32 %i\n"
20505 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20506 " OpStore %i %i_new\n"
20507 " OpBranch %loop\n"
20508
20509 " %merge = OpLabel\n"
20510 " OpReturnValue %param\n"
20511
20512 " OpFunctionEnd\n"
20513 );
20514 deUint32 lastId = firstNdx;
20515 SpecResource specResource;
20516 map<string, string> specs;
20517 VulkanFeatures features;
20518 map<string, string> fragments;
20519 vector<string> extensions;
20520 std::string sequence;
20521
20522 for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20523 {
20524 const deUint32 sequenceId = sequenceNdx + firstNdx;
20525 const std::string sequenceIdStr = de::toString(sequenceId);
20526
20527 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20528 lastId = sequenceId;
20529
20530 if (sequenceNdx == 0)
20531 sequence.reserve((10 + sequence.length()) * sequenceCount);
20532 }
20533
20534 specs["num_data_points"] = de::toString(numDataPoints);
20535 specs["zeroth_id"] = de::toString(firstNdx - 1);
20536 specs["last_id"] = de::toString(lastId);
20537 specs["seq"] = sequence;
20538
20539 fragments["decoration"] = decoration.specialize(specs);
20540 fragments["pre_main"] = preMain.specialize(specs);
20541 fragments["testfun"] = testFun.specialize(specs);
20542
20543 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20544 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20545 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20546
20547 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20548 {
20549 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20550 features.coreFeatures.fragmentStoresAndAtomics = true;
20551 }
20552
20553 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20554 }
20555
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20556 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20557 {
20558 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20559
20560 createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20561 createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20562
20563 return testGroup.release();
20564 }
20565
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20566 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20567 {
20568 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20569
20570 createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20571 createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20572
20573 return testGroup.release();
20574 }
20575
createFunctionParamsGroup(tcu::TestContext & testCtx)20576 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20577 {
20578 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20579 #ifndef CTS_USES_VULKANSC
20580 static const char data_dir[] = "spirv_assembly/instruction/function_params";
20581
20582 static const struct
20583 {
20584 const std::string name;
20585 const std::string desc;
20586 } cases[] =
20587 {
20588 { "sampler_param", "Test combined image sampler as function parameter" },
20589 };
20590
20591 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20592 {
20593 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20594 cases[i].name.c_str(),
20595 cases[i].desc.c_str(),
20596 data_dir,
20597 cases[i].name + ".amber");
20598 testGroup->addChild(testCase);
20599 }
20600 #endif
20601 return testGroup.release();
20602 }
20603
createEarlyFragmentTests(tcu::TestContext & testCtx)20604 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20605 {
20606 de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20607
20608 #ifndef CTS_USES_VULKANSC
20609 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20610
20611 static const struct Case
20612 {
20613 const string name;
20614 const string desc;
20615 }
20616 cases[] =
20617 {
20618 // Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20619 { "depth_less", "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." },
20620 { "depth_greater", "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH." },
20621 { "depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20622 { "depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20623 { "depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20624 { "depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." }
20625 };
20626
20627 for (const auto& tCase : cases)
20628 {
20629 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20630 tCase.name.c_str(),
20631 tCase.desc.c_str(),
20632 dataDir,
20633 tCase.name + ".amber");
20634
20635 earlyFragTests->addChild(testCase);
20636 }
20637 #endif // CTS_USES_VULKANSC
20638
20639 return earlyFragTests.release();
20640 }
20641
createEarlyAndLateFragmentTests(tcu::TestContext & testCtx)20642 tcu::TestCaseGroup* createEarlyAndLateFragmentTests(tcu::TestContext& testCtx)
20643 {
20644 de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment", "Early And Late Fragment Tests"));
20645 #ifndef CTS_USES_VULKANSC
20646 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20647
20648 static const struct Case
20649 {
20650 const string name;
20651 const string desc;
20652 } cases[] =
20653 {
20654 { "depth_less", "gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." },
20655 { "depth_greater", "gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH." },
20656 { "depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20657 { "depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20658 { "depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20659 { "depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." }
20660 };
20661
20662 for (const auto& tCase : cases)
20663 {
20664 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20665 tCase.name.c_str(),
20666 tCase.desc.c_str(),
20667 dataDir,
20668 tCase.name + ".amber",
20669 { "VK_AMD_shader_early_and_late_fragment_tests" });
20670
20671 earlyLateFragTests->addChild(testCase);
20672 }
20673 #endif
20674
20675 return earlyLateFragTests.release();
20676 }
20677
createOpExecutionModeTests(tcu::TestContext & testCtx)20678 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20679 {
20680 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20681
20682 #ifndef CTS_USES_VULKANSC
20683 static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20684
20685 static const struct Case
20686 {
20687 const string name;
20688 const string desc;
20689 } cases[] =
20690 {
20691 { "depthless_0", "FragDepth < Polygon depth: depth test should pass." },
20692 { "depthless_1", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20693 { "depthless_2", "FragDepth < Polygon depth: depth test should fail." },
20694 { "depthless_3", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20695 { "depthless_4", "FragDepth < Polygon depth: depth test should pass." },
20696 { "depthgreater_0", "FragDepth > Polygon depth: depth test should pass." },
20697 { "depthgreater_1", "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20698 { "depthgreater_2", "FragDepth > Polygon depth: depth test should fail." },
20699 { "depthgreater_3", "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20700 { "depthgreater_4", "FragDepth > Polygon depth: depth test should pass." },
20701 { "depthunchanged_0", "FragDepth == Polygon depth: depth test should pass." },
20702 { "depthunchanged_1", "FragDepth == Polygon depth: depth test should fail." },
20703 { "depthunchanged_2", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20704 { "depthunchanged_3", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20705 };
20706
20707 for (const auto& case_ : cases)
20708 {
20709 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20710 case_.name.c_str(),
20711 case_.desc.c_str(),
20712 dataDir,
20713 case_.name + ".amber");
20714 testGroup->addChild(testCase);
20715 }
20716 #endif // CTS_USES_VULKANSC
20717
20718 return testGroup.release();
20719 }
20720
createOpMulExtendedGroup(tcu::TestContext & testCtx)20721 tcu::TestCaseGroup* createOpMulExtendedGroup (tcu::TestContext& testCtx)
20722 {
20723 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "mul_extended", "Op[S/U]MulExtended tests"));
20724
20725 #ifndef CTS_USES_VULKANSC
20726 static const char dataDir[] = "spirv_assembly/instruction/compute/mul_extended";
20727
20728 static const struct Case
20729 {
20730 const string name;
20731 const vector<string> features;
20732 } cases[] =
20733 {
20734 { "signed_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"} },
20735 { "signed_32bit", {} },
20736 { "signed_64bit", {"Features.shaderInt64"} },
20737 { "signed_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"} },
20738 { "unsigned_16bit", {"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"} },
20739 { "unsigned_32bit", {} },
20740 { "unsigned_64bit", {"Features.shaderInt64"} },
20741 { "unsigned_8bit", {"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"} }
20742 };
20743
20744 for (const auto& test : cases)
20745 {
20746 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20747 test.name.c_str(),
20748 "",
20749 dataDir,
20750 test.name + ".amber",
20751 test.features);
20752 testGroup->addChild(testCase);
20753 }
20754 #endif // CTS_USES_VULKANSC
20755
20756 return testGroup.release();
20757 }
20758
createQueryGroup(tcu::TestContext & testCtx)20759 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20760 {
20761 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20762
20763 #ifndef CTS_USES_VULKANSC
20764 static const char data_dir[] = "spirv_assembly/instruction/image_query";
20765
20766 static const struct
20767 {
20768 const std::string name;
20769 const std::string desc;
20770 } cases[] =
20771 {
20772 { "samples_storage", "Test samples query can be used on storage images" },
20773 };
20774
20775 vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20776
20777 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20778 {
20779 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20780 cases[i].name.c_str(),
20781 cases[i].desc.c_str(),
20782 data_dir,
20783 cases[i].name + ".amber",
20784 requirements);
20785 testGroup->addChild(testCase);
20786 }
20787 #endif // CTS_USES_VULKANSC
20788
20789 return testGroup.release();
20790 }
20791
createInstructionTests(tcu::TestContext & testCtx)20792 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20793 {
20794 const bool testComputePipeline = true;
20795
20796 de::MovePtr<tcu::TestCaseGroup> instructionTests (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20797 de::MovePtr<tcu::TestCaseGroup> computeTests (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20798 de::MovePtr<tcu::TestCaseGroup> graphicsTests (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20799
20800 computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20801 computeTests->addChild(createLocalSizeGroup(testCtx, false));
20802 computeTests->addChild(createLocalSizeGroup(testCtx, true));
20803 computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20804 computeTests->addChild(createOpNopGroup(testCtx));
20805 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20806 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20807 computeTests->addChild(createOpAtomicGroup(testCtx, false));
20808 computeTests->addChild(createOpAtomicGroup(testCtx, true)); // Using new StorageBuffer decoration
20809 computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true)); // Return value validation
20810 computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
20811 computeTests->addChild(createOpLineGroup(testCtx));
20812 computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20813 computeTests->addChild(createOpNoLineGroup(testCtx));
20814 computeTests->addChild(createOpConstantNullGroup(testCtx));
20815 computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20816 computeTests->addChild(createOpConstantUsageGroup(testCtx));
20817 computeTests->addChild(createSpecConstantGroup(testCtx));
20818 computeTests->addChild(createOpSourceGroup(testCtx));
20819 computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20820 computeTests->addChild(createDecorationGroupGroup(testCtx));
20821 computeTests->addChild(createOpPhiGroup(testCtx));
20822 computeTests->addChild(createLoopControlGroup(testCtx));
20823 computeTests->addChild(createFunctionControlGroup(testCtx));
20824 computeTests->addChild(createSelectionControlGroup(testCtx));
20825 computeTests->addChild(createBlockOrderGroup(testCtx));
20826 computeTests->addChild(createMultipleShaderGroup(testCtx));
20827 computeTests->addChild(createMemoryAccessGroup(testCtx));
20828 computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20829 computeTests->addChild(createOpCopyObjectGroup(testCtx));
20830 computeTests->addChild(createNoContractionGroup(testCtx));
20831 computeTests->addChild(createOpUndefGroup(testCtx));
20832 computeTests->addChild(createOpUnreachableGroup(testCtx));
20833 computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20834 computeTests->addChild(createOpFRemGroup(testCtx));
20835 computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20836 computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20837 computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20838 computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20839 #ifndef CTS_USES_VULKANSC
20840 computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20841 computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20842 computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20843 computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20844 computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20845 computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20846 #endif // CTS_USES_VULKANSC
20847 computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20848 computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20849 computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20850 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20851 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20852 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20853 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20854 computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20855 computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20856 computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20857 computeTests->addChild(createOpNMinGroup(testCtx));
20858 computeTests->addChild(createOpNMaxGroup(testCtx));
20859 computeTests->addChild(createOpNClampGroup(testCtx));
20860 computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20861 {
20862 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20863
20864 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20865 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20866
20867 computeTests->addChild(computeAndroidTests.release());
20868 }
20869
20870 computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20871 computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20872 computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20873 computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20874 computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20875 computeTests->addChild(createVariableInitComputeGroup(testCtx));
20876 computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20877 computeTests->addChild(createIndexingComputeGroup(testCtx));
20878 computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20879 computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20880 computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20881 computeTests->addChild(createOpNameGroup(testCtx));
20882 computeTests->addChild(createOpMemberNameGroup(testCtx));
20883 computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20884 computeTests->addChild(createFloat16Group(testCtx));
20885 #ifndef CTS_USES_VULKANSC
20886 computeTests->addChild(createFloat32Group(testCtx));
20887 #endif // CTS_USES_VULKANSC
20888 computeTests->addChild(createBoolGroup(testCtx));
20889 computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20890 computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20891 #ifndef CTS_USES_VULKANSC
20892 computeTests->addChild(createSignedIntCompareGroup(testCtx));
20893 computeTests->addChild(createSignedOpTestsGroup(testCtx));
20894 #endif // CTS_USES_VULKANSC
20895 computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20896 #ifndef CTS_USES_VULKANSC
20897 computeTests->addChild(createPtrAccessChainGroup(testCtx));
20898 computeTests->addChild(createVectorShuffleGroup(testCtx));
20899 #endif // CTS_USES_VULKANSC
20900 computeTests->addChild(createHlslComputeGroup(testCtx));
20901 computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20902 computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20903 #ifndef CTS_USES_VULKANSC
20904 computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20905 #endif // CTS_USES_VULKANSC
20906 computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20907 computeTests->addChild(createOpMulExtendedGroup(testCtx));
20908
20909 graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20910 graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20911 graphicsTests->addChild(createOpNopTests(testCtx));
20912 graphicsTests->addChild(createOpSourceTests(testCtx));
20913 graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20914 graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20915 graphicsTests->addChild(createOpLineTests(testCtx));
20916 graphicsTests->addChild(createOpNoLineTests(testCtx));
20917 graphicsTests->addChild(createOpConstantNullTests(testCtx));
20918 graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20919 graphicsTests->addChild(createMemoryAccessTests(testCtx));
20920 graphicsTests->addChild(createOpUndefTests(testCtx));
20921 graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20922 graphicsTests->addChild(createModuleTests(testCtx));
20923 graphicsTests->addChild(createUnusedVariableTests(testCtx));
20924 graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20925 graphicsTests->addChild(createOpPhiTests(testCtx));
20926 graphicsTests->addChild(createNoContractionTests(testCtx));
20927 graphicsTests->addChild(createOpQuantizeTests(testCtx));
20928 graphicsTests->addChild(createLoopTests(testCtx));
20929 graphicsTests->addChild(createSpecConstantTests(testCtx));
20930 graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20931 graphicsTests->addChild(createBarrierTests(testCtx));
20932 graphicsTests->addChild(createDecorationGroupTests(testCtx));
20933 graphicsTests->addChild(createFRemTests(testCtx));
20934 graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20935 graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20936
20937 {
20938 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20939
20940 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20941 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20942
20943 graphicsTests->addChild(graphicsAndroidTests.release());
20944 }
20945
20946 graphicsTests->addChild(createOpNameTests(testCtx));
20947 graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20948 graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20949
20950 graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20951 graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20952 graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20953 graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20954 graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20955 graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20956 graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20957 graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20958 graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20959 graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20960 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20961 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20962 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20963 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20964 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20965 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20966 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20967 graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20968 graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20969 graphicsTests->addChild(createFloat16Tests(testCtx));
20970 #ifndef CTS_USES_VULKANSC
20971 graphicsTests->addChild(createFloat32Tests(testCtx));
20972 #endif // CTS_USES_VULKANSC
20973 graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20974 graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20975 graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20976 graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
20977 graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20978
20979 instructionTests->addChild(computeTests.release());
20980 instructionTests->addChild(graphicsTests.release());
20981 #ifndef CTS_USES_VULKANSC
20982 instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20983 instructionTests->addChild(createFunctionParamsGroup(testCtx));
20984 #endif // CTS_USES_VULKANSC
20985 instructionTests->addChild(createQueryGroup(testCtx));
20986 instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20987 instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20988
20989 return instructionTests.release();
20990 }
20991
20992 } // SpirVAssembly
20993 } // vkt
20994