1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 Google Inc.
6 * Copyright (c) 2016 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #include "vktSpvAsmIntegerDotProductTests.hpp"
89 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
90
91 #include <cmath>
92 #include <limits>
93 #include <map>
94 #include <string>
95 #include <sstream>
96 #include <utility>
97 #include <stack>
98
99 namespace vkt
100 {
101 namespace SpirVAssembly
102 {
103
104 namespace
105 {
106
107 using namespace vk;
108 using std::map;
109 using std::string;
110 using std::vector;
111 using tcu::IVec3;
112 using tcu::IVec4;
113 using tcu::RGBA;
114 using tcu::TestLog;
115 using tcu::TestStatus;
116 using tcu::Vec4;
117 using de::UniquePtr;
118 using tcu::StringTemplate;
119 using tcu::Vec4;
120
121 const bool TEST_WITH_NAN = true;
122 const bool TEST_WITHOUT_NAN = false;
123
124 const string loadScalarF16FromUint =
125 "%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
126 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
127 "%ld_arg_${var}_entry = OpLabel\n"
128 "%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
129 "%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
130 "%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
131 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
132 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
133 "%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
134 "%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
135 "OpReturnValue %ld_arg_${var}_ex\n"
136 "OpFunctionEnd\n";
137
138 const string loadV2F16FromUint =
139 "%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
140 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
141 "%ld_arg_${var}_entry = OpLabel\n"
142 "%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
143 "%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
144 "%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
145 "OpReturnValue %ld_arg_${var}_cast\n"
146 "OpFunctionEnd\n";
147
148 const string loadV3F16FromUints =
149 // Since we allocate a vec4 worth of values, this case is almost the
150 // same as that case.
151 "%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
152 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
153 "%ld_arg_${var}_entry = OpLabel\n"
154 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
155 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
156 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
157 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
158 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
159 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
160 "%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
161 "OpReturnValue %ld_arg_${var}_shuffle\n"
162 "OpFunctionEnd\n";
163
164 const string loadV4F16FromUints =
165 "%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
166 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
167 "%ld_arg_${var}_entry = OpLabel\n"
168 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
169 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
170 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
171 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
172 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
173 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
174 "%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
175 "OpReturnValue %ld_arg_${var}_shuffle\n"
176 "OpFunctionEnd\n";
177
178 const string loadM2x2F16FromUints =
179 "%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
180 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
181 "%ld_arg_${var}_entry = OpLabel\n"
182 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
183 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
184 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
185 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
186 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
187 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
188 "%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
189 "OpReturnValue %ld_arg_${var}_cons\n"
190 "OpFunctionEnd\n";
191
192 const string loadM2x3F16FromUints =
193 "%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
194 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
195 "%ld_arg_${var}_entry = OpLabel\n"
196 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
197 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
198 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
199 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
200 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
201 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
202 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
203 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
204 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
205 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
206 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
207 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
208 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
209 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
210 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
211 "OpReturnValue %ld_arg_${var}_mat\n"
212 "OpFunctionEnd\n";
213
214 const string loadM2x4F16FromUints =
215 "%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
216 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
217 "%ld_arg_${var}_entry = OpLabel\n"
218 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
219 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
220 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
221 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
222 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
223 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
224 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
225 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
226 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
227 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
228 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
229 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
230 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
231 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
232 "%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
233 "OpReturnValue %ld_arg_${var}_mat\n"
234 "OpFunctionEnd\n";
235
236 const string loadM3x2F16FromUints =
237 "%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
238 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
239 "%ld_arg_${var}_entry = OpLabel\n"
240 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
241 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
242 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
243 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
244 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
245 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
246 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
247 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
248 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
249 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
250 "OpReturnValue %ld_arg_${var}_mat\n"
251 "OpFunctionEnd\n";
252
253 const string loadM3x3F16FromUints =
254 "%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
255 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
256 "%ld_arg_${var}_entry = OpLabel\n"
257 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
258 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
259 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
260 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
261 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
262 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
263 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
264 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
265 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
266 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
267 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
268 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
269 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
270 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
271 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
272 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
273 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
274 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
275 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
276 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
277 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
278 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
279 "OpReturnValue %ld_arg_${var}_mat\n"
280 "OpFunctionEnd\n";
281
282 const string loadM3x4F16FromUints =
283 "%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
284 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
285 "%ld_arg_${var}_entry = OpLabel\n"
286 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
287 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
288 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
289 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
290 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
291 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
292 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
293 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
294 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
295 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
296 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
297 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
298 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
299 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
300 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
301 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
302 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
303 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
304 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
305 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
306 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
307 "%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
308 "OpReturnValue %ld_arg_${var}_mat\n"
309 "OpFunctionEnd\n";
310
311 const string loadM4x2F16FromUints =
312 "%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
313 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
314 "%ld_arg_${var}_entry = OpLabel\n"
315 "%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
316 "%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
317 "%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
318 "%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
319 "%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
320 "%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
321 "%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
322 "%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
323 "%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
324 "%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
325 "%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
326 "%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
327 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
328 "OpReturnValue %ld_arg_${var}_mat\n"
329 "OpFunctionEnd\n";
330
331 const string loadM4x3F16FromUints =
332 "%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
333 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
334 "%ld_arg_${var}_entry = OpLabel\n"
335 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
336 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
337 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
338 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
339 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
340 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
341 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
342 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
343 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
344 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
345 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
346 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
347 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
348 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
349 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
350 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
351 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
352 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
353 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
354 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
355 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
356 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
357 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
358 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
359 "%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
360 "%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
361 "%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
362 "%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
363 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
364 "OpReturnValue %ld_arg_${var}_mat\n"
365 "OpFunctionEnd\n";
366
367 const string loadM4x4F16FromUints =
368 "%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
369 "%ld_arg_${var}_param = OpFunctionParameter %i32\n"
370 "%ld_arg_${var}_entry = OpLabel\n"
371 "%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
372 "%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
373 "%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
374 "%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
375 "%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
376 "%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
377 "%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
378 "%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
379 "%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
380 "%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
381 "%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
382 "%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
383 "%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
384 "%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
385 "%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
386 "%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
387 "%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
388 "%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
389 "%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
390 "%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
391 "%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
392 "%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
393 "%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
394 "%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
395 "%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
396 "%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
397 "%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
398 "%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
399 "%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
400 "OpReturnValue %ld_arg_${var}_mat\n"
401 "OpFunctionEnd\n";
402
403 const string storeScalarF16AsUint =
404 // This version is sensitive to the initial value in the output buffer.
405 // The infrastructure sets all output buffer bits to one before invoking
406 // the shader so this version uses an atomic and to generate the correct
407 // zeroes.
408 "%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
409 "%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
410 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
411 "%st_fn_${var}_entry = OpLabel\n"
412 "%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
413 "%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
414 "%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
415 "%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
416 // Or 16 bits of ones into the half that was not populated with the result.
417 "%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
418 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
419 "%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
420 "%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
421 "%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
422 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
423 "%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
424 "OpReturn\n"
425 "OpFunctionEnd\n";
426
427 const string storeV2F16AsUint =
428 "%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
429 "%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
430 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
431 "%st_fn_${var}_entry = OpLabel\n"
432 "%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
433 "%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
434 "OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
435 "OpReturn\n"
436 "OpFunctionEnd\n";
437
438 const string storeV3F16AsUints =
439 // Since we allocate a vec4 worth of values, this case can be treated
440 // almost the same as a vec4 case. We will store some extra data that
441 // should not be compared.
442 "%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
443 "%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
444 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
445 "%st_fn_${var}_entry = OpLabel\n"
446 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
447 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
448 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
449 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
450 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
451 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
452 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
453 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
454 "OpReturn\n"
455 "OpFunctionEnd\n";
456
457 const string storeV4F16AsUints =
458 "%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
459 "%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
460 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
461 "%st_fn_${var}_entry = OpLabel\n"
462 "%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
463 "%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
464 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
465 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
466 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
467 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
468 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
469 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
470 "OpReturn\n"
471 "OpFunctionEnd\n";
472
473 const string storeM2x2F16AsUints =
474 "%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
475 "%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
476 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
477 "%st_fn_${var}_entry = OpLabel\n"
478 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
479 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
480 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
481 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
482 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
483 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
484 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
485 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
486 "OpReturn\n"
487 "OpFunctionEnd\n";
488
489 const string storeM2x3F16AsUints =
490 // In the extracted elements for 01 and 11 the second element doesn't
491 // matter.
492 "%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
493 "%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
494 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
495 "%st_fn_${var}_entry = OpLabel\n"
496 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
497 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
498 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
499 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
500 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
501 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
502 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
503 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
504 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
505 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
506 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
507 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
508 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
509 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
510 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
511 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
512 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
513 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
514 "OpReturn\n"
515 "OpFunctionEnd\n";
516
517 const string storeM2x4F16AsUints =
518 "%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
519 "%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
520 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
521 "%st_fn_${var}_entry = OpLabel\n"
522 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
523 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
524 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
525 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
526 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
527 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
528 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
529 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
530 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
531 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
532 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
533 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
534 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
535 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
536 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
537 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
538 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
539 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
540 "OpReturn\n"
541 "OpFunctionEnd\n";
542
543 const string storeM3x2F16AsUints =
544 "%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
545 "%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
546 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
547 "%st_fn_${var}_entry = OpLabel\n"
548 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
549 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
550 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
551 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
552 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
553 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
554 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
555 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
556 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
557 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
558 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
559 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
560 "OpReturn\n"
561 "OpFunctionEnd\n";
562
563 const string storeM3x3F16AsUints =
564 // The second element of the each broken down vec3 doesn't matter.
565 "%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
566 "%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
567 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
568 "%st_fn_${var}_entry = OpLabel\n"
569 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
570 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
571 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
572 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
573 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
574 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
575 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
576 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
577 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
578 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
579 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
580 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
581 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
582 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
583 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
584 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
585 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
586 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
587 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
588 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
589 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
590 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
591 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
592 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
593 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
594 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
595 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
596 "OpReturn\n"
597 "OpFunctionEnd\n";
598
599 const string storeM3x4F16AsUints =
600 "%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
601 "%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
602 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
603 "%st_fn_${var}_entry = OpLabel\n"
604 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
605 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
606 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
607 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
608 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
609 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
610 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
611 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
612 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
613 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
614 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
615 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
616 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
617 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
618 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
619 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
620 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
621 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
622 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
623 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
624 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
625 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
626 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
627 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
628 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
629 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
630 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
631 "OpReturn\n"
632 "OpFunctionEnd\n";
633
634 const string storeM4x2F16AsUints =
635 "%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
636 "%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
637 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
638 "%st_fn_${var}_entry = OpLabel\n"
639 "%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
640 "%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
641 "%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
642 "%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
643 "%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
644 "%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
645 "%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
646 "%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
647 "%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
648 "%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
649 "%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
650 "%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
651 "OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
652 "OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
653 "OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
654 "OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
655 "OpReturn\n"
656 "OpFunctionEnd\n";
657
658 const string storeM4x3F16AsUints =
659 // The last element of each decomposed vec3 doesn't matter.
660 "%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
661 "%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
662 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
663 "%st_fn_${var}_entry = OpLabel\n"
664 "%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
665 "%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
666 "%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
667 "%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
668 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
669 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
670 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
671 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
672 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
673 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
674 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
675 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
676 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
677 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
678 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
679 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
680 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
681 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
682 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
683 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
684 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
685 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
686 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
687 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
688 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
689 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
690 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
691 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
692 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
693 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
694 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
695 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
696 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
697 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
698 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
699 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
700 "OpReturn\n"
701 "OpFunctionEnd\n";
702
703 const string storeM4x4F16AsUints =
704 "%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
705 "%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
706 "%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
707 "%st_fn_${var}_entry = OpLabel\n"
708 "%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
709 "%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
710 "%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
711 "%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
712 "%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
713 "%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
714 "%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
715 "%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
716 "%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
717 "%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
718 "%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
719 "%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
720 "%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
721 "%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
722 "%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
723 "%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
724 "%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
725 "%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
726 "%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
727 "%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
728 "%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
729 "%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
730 "%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
731 "%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
732 "%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
733 "%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
734 "%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
735 "%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
736 "OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
737 "OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
738 "OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
739 "OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
740 "OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
741 "OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
742 "OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
743 "OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
744 "OpReturn\n"
745 "OpFunctionEnd\n";
746
747 template<typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)748 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
749 {
750 T* const typedPtr = (T*)dst;
751 for (int ndx = 0; ndx < numValues; ndx++)
752 typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
753 }
754
755 // Filter is a function that returns true if a value should pass, false otherwise.
756 template<typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)757 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
758 {
759 T* const typedPtr = (T*)dst;
760 T value;
761 for (int ndx = 0; ndx < numValues; ndx++)
762 {
763 do
764 value = de::randomScalar<T>(rnd, minValue, maxValue);
765 while (!filter(value));
766
767 typedPtr[offset + ndx] = value;
768 }
769 }
770
771 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)772 deInt64 randomInt64LogDistributed (de::Random& rnd)
773 {
774 deInt64 val = rnd.getUint64();
775 val &= (1ull << rnd.getInt(1, 63)) - 1;
776 if (rnd.getBool())
777 val = -val;
778 return val;
779 }
780
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues)781 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
782 {
783 for (int ndx = 0; ndx < numValues; ndx++)
784 dst[ndx] = randomInt64LogDistributed(rnd);
785 }
786
787 template<typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues,FilterT filter)788 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
789 {
790 for (int ndx = 0; ndx < numValues; ndx++)
791 {
792 deInt64 value;
793 do {
794 value = randomInt64LogDistributed(rnd);
795 } while (!filter(value));
796 dst[ndx] = value;
797 }
798 }
799
filterNonNegative(const deInt64 value)800 inline bool filterNonNegative (const deInt64 value)
801 {
802 return value >= 0;
803 }
804
filterPositive(const deInt64 value)805 inline bool filterPositive (const deInt64 value)
806 {
807 return value > 0;
808 }
809
filterNotZero(const deInt64 value)810 inline bool filterNotZero (const deInt64 value)
811 {
812 return value != 0;
813 }
814
floorAll(vector<float> & values)815 static void floorAll (vector<float>& values)
816 {
817 for (size_t i = 0; i < values.size(); i++)
818 values[i] = deFloatFloor(values[i]);
819 }
820
floorAll(vector<Vec4> & values)821 static void floorAll (vector<Vec4>& values)
822 {
823 for (size_t i = 0; i < values.size(); i++)
824 values[i] = floor(values[i]);
825 }
826
827 struct CaseParameter
828 {
829 const char* name;
830 string param;
831
CaseParametervkt::SpirVAssembly::__anon043fb9e60111::CaseParameter832 CaseParameter (const char* case_, const string& param_) : name(case_), param(param_) {}
833 };
834
835 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
836 //
837 // #version 430
838 //
839 // layout(std140, set = 0, binding = 0) readonly buffer Input {
840 // float elements[];
841 // } input_data;
842 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
843 // float elements[];
844 // } output_data;
845 //
846 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
847 //
848 // void main() {
849 // uint x = gl_GlobalInvocationID.x;
850 // output_data.elements[x] = -input_data.elements[x];
851 // }
852
getAsmForLocalSizeTest(bool useLiteralLocalSize,bool useLiteralLocalSizeId,bool useSpecConstantWorkgroupSize,IVec3 workGroupSize,deUint32 ndx)853 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
854 {
855 std::ostringstream out;
856 out << "OpCapability Shader\n"
857 "OpMemoryModel Logical GLSL450\n";
858
859 if (useLiteralLocalSizeId)
860 {
861 out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
862 "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
863 }
864 else
865 {
866 out << "OpEntryPoint GLCompute %main \"main\" %id\n";
867
868 if (useLiteralLocalSize)
869 {
870 out << "OpExecutionMode %main LocalSize "
871 << workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
872 }
873 }
874
875 out << "OpSource GLSL 430\n"
876 "OpName %main \"main\"\n"
877 "OpName %id \"gl_GlobalInvocationID\"\n"
878 "OpDecorate %id BuiltIn GlobalInvocationId\n";
879
880 if (useSpecConstantWorkgroupSize)
881 {
882 out << "OpDecorate %spec_0 SpecId 100\n"
883 "OpDecorate %spec_1 SpecId 101\n"
884 "OpDecorate %spec_2 SpecId 102\n"
885 "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
886 }
887
888 if (useLiteralLocalSizeId)
889 {
890 out << getComputeAsmInputOutputBufferTraits("Block")
891 << getComputeAsmCommonTypes("StorageBuffer")
892 << getComputeAsmInputOutputBuffer("StorageBuffer")
893 << "%const_0 = OpConstant %u32 " << workGroupSize.x() << "\n"
894 "%const_1 = OpConstant %u32 " << workGroupSize.y() << "\n"
895 "%const_2 = OpConstant %u32 " << workGroupSize.z() << "\n";
896 }
897 else
898 {
899 out << getComputeAsmInputOutputBufferTraits()
900 << getComputeAsmCommonTypes()
901 << getComputeAsmInputOutputBuffer();
902 }
903
904 out << "%id = OpVariable %uvec3ptr Input\n"
905 "%zero = OpConstant %i32 0 \n";
906
907 if (useSpecConstantWorkgroupSize)
908 {
909 out << "%spec_0 = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
910 "%spec_1 = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
911 "%spec_2 = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
912 "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
913 }
914
915 out << "%main = OpFunction %void None %voidf\n"
916 "%label = OpLabel\n"
917 "%idval = OpLoad %uvec3 %id\n"
918 "%ndx = OpCompositeExtract %u32 %idval " << ndx << "\n"
919
920 "%inloc = OpAccessChain %f32ptr %indata %zero %ndx\n"
921 "%inval = OpLoad %f32 %inloc\n"
922 "%neg = OpFNegate %f32 %inval\n"
923 "%outloc = OpAccessChain %f32ptr %outdata %zero %ndx\n"
924 " OpStore %outloc %neg\n"
925 " OpReturn\n"
926 " OpFunctionEnd\n";
927
928 return out.str();
929 }
930
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)931 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
932 {
933 const char* groupName[]{ "localsize", "localsize_id" };
934
935 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
936 ComputeShaderSpec spec;
937 de::Random rnd (deStringHash(group->getName()));
938 const deUint32 numElements = 64u;
939 vector<float> positiveFloats (numElements, 0);
940 vector<float> negativeFloats (numElements, 0);
941
942 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
943
944 for (size_t ndx = 0; ndx < numElements; ++ndx)
945 negativeFloats[ndx] = -positiveFloats[ndx];
946
947 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
948 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
949
950 if (useLocalSizeId)
951 {
952 spec.spirvVersion = SPIRV_VERSION_1_5;
953 spec.extensions.push_back("VK_KHR_maintenance4");
954 }
955
956 spec.numWorkGroups = IVec3(numElements, 1, 1);
957
958 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
959 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
960
961 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
962 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
963
964 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
965 {
966 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
967 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
968 }
969
970 spec.numWorkGroups = IVec3(1, 1, 1);
971
972 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
973 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
974
975 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
976 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
977
978 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
979 {
980 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
981 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
982 }
983
984 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
985 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
986
987 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
988 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
989
990 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
991 {
992 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
993 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
994 }
995
996 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
997 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
998
999 spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1000 group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
1001
1002 if (!useLocalSizeId) // dont repeat this test when useLocalSizeId is true
1003 {
1004 spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1005 group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
1006 }
1007
1008 return group.release();
1009 }
1010
createOpNopGroup(tcu::TestContext & testCtx)1011 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1012 {
1013 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
1014 ComputeShaderSpec spec;
1015 de::Random rnd (deStringHash(group->getName()));
1016 const int numElements = 100;
1017 vector<float> positiveFloats (numElements, 0);
1018 vector<float> negativeFloats (numElements, 0);
1019
1020 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1021
1022 for (size_t ndx = 0; ndx < numElements; ++ndx)
1023 negativeFloats[ndx] = -positiveFloats[ndx];
1024
1025 spec.assembly =
1026 string(getComputeAsmShaderPreamble()) +
1027
1028 "OpSource GLSL 430\n"
1029 "OpName %main \"main\"\n"
1030 "OpName %id \"gl_GlobalInvocationID\"\n"
1031
1032 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1033
1034 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1035
1036 + string(getComputeAsmInputOutputBuffer()) +
1037
1038 "%id = OpVariable %uvec3ptr Input\n"
1039 "%zero = OpConstant %i32 0\n"
1040
1041 "%main = OpFunction %void None %voidf\n"
1042 "%label = OpLabel\n"
1043 "%idval = OpLoad %uvec3 %id\n"
1044 "%x = OpCompositeExtract %u32 %idval 0\n"
1045
1046 " OpNop\n" // Inside a function body
1047
1048 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1049 "%inval = OpLoad %f32 %inloc\n"
1050 "%neg = OpFNegate %f32 %inval\n"
1051 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1052 " OpStore %outloc %neg\n"
1053 " OpReturn\n"
1054 " OpFunctionEnd\n";
1055 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1056 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1057 spec.numWorkGroups = IVec3(numElements, 1, 1);
1058
1059 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1060
1061 return group.release();
1062 }
1063
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1064 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1065 {
1066 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1067 de::Random rnd (deStringHash(group->getName()));
1068 const int numElements = 100;
1069 vector<float> positiveFloats (numElements, 0);
1070 vector<float> negativeFloats (numElements, 0);
1071
1072 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1073
1074 for (size_t ndx = 0; ndx < numElements; ++ndx)
1075 negativeFloats[ndx] = -positiveFloats[ndx];
1076
1077 const VariableLocation testLocations[] =
1078 {
1079 // Set Binding
1080 { 0, 5 },
1081 { 5, 5 },
1082 };
1083
1084 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1085 {
1086 const VariableLocation& location = testLocations[locationNdx];
1087
1088 // Unused variable.
1089 {
1090 ComputeShaderSpec spec;
1091
1092 spec.assembly =
1093 string(getComputeAsmShaderPreamble()) +
1094
1095 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1096
1097 + getUnusedDecorations(location)
1098
1099 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1100
1101 + getUnusedTypesAndConstants()
1102
1103 + string(getComputeAsmInputOutputBuffer())
1104
1105 + getUnusedBuffer() +
1106
1107 "%id = OpVariable %uvec3ptr Input\n"
1108 "%zero = OpConstant %i32 0\n"
1109
1110 "%main = OpFunction %void None %voidf\n"
1111 "%label = OpLabel\n"
1112 "%idval = OpLoad %uvec3 %id\n"
1113 "%x = OpCompositeExtract %u32 %idval 0\n"
1114
1115 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1116 "%inval = OpLoad %f32 %inloc\n"
1117 "%neg = OpFNegate %f32 %inval\n"
1118 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1119 " OpStore %outloc %neg\n"
1120 " OpReturn\n"
1121 " OpFunctionEnd\n";
1122 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1123 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1124 spec.numWorkGroups = IVec3(numElements, 1, 1);
1125
1126 std::string testName = "variable_" + location.toString();
1127 std::string testDescription = "Unused variable test with " + location.toDescription();
1128
1129 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1130 }
1131
1132 // Unused function.
1133 {
1134 ComputeShaderSpec spec;
1135
1136 spec.assembly =
1137 string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1138
1139 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1140
1141 + getUnusedDecorations(location)
1142
1143 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1144
1145 + getUnusedTypesAndConstants() +
1146
1147 "%c_i32_0 = OpConstant %i32 0\n"
1148 "%c_i32_1 = OpConstant %i32 1\n"
1149
1150 + string(getComputeAsmInputOutputBuffer())
1151
1152 + getUnusedBuffer() +
1153
1154 "%id = OpVariable %uvec3ptr Input\n"
1155 "%zero = OpConstant %i32 0\n"
1156
1157 "%main = OpFunction %void None %voidf\n"
1158 "%label = OpLabel\n"
1159 "%idval = OpLoad %uvec3 %id\n"
1160 "%x = OpCompositeExtract %u32 %idval 0\n"
1161
1162 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1163 "%inval = OpLoad %f32 %inloc\n"
1164 "%neg = OpFNegate %f32 %inval\n"
1165 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1166 " OpStore %outloc %neg\n"
1167 " OpReturn\n"
1168 " OpFunctionEnd\n"
1169
1170 + getUnusedFunctionBody();
1171
1172 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1173 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1174 spec.numWorkGroups = IVec3(numElements, 1, 1);
1175
1176 std::string testName = "function_" + location.toString();
1177 std::string testDescription = "Unused function test with " + location.toDescription();
1178
1179 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1180 }
1181 }
1182
1183 return group.release();
1184 }
1185
1186 template<bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1187 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1188 {
1189 if (outputAllocs.size() != 1)
1190 return false;
1191
1192 vector<deUint8> input1Bytes;
1193 vector<deUint8> input2Bytes;
1194 vector<deUint8> expectedBytes;
1195
1196 inputs[0].getBytes(input1Bytes);
1197 inputs[1].getBytes(input2Bytes);
1198 expectedOutputs[0].getBytes(expectedBytes);
1199
1200 const deInt32* const expectedOutputAsInt = reinterpret_cast<const deInt32*>(&expectedBytes.front());
1201 const deInt32* const outputAsInt = static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1202 const float* const input1AsFloat = reinterpret_cast<const float*>(&input1Bytes.front());
1203 const float* const input2AsFloat = reinterpret_cast<const float*>(&input2Bytes.front());
1204 bool returnValue = true;
1205
1206 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1207 {
1208 if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1209 continue;
1210
1211 if (outputAsInt[idx] != expectedOutputAsInt[idx])
1212 {
1213 log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1214 returnValue = false;
1215 }
1216 }
1217 return returnValue;
1218 }
1219
1220 typedef VkBool32 (*compareFuncType) (float, float);
1221
1222 struct OpFUnordCase
1223 {
1224 const char* name;
1225 const char* opCode;
1226 compareFuncType compareFunc;
1227
OpFUnordCasevkt::SpirVAssembly::__anon043fb9e60111::OpFUnordCase1228 OpFUnordCase (const char* _name, const char* _opCode, compareFuncType _compareFunc)
1229 : name (_name)
1230 , opCode (_opCode)
1231 , compareFunc (_compareFunc) {}
1232 };
1233
1234 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1235 do { \
1236 struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1237 cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1238 } while (deGetFalse())
1239
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1240 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1241 {
1242 const string nan = testWithNan ? "_nan" : "";
1243 const string groupName = "opfunord" + nan;
1244 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1245 de::Random rnd (deStringHash(group->getName()));
1246 const int numElements = 100;
1247 vector<OpFUnordCase> cases;
1248 string extensions = testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1249 string capabilities = testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1250 string exeModes = testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1251 const StringTemplate shaderTemplate (
1252 string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1253 "OpSource GLSL 430\n"
1254 "OpName %main \"main\"\n"
1255 "OpName %id \"gl_GlobalInvocationID\"\n"
1256
1257 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1258
1259 "OpDecorate %buf BufferBlock\n"
1260 "OpDecorate %buf2 BufferBlock\n"
1261 "OpDecorate %indata1 DescriptorSet 0\n"
1262 "OpDecorate %indata1 Binding 0\n"
1263 "OpDecorate %indata2 DescriptorSet 0\n"
1264 "OpDecorate %indata2 Binding 1\n"
1265 "OpDecorate %outdata DescriptorSet 0\n"
1266 "OpDecorate %outdata Binding 2\n"
1267 "OpDecorate %f32arr ArrayStride 4\n"
1268 "OpDecorate %i32arr ArrayStride 4\n"
1269 "OpMemberDecorate %buf 0 Offset 0\n"
1270 "OpMemberDecorate %buf2 0 Offset 0\n"
1271
1272 + string(getComputeAsmCommonTypes()) +
1273
1274 "%buf = OpTypeStruct %f32arr\n"
1275 "%bufptr = OpTypePointer Uniform %buf\n"
1276 "%indata1 = OpVariable %bufptr Uniform\n"
1277 "%indata2 = OpVariable %bufptr Uniform\n"
1278
1279 "%buf2 = OpTypeStruct %i32arr\n"
1280 "%buf2ptr = OpTypePointer Uniform %buf2\n"
1281 "%outdata = OpVariable %buf2ptr Uniform\n"
1282
1283 "%id = OpVariable %uvec3ptr Input\n"
1284 "%zero = OpConstant %i32 0\n"
1285 "%consti1 = OpConstant %i32 1\n"
1286 "%constf1 = OpConstant %f32 1.0\n"
1287
1288 "%main = OpFunction %void None %voidf\n"
1289 "%label = OpLabel\n"
1290 "%idval = OpLoad %uvec3 %id\n"
1291 "%x = OpCompositeExtract %u32 %idval 0\n"
1292
1293 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1294 "%inval1 = OpLoad %f32 %inloc1\n"
1295 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1296 "%inval2 = OpLoad %f32 %inloc2\n"
1297 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
1298
1299 "%result = ${OPCODE} %bool %inval1 %inval2\n"
1300 "%int_res = OpSelect %i32 %result %consti1 %zero\n"
1301 " OpStore %outloc %int_res\n"
1302
1303 " OpReturn\n"
1304 " OpFunctionEnd\n");
1305
1306 ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1307 ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1308 ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1309 ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1310 ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1311 ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1312
1313 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1314 {
1315 map<string, string> specializations;
1316 ComputeShaderSpec spec;
1317 const float NaN = std::numeric_limits<float>::quiet_NaN();
1318 vector<float> inputFloats1 (numElements, 0);
1319 vector<float> inputFloats2 (numElements, 0);
1320 vector<deInt32> expectedInts (numElements, 0);
1321
1322 specializations["OPCODE"] = cases[caseNdx].opCode;
1323 spec.assembly = shaderTemplate.specialize(specializations);
1324
1325 fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1326 for (size_t ndx = 0; ndx < numElements; ++ndx)
1327 {
1328 switch (ndx % 6)
1329 {
1330 case 0: inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1331 case 1: inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1332 case 2: inputFloats2[ndx] = inputFloats1[ndx]; break;
1333 case 3: inputFloats2[ndx] = NaN; break;
1334 case 4: inputFloats2[ndx] = inputFloats1[ndx]; inputFloats1[ndx] = NaN; break;
1335 case 5: inputFloats2[ndx] = NaN; inputFloats1[ndx] = NaN; break;
1336 }
1337 expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1338 }
1339
1340 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1341 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1342 spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1343 spec.numWorkGroups = IVec3(numElements, 1, 1);
1344 spec.verifyIO = testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1345
1346 if (testWithNan)
1347 {
1348 spec.extensions.push_back("VK_KHR_shader_float_controls");
1349 spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1350 }
1351
1352 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1353 }
1354
1355 return group.release();
1356 }
1357
1358 struct OpAtomicCase
1359 {
1360 const char* name;
1361 const char* assembly;
1362 const char* retValAssembly;
1363 OpAtomicType opAtomic;
1364 deInt32 numOutputElements;
1365
OpAtomicCasevkt::SpirVAssembly::__anon043fb9e60111::OpAtomicCase1366 OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1367 : name (_name)
1368 , assembly (_assembly)
1369 , retValAssembly (_retValAssembly)
1370 , opAtomic (_opAtomic)
1371 , numOutputElements (_numOutputElements) {}
1372 };
1373
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1374 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1375 {
1376 std::string groupName ("opatomic");
1377 if (useStorageBuffer)
1378 groupName += "_storage_buffer";
1379 if (verifyReturnValues)
1380 groupName += "_return_values";
1381 if (volatileAtomic)
1382 groupName += "_volatile";
1383 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1384 vector<OpAtomicCase> cases;
1385
1386 const StringTemplate shaderTemplate (
1387
1388 string("OpCapability Shader\n") +
1389 (volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1390 (useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1391 (volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1392 (volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1393 "OpEntryPoint GLCompute %main \"main\" %id\n"
1394 "OpExecutionMode %main LocalSize 1 1 1\n" +
1395
1396 "OpSource GLSL 430\n"
1397 "OpName %main \"main\"\n"
1398 "OpName %id \"gl_GlobalInvocationID\"\n"
1399
1400 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1401
1402 "OpDecorate %buf ${BLOCK_DECORATION}\n"
1403 "OpDecorate %indata DescriptorSet 0\n"
1404 "OpDecorate %indata Binding 0\n"
1405 "OpDecorate %i32arr ArrayStride 4\n"
1406 "OpMemberDecorate %buf 0 Offset 0\n"
1407
1408 "OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1409 "OpDecorate %sum DescriptorSet 0\n"
1410 "OpDecorate %sum Binding 1\n"
1411 "OpMemberDecorate %sumbuf 0 Offset 0\n"
1412
1413 "${RETVAL_BUF_DECORATE}"
1414
1415 + getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1416
1417 "%buf = OpTypeStruct %i32arr\n"
1418 "%bufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1419 "%indata = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1420
1421 "%sumbuf = OpTypeStruct %i32arr\n"
1422 "%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1423 "%sum = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1424
1425 "${RETVAL_BUF_DECL}"
1426
1427 "%id = OpVariable %uvec3ptr Input\n"
1428 "%minusone = OpConstant %i32 -1\n"
1429 "%zero = OpConstant %i32 0\n"
1430 "%one = OpConstant %u32 1\n"
1431 "%two = OpConstant %i32 2\n"
1432 "%five = OpConstant %i32 5\n"
1433 "%volbit = OpConstant %i32 32768\n"
1434
1435 "%main = OpFunction %void None %voidf\n"
1436 "%label = OpLabel\n"
1437 "%idval = OpLoad %uvec3 %id\n"
1438 "%x = OpCompositeExtract %u32 %idval 0\n"
1439
1440 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
1441 "%inval = OpLoad %i32 %inloc\n"
1442
1443 "%outloc = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1444 "${INSTRUCTION}"
1445 "${RETVAL_ASSEMBLY}"
1446
1447 " OpReturn\n"
1448 " OpFunctionEnd\n");
1449
1450 #define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1451 do { \
1452 DE_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
1453 cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1454 } while (deGetFalse())
1455 #define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1456 #define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1457
1458 ADD_OPATOMIC_CASE_1(iadd, "%retv = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1459 " OpStore %retloc %retv\n", OPATOMIC_IADD );
1460 ADD_OPATOMIC_CASE_1(isub, "%retv = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1461 " OpStore %retloc %retv\n", OPATOMIC_ISUB );
1462 ADD_OPATOMIC_CASE_1(iinc, "%retv = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1463 " OpStore %retloc %retv\n", OPATOMIC_IINC );
1464 ADD_OPATOMIC_CASE_1(idec, "%retv = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1465 " OpStore %retloc %retv\n", OPATOMIC_IDEC );
1466 if (!verifyReturnValues)
1467 {
1468 ADD_OPATOMIC_CASE_N(load, "%inval2 = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1469 " OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1470 ADD_OPATOMIC_CASE_N(store, " OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1471 }
1472
1473 ADD_OPATOMIC_CASE_N(compex, "%even = OpSMod %i32 %inval %two\n"
1474 " OpStore %outloc %even\n"
1475 "%retv = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1476 " OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1477
1478
1479 #undef ADD_OPATOMIC_CASE
1480 #undef ADD_OPATOMIC_CASE_1
1481 #undef ADD_OPATOMIC_CASE_N
1482
1483 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1484 {
1485 map<string, string> specializations;
1486 ComputeShaderSpec spec;
1487 vector<deInt32> inputInts (numElements, 0);
1488 vector<deInt32> expected (cases[caseNdx].numOutputElements, -1);
1489
1490 if (volatileAtomic)
1491 {
1492 spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1493 spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1494
1495 // volatile, queuefamily scope
1496 specializations["SEMANTICS"] = "%volbit";
1497 specializations["SCOPE"] = "%five";
1498 }
1499 else
1500 {
1501 // non-volatile, device scope
1502 specializations["SEMANTICS"] = "%zero";
1503 specializations["SCOPE"] = "%one";
1504 }
1505 specializations["INDEX"] = (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1506 specializations["INSTRUCTION"] = cases[caseNdx].assembly;
1507 specializations["BLOCK_DECORATION"] = useStorageBuffer ? "Block" : "BufferBlock";
1508 specializations["BLOCK_POINTER_TYPE"] = useStorageBuffer ? "StorageBuffer" : "Uniform";
1509
1510 if (verifyReturnValues)
1511 {
1512 const StringTemplate blockDecoration (
1513 "\n"
1514 "OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1515 "OpDecorate %ret DescriptorSet 0\n"
1516 "OpDecorate %ret Binding 2\n"
1517 "OpMemberDecorate %retbuf 0 Offset 0\n\n");
1518
1519 const StringTemplate blockDeclaration (
1520 "\n"
1521 "%retbuf = OpTypeStruct %i32arr\n"
1522 "%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1523 "%ret = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1524
1525 specializations["RETVAL_ASSEMBLY"] =
1526 "%retloc = OpAccessChain %i32ptr %ret %zero %x\n"
1527 + std::string(cases[caseNdx].retValAssembly);
1528
1529 specializations["RETVAL_BUF_DECORATE"] = blockDecoration.specialize(specializations);
1530 specializations["RETVAL_BUF_DECL"] = blockDeclaration.specialize(specializations);
1531 }
1532 else
1533 {
1534 specializations["RETVAL_ASSEMBLY"] = "";
1535 specializations["RETVAL_BUF_DECORATE"] = "";
1536 specializations["RETVAL_BUF_DECL"] = "";
1537 }
1538
1539 spec.assembly = shaderTemplate.specialize(specializations);
1540
1541 // Specialize one more time, to catch things that were in a template parameter
1542 const StringTemplate assemblyTemplate(spec.assembly);
1543 spec.assembly = assemblyTemplate.specialize(specializations);
1544
1545 if (useStorageBuffer)
1546 spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1547
1548 spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1549 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1550 if (verifyReturnValues)
1551 spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1552 spec.numWorkGroups = IVec3(numElements, 1, 1);
1553
1554 if (verifyReturnValues)
1555 {
1556 switch (cases[caseNdx].opAtomic)
1557 {
1558 case OPATOMIC_IADD:
1559 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1560 break;
1561 case OPATOMIC_ISUB:
1562 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1563 break;
1564 case OPATOMIC_IINC:
1565 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1566 break;
1567 case OPATOMIC_IDEC:
1568 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1569 break;
1570 case OPATOMIC_COMPEX:
1571 spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1572 break;
1573 default:
1574 DE_FATAL("Unsupported OpAtomic type for return value verification");
1575 }
1576 }
1577 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1578 }
1579
1580 return group.release();
1581 }
1582
createOpLineGroup(tcu::TestContext & testCtx)1583 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1584 {
1585 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1586 ComputeShaderSpec spec;
1587 de::Random rnd (deStringHash(group->getName()));
1588 const int numElements = 100;
1589 vector<float> positiveFloats (numElements, 0);
1590 vector<float> negativeFloats (numElements, 0);
1591
1592 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1593
1594 for (size_t ndx = 0; ndx < numElements; ++ndx)
1595 negativeFloats[ndx] = -positiveFloats[ndx];
1596
1597 spec.assembly =
1598 string(getComputeAsmShaderPreamble()) +
1599
1600 "%fname1 = OpString \"negateInputs.comp\"\n"
1601 "%fname2 = OpString \"negateInputs\"\n"
1602
1603 "OpSource GLSL 430\n"
1604 "OpName %main \"main\"\n"
1605 "OpName %id \"gl_GlobalInvocationID\"\n"
1606
1607 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1608
1609 + string(getComputeAsmInputOutputBufferTraits()) +
1610
1611 "OpLine %fname1 0 0\n" // At the earliest possible position
1612
1613 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1614
1615 "OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1616 "OpLine %fname2 1 0\n" // Different filenames
1617 "OpLine %fname1 1000 100000\n"
1618
1619 "%id = OpVariable %uvec3ptr Input\n"
1620 "%zero = OpConstant %i32 0\n"
1621
1622 "OpLine %fname1 1 1\n" // Before a function
1623
1624 "%main = OpFunction %void None %voidf\n"
1625 "%label = OpLabel\n"
1626
1627 "OpLine %fname1 1 1\n" // In a function
1628
1629 "%idval = OpLoad %uvec3 %id\n"
1630 "%x = OpCompositeExtract %u32 %idval 0\n"
1631 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1632 "%inval = OpLoad %f32 %inloc\n"
1633 "%neg = OpFNegate %f32 %inval\n"
1634 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1635 " OpStore %outloc %neg\n"
1636 " OpReturn\n"
1637 " OpFunctionEnd\n";
1638 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1639 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1640 spec.numWorkGroups = IVec3(numElements, 1, 1);
1641
1642 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1643
1644 return group.release();
1645 }
1646
veryfiBinaryShader(const ProgramBinary & binary)1647 bool veryfiBinaryShader (const ProgramBinary& binary)
1648 {
1649 const size_t paternCount = 3u;
1650 bool paternsCheck[paternCount] =
1651 {
1652 false, false, false
1653 };
1654 const string patersns[paternCount] =
1655 {
1656 "VULKAN CTS",
1657 "Negative values",
1658 "Date: 2017/09/21"
1659 };
1660 size_t paternNdx = 0u;
1661
1662 for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1663 {
1664 if (false == paternsCheck[paternNdx] &&
1665 patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1666 deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1667 {
1668 paternsCheck[paternNdx]= true;
1669 paternNdx++;
1670 if (paternNdx == paternCount)
1671 break;
1672 }
1673 }
1674
1675 for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1676 {
1677 if (!paternsCheck[ndx])
1678 return false;
1679 }
1680
1681 return true;
1682 }
1683
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1684 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1685 {
1686 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1687 ComputeShaderSpec spec;
1688 de::Random rnd (deStringHash(group->getName()));
1689 const int numElements = 10;
1690 vector<float> positiveFloats (numElements, 0);
1691 vector<float> negativeFloats (numElements, 0);
1692
1693 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1694
1695 for (size_t ndx = 0; ndx < numElements; ++ndx)
1696 negativeFloats[ndx] = -positiveFloats[ndx];
1697
1698 spec.assembly =
1699 string(getComputeAsmShaderPreamble()) +
1700 "%fname = OpString \"negateInputs.comp\"\n"
1701
1702 "OpSource GLSL 430\n"
1703 "OpName %main \"main\"\n"
1704 "OpName %id \"gl_GlobalInvocationID\"\n"
1705 "OpModuleProcessed \"VULKAN CTS\"\n" //OpModuleProcessed;
1706 "OpModuleProcessed \"Negative values\"\n"
1707 "OpModuleProcessed \"Date: 2017/09/21\"\n"
1708 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1709
1710 + string(getComputeAsmInputOutputBufferTraits())
1711
1712 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1713
1714 "OpLine %fname 0 1\n"
1715
1716 "OpLine %fname 1000 1\n"
1717
1718 "%id = OpVariable %uvec3ptr Input\n"
1719 "%zero = OpConstant %i32 0\n"
1720 "%main = OpFunction %void None %voidf\n"
1721
1722 "%label = OpLabel\n"
1723 "%idval = OpLoad %uvec3 %id\n"
1724 "%x = OpCompositeExtract %u32 %idval 0\n"
1725
1726 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1727 "%inval = OpLoad %f32 %inloc\n"
1728 "%neg = OpFNegate %f32 %inval\n"
1729 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1730 " OpStore %outloc %neg\n"
1731 " OpReturn\n"
1732 " OpFunctionEnd\n";
1733 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1734 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1735 spec.numWorkGroups = IVec3(numElements, 1, 1);
1736 spec.verifyBinary = veryfiBinaryShader;
1737 spec.spirvVersion = SPIRV_VERSION_1_3;
1738
1739 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1740
1741 return group.release();
1742 }
1743
createOpNoLineGroup(tcu::TestContext & testCtx)1744 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1745 {
1746 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1747 ComputeShaderSpec spec;
1748 de::Random rnd (deStringHash(group->getName()));
1749 const int numElements = 100;
1750 vector<float> positiveFloats (numElements, 0);
1751 vector<float> negativeFloats (numElements, 0);
1752
1753 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1754
1755 for (size_t ndx = 0; ndx < numElements; ++ndx)
1756 negativeFloats[ndx] = -positiveFloats[ndx];
1757
1758 spec.assembly =
1759 string(getComputeAsmShaderPreamble()) +
1760
1761 "%fname = OpString \"negateInputs.comp\"\n"
1762
1763 "OpSource GLSL 430\n"
1764 "OpName %main \"main\"\n"
1765 "OpName %id \"gl_GlobalInvocationID\"\n"
1766
1767 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1768
1769 + string(getComputeAsmInputOutputBufferTraits()) +
1770
1771 "OpNoLine\n" // At the earliest possible position, without preceding OpLine
1772
1773 + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1774
1775 "OpLine %fname 0 1\n"
1776 "OpNoLine\n" // Immediately following a preceding OpLine
1777
1778 "OpLine %fname 1000 1\n"
1779
1780 "%id = OpVariable %uvec3ptr Input\n"
1781 "%zero = OpConstant %i32 0\n"
1782
1783 "OpNoLine\n" // Contents after the previous OpLine
1784
1785 "%main = OpFunction %void None %voidf\n"
1786 "%label = OpLabel\n"
1787 "%idval = OpLoad %uvec3 %id\n"
1788 "%x = OpCompositeExtract %u32 %idval 0\n"
1789
1790 "OpNoLine\n" // Multiple OpNoLine
1791 "OpNoLine\n"
1792 "OpNoLine\n"
1793
1794 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
1795 "%inval = OpLoad %f32 %inloc\n"
1796 "%neg = OpFNegate %f32 %inval\n"
1797 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1798 " OpStore %outloc %neg\n"
1799 " OpReturn\n"
1800 " OpFunctionEnd\n";
1801 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1802 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1803 spec.numWorkGroups = IVec3(numElements, 1, 1);
1804
1805 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1806
1807 return group.release();
1808 }
1809
1810 // Compare instruction for the contraction compute case.
1811 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1812 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1813 {
1814 if (outputAllocs.size() != 1)
1815 return false;
1816
1817 // Only size is needed because we are not comparing the exact values.
1818 size_t byteSize = expectedOutputs[0].getByteSize();
1819
1820 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1821
1822 for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1823 if (outputAsFloat[i] != 0.f &&
1824 outputAsFloat[i] != -ldexp(1, -24)) {
1825 return false;
1826 }
1827 }
1828
1829 return true;
1830 }
1831
createNoContractionGroup(tcu::TestContext & testCtx)1832 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1833 {
1834 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1835 vector<CaseParameter> cases;
1836 const int numElements = 100;
1837 vector<float> inputFloats1 (numElements, 0);
1838 vector<float> inputFloats2 (numElements, 0);
1839 vector<float> outputFloats (numElements, 0);
1840 const StringTemplate shaderTemplate (
1841 string(getComputeAsmShaderPreamble()) +
1842
1843 "OpName %main \"main\"\n"
1844 "OpName %id \"gl_GlobalInvocationID\"\n"
1845
1846 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1847
1848 "${DECORATION}\n"
1849
1850 "OpDecorate %buf BufferBlock\n"
1851 "OpDecorate %indata1 DescriptorSet 0\n"
1852 "OpDecorate %indata1 Binding 0\n"
1853 "OpDecorate %indata2 DescriptorSet 0\n"
1854 "OpDecorate %indata2 Binding 1\n"
1855 "OpDecorate %outdata DescriptorSet 0\n"
1856 "OpDecorate %outdata Binding 2\n"
1857 "OpDecorate %f32arr ArrayStride 4\n"
1858 "OpMemberDecorate %buf 0 Offset 0\n"
1859
1860 + string(getComputeAsmCommonTypes()) +
1861
1862 "%buf = OpTypeStruct %f32arr\n"
1863 "%bufptr = OpTypePointer Uniform %buf\n"
1864 "%indata1 = OpVariable %bufptr Uniform\n"
1865 "%indata2 = OpVariable %bufptr Uniform\n"
1866 "%outdata = OpVariable %bufptr Uniform\n"
1867
1868 "%id = OpVariable %uvec3ptr Input\n"
1869 "%zero = OpConstant %i32 0\n"
1870 "%c_f_m1 = OpConstant %f32 -1.\n"
1871
1872 "%main = OpFunction %void None %voidf\n"
1873 "%label = OpLabel\n"
1874 "%idval = OpLoad %uvec3 %id\n"
1875 "%x = OpCompositeExtract %u32 %idval 0\n"
1876 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1877 "%inval1 = OpLoad %f32 %inloc1\n"
1878 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1879 "%inval2 = OpLoad %f32 %inloc2\n"
1880 "%mul = OpFMul %f32 %inval1 %inval2\n"
1881 "%add = OpFAdd %f32 %mul %c_f_m1\n"
1882 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1883 " OpStore %outloc %add\n"
1884 " OpReturn\n"
1885 " OpFunctionEnd\n");
1886
1887 cases.push_back(CaseParameter("multiplication", "OpDecorate %mul NoContraction"));
1888 cases.push_back(CaseParameter("addition", "OpDecorate %add NoContraction"));
1889 cases.push_back(CaseParameter("both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1890
1891 for (size_t ndx = 0; ndx < numElements; ++ndx)
1892 {
1893 inputFloats1[ndx] = 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1894 inputFloats2[ndx] = 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1895 // Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1896 // conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1897 // So the final result will be 0.f or 0x1p-24.
1898 // If the operation is combined into a precise fused multiply-add, then the result would be
1899 // 2^-46 (0xa8800000).
1900 outputFloats[ndx] = 0.f;
1901 }
1902
1903 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1904 {
1905 map<string, string> specializations;
1906 ComputeShaderSpec spec;
1907
1908 specializations["DECORATION"] = cases[caseNdx].param;
1909 spec.assembly = shaderTemplate.specialize(specializations);
1910 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1911 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1912 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1913 spec.numWorkGroups = IVec3(numElements, 1, 1);
1914 // Check against the two possible answers based on rounding mode.
1915 spec.verifyIO = &compareNoContractCase;
1916
1917 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1918 }
1919 return group.release();
1920 }
1921
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1922 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1923 {
1924 if (outputAllocs.size() != 1)
1925 return false;
1926
1927 vector<deUint8> expectedBytes;
1928 expectedOutputs[0].getBytes(expectedBytes);
1929
1930 const float* expectedOutputAsFloat = reinterpret_cast<const float*>(&expectedBytes.front());
1931 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
1932
1933 for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1934 {
1935 const float f0 = expectedOutputAsFloat[idx];
1936 const float f1 = outputAsFloat[idx];
1937 // \todo relative error needs to be fairly high because FRem may be implemented as
1938 // (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1939 if (deFloatAbs((f1 - f0) / f0) > 0.02)
1940 return false;
1941 }
1942
1943 return true;
1944 }
1945
createOpFRemGroup(tcu::TestContext & testCtx)1946 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1947 {
1948 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1949 ComputeShaderSpec spec;
1950 de::Random rnd (deStringHash(group->getName()));
1951 const int numElements = 200;
1952 vector<float> inputFloats1 (numElements, 0);
1953 vector<float> inputFloats2 (numElements, 0);
1954 vector<float> outputFloats (numElements, 0);
1955
1956 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1957 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1958
1959 for (size_t ndx = 0; ndx < numElements; ++ndx)
1960 {
1961 // Guard against divisors near zero.
1962 if (std::fabs(inputFloats2[ndx]) < 1e-3)
1963 inputFloats2[ndx] = 8.f;
1964
1965 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1966 outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1967 }
1968
1969 spec.assembly =
1970 string(getComputeAsmShaderPreamble()) +
1971
1972 "OpName %main \"main\"\n"
1973 "OpName %id \"gl_GlobalInvocationID\"\n"
1974
1975 "OpDecorate %id BuiltIn GlobalInvocationId\n"
1976
1977 "OpDecorate %buf BufferBlock\n"
1978 "OpDecorate %indata1 DescriptorSet 0\n"
1979 "OpDecorate %indata1 Binding 0\n"
1980 "OpDecorate %indata2 DescriptorSet 0\n"
1981 "OpDecorate %indata2 Binding 1\n"
1982 "OpDecorate %outdata DescriptorSet 0\n"
1983 "OpDecorate %outdata Binding 2\n"
1984 "OpDecorate %f32arr ArrayStride 4\n"
1985 "OpMemberDecorate %buf 0 Offset 0\n"
1986
1987 + string(getComputeAsmCommonTypes()) +
1988
1989 "%buf = OpTypeStruct %f32arr\n"
1990 "%bufptr = OpTypePointer Uniform %buf\n"
1991 "%indata1 = OpVariable %bufptr Uniform\n"
1992 "%indata2 = OpVariable %bufptr Uniform\n"
1993 "%outdata = OpVariable %bufptr Uniform\n"
1994
1995 "%id = OpVariable %uvec3ptr Input\n"
1996 "%zero = OpConstant %i32 0\n"
1997
1998 "%main = OpFunction %void None %voidf\n"
1999 "%label = OpLabel\n"
2000 "%idval = OpLoad %uvec3 %id\n"
2001 "%x = OpCompositeExtract %u32 %idval 0\n"
2002 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2003 "%inval1 = OpLoad %f32 %inloc1\n"
2004 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2005 "%inval2 = OpLoad %f32 %inloc2\n"
2006 "%rem = OpFRem %f32 %inval1 %inval2\n"
2007 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2008 " OpStore %outloc %rem\n"
2009 " OpReturn\n"
2010 " OpFunctionEnd\n";
2011
2012 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2013 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2014 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2015 spec.numWorkGroups = IVec3(numElements, 1, 1);
2016 spec.verifyIO = &compareFRem;
2017
2018 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2019
2020 return group.release();
2021 }
2022
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2023 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2024 {
2025 if (outputAllocs.size() != 1)
2026 return false;
2027
2028 const BufferSp& expectedOutput (expectedOutputs[0].getBuffer());
2029 std::vector<deUint8> data;
2030 expectedOutput->getBytes(data);
2031
2032 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2033 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2034
2035 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2036 {
2037 const float f0 = expectedOutputAsFloat[idx];
2038 const float f1 = outputAsFloat[idx];
2039
2040 // For NMin, we accept NaN as output if both inputs were NaN.
2041 // Otherwise the NaN is the wrong choise, as on architectures that
2042 // do not handle NaN, those are huge values.
2043 if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2044 return false;
2045 }
2046
2047 return true;
2048 }
2049
createOpNMinGroup(tcu::TestContext & testCtx)2050 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2051 {
2052 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2053 ComputeShaderSpec spec;
2054 de::Random rnd (deStringHash(group->getName()));
2055 const int numElements = 200;
2056 vector<float> inputFloats1 (numElements, 0);
2057 vector<float> inputFloats2 (numElements, 0);
2058 vector<float> outputFloats (numElements, 0);
2059
2060 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2061 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2062
2063 // Make the first case a full-NAN case.
2064 inputFloats1[0] = TCU_NAN;
2065 inputFloats2[0] = TCU_NAN;
2066
2067 for (size_t ndx = 0; ndx < numElements; ++ndx)
2068 {
2069 // By default, pick the smallest
2070 outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2071
2072 // Make half of the cases NaN cases
2073 if ((ndx & 1) == 0)
2074 {
2075 // Alternate between the NaN operand
2076 if ((ndx & 2) == 0)
2077 {
2078 outputFloats[ndx] = inputFloats2[ndx];
2079 inputFloats1[ndx] = TCU_NAN;
2080 }
2081 else
2082 {
2083 outputFloats[ndx] = inputFloats1[ndx];
2084 inputFloats2[ndx] = TCU_NAN;
2085 }
2086 }
2087 }
2088
2089 spec.assembly =
2090 "OpCapability Shader\n"
2091 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2092 "OpMemoryModel Logical GLSL450\n"
2093 "OpEntryPoint GLCompute %main \"main\" %id\n"
2094 "OpExecutionMode %main LocalSize 1 1 1\n"
2095
2096 "OpName %main \"main\"\n"
2097 "OpName %id \"gl_GlobalInvocationID\"\n"
2098
2099 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2100
2101 "OpDecorate %buf BufferBlock\n"
2102 "OpDecorate %indata1 DescriptorSet 0\n"
2103 "OpDecorate %indata1 Binding 0\n"
2104 "OpDecorate %indata2 DescriptorSet 0\n"
2105 "OpDecorate %indata2 Binding 1\n"
2106 "OpDecorate %outdata DescriptorSet 0\n"
2107 "OpDecorate %outdata Binding 2\n"
2108 "OpDecorate %f32arr ArrayStride 4\n"
2109 "OpMemberDecorate %buf 0 Offset 0\n"
2110
2111 + string(getComputeAsmCommonTypes()) +
2112
2113 "%buf = OpTypeStruct %f32arr\n"
2114 "%bufptr = OpTypePointer Uniform %buf\n"
2115 "%indata1 = OpVariable %bufptr Uniform\n"
2116 "%indata2 = OpVariable %bufptr Uniform\n"
2117 "%outdata = OpVariable %bufptr Uniform\n"
2118
2119 "%id = OpVariable %uvec3ptr Input\n"
2120 "%zero = OpConstant %i32 0\n"
2121
2122 "%main = OpFunction %void None %voidf\n"
2123 "%label = OpLabel\n"
2124 "%idval = OpLoad %uvec3 %id\n"
2125 "%x = OpCompositeExtract %u32 %idval 0\n"
2126 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2127 "%inval1 = OpLoad %f32 %inloc1\n"
2128 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2129 "%inval2 = OpLoad %f32 %inloc2\n"
2130 "%rem = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2131 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2132 " OpStore %outloc %rem\n"
2133 " OpReturn\n"
2134 " OpFunctionEnd\n";
2135
2136 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2137 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2138 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2139 spec.numWorkGroups = IVec3(numElements, 1, 1);
2140 spec.verifyIO = &compareNMin;
2141
2142 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2143
2144 return group.release();
2145 }
2146
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2147 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2148 {
2149 if (outputAllocs.size() != 1)
2150 return false;
2151
2152 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2153 std::vector<deUint8> data;
2154 expectedOutput->getBytes(data);
2155
2156 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2157 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2158
2159 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2160 {
2161 const float f0 = expectedOutputAsFloat[idx];
2162 const float f1 = outputAsFloat[idx];
2163
2164 // For NMax, NaN is considered acceptable result, since in
2165 // architectures that do not handle NaNs, those are huge values.
2166 if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2167 return false;
2168 }
2169
2170 return true;
2171 }
2172
createOpNMaxGroup(tcu::TestContext & testCtx)2173 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2174 {
2175 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2176 ComputeShaderSpec spec;
2177 de::Random rnd (deStringHash(group->getName()));
2178 const int numElements = 200;
2179 vector<float> inputFloats1 (numElements, 0);
2180 vector<float> inputFloats2 (numElements, 0);
2181 vector<float> outputFloats (numElements, 0);
2182
2183 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2184 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2185
2186 // Make the first case a full-NAN case.
2187 inputFloats1[0] = TCU_NAN;
2188 inputFloats2[0] = TCU_NAN;
2189
2190 for (size_t ndx = 0; ndx < numElements; ++ndx)
2191 {
2192 // By default, pick the biggest
2193 outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2194
2195 // Make half of the cases NaN cases
2196 if ((ndx & 1) == 0)
2197 {
2198 // Alternate between the NaN operand
2199 if ((ndx & 2) == 0)
2200 {
2201 outputFloats[ndx] = inputFloats2[ndx];
2202 inputFloats1[ndx] = TCU_NAN;
2203 }
2204 else
2205 {
2206 outputFloats[ndx] = inputFloats1[ndx];
2207 inputFloats2[ndx] = TCU_NAN;
2208 }
2209 }
2210 }
2211
2212 spec.assembly =
2213 "OpCapability Shader\n"
2214 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2215 "OpMemoryModel Logical GLSL450\n"
2216 "OpEntryPoint GLCompute %main \"main\" %id\n"
2217 "OpExecutionMode %main LocalSize 1 1 1\n"
2218
2219 "OpName %main \"main\"\n"
2220 "OpName %id \"gl_GlobalInvocationID\"\n"
2221
2222 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2223
2224 "OpDecorate %buf BufferBlock\n"
2225 "OpDecorate %indata1 DescriptorSet 0\n"
2226 "OpDecorate %indata1 Binding 0\n"
2227 "OpDecorate %indata2 DescriptorSet 0\n"
2228 "OpDecorate %indata2 Binding 1\n"
2229 "OpDecorate %outdata DescriptorSet 0\n"
2230 "OpDecorate %outdata Binding 2\n"
2231 "OpDecorate %f32arr ArrayStride 4\n"
2232 "OpMemberDecorate %buf 0 Offset 0\n"
2233
2234 + string(getComputeAsmCommonTypes()) +
2235
2236 "%buf = OpTypeStruct %f32arr\n"
2237 "%bufptr = OpTypePointer Uniform %buf\n"
2238 "%indata1 = OpVariable %bufptr Uniform\n"
2239 "%indata2 = OpVariable %bufptr Uniform\n"
2240 "%outdata = OpVariable %bufptr Uniform\n"
2241
2242 "%id = OpVariable %uvec3ptr Input\n"
2243 "%zero = OpConstant %i32 0\n"
2244
2245 "%main = OpFunction %void None %voidf\n"
2246 "%label = OpLabel\n"
2247 "%idval = OpLoad %uvec3 %id\n"
2248 "%x = OpCompositeExtract %u32 %idval 0\n"
2249 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2250 "%inval1 = OpLoad %f32 %inloc1\n"
2251 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2252 "%inval2 = OpLoad %f32 %inloc2\n"
2253 "%rem = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2254 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2255 " OpStore %outloc %rem\n"
2256 " OpReturn\n"
2257 " OpFunctionEnd\n";
2258
2259 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2260 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2261 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2262 spec.numWorkGroups = IVec3(numElements, 1, 1);
2263 spec.verifyIO = &compareNMax;
2264
2265 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2266
2267 return group.release();
2268 }
2269
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2270 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2271 {
2272 if (outputAllocs.size() != 1)
2273 return false;
2274
2275 const BufferSp& expectedOutput = expectedOutputs[0].getBuffer();
2276 std::vector<deUint8> data;
2277 expectedOutput->getBytes(data);
2278
2279 const float* const expectedOutputAsFloat = reinterpret_cast<const float*>(&data.front());
2280 const float* const outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
2281
2282 for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2283 {
2284 const float e0 = expectedOutputAsFloat[idx * 2];
2285 const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2286 const float res = outputAsFloat[idx];
2287
2288 // For NClamp, we have two possible outcomes based on
2289 // whether NaNs are handled or not.
2290 // If either min or max value is NaN, the result is undefined,
2291 // so this test doesn't stress those. If the clamped value is
2292 // NaN, and NaNs are handled, the result is min; if NaNs are not
2293 // handled, they are big values that result in max.
2294 // If all three parameters are NaN, the result should be NaN.
2295 if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2296 (deFloatAbs(e0 - res) < 0.00001f) ||
2297 (deFloatAbs(e1 - res) < 0.00001f)))
2298 return false;
2299 }
2300
2301 return true;
2302 }
2303
createOpNClampGroup(tcu::TestContext & testCtx)2304 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2305 {
2306 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2307 ComputeShaderSpec spec;
2308 de::Random rnd (deStringHash(group->getName()));
2309 const int numElements = 200;
2310 vector<float> inputFloats1 (numElements, 0);
2311 vector<float> inputFloats2 (numElements, 0);
2312 vector<float> inputFloats3 (numElements, 0);
2313 vector<float> outputFloats (numElements * 2, 0);
2314
2315 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2316 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2317 fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2318
2319 for (size_t ndx = 0; ndx < numElements; ++ndx)
2320 {
2321 // Results are only defined if max value is bigger than min value.
2322 if (inputFloats2[ndx] > inputFloats3[ndx])
2323 {
2324 float t = inputFloats2[ndx];
2325 inputFloats2[ndx] = inputFloats3[ndx];
2326 inputFloats3[ndx] = t;
2327 }
2328
2329 // By default, do the clamp, setting both possible answers
2330 float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2331
2332 float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2333 float maxResB = maxResA;
2334
2335 // Alternate between the NaN cases
2336 if (ndx & 1)
2337 {
2338 inputFloats1[ndx] = TCU_NAN;
2339 // If NaN is handled, the result should be same as the clamp minimum.
2340 // If NaN is not handled, the result should clamp to the clamp maximum.
2341 maxResA = inputFloats2[ndx];
2342 maxResB = inputFloats3[ndx];
2343 }
2344 else
2345 {
2346 // Not a NaN case - only one legal result.
2347 maxResA = defaultRes;
2348 maxResB = defaultRes;
2349 }
2350
2351 outputFloats[ndx * 2] = maxResA;
2352 outputFloats[ndx * 2 + 1] = maxResB;
2353 }
2354
2355 // Make the first case a full-NAN case.
2356 inputFloats1[0] = TCU_NAN;
2357 inputFloats2[0] = TCU_NAN;
2358 inputFloats3[0] = TCU_NAN;
2359 outputFloats[0] = TCU_NAN;
2360 outputFloats[1] = TCU_NAN;
2361
2362 spec.assembly =
2363 "OpCapability Shader\n"
2364 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2365 "OpMemoryModel Logical GLSL450\n"
2366 "OpEntryPoint GLCompute %main \"main\" %id\n"
2367 "OpExecutionMode %main LocalSize 1 1 1\n"
2368
2369 "OpName %main \"main\"\n"
2370 "OpName %id \"gl_GlobalInvocationID\"\n"
2371
2372 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2373
2374 "OpDecorate %buf BufferBlock\n"
2375 "OpDecorate %indata1 DescriptorSet 0\n"
2376 "OpDecorate %indata1 Binding 0\n"
2377 "OpDecorate %indata2 DescriptorSet 0\n"
2378 "OpDecorate %indata2 Binding 1\n"
2379 "OpDecorate %indata3 DescriptorSet 0\n"
2380 "OpDecorate %indata3 Binding 2\n"
2381 "OpDecorate %outdata DescriptorSet 0\n"
2382 "OpDecorate %outdata Binding 3\n"
2383 "OpDecorate %f32arr ArrayStride 4\n"
2384 "OpMemberDecorate %buf 0 Offset 0\n"
2385
2386 + string(getComputeAsmCommonTypes()) +
2387
2388 "%buf = OpTypeStruct %f32arr\n"
2389 "%bufptr = OpTypePointer Uniform %buf\n"
2390 "%indata1 = OpVariable %bufptr Uniform\n"
2391 "%indata2 = OpVariable %bufptr Uniform\n"
2392 "%indata3 = OpVariable %bufptr Uniform\n"
2393 "%outdata = OpVariable %bufptr Uniform\n"
2394
2395 "%id = OpVariable %uvec3ptr Input\n"
2396 "%zero = OpConstant %i32 0\n"
2397
2398 "%main = OpFunction %void None %voidf\n"
2399 "%label = OpLabel\n"
2400 "%idval = OpLoad %uvec3 %id\n"
2401 "%x = OpCompositeExtract %u32 %idval 0\n"
2402 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
2403 "%inval1 = OpLoad %f32 %inloc1\n"
2404 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
2405 "%inval2 = OpLoad %f32 %inloc2\n"
2406 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
2407 "%inval3 = OpLoad %f32 %inloc3\n"
2408 "%rem = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2409 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
2410 " OpStore %outloc %rem\n"
2411 " OpReturn\n"
2412 " OpFunctionEnd\n";
2413
2414 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2415 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2416 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2417 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2418 spec.numWorkGroups = IVec3(numElements, 1, 1);
2419 spec.verifyIO = &compareNClamp;
2420
2421 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2422
2423 return group.release();
2424 }
2425
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2426 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2427 {
2428 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2429 de::Random rnd (deStringHash(group->getName()));
2430 const int numElements = 200;
2431
2432 const struct CaseParams
2433 {
2434 const char* name;
2435 const char* failMessage; // customized status message
2436 qpTestResult failResult; // override status on failure
2437 int op1Min, op1Max; // operand ranges
2438 int op2Min, op2Max;
2439 } cases[] =
2440 {
2441 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2442 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2443 };
2444 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2445
2446 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2447 {
2448 const CaseParams& params = cases[caseNdx];
2449 ComputeShaderSpec spec;
2450 vector<deInt32> inputInts1 (numElements, 0);
2451 vector<deInt32> inputInts2 (numElements, 0);
2452 vector<deInt32> outputInts (numElements, 0);
2453
2454 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2455 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2456
2457 for (int ndx = 0; ndx < numElements; ++ndx)
2458 {
2459 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2460 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2461 }
2462
2463 spec.assembly =
2464 string(getComputeAsmShaderPreamble()) +
2465
2466 "OpName %main \"main\"\n"
2467 "OpName %id \"gl_GlobalInvocationID\"\n"
2468
2469 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2470
2471 "OpDecorate %buf BufferBlock\n"
2472 "OpDecorate %indata1 DescriptorSet 0\n"
2473 "OpDecorate %indata1 Binding 0\n"
2474 "OpDecorate %indata2 DescriptorSet 0\n"
2475 "OpDecorate %indata2 Binding 1\n"
2476 "OpDecorate %outdata DescriptorSet 0\n"
2477 "OpDecorate %outdata Binding 2\n"
2478 "OpDecorate %i32arr ArrayStride 4\n"
2479 "OpMemberDecorate %buf 0 Offset 0\n"
2480
2481 + string(getComputeAsmCommonTypes()) +
2482
2483 "%buf = OpTypeStruct %i32arr\n"
2484 "%bufptr = OpTypePointer Uniform %buf\n"
2485 "%indata1 = OpVariable %bufptr Uniform\n"
2486 "%indata2 = OpVariable %bufptr Uniform\n"
2487 "%outdata = OpVariable %bufptr Uniform\n"
2488
2489 "%id = OpVariable %uvec3ptr Input\n"
2490 "%zero = OpConstant %i32 0\n"
2491
2492 "%main = OpFunction %void None %voidf\n"
2493 "%label = OpLabel\n"
2494 "%idval = OpLoad %uvec3 %id\n"
2495 "%x = OpCompositeExtract %u32 %idval 0\n"
2496 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2497 "%inval1 = OpLoad %i32 %inloc1\n"
2498 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2499 "%inval2 = OpLoad %i32 %inloc2\n"
2500 "%rem = OpSRem %i32 %inval1 %inval2\n"
2501 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2502 " OpStore %outloc %rem\n"
2503 " OpReturn\n"
2504 " OpFunctionEnd\n";
2505
2506 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2507 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2508 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2509 spec.numWorkGroups = IVec3(numElements, 1, 1);
2510 spec.failResult = params.failResult;
2511 spec.failMessage = params.failMessage;
2512
2513 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2514 }
2515
2516 return group.release();
2517 }
2518
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2519 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2520 {
2521 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2522 de::Random rnd (deStringHash(group->getName()));
2523 const int numElements = 200;
2524
2525 const struct CaseParams
2526 {
2527 const char* name;
2528 const char* failMessage; // customized status message
2529 qpTestResult failResult; // override status on failure
2530 bool positive;
2531 } cases[] =
2532 {
2533 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2534 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2535 };
2536 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2537
2538 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2539 {
2540 const CaseParams& params = cases[caseNdx];
2541 ComputeShaderSpec spec;
2542 vector<deInt64> inputInts1 (numElements, 0);
2543 vector<deInt64> inputInts2 (numElements, 0);
2544 vector<deInt64> outputInts (numElements, 0);
2545
2546 if (params.positive)
2547 {
2548 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2549 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2550 }
2551 else
2552 {
2553 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2554 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2555 }
2556
2557 for (int ndx = 0; ndx < numElements; ++ndx)
2558 {
2559 // The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2560 outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2561 }
2562
2563 spec.assembly =
2564 "OpCapability Int64\n"
2565
2566 + string(getComputeAsmShaderPreamble()) +
2567
2568 "OpName %main \"main\"\n"
2569 "OpName %id \"gl_GlobalInvocationID\"\n"
2570
2571 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2572
2573 "OpDecorate %buf BufferBlock\n"
2574 "OpDecorate %indata1 DescriptorSet 0\n"
2575 "OpDecorate %indata1 Binding 0\n"
2576 "OpDecorate %indata2 DescriptorSet 0\n"
2577 "OpDecorate %indata2 Binding 1\n"
2578 "OpDecorate %outdata DescriptorSet 0\n"
2579 "OpDecorate %outdata Binding 2\n"
2580 "OpDecorate %i64arr ArrayStride 8\n"
2581 "OpMemberDecorate %buf 0 Offset 0\n"
2582
2583 + string(getComputeAsmCommonTypes())
2584 + string(getComputeAsmCommonInt64Types()) +
2585
2586 "%buf = OpTypeStruct %i64arr\n"
2587 "%bufptr = OpTypePointer Uniform %buf\n"
2588 "%indata1 = OpVariable %bufptr Uniform\n"
2589 "%indata2 = OpVariable %bufptr Uniform\n"
2590 "%outdata = OpVariable %bufptr Uniform\n"
2591
2592 "%id = OpVariable %uvec3ptr Input\n"
2593 "%zero = OpConstant %i64 0\n"
2594
2595 "%main = OpFunction %void None %voidf\n"
2596 "%label = OpLabel\n"
2597 "%idval = OpLoad %uvec3 %id\n"
2598 "%x = OpCompositeExtract %u32 %idval 0\n"
2599 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2600 "%inval1 = OpLoad %i64 %inloc1\n"
2601 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2602 "%inval2 = OpLoad %i64 %inloc2\n"
2603 "%rem = OpSRem %i64 %inval1 %inval2\n"
2604 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2605 " OpStore %outloc %rem\n"
2606 " OpReturn\n"
2607 " OpFunctionEnd\n";
2608
2609 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2610 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2611 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2612 spec.numWorkGroups = IVec3(numElements, 1, 1);
2613 spec.failResult = params.failResult;
2614 spec.failMessage = params.failMessage;
2615
2616 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2617
2618 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2619 }
2620
2621 return group.release();
2622 }
2623
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2624 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2625 {
2626 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2627 de::Random rnd (deStringHash(group->getName()));
2628 const int numElements = 200;
2629
2630 const struct CaseParams
2631 {
2632 const char* name;
2633 const char* failMessage; // customized status message
2634 qpTestResult failResult; // override status on failure
2635 int op1Min, op1Max; // operand ranges
2636 int op2Min, op2Max;
2637 } cases[] =
2638 {
2639 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, 0, 65536, 0, 100 },
2640 { "all", "Inconsistent results, but within specification", negFailResult, -65536, 65536, -100, 100 }, // see below
2641 };
2642 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2643
2644 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2645 {
2646 const CaseParams& params = cases[caseNdx];
2647
2648 ComputeShaderSpec spec;
2649 vector<deInt32> inputInts1 (numElements, 0);
2650 vector<deInt32> inputInts2 (numElements, 0);
2651 vector<deInt32> outputInts (numElements, 0);
2652
2653 fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2654 fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2655
2656 for (int ndx = 0; ndx < numElements; ++ndx)
2657 {
2658 deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2659 if (rem == 0)
2660 {
2661 outputInts[ndx] = 0;
2662 }
2663 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2664 {
2665 // They have the same sign
2666 outputInts[ndx] = rem;
2667 }
2668 else
2669 {
2670 // They have opposite sign. The remainder operation takes the
2671 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2672 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2673 // the result has the correct sign and that it is still
2674 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2675 //
2676 // See also http://mathforum.org/library/drmath/view/52343.html
2677 outputInts[ndx] = rem + inputInts2[ndx];
2678 }
2679 }
2680
2681 spec.assembly =
2682 string(getComputeAsmShaderPreamble()) +
2683
2684 "OpName %main \"main\"\n"
2685 "OpName %id \"gl_GlobalInvocationID\"\n"
2686
2687 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2688
2689 "OpDecorate %buf BufferBlock\n"
2690 "OpDecorate %indata1 DescriptorSet 0\n"
2691 "OpDecorate %indata1 Binding 0\n"
2692 "OpDecorate %indata2 DescriptorSet 0\n"
2693 "OpDecorate %indata2 Binding 1\n"
2694 "OpDecorate %outdata DescriptorSet 0\n"
2695 "OpDecorate %outdata Binding 2\n"
2696 "OpDecorate %i32arr ArrayStride 4\n"
2697 "OpMemberDecorate %buf 0 Offset 0\n"
2698
2699 + string(getComputeAsmCommonTypes()) +
2700
2701 "%buf = OpTypeStruct %i32arr\n"
2702 "%bufptr = OpTypePointer Uniform %buf\n"
2703 "%indata1 = OpVariable %bufptr Uniform\n"
2704 "%indata2 = OpVariable %bufptr Uniform\n"
2705 "%outdata = OpVariable %bufptr Uniform\n"
2706
2707 "%id = OpVariable %uvec3ptr Input\n"
2708 "%zero = OpConstant %i32 0\n"
2709
2710 "%main = OpFunction %void None %voidf\n"
2711 "%label = OpLabel\n"
2712 "%idval = OpLoad %uvec3 %id\n"
2713 "%x = OpCompositeExtract %u32 %idval 0\n"
2714 "%inloc1 = OpAccessChain %i32ptr %indata1 %zero %x\n"
2715 "%inval1 = OpLoad %i32 %inloc1\n"
2716 "%inloc2 = OpAccessChain %i32ptr %indata2 %zero %x\n"
2717 "%inval2 = OpLoad %i32 %inloc2\n"
2718 "%rem = OpSMod %i32 %inval1 %inval2\n"
2719 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
2720 " OpStore %outloc %rem\n"
2721 " OpReturn\n"
2722 " OpFunctionEnd\n";
2723
2724 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts1)));
2725 spec.inputs.push_back (BufferSp(new Int32Buffer(inputInts2)));
2726 spec.outputs.push_back (BufferSp(new Int32Buffer(outputInts)));
2727 spec.numWorkGroups = IVec3(numElements, 1, 1);
2728 spec.failResult = params.failResult;
2729 spec.failMessage = params.failMessage;
2730
2731 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2732 }
2733
2734 return group.release();
2735 }
2736
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2737 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2738 {
2739 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2740 de::Random rnd (deStringHash(group->getName()));
2741 const int numElements = 200;
2742
2743 const struct CaseParams
2744 {
2745 const char* name;
2746 const char* failMessage; // customized status message
2747 qpTestResult failResult; // override status on failure
2748 bool positive;
2749 } cases[] =
2750 {
2751 { "positive", "Output doesn't match with expected", QP_TEST_RESULT_FAIL, true },
2752 { "all", "Inconsistent results, but within specification", negFailResult, false }, // see below
2753 };
2754 // If either operand is negative the result is undefined. Some implementations may still return correct values.
2755
2756 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2757 {
2758 const CaseParams& params = cases[caseNdx];
2759
2760 ComputeShaderSpec spec;
2761 vector<deInt64> inputInts1 (numElements, 0);
2762 vector<deInt64> inputInts2 (numElements, 0);
2763 vector<deInt64> outputInts (numElements, 0);
2764
2765
2766 if (params.positive)
2767 {
2768 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2769 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2770 }
2771 else
2772 {
2773 fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2774 fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2775 }
2776
2777 for (int ndx = 0; ndx < numElements; ++ndx)
2778 {
2779 deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2780 if (rem == 0)
2781 {
2782 outputInts[ndx] = 0;
2783 }
2784 else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2785 {
2786 // They have the same sign
2787 outputInts[ndx] = rem;
2788 }
2789 else
2790 {
2791 // They have opposite sign. The remainder operation takes the
2792 // sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2793 // of inputInts2[ndx]. Adding inputInts2[ndx] will ensure that
2794 // the result has the correct sign and that it is still
2795 // congruent to inputInts1[ndx] modulo inputInts2[ndx]
2796 //
2797 // See also http://mathforum.org/library/drmath/view/52343.html
2798 outputInts[ndx] = rem + inputInts2[ndx];
2799 }
2800 }
2801
2802 spec.assembly =
2803 "OpCapability Int64\n"
2804
2805 + string(getComputeAsmShaderPreamble()) +
2806
2807 "OpName %main \"main\"\n"
2808 "OpName %id \"gl_GlobalInvocationID\"\n"
2809
2810 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2811
2812 "OpDecorate %buf BufferBlock\n"
2813 "OpDecorate %indata1 DescriptorSet 0\n"
2814 "OpDecorate %indata1 Binding 0\n"
2815 "OpDecorate %indata2 DescriptorSet 0\n"
2816 "OpDecorate %indata2 Binding 1\n"
2817 "OpDecorate %outdata DescriptorSet 0\n"
2818 "OpDecorate %outdata Binding 2\n"
2819 "OpDecorate %i64arr ArrayStride 8\n"
2820 "OpMemberDecorate %buf 0 Offset 0\n"
2821
2822 + string(getComputeAsmCommonTypes())
2823 + string(getComputeAsmCommonInt64Types()) +
2824
2825 "%buf = OpTypeStruct %i64arr\n"
2826 "%bufptr = OpTypePointer Uniform %buf\n"
2827 "%indata1 = OpVariable %bufptr Uniform\n"
2828 "%indata2 = OpVariable %bufptr Uniform\n"
2829 "%outdata = OpVariable %bufptr Uniform\n"
2830
2831 "%id = OpVariable %uvec3ptr Input\n"
2832 "%zero = OpConstant %i64 0\n"
2833
2834 "%main = OpFunction %void None %voidf\n"
2835 "%label = OpLabel\n"
2836 "%idval = OpLoad %uvec3 %id\n"
2837 "%x = OpCompositeExtract %u32 %idval 0\n"
2838 "%inloc1 = OpAccessChain %i64ptr %indata1 %zero %x\n"
2839 "%inval1 = OpLoad %i64 %inloc1\n"
2840 "%inloc2 = OpAccessChain %i64ptr %indata2 %zero %x\n"
2841 "%inval2 = OpLoad %i64 %inloc2\n"
2842 "%rem = OpSMod %i64 %inval1 %inval2\n"
2843 "%outloc = OpAccessChain %i64ptr %outdata %zero %x\n"
2844 " OpStore %outloc %rem\n"
2845 " OpReturn\n"
2846 " OpFunctionEnd\n";
2847
2848 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts1)));
2849 spec.inputs.push_back (BufferSp(new Int64Buffer(inputInts2)));
2850 spec.outputs.push_back (BufferSp(new Int64Buffer(outputInts)));
2851 spec.numWorkGroups = IVec3(numElements, 1, 1);
2852 spec.failResult = params.failResult;
2853 spec.failMessage = params.failMessage;
2854
2855 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2856
2857 group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2858 }
2859
2860 return group.release();
2861 }
2862
2863 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2864 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2865 {
2866 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2867 de::Random rnd (deStringHash(group->getName()));
2868 const int numElements = 100;
2869
2870 // The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2871 ComputeShaderSpec spec1;
2872 vector<Vec4> inputFloats1 (numElements);
2873 vector<Vec4> outputFloats1 (numElements);
2874
2875 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2876
2877 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2878 floorAll(inputFloats1);
2879
2880 for (size_t ndx = 0; ndx < numElements; ++ndx)
2881 outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2882
2883 spec1.assembly =
2884 string(getComputeAsmShaderPreamble()) +
2885
2886 "OpName %main \"main\"\n"
2887 "OpName %id \"gl_GlobalInvocationID\"\n"
2888
2889 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2890 "OpDecorate %vec4arr ArrayStride 16\n"
2891
2892 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2893
2894 "%vec4 = OpTypeVector %f32 4\n"
2895 "%vec4ptr_u = OpTypePointer Uniform %vec4\n"
2896 "%vec4ptr_f = OpTypePointer Function %vec4\n"
2897 "%vec4arr = OpTypeRuntimeArray %vec4\n"
2898 "%buf = OpTypeStruct %vec4arr\n"
2899 "%bufptr = OpTypePointer Uniform %buf\n"
2900 "%indata = OpVariable %bufptr Uniform\n"
2901 "%outdata = OpVariable %bufptr Uniform\n"
2902
2903 "%id = OpVariable %uvec3ptr Input\n"
2904 "%zero = OpConstant %i32 0\n"
2905 "%c_f_0 = OpConstant %f32 0.\n"
2906 "%c_f_0_5 = OpConstant %f32 0.5\n"
2907 "%c_f_1_5 = OpConstant %f32 1.5\n"
2908 "%c_f_2_5 = OpConstant %f32 2.5\n"
2909 "%c_vec4 = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2910
2911 "%main = OpFunction %void None %voidf\n"
2912 "%label = OpLabel\n"
2913 "%v_vec4 = OpVariable %vec4ptr_f Function\n"
2914 "%idval = OpLoad %uvec3 %id\n"
2915 "%x = OpCompositeExtract %u32 %idval 0\n"
2916 "%inloc = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2917 "%outloc = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2918 " OpCopyMemory %v_vec4 %inloc\n"
2919 "%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2920 "%add = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2921 " OpStore %outloc %add\n"
2922 " OpReturn\n"
2923 " OpFunctionEnd\n";
2924
2925 spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2926 spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2927 spec1.numWorkGroups = IVec3(numElements, 1, 1);
2928
2929 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2930
2931 // The following case copies a float[100] variable from the input buffer to the output buffer.
2932 ComputeShaderSpec spec2;
2933 vector<float> inputFloats2 (numElements);
2934 vector<float> outputFloats2 (numElements);
2935
2936 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2937
2938 for (size_t ndx = 0; ndx < numElements; ++ndx)
2939 outputFloats2[ndx] = inputFloats2[ndx];
2940
2941 spec2.assembly =
2942 string(getComputeAsmShaderPreamble()) +
2943
2944 "OpName %main \"main\"\n"
2945 "OpName %id \"gl_GlobalInvocationID\"\n"
2946
2947 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2948 "OpDecorate %f32arr100 ArrayStride 4\n"
2949
2950 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2951
2952 "%hundred = OpConstant %u32 100\n"
2953 "%f32arr100 = OpTypeArray %f32 %hundred\n"
2954 "%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2955 "%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2956 "%buf = OpTypeStruct %f32arr100\n"
2957 "%bufptr = OpTypePointer Uniform %buf\n"
2958 "%indata = OpVariable %bufptr Uniform\n"
2959 "%outdata = OpVariable %bufptr Uniform\n"
2960
2961 "%id = OpVariable %uvec3ptr Input\n"
2962 "%zero = OpConstant %i32 0\n"
2963
2964 "%main = OpFunction %void None %voidf\n"
2965 "%label = OpLabel\n"
2966 "%var = OpVariable %f32arr100ptr_f Function\n"
2967 "%inarr = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2968 "%outarr = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2969 " OpCopyMemory %var %inarr\n"
2970 " OpCopyMemory %outarr %var\n"
2971 " OpReturn\n"
2972 " OpFunctionEnd\n";
2973
2974 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2975 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2976 spec2.numWorkGroups = IVec3(1, 1, 1);
2977
2978 group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2979
2980 // The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2981 ComputeShaderSpec spec3;
2982 vector<float> inputFloats3 (16);
2983 vector<float> outputFloats3 (16);
2984
2985 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2986
2987 for (size_t ndx = 0; ndx < 16; ++ndx)
2988 outputFloats3[ndx] = inputFloats3[ndx];
2989
2990 spec3.assembly =
2991 string(getComputeAsmShaderPreamble()) +
2992
2993 "OpName %main \"main\"\n"
2994 "OpName %id \"gl_GlobalInvocationID\"\n"
2995
2996 "OpDecorate %id BuiltIn GlobalInvocationId\n"
2997 //"OpMemberDecorate %buf 0 Offset 0\n" - exists in getComputeAsmInputOutputBufferTraits
2998 "OpMemberDecorate %buf 1 Offset 16\n"
2999 "OpMemberDecorate %buf 2 Offset 32\n"
3000 "OpMemberDecorate %buf 3 Offset 48\n"
3001
3002 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3003
3004 "%vec4 = OpTypeVector %f32 4\n"
3005 "%buf = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3006 "%bufptr = OpTypePointer Uniform %buf\n"
3007 "%indata = OpVariable %bufptr Uniform\n"
3008 "%outdata = OpVariable %bufptr Uniform\n"
3009 "%vec4stptr = OpTypePointer Function %buf\n"
3010
3011 "%id = OpVariable %uvec3ptr Input\n"
3012 "%zero = OpConstant %i32 0\n"
3013
3014 "%main = OpFunction %void None %voidf\n"
3015 "%label = OpLabel\n"
3016 "%var = OpVariable %vec4stptr Function\n"
3017 " OpCopyMemory %var %indata\n"
3018 " OpCopyMemory %outdata %var\n"
3019 " OpReturn\n"
3020 " OpFunctionEnd\n";
3021
3022 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3023 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3024 spec3.numWorkGroups = IVec3(1, 1, 1);
3025
3026 group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
3027
3028 // The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3029 ComputeShaderSpec spec4;
3030 vector<float> inputFloats4 (numElements);
3031 vector<float> outputFloats4 (numElements);
3032
3033 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3034
3035 for (size_t ndx = 0; ndx < numElements; ++ndx)
3036 outputFloats4[ndx] = -inputFloats4[ndx];
3037
3038 spec4.assembly =
3039 string(getComputeAsmShaderPreamble()) +
3040
3041 "OpName %main \"main\"\n"
3042 "OpName %id \"gl_GlobalInvocationID\"\n"
3043
3044 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3045
3046 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3047
3048 "%f32ptr_f = OpTypePointer Function %f32\n"
3049 "%id = OpVariable %uvec3ptr Input\n"
3050 "%zero = OpConstant %i32 0\n"
3051
3052 "%main = OpFunction %void None %voidf\n"
3053 "%label = OpLabel\n"
3054 "%var = OpVariable %f32ptr_f Function\n"
3055 "%idval = OpLoad %uvec3 %id\n"
3056 "%x = OpCompositeExtract %u32 %idval 0\n"
3057 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3058 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3059 " OpCopyMemory %var %inloc\n"
3060 "%val = OpLoad %f32 %var\n"
3061 "%neg = OpFNegate %f32 %val\n"
3062 " OpStore %outloc %neg\n"
3063 " OpReturn\n"
3064 " OpFunctionEnd\n";
3065
3066 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3067 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3068 spec4.numWorkGroups = IVec3(numElements, 1, 1);
3069
3070 group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3071
3072 return group.release();
3073 }
3074
createOpCopyObjectGroup(tcu::TestContext & testCtx)3075 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3076 {
3077 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3078 ComputeShaderSpec spec;
3079 de::Random rnd (deStringHash(group->getName()));
3080 const int numElements = 100;
3081 vector<float> inputFloats (numElements, 0);
3082 vector<float> outputFloats (numElements, 0);
3083
3084 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3085
3086 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3087 floorAll(inputFloats);
3088
3089 for (size_t ndx = 0; ndx < numElements; ++ndx)
3090 outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3091
3092 spec.assembly =
3093 string(getComputeAsmShaderPreamble()) +
3094
3095 "OpName %main \"main\"\n"
3096 "OpName %id \"gl_GlobalInvocationID\"\n"
3097
3098 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3099
3100 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3101
3102 "%fmat = OpTypeMatrix %fvec3 3\n"
3103 "%three = OpConstant %u32 3\n"
3104 "%farr = OpTypeArray %f32 %three\n"
3105 "%fst = OpTypeStruct %f32 %f32\n"
3106
3107 + string(getComputeAsmInputOutputBuffer()) +
3108
3109 "%id = OpVariable %uvec3ptr Input\n"
3110 "%zero = OpConstant %i32 0\n"
3111 "%c_f = OpConstant %f32 1.5\n"
3112 "%c_fvec3 = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3113 "%c_fmat = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3114 "%c_farr = OpConstantComposite %farr %c_f %c_f %c_f\n"
3115 "%c_fst = OpConstantComposite %fst %c_f %c_f\n"
3116
3117 "%main = OpFunction %void None %voidf\n"
3118 "%label = OpLabel\n"
3119 "%c_f_copy = OpCopyObject %f32 %c_f\n"
3120 "%c_fvec3_copy = OpCopyObject %fvec3 %c_fvec3\n"
3121 "%c_fmat_copy = OpCopyObject %fmat %c_fmat\n"
3122 "%c_farr_copy = OpCopyObject %farr %c_farr\n"
3123 "%c_fst_copy = OpCopyObject %fst %c_fst\n"
3124 "%fvec3_elem = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3125 "%fmat_elem = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3126 "%farr_elem = OpCompositeExtract %f32 %c_farr_copy 2\n"
3127 "%fst_elem = OpCompositeExtract %f32 %c_fst_copy 1\n"
3128 // Add up. 1.5 * 5 = 7.5.
3129 "%add1 = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3130 "%add2 = OpFAdd %f32 %add1 %fmat_elem\n"
3131 "%add3 = OpFAdd %f32 %add2 %farr_elem\n"
3132 "%add4 = OpFAdd %f32 %add3 %fst_elem\n"
3133
3134 "%idval = OpLoad %uvec3 %id\n"
3135 "%x = OpCompositeExtract %u32 %idval 0\n"
3136 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3137 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3138 "%inval = OpLoad %f32 %inloc\n"
3139 "%add = OpFAdd %f32 %add4 %inval\n"
3140 " OpStore %outloc %add\n"
3141 " OpReturn\n"
3142 " OpFunctionEnd\n";
3143 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3144 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3145 spec.numWorkGroups = IVec3(numElements, 1, 1);
3146
3147 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3148
3149 return group.release();
3150 }
3151 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3152 //
3153 // #version 430
3154 //
3155 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3156 // float elements[];
3157 // } input_data;
3158 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3159 // float elements[];
3160 // } output_data;
3161 //
3162 // void not_called_func() {
3163 // // place OpUnreachable here
3164 // }
3165 //
3166 // uint modulo4(uint val) {
3167 // switch (val % uint(4)) {
3168 // case 0: return 3;
3169 // case 1: return 2;
3170 // case 2: return 1;
3171 // case 3: return 0;
3172 // default: return 100; // place OpUnreachable here
3173 // }
3174 // }
3175 //
3176 // uint const5() {
3177 // return 5;
3178 // // place OpUnreachable here
3179 // }
3180 //
3181 // void main() {
3182 // uint x = gl_GlobalInvocationID.x;
3183 // if (const5() > modulo4(1000)) {
3184 // output_data.elements[x] = -input_data.elements[x];
3185 // } else {
3186 // // place OpUnreachable here
3187 // output_data.elements[x] = input_data.elements[x];
3188 // }
3189 // }
3190
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3191 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3192 {
3193 static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3194
3195 struct Case
3196 {
3197 string name;
3198 string desc;
3199 };
3200
3201 static const Case cases[] =
3202 {
3203 { "unreachable-switch-merge-in-loop", "Test containing an unreachable switch merge block inside an infinite loop" },
3204 };
3205
3206 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3207 {
3208 const string fileName = cases[i].name + ".amber";
3209 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3210 }
3211 }
3212
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3213 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3214 {
3215 static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3216
3217 struct Case
3218 {
3219 string name;
3220 string desc;
3221 };
3222
3223 static const Case cases[] =
3224 {
3225 { "switch-case-to-merge-block", "Test switch containing a case that jumps directly to the merge block" },
3226 };
3227
3228 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3229 {
3230 const string fileName = cases[i].name + ".amber";
3231 group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3232 }
3233 }
3234
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3235 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3236 {
3237 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3238 static const char dataDir[] = "spirv_assembly/instruction/compute/arraylength";
3239
3240 struct Case
3241 {
3242 string name;
3243 string desc;
3244 };
3245
3246 static const Case cases[] =
3247 {
3248 { "array-stride-larger-than-element-size", "Test using an unsized array with stride larger than the element size" }
3249 };
3250
3251 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3252 {
3253 const string fileName = cases[i].name + ".amber";
3254 group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3255 }
3256
3257 return group.release();
3258 }
3259
createOpUnreachableGroup(tcu::TestContext & testCtx)3260 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3261 {
3262 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3263 ComputeShaderSpec spec;
3264 de::Random rnd (deStringHash(group->getName()));
3265 const int numElements = 100;
3266 vector<float> positiveFloats (numElements, 0);
3267 vector<float> negativeFloats (numElements, 0);
3268
3269 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3270
3271 for (size_t ndx = 0; ndx < numElements; ++ndx)
3272 negativeFloats[ndx] = -positiveFloats[ndx];
3273
3274 spec.assembly =
3275 string(getComputeAsmShaderPreamble()) +
3276
3277 "OpSource GLSL 430\n"
3278 "OpName %main \"main\"\n"
3279 "OpName %func_not_called_func \"not_called_func(\"\n"
3280 "OpName %func_modulo4 \"modulo4(u1;\"\n"
3281 "OpName %func_const5 \"const5(\"\n"
3282 "OpName %id \"gl_GlobalInvocationID\"\n"
3283
3284 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3285
3286 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3287
3288 "%u32ptr = OpTypePointer Function %u32\n"
3289 "%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3290 "%unitf = OpTypeFunction %u32\n"
3291
3292 "%id = OpVariable %uvec3ptr Input\n"
3293 "%zero = OpConstant %u32 0\n"
3294 "%one = OpConstant %u32 1\n"
3295 "%two = OpConstant %u32 2\n"
3296 "%three = OpConstant %u32 3\n"
3297 "%four = OpConstant %u32 4\n"
3298 "%five = OpConstant %u32 5\n"
3299 "%hundred = OpConstant %u32 100\n"
3300 "%thousand = OpConstant %u32 1000\n"
3301
3302 + string(getComputeAsmInputOutputBuffer()) +
3303
3304 // Main()
3305 "%main = OpFunction %void None %voidf\n"
3306 "%main_entry = OpLabel\n"
3307 "%v_thousand = OpVariable %u32ptr Function %thousand\n"
3308 "%idval = OpLoad %uvec3 %id\n"
3309 "%x = OpCompositeExtract %u32 %idval 0\n"
3310 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
3311 "%inval = OpLoad %f32 %inloc\n"
3312 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3313 "%ret_const5 = OpFunctionCall %u32 %func_const5\n"
3314 "%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3315 "%cmp_gt = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3316 " OpSelectionMerge %if_end None\n"
3317 " OpBranchConditional %cmp_gt %if_true %if_false\n"
3318 "%if_true = OpLabel\n"
3319 "%negate = OpFNegate %f32 %inval\n"
3320 " OpStore %outloc %negate\n"
3321 " OpBranch %if_end\n"
3322 "%if_false = OpLabel\n"
3323 " OpUnreachable\n" // Unreachable else branch for if statement
3324 "%if_end = OpLabel\n"
3325 " OpReturn\n"
3326 " OpFunctionEnd\n"
3327
3328 // not_called_function()
3329 "%func_not_called_func = OpFunction %void None %voidf\n"
3330 "%not_called_func_entry = OpLabel\n"
3331 " OpUnreachable\n" // Unreachable entry block in not called static function
3332 " OpFunctionEnd\n"
3333
3334 // modulo4()
3335 "%func_modulo4 = OpFunction %u32 None %uintfuint\n"
3336 "%valptr = OpFunctionParameter %u32ptr\n"
3337 "%modulo4_entry = OpLabel\n"
3338 "%val = OpLoad %u32 %valptr\n"
3339 "%modulo = OpUMod %u32 %val %four\n"
3340 " OpSelectionMerge %switch_merge None\n"
3341 " OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3342 "%case0 = OpLabel\n"
3343 " OpReturnValue %three\n"
3344 "%case1 = OpLabel\n"
3345 " OpReturnValue %two\n"
3346 "%case2 = OpLabel\n"
3347 " OpReturnValue %one\n"
3348 "%case3 = OpLabel\n"
3349 " OpReturnValue %zero\n"
3350 "%default = OpLabel\n"
3351 " OpUnreachable\n" // Unreachable default case for switch statement
3352 "%switch_merge = OpLabel\n"
3353 " OpUnreachable\n" // Unreachable merge block for switch statement
3354 " OpFunctionEnd\n"
3355
3356 // const5()
3357 "%func_const5 = OpFunction %u32 None %unitf\n"
3358 "%const5_entry = OpLabel\n"
3359 " OpReturnValue %five\n"
3360 "%unreachable = OpLabel\n"
3361 " OpUnreachable\n" // Unreachable block in function
3362 " OpFunctionEnd\n";
3363 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3364 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3365 spec.numWorkGroups = IVec3(numElements, 1, 1);
3366
3367 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3368
3369 addOpUnreachableAmberTests(*group, testCtx);
3370
3371 return group.release();
3372 }
3373
3374 // Assembly code used for testing decoration group is based on GLSL source code:
3375 //
3376 // #version 430
3377 //
3378 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3379 // float elements[];
3380 // } input_data0;
3381 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3382 // float elements[];
3383 // } input_data1;
3384 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3385 // float elements[];
3386 // } input_data2;
3387 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3388 // float elements[];
3389 // } input_data3;
3390 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3391 // float elements[];
3392 // } input_data4;
3393 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3394 // float elements[];
3395 // } output_data;
3396 //
3397 // void main() {
3398 // uint x = gl_GlobalInvocationID.x;
3399 // output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3400 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3401 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3402 {
3403 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3404 ComputeShaderSpec spec;
3405 de::Random rnd (deStringHash(group->getName()));
3406 const int numElements = 100;
3407 vector<float> inputFloats0 (numElements, 0);
3408 vector<float> inputFloats1 (numElements, 0);
3409 vector<float> inputFloats2 (numElements, 0);
3410 vector<float> inputFloats3 (numElements, 0);
3411 vector<float> inputFloats4 (numElements, 0);
3412 vector<float> outputFloats (numElements, 0);
3413
3414 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3415 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3416 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3417 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3418 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3419
3420 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3421 floorAll(inputFloats0);
3422 floorAll(inputFloats1);
3423 floorAll(inputFloats2);
3424 floorAll(inputFloats3);
3425 floorAll(inputFloats4);
3426
3427 for (size_t ndx = 0; ndx < numElements; ++ndx)
3428 outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3429
3430 spec.assembly =
3431 string(getComputeAsmShaderPreamble()) +
3432
3433 "OpSource GLSL 430\n"
3434 "OpName %main \"main\"\n"
3435 "OpName %id \"gl_GlobalInvocationID\"\n"
3436
3437 // Not using group decoration on variable.
3438 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3439 // Not using group decoration on type.
3440 "OpDecorate %f32arr ArrayStride 4\n"
3441
3442 "OpDecorate %groups BufferBlock\n"
3443 "OpDecorate %groupm Offset 0\n"
3444 "%groups = OpDecorationGroup\n"
3445 "%groupm = OpDecorationGroup\n"
3446
3447 // Group decoration on multiple structs.
3448 "OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3449 // Group decoration on multiple struct members.
3450 "OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3451
3452 "OpDecorate %group1 DescriptorSet 0\n"
3453 "OpDecorate %group3 DescriptorSet 0\n"
3454 "OpDecorate %group3 NonWritable\n"
3455 "OpDecorate %group3 Restrict\n"
3456 "%group0 = OpDecorationGroup\n"
3457 "%group1 = OpDecorationGroup\n"
3458 "%group3 = OpDecorationGroup\n"
3459
3460 // Applying the same decoration group multiple times.
3461 "OpGroupDecorate %group1 %outdata\n"
3462 "OpGroupDecorate %group1 %outdata\n"
3463 "OpGroupDecorate %group1 %outdata\n"
3464 "OpDecorate %outdata DescriptorSet 0\n"
3465 "OpDecorate %outdata Binding 5\n"
3466 // Applying decoration group containing nothing.
3467 "OpGroupDecorate %group0 %indata0\n"
3468 "OpDecorate %indata0 DescriptorSet 0\n"
3469 "OpDecorate %indata0 Binding 0\n"
3470 // Applying decoration group containing one decoration.
3471 "OpGroupDecorate %group1 %indata1\n"
3472 "OpDecorate %indata1 Binding 1\n"
3473 // Applying decoration group containing multiple decorations.
3474 "OpGroupDecorate %group3 %indata2 %indata3\n"
3475 "OpDecorate %indata2 Binding 2\n"
3476 "OpDecorate %indata3 Binding 3\n"
3477 // Applying multiple decoration groups (with overlapping).
3478 "OpGroupDecorate %group0 %indata4\n"
3479 "OpGroupDecorate %group1 %indata4\n"
3480 "OpGroupDecorate %group3 %indata4\n"
3481 "OpDecorate %indata4 Binding 4\n"
3482
3483 + string(getComputeAsmCommonTypes()) +
3484
3485 "%id = OpVariable %uvec3ptr Input\n"
3486 "%zero = OpConstant %i32 0\n"
3487
3488 "%outbuf = OpTypeStruct %f32arr\n"
3489 "%outbufptr = OpTypePointer Uniform %outbuf\n"
3490 "%outdata = OpVariable %outbufptr Uniform\n"
3491 "%inbuf0 = OpTypeStruct %f32arr\n"
3492 "%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3493 "%indata0 = OpVariable %inbuf0ptr Uniform\n"
3494 "%inbuf1 = OpTypeStruct %f32arr\n"
3495 "%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3496 "%indata1 = OpVariable %inbuf1ptr Uniform\n"
3497 "%inbuf2 = OpTypeStruct %f32arr\n"
3498 "%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3499 "%indata2 = OpVariable %inbuf2ptr Uniform\n"
3500 "%inbuf3 = OpTypeStruct %f32arr\n"
3501 "%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3502 "%indata3 = OpVariable %inbuf3ptr Uniform\n"
3503 "%inbuf4 = OpTypeStruct %f32arr\n"
3504 "%inbufptr = OpTypePointer Uniform %inbuf4\n"
3505 "%indata4 = OpVariable %inbufptr Uniform\n"
3506
3507 "%main = OpFunction %void None %voidf\n"
3508 "%label = OpLabel\n"
3509 "%idval = OpLoad %uvec3 %id\n"
3510 "%x = OpCompositeExtract %u32 %idval 0\n"
3511 "%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3512 "%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3513 "%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3514 "%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3515 "%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3516 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3517 "%inval0 = OpLoad %f32 %inloc0\n"
3518 "%inval1 = OpLoad %f32 %inloc1\n"
3519 "%inval2 = OpLoad %f32 %inloc2\n"
3520 "%inval3 = OpLoad %f32 %inloc3\n"
3521 "%inval4 = OpLoad %f32 %inloc4\n"
3522 "%add0 = OpFAdd %f32 %inval0 %inval1\n"
3523 "%add1 = OpFAdd %f32 %add0 %inval2\n"
3524 "%add2 = OpFAdd %f32 %add1 %inval3\n"
3525 "%add = OpFAdd %f32 %add2 %inval4\n"
3526 " OpStore %outloc %add\n"
3527 " OpReturn\n"
3528 " OpFunctionEnd\n";
3529 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3530 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3531 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3532 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3533 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3534 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3535 spec.numWorkGroups = IVec3(numElements, 1, 1);
3536
3537 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3538
3539 return group.release();
3540 }
3541
3542 enum SpecConstantType
3543 {
3544 SC_INT8,
3545 SC_UINT8,
3546 SC_INT16,
3547 SC_UINT16,
3548 SC_INT32,
3549 SC_UINT32,
3550 SC_INT64,
3551 SC_UINT64,
3552 SC_FLOAT16,
3553 SC_FLOAT32,
3554 SC_FLOAT64,
3555 };
3556
3557 struct SpecConstantValue
3558 {
3559 SpecConstantType type;
3560 union ValueUnion {
3561 deInt8 i8;
3562 deUint8 u8;
3563 deInt16 i16;
3564 deUint16 u16;
3565 deInt32 i32;
3566 deUint32 u32;
3567 deInt64 i64;
3568 deUint64 u64;
3569 tcu::Float16 f16;
3570 tcu::Float32 f32;
3571 tcu::Float64 f64;
3572
ValueUnion(deInt8 v)3573 ValueUnion (deInt8 v) : i8(v) {}
ValueUnion(deUint8 v)3574 ValueUnion (deUint8 v) : u8(v) {}
ValueUnion(deInt16 v)3575 ValueUnion (deInt16 v) : i16(v) {}
ValueUnion(deUint16 v)3576 ValueUnion (deUint16 v) : u16(v) {}
ValueUnion(deInt32 v)3577 ValueUnion (deInt32 v) : i32(v) {}
ValueUnion(deUint32 v)3578 ValueUnion (deUint32 v) : u32(v) {}
ValueUnion(deInt64 v)3579 ValueUnion (deInt64 v) : i64(v) {}
ValueUnion(deUint64 v)3580 ValueUnion (deUint64 v) : u64(v) {}
ValueUnion(tcu::Float16 v)3581 ValueUnion (tcu::Float16 v) : f16(v) {}
ValueUnion(tcu::Float32 v)3582 ValueUnion (tcu::Float32 v) : f32(v) {}
ValueUnion(tcu::Float64 v)3583 ValueUnion (tcu::Float64 v) : f64(v) {}
3584 } value;
3585
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3586 SpecConstantValue (deInt8 v) : type(SC_INT8) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3587 SpecConstantValue (deUint8 v) : type(SC_UINT8) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3588 SpecConstantValue (deInt16 v) : type(SC_INT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3589 SpecConstantValue (deUint16 v) : type(SC_UINT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3590 SpecConstantValue (deInt32 v) : type(SC_INT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3591 SpecConstantValue (deUint32 v) : type(SC_UINT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3592 SpecConstantValue (deInt64 v) : type(SC_INT64) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3593 SpecConstantValue (deUint64 v) : type(SC_UINT64) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3594 SpecConstantValue (tcu::Float16 v) : type(SC_FLOAT16) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3595 SpecConstantValue (tcu::Float32 v) : type(SC_FLOAT32) , value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3596 SpecConstantValue (tcu::Float64 v) : type(SC_FLOAT64) , value(v) {}
3597
appendTovkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3598 void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3599 {
3600 switch (type)
3601 {
3602 case SC_INT8: specConstants.append(value.i8); break;
3603 case SC_UINT8: specConstants.append(value.u8); break;
3604 case SC_INT16: specConstants.append(value.i16); break;
3605 case SC_UINT16: specConstants.append(value.u16); break;
3606 case SC_INT32: specConstants.append(value.i32); break;
3607 case SC_UINT32: specConstants.append(value.u32); break;
3608 case SC_INT64: specConstants.append(value.i64); break;
3609 case SC_UINT64: specConstants.append(value.u64); break;
3610 case SC_FLOAT16: specConstants.append(value.f16); break;
3611 case SC_FLOAT32: specConstants.append(value.f32); break;
3612 case SC_FLOAT64: specConstants.append(value.f64); break;
3613 default:
3614 DE_ASSERT(false);
3615 }
3616 }
3617 };
3618
3619 enum CaseFlagBits
3620 {
3621 FLAG_NONE = 0,
3622 FLAG_CONVERT = 1,
3623 FLAG_I8 = (1<<1),
3624 FLAG_I16 = (1<<2),
3625 FLAG_I64 = (1<<3),
3626 FLAG_F16 = (1<<4),
3627 FLAG_F64 = (1<<5),
3628 };
3629 using CaseFlags = deUint32;
3630
3631 struct SpecConstantTwoValCase
3632 {
3633 const std::string caseName;
3634 const std::string scDefinition0;
3635 const std::string scDefinition1;
3636 const std::string scResultType;
3637 const std::string scOperation;
3638 SpecConstantValue scActualValue0;
3639 SpecConstantValue scActualValue1;
3640 const std::string resultOperation;
3641 vector<deInt32> expectedOutput;
3642 CaseFlags caseFlags;
3643
SpecConstantTwoValCasevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantTwoValCase3644 SpecConstantTwoValCase (const std::string& name,
3645 const std::string& definition0,
3646 const std::string& definition1,
3647 const std::string& resultType,
3648 const std::string& operation,
3649 SpecConstantValue value0,
3650 SpecConstantValue value1,
3651 const std::string& resultOp,
3652 const vector<deInt32>& output,
3653 CaseFlags flags = FLAG_NONE)
3654 : caseName (name)
3655 , scDefinition0 (definition0)
3656 , scDefinition1 (definition1)
3657 , scResultType (resultType)
3658 , scOperation (operation)
3659 , scActualValue0 (value0)
3660 , scActualValue1 (value1)
3661 , resultOperation (resultOp)
3662 , expectedOutput (output)
3663 , caseFlags (flags)
3664 {}
3665 };
3666
getSpecConstantOpStructConstantsAndTypes()3667 std::string getSpecConstantOpStructConstantsAndTypes ()
3668 {
3669 return
3670 "%zero = OpConstant %i32 0\n"
3671 "%one = OpConstant %i32 1\n"
3672 "%two = OpConstant %i32 2\n"
3673 "%three = OpConstant %i32 3\n"
3674 "%iarr3 = OpTypeArray %i32 %three\n"
3675 "%imat3 = OpTypeArray %iarr3 %three\n"
3676 "%struct = OpTypeStruct %imat3\n"
3677 ;
3678 }
3679
getSpecConstantOpStructComposites()3680 std::string getSpecConstantOpStructComposites ()
3681 {
3682 return
3683 "%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero\n"
3684 "%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3685 "%struct_0 = OpConstantComposite %struct %imat3_0\n"
3686 ;
3687 }
3688
getSpecConstantOpStructConstBlock()3689 std::string getSpecConstantOpStructConstBlock ()
3690 {
3691 return
3692 "%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0\n" // Compose (sc_0, sc_1, sc_2)
3693 "%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1\n"
3694 "%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2\n"
3695
3696 "%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0\n" // Compose (sc_1, sc_2, sc_0)
3697 "%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1\n"
3698 "%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2\n"
3699
3700 "%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0\n" // Compose (sc_2, sc_0, sc_1)
3701 "%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1\n"
3702 "%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2\n"
3703
3704 "%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0\n" // Matrix with the 3 previous arrays.
3705 "%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1\n"
3706 "%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2\n"
3707
3708 "%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0\n" // Save it in the struct.
3709
3710 "%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0\n" // Extract some component pairs to compare them.
3711 "%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0\n"
3712
3713 "%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1\n"
3714 "%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2\n"
3715
3716 "%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0\n"
3717 "%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1\n"
3718
3719 "%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0\n" // Must be false.
3720 "%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2\n" // Must be true.
3721 "%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1\n" // Must be true.
3722
3723 "%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero\n" // Must select 0
3724 "%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero\n" // Must select 1
3725 "%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one\n" // Must select 2
3726 ;
3727 }
3728
getSpecConstantOpStructInstructions()3729 std::string getSpecConstantOpStructInstructions ()
3730 {
3731 return
3732 // Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3733 // the final result should not be altered.
3734 "%subf_a = OpISub %i32 %one %mustbe_0\n"
3735 "%subf_b = OpIMul %i32 %subf_a %mustbe_1\n"
3736 "%subf_c = OpISub %i32 %mustbe_2 %one\n"
3737 "%factor = OpIMul %i32 %subf_b %subf_c\n"
3738 "%sc_final = OpIMul %i32 %factor %sc_factor\n"
3739 ;
3740 }
3741
createSpecConstantGroup(tcu::TestContext & testCtx)3742 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3743 {
3744 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3745 vector<SpecConstantTwoValCase> cases;
3746 de::Random rnd (deStringHash(group->getName()));
3747 const int numElements = 100;
3748 vector<deInt32> inputInts (numElements, 0);
3749 vector<deInt32> outputInts1 (numElements, 0);
3750 vector<deInt32> outputInts2 (numElements, 0);
3751 vector<deInt32> outputInts3 (numElements, 0);
3752 vector<deInt32> outputInts4 (numElements, 0);
3753 vector<deInt32> outputInts5 (numElements, 0);
3754 const StringTemplate shaderTemplate (
3755 "${CAPABILITIES:opt}"
3756 + string(getComputeAsmShaderPreamble()) +
3757
3758 "OpName %main \"main\"\n"
3759 "OpName %id \"gl_GlobalInvocationID\"\n"
3760
3761 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3762 "OpDecorate %sc_0 SpecId 0\n"
3763 "OpDecorate %sc_1 SpecId 1\n"
3764 "OpDecorate %i32arr ArrayStride 4\n"
3765
3766 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3767
3768 "${OPTYPE_DEFINITIONS:opt}"
3769 "%buf = OpTypeStruct %i32arr\n"
3770 "%bufptr = OpTypePointer Uniform %buf\n"
3771 "%indata = OpVariable %bufptr Uniform\n"
3772 "%outdata = OpVariable %bufptr Uniform\n"
3773
3774 "%id = OpVariable %uvec3ptr Input\n"
3775 "%zero = OpConstant %i32 0\n"
3776
3777 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
3778 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
3779 "%sc_final = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3780
3781 "%main = OpFunction %void None %voidf\n"
3782 "%label = OpLabel\n"
3783 "${TYPE_CONVERT:opt}"
3784 "%idval = OpLoad %uvec3 %id\n"
3785 "%x = OpCompositeExtract %u32 %idval 0\n"
3786 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
3787 "%inval = OpLoad %i32 %inloc\n"
3788 "%final = ${GEN_RESULT}\n"
3789 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
3790 " OpStore %outloc %final\n"
3791 " OpReturn\n"
3792 " OpFunctionEnd\n");
3793
3794 fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3795
3796 for (size_t ndx = 0; ndx < numElements; ++ndx)
3797 {
3798 outputInts1[ndx] = inputInts[ndx] + 42;
3799 outputInts2[ndx] = inputInts[ndx];
3800 outputInts3[ndx] = inputInts[ndx] - 11200;
3801 outputInts4[ndx] = inputInts[ndx] + 1;
3802 outputInts5[ndx] = inputInts[ndx] - 42;
3803 }
3804
3805 const char addScToInput[] = "OpIAdd %i32 %inval %sc_final";
3806 const char addSc32ToInput[] = "OpIAdd %i32 %inval %sc_final32";
3807 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_final %inval %zero";
3808 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_final %zero %inval";
3809
3810 cases.push_back(SpecConstantTwoValCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 62, -20, addScToInput, outputInts1));
3811 cases.push_back(SpecConstantTwoValCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 100, 58, addScToInput, outputInts1));
3812 cases.push_back(SpecConstantTwoValCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -2, -21, addScToInput, outputInts1));
3813 cases.push_back(SpecConstantTwoValCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, -3, addScToInput, outputInts1));
3814 cases.push_back(SpecConstantTwoValCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 3, addScToInput, outputInts1));
3815 cases.push_back(SpecConstantTwoValCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3816 cases.push_back(SpecConstantTwoValCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 7, 3, addScToInput, outputInts4));
3817 cases.push_back(SpecConstantTwoValCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 342, 50, addScToInput, outputInts1));
3818 cases.push_back(SpecConstantTwoValCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 42, 63, addScToInput, outputInts1));
3819 cases.push_back(SpecConstantTwoValCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 34, 8, addScToInput, outputInts1));
3820 cases.push_back(SpecConstantTwoValCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 18, 56, addScToInput, outputInts1));
3821 cases.push_back(SpecConstantTwoValCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, 2, addScToInput, outputInts1));
3822 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, 2, addScToInput, outputInts5));
3823 cases.push_back(SpecConstantTwoValCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, 1, addScToInput, outputInts1));
3824
3825 // Shifts for other integer sizes.
3826 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{168}, deInt64{2}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3827 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-168}, deInt64{2}, addSc32ToInput, outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3828 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{21}, deInt64{1}, addSc32ToInput, outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3829 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{168}, deInt16{2}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3830 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-168}, deInt16{2}, addSc32ToInput, outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3831 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{21}, deInt16{1}, addSc32ToInput, outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3832 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{84}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3833 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-84}, deInt8{1}, addSc32ToInput, outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3834 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{21}, deInt8{1}, addSc32ToInput, outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3835
3836 // Shifts for other integer sizes but only in the shift amount.
3837 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt64{2}, addScToInput, outputInts1, (FLAG_I64)));
3838 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt64{2}, addScToInput, outputInts5, (FLAG_I64)));
3839 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt64{1}, addScToInput, outputInts1, (FLAG_I64)));
3840 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 168, deInt16{2}, addScToInput, outputInts1, (FLAG_I16)));
3841 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -168, deInt16{2}, addScToInput, outputInts5, (FLAG_I16)));
3842 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt16{1}, addScToInput, outputInts1, (FLAG_I16)));
3843 cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 84, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3844 cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -84, deInt8{1}, addScToInput, outputInts5, (FLAG_I8)));
3845 cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 21, deInt8{1}, addScToInput, outputInts1, (FLAG_I8)));
3846
3847 cases.push_back(SpecConstantTwoValCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputInts2));
3848 cases.push_back(SpecConstantTwoValCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputInts2));
3849 cases.push_back(SpecConstantTwoValCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3850 cases.push_back(SpecConstantTwoValCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputInts2));
3851 cases.push_back(SpecConstantTwoValCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputInts2));
3852 cases.push_back(SpecConstantTwoValCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputInts2));
3853 cases.push_back(SpecConstantTwoValCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputInts2));
3854 cases.push_back(SpecConstantTwoValCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputInts2));
3855 cases.push_back(SpecConstantTwoValCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputInts2));
3856 cases.push_back(SpecConstantTwoValCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputInts2));
3857 cases.push_back(SpecConstantTwoValCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3858 cases.push_back(SpecConstantTwoValCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3859 cases.push_back(SpecConstantTwoValCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputInts2));
3860 cases.push_back(SpecConstantTwoValCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputInts2));
3861 cases.push_back(SpecConstantTwoValCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -42, 0, addScToInput, outputInts1));
3862 cases.push_back(SpecConstantTwoValCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -43, 0, addScToInput, outputInts1));
3863 cases.push_back(SpecConstantTwoValCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputInts2));
3864 cases.push_back(SpecConstantTwoValCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %zero", 1, 42, addScToInput, outputInts1));
3865 cases.push_back(SpecConstantTwoValCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -11200, 0, addSc32ToInput, outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3866 cases.push_back(SpecConstantTwoValCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32{-11200.0}, tcu::Float32{0.0}, addSc32ToInput, outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3867 cases.push_back(SpecConstantTwoValCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16{1.0}, tcu::Float16{0.0}, addSc32ToInput, outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3868
3869 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3870 {
3871 map<string, string> specializations;
3872 ComputeShaderSpec spec;
3873
3874 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
3875 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
3876 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
3877 specializations["SC_OP"] = cases[caseNdx].scOperation;
3878 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
3879
3880 // Special SPIR-V code when using 16-bit integers.
3881 if (cases[caseNdx].caseFlags & FLAG_I16)
3882 {
3883 spec.requestedVulkanFeatures.coreFeatures.shaderInt16 = VK_TRUE;
3884 specializations["CAPABILITIES"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
3885 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
3886 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3887 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 16-bit integer to 32-bit integer
3888 }
3889
3890 // Special SPIR-V code when using 64-bit integers.
3891 if (cases[caseNdx].caseFlags & FLAG_I64)
3892 {
3893 spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
3894 specializations["CAPABILITIES"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
3895 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
3896 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3897 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 64-bit integer to 32-bit integer
3898 }
3899
3900 // Special SPIR-V code when using 64-bit floats.
3901 if (cases[caseNdx].caseFlags & FLAG_F64)
3902 {
3903 spec.requestedVulkanFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
3904 specializations["CAPABILITIES"] += "OpCapability Float64\n"; // Adds 64-bit float capability
3905 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
3906 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3907 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 64-bit float to 32-bit integer
3908 }
3909
3910 // Extension needed for float16 and int8.
3911 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3912 spec.extensions.push_back("VK_KHR_shader_float16_int8");
3913
3914 // Special SPIR-V code when using 16-bit floats.
3915 if (cases[caseNdx].caseFlags & FLAG_F16)
3916 {
3917 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3918 specializations["CAPABILITIES"] += "OpCapability Float16\n"; // Adds 16-bit float capability
3919 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
3920 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3921 specializations["TYPE_CONVERT"] += "%sc_final32 = OpConvertFToS %i32 %sc_final\n"; // Converts 16-bit float to 32-bit integer
3922 }
3923
3924 // Special SPIR-V code when using 8-bit integers.
3925 if (cases[caseNdx].caseFlags & FLAG_I8)
3926 {
3927 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3928 specializations["CAPABILITIES"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
3929 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
3930 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3931 specializations["TYPE_CONVERT"] += "%sc_final32 = OpSConvert %i32 %sc_final\n"; // Converts 8-bit integer to 32-bit integer
3932 }
3933
3934 spec.assembly = shaderTemplate.specialize(specializations);
3935 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3936 spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3937 spec.numWorkGroups = IVec3(numElements, 1, 1);
3938 cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3939 cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3940
3941 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), cases[caseNdx].caseName.c_str(), spec));
3942 }
3943
3944 ComputeShaderSpec spec;
3945
3946 spec.assembly =
3947 string(getComputeAsmShaderPreamble()) +
3948
3949 "OpName %main \"main\"\n"
3950 "OpName %id \"gl_GlobalInvocationID\"\n"
3951
3952 "OpDecorate %id BuiltIn GlobalInvocationId\n"
3953 "OpDecorate %sc_0 SpecId 0\n"
3954 "OpDecorate %sc_1 SpecId 1\n"
3955 "OpDecorate %sc_2 SpecId 2\n"
3956 "OpDecorate %i32arr ArrayStride 4\n"
3957
3958 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3959
3960 "%ivec3 = OpTypeVector %i32 3\n"
3961
3962 + getSpecConstantOpStructConstantsAndTypes() +
3963
3964 "%buf = OpTypeStruct %i32arr\n"
3965 "%bufptr = OpTypePointer Uniform %buf\n"
3966 "%indata = OpVariable %bufptr Uniform\n"
3967 "%outdata = OpVariable %bufptr Uniform\n"
3968
3969 "%id = OpVariable %uvec3ptr Input\n"
3970 "%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero\n"
3971 "%vec3_undef = OpUndef %ivec3\n"
3972
3973 + getSpecConstantOpStructComposites () +
3974
3975 "%sc_0 = OpSpecConstant %i32 0\n"
3976 "%sc_1 = OpSpecConstant %i32 0\n"
3977 "%sc_2 = OpSpecConstant %i32 0\n"
3978
3979 + getSpecConstantOpStructConstBlock () +
3980
3981 "%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0\n" // (sc_0, 0, 0)
3982 "%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1\n" // (0, sc_1, 0)
3983 "%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2\n" // (0, 0, sc_2)
3984 "%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
3985 "%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
3986 "%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
3987 "%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
3988 "%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
3989 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
3990 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
3991 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
3992 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
3993 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n" // (sc_2 - sc_0) * sc_1
3994
3995 "%main = OpFunction %void None %voidf\n"
3996 "%label = OpLabel\n"
3997
3998 + getSpecConstantOpStructInstructions() +
3999
4000 "%idval = OpLoad %uvec3 %id\n"
4001 "%x = OpCompositeExtract %u32 %idval 0\n"
4002 "%inloc = OpAccessChain %i32ptr %indata %zero %x\n"
4003 "%inval = OpLoad %i32 %inloc\n"
4004 "%final = OpIAdd %i32 %inval %sc_final\n"
4005 "%outloc = OpAccessChain %i32ptr %outdata %zero %x\n"
4006 " OpStore %outloc %final\n"
4007 " OpReturn\n"
4008 " OpFunctionEnd\n";
4009 spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4010 spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4011 spec.numWorkGroups = IVec3(numElements, 1, 1);
4012 spec.specConstants.append<deInt32>(123);
4013 spec.specConstants.append<deInt32>(56);
4014 spec.specConstants.append<deInt32>(-77);
4015
4016 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
4017
4018 return group.release();
4019 }
4020
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4021 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4022 {
4023 ComputeShaderSpec specInt;
4024 ComputeShaderSpec specFloat;
4025 ComputeShaderSpec specFloat16;
4026 ComputeShaderSpec specVec3;
4027 ComputeShaderSpec specMat4;
4028 ComputeShaderSpec specArray;
4029 ComputeShaderSpec specStruct;
4030 de::Random rnd (deStringHash(group->getName()));
4031 const int numElements = 100;
4032 vector<float> inputFloats (numElements, 0);
4033 vector<float> outputFloats (numElements, 0);
4034 vector<deUint32> inputUints (numElements, 0);
4035 vector<deUint32> outputUints (numElements, 0);
4036
4037 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4038
4039 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4040 floorAll(inputFloats);
4041
4042 for (size_t ndx = 0; ndx < numElements; ++ndx)
4043 {
4044 // Just check if the value is positive or not
4045 outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4046 }
4047
4048 for (size_t ndx = 0; ndx < numElements; ++ndx)
4049 {
4050 inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4051 outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4052 }
4053
4054 // All of the tests are of the form:
4055 //
4056 // testtype r
4057 //
4058 // if (inputdata > 0)
4059 // r = 1
4060 // else
4061 // r = -1
4062 //
4063 // return (float)r
4064
4065 specFloat.assembly =
4066 string(getComputeAsmShaderPreamble()) +
4067
4068 "OpSource GLSL 430\n"
4069 "OpName %main \"main\"\n"
4070 "OpName %id \"gl_GlobalInvocationID\"\n"
4071
4072 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4073
4074 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4075
4076 "%id = OpVariable %uvec3ptr Input\n"
4077 "%zero = OpConstant %i32 0\n"
4078 "%float_0 = OpConstant %f32 0.0\n"
4079 "%float_1 = OpConstant %f32 1.0\n"
4080 "%float_n1 = OpConstant %f32 -1.0\n"
4081
4082 "%main = OpFunction %void None %voidf\n"
4083 "%entry = OpLabel\n"
4084 "%idval = OpLoad %uvec3 %id\n"
4085 "%x = OpCompositeExtract %u32 %idval 0\n"
4086 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4087 "%inval = OpLoad %f32 %inloc\n"
4088
4089 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4090 " OpSelectionMerge %cm None\n"
4091 " OpBranchConditional %comp %tb %fb\n"
4092 "%tb = OpLabel\n"
4093 " OpBranch %cm\n"
4094 "%fb = OpLabel\n"
4095 " OpBranch %cm\n"
4096 "%cm = OpLabel\n"
4097 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4098
4099 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4100 " OpStore %outloc %res\n"
4101 " OpReturn\n"
4102
4103 " OpFunctionEnd\n";
4104 specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4105 specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4106 specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4107
4108 specFloat16.assembly =
4109 "OpCapability Shader\n"
4110 "OpCapability Float16\n"
4111 "OpMemoryModel Logical GLSL450\n"
4112 "OpEntryPoint GLCompute %main \"main\" %id\n"
4113 "OpExecutionMode %main LocalSize 1 1 1\n"
4114
4115 "OpSource GLSL 430\n"
4116 "OpName %main \"main\"\n"
4117 "OpName %id \"gl_GlobalInvocationID\"\n"
4118
4119 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4120
4121 "OpDecorate %buf BufferBlock\n"
4122 "OpDecorate %indata DescriptorSet 0\n"
4123 "OpDecorate %indata Binding 0\n"
4124 "OpDecorate %outdata DescriptorSet 0\n"
4125 "OpDecorate %outdata Binding 1\n"
4126 "OpDecorate %u32arr ArrayStride 4\n"
4127 "OpMemberDecorate %buf 0 Offset 0\n"
4128
4129 + string(getComputeAsmCommonTypes()) +
4130
4131 "%f16 = OpTypeFloat 16\n"
4132 "%f16vec2 = OpTypeVector %f16 2\n"
4133 "%fvec2 = OpTypeVector %f32 2\n"
4134 "%u32ptr = OpTypePointer Uniform %u32\n"
4135 "%u32arr = OpTypeRuntimeArray %u32\n"
4136 "%f16_0 = OpConstant %f16 0.0\n"
4137
4138
4139 "%buf = OpTypeStruct %u32arr\n"
4140 "%bufptr = OpTypePointer Uniform %buf\n"
4141 "%indata = OpVariable %bufptr Uniform\n"
4142 "%outdata = OpVariable %bufptr Uniform\n"
4143
4144 "%id = OpVariable %uvec3ptr Input\n"
4145 "%zero = OpConstant %i32 0\n"
4146 "%float_0 = OpConstant %f32 0.0\n"
4147 "%float_1 = OpConstant %f32 1.0\n"
4148 "%float_n1 = OpConstant %f32 -1.0\n"
4149
4150 "%main = OpFunction %void None %voidf\n"
4151 "%entry = OpLabel\n"
4152 "%idval = OpLoad %uvec3 %id\n"
4153 "%x = OpCompositeExtract %u32 %idval 0\n"
4154 "%inloc = OpAccessChain %u32ptr %indata %zero %x\n"
4155 "%inval = OpLoad %u32 %inloc\n"
4156 "%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4157 "%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4158 "%f32_inval = OpFConvert %f32 %f16_inval\n"
4159
4160 "%comp = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4161 " OpSelectionMerge %cm None\n"
4162 " OpBranchConditional %comp %tb %fb\n"
4163 "%tb = OpLabel\n"
4164 " OpBranch %cm\n"
4165 "%fb = OpLabel\n"
4166 " OpBranch %cm\n"
4167 "%cm = OpLabel\n"
4168 "%res = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4169 "%f16_res = OpFConvert %f16 %res\n"
4170
4171 "%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4172 "%u32_res = OpBitcast %u32 %f16vec2_res\n"
4173
4174 "%outloc = OpAccessChain %u32ptr %outdata %zero %x\n"
4175 " OpStore %outloc %u32_res\n"
4176 " OpReturn\n"
4177
4178 " OpFunctionEnd\n";
4179
4180 specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4181 specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4182 specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4183 specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4184
4185 specMat4.assembly =
4186 string(getComputeAsmShaderPreamble()) +
4187
4188 "OpSource GLSL 430\n"
4189 "OpName %main \"main\"\n"
4190 "OpName %id \"gl_GlobalInvocationID\"\n"
4191
4192 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4193
4194 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4195
4196 "%id = OpVariable %uvec3ptr Input\n"
4197 "%v4f32 = OpTypeVector %f32 4\n"
4198 "%mat4v4f32 = OpTypeMatrix %v4f32 4\n"
4199 "%zero = OpConstant %i32 0\n"
4200 "%float_0 = OpConstant %f32 0.0\n"
4201 "%float_1 = OpConstant %f32 1.0\n"
4202 "%float_n1 = OpConstant %f32 -1.0\n"
4203 "%m11 = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4204 "%m12 = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4205 "%m13 = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4206 "%m14 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4207 "%m1 = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4208 "%m21 = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4209 "%m22 = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4210 "%m23 = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4211 "%m24 = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4212 "%m2 = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4213
4214 "%main = OpFunction %void None %voidf\n"
4215 "%entry = OpLabel\n"
4216 "%idval = OpLoad %uvec3 %id\n"
4217 "%x = OpCompositeExtract %u32 %idval 0\n"
4218 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4219 "%inval = OpLoad %f32 %inloc\n"
4220
4221 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4222 " OpSelectionMerge %cm None\n"
4223 " OpBranchConditional %comp %tb %fb\n"
4224 "%tb = OpLabel\n"
4225 " OpBranch %cm\n"
4226 "%fb = OpLabel\n"
4227 " OpBranch %cm\n"
4228 "%cm = OpLabel\n"
4229 "%mres = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4230 "%res = OpCompositeExtract %f32 %mres 2 2\n"
4231
4232 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4233 " OpStore %outloc %res\n"
4234 " OpReturn\n"
4235
4236 " OpFunctionEnd\n";
4237 specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4238 specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4239 specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4240
4241 specVec3.assembly =
4242 string(getComputeAsmShaderPreamble()) +
4243
4244 "OpSource GLSL 430\n"
4245 "OpName %main \"main\"\n"
4246 "OpName %id \"gl_GlobalInvocationID\"\n"
4247
4248 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4249
4250 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4251
4252 "%id = OpVariable %uvec3ptr Input\n"
4253 "%zero = OpConstant %i32 0\n"
4254 "%float_0 = OpConstant %f32 0.0\n"
4255 "%float_1 = OpConstant %f32 1.0\n"
4256 "%float_n1 = OpConstant %f32 -1.0\n"
4257 "%v1 = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4258 "%v2 = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4259
4260 "%main = OpFunction %void None %voidf\n"
4261 "%entry = OpLabel\n"
4262 "%idval = OpLoad %uvec3 %id\n"
4263 "%x = OpCompositeExtract %u32 %idval 0\n"
4264 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4265 "%inval = OpLoad %f32 %inloc\n"
4266
4267 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4268 " OpSelectionMerge %cm None\n"
4269 " OpBranchConditional %comp %tb %fb\n"
4270 "%tb = OpLabel\n"
4271 " OpBranch %cm\n"
4272 "%fb = OpLabel\n"
4273 " OpBranch %cm\n"
4274 "%cm = OpLabel\n"
4275 "%vres = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4276 "%res = OpCompositeExtract %f32 %vres 2\n"
4277
4278 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4279 " OpStore %outloc %res\n"
4280 " OpReturn\n"
4281
4282 " OpFunctionEnd\n";
4283 specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4284 specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4285 specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4286
4287 specInt.assembly =
4288 string(getComputeAsmShaderPreamble()) +
4289
4290 "OpSource GLSL 430\n"
4291 "OpName %main \"main\"\n"
4292 "OpName %id \"gl_GlobalInvocationID\"\n"
4293
4294 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4295
4296 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4297
4298 "%id = OpVariable %uvec3ptr Input\n"
4299 "%zero = OpConstant %i32 0\n"
4300 "%float_0 = OpConstant %f32 0.0\n"
4301 "%i1 = OpConstant %i32 1\n"
4302 "%i2 = OpConstant %i32 -1\n"
4303
4304 "%main = OpFunction %void None %voidf\n"
4305 "%entry = OpLabel\n"
4306 "%idval = OpLoad %uvec3 %id\n"
4307 "%x = OpCompositeExtract %u32 %idval 0\n"
4308 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4309 "%inval = OpLoad %f32 %inloc\n"
4310
4311 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4312 " OpSelectionMerge %cm None\n"
4313 " OpBranchConditional %comp %tb %fb\n"
4314 "%tb = OpLabel\n"
4315 " OpBranch %cm\n"
4316 "%fb = OpLabel\n"
4317 " OpBranch %cm\n"
4318 "%cm = OpLabel\n"
4319 "%ires = OpPhi %i32 %i1 %tb %i2 %fb\n"
4320 "%res = OpConvertSToF %f32 %ires\n"
4321
4322 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4323 " OpStore %outloc %res\n"
4324 " OpReturn\n"
4325
4326 " OpFunctionEnd\n";
4327 specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4328 specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4329 specInt.numWorkGroups = IVec3(numElements, 1, 1);
4330
4331 specArray.assembly =
4332 string(getComputeAsmShaderPreamble()) +
4333
4334 "OpSource GLSL 430\n"
4335 "OpName %main \"main\"\n"
4336 "OpName %id \"gl_GlobalInvocationID\"\n"
4337
4338 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4339
4340 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4341
4342 "%id = OpVariable %uvec3ptr Input\n"
4343 "%zero = OpConstant %i32 0\n"
4344 "%u7 = OpConstant %u32 7\n"
4345 "%float_0 = OpConstant %f32 0.0\n"
4346 "%float_1 = OpConstant %f32 1.0\n"
4347 "%float_n1 = OpConstant %f32 -1.0\n"
4348 "%f32a7 = OpTypeArray %f32 %u7\n"
4349 "%a1 = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4350 "%a2 = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4351 "%main = OpFunction %void None %voidf\n"
4352 "%entry = OpLabel\n"
4353 "%idval = OpLoad %uvec3 %id\n"
4354 "%x = OpCompositeExtract %u32 %idval 0\n"
4355 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4356 "%inval = OpLoad %f32 %inloc\n"
4357
4358 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4359 " OpSelectionMerge %cm None\n"
4360 " OpBranchConditional %comp %tb %fb\n"
4361 "%tb = OpLabel\n"
4362 " OpBranch %cm\n"
4363 "%fb = OpLabel\n"
4364 " OpBranch %cm\n"
4365 "%cm = OpLabel\n"
4366 "%ares = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4367 "%res = OpCompositeExtract %f32 %ares 5\n"
4368
4369 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4370 " OpStore %outloc %res\n"
4371 " OpReturn\n"
4372
4373 " OpFunctionEnd\n";
4374 specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4375 specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4376 specArray.numWorkGroups = IVec3(numElements, 1, 1);
4377
4378 specStruct.assembly =
4379 string(getComputeAsmShaderPreamble()) +
4380
4381 "OpSource GLSL 430\n"
4382 "OpName %main \"main\"\n"
4383 "OpName %id \"gl_GlobalInvocationID\"\n"
4384
4385 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4386
4387 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4388
4389 "%id = OpVariable %uvec3ptr Input\n"
4390 "%zero = OpConstant %i32 0\n"
4391 "%float_0 = OpConstant %f32 0.0\n"
4392 "%float_1 = OpConstant %f32 1.0\n"
4393 "%float_n1 = OpConstant %f32 -1.0\n"
4394
4395 "%v2f32 = OpTypeVector %f32 2\n"
4396 "%Data2 = OpTypeStruct %f32 %v2f32\n"
4397 "%Data = OpTypeStruct %Data2 %f32\n"
4398
4399 "%in1a = OpConstantComposite %v2f32 %float_1 %float_1\n"
4400 "%in1b = OpConstantComposite %Data2 %float_1 %in1a\n"
4401 "%s1 = OpConstantComposite %Data %in1b %float_1\n"
4402 "%in2a = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4403 "%in2b = OpConstantComposite %Data2 %float_n1 %in2a\n"
4404 "%s2 = OpConstantComposite %Data %in2b %float_n1\n"
4405
4406 "%main = OpFunction %void None %voidf\n"
4407 "%entry = OpLabel\n"
4408 "%idval = OpLoad %uvec3 %id\n"
4409 "%x = OpCompositeExtract %u32 %idval 0\n"
4410 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4411 "%inval = OpLoad %f32 %inloc\n"
4412
4413 "%comp = OpFOrdGreaterThan %bool %inval %float_0\n"
4414 " OpSelectionMerge %cm None\n"
4415 " OpBranchConditional %comp %tb %fb\n"
4416 "%tb = OpLabel\n"
4417 " OpBranch %cm\n"
4418 "%fb = OpLabel\n"
4419 " OpBranch %cm\n"
4420 "%cm = OpLabel\n"
4421 "%sres = OpPhi %Data %s1 %tb %s2 %fb\n"
4422 "%res = OpCompositeExtract %f32 %sres 0 0\n"
4423
4424 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4425 " OpStore %outloc %res\n"
4426 " OpReturn\n"
4427
4428 " OpFunctionEnd\n";
4429 specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4430 specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4431 specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4432
4433 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4434 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4435 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4436 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4437 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4438 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4439 group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4440 }
4441
generateConstantDefinitions(int count)4442 string generateConstantDefinitions (int count)
4443 {
4444 std::ostringstream r;
4445 for (int i = 0; i < count; i++)
4446 r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4447 r << "\n";
4448 return r.str();
4449 }
4450
generateSwitchCases(int count)4451 string generateSwitchCases (int count)
4452 {
4453 std::ostringstream r;
4454 for (int i = 0; i < count; i++)
4455 r << " " << i << " %case" << i;
4456 r << "\n";
4457 return r.str();
4458 }
4459
generateSwitchTargets(int count)4460 string generateSwitchTargets (int count)
4461 {
4462 std::ostringstream r;
4463 for (int i = 0; i < count; i++)
4464 r << "%case" << i << " = OpLabel\n OpBranch %phi\n";
4465 r << "\n";
4466 return r.str();
4467 }
4468
generateOpPhiParams(int count)4469 string generateOpPhiParams (int count)
4470 {
4471 std::ostringstream r;
4472 for (int i = 0; i < count; i++)
4473 r << " %cf" << (i * 10 + 5) << " %case" << i;
4474 r << "\n";
4475 return r.str();
4476 }
4477
generateIntWidth(int value)4478 string generateIntWidth (int value)
4479 {
4480 std::ostringstream r;
4481 r << value;
4482 return r.str();
4483 }
4484
4485 // Expand input string by injecting "ABC" between the input
4486 // string characters. The acc/add/treshold parameters are used
4487 // to skip some of the injections to make the result less
4488 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4489 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4490 {
4491 std::ostringstream res;
4492 const char* p = s.c_str();
4493
4494 while (*p)
4495 {
4496 res << *p;
4497 acc += add;
4498 if (acc > treshold)
4499 {
4500 acc -= treshold;
4501 res << "ABC";
4502 }
4503 p++;
4504 }
4505 return res.str();
4506 }
4507
4508 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4509 float calcOpPhiCase5 (float val, const string& s)
4510 {
4511 const char* p = s.c_str();
4512 float x[8];
4513 bool b[8];
4514 const float tv[8] = { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4515 const float v = deFloatAbs(val);
4516 float res = 0;
4517 int depth = -1;
4518 int skip = 0;
4519
4520 for (int i = 7; i >= 0; --i)
4521 x[i] = std::fmod((float)v, (float)(2 << i));
4522 for (int i = 7; i >= 0; --i)
4523 b[i] = x[i] > tv[i];
4524
4525 while (*p)
4526 {
4527 if (*p == 'A')
4528 {
4529 depth++;
4530 if (skip == 0 && b[depth])
4531 {
4532 res++;
4533 }
4534 else
4535 skip++;
4536 }
4537 if (*p == 'B')
4538 {
4539 if (skip)
4540 skip--;
4541 if (b[depth] || skip)
4542 skip++;
4543 }
4544 if (*p == 'C')
4545 {
4546 depth--;
4547 if (skip)
4548 skip--;
4549 }
4550 p++;
4551 }
4552 return res;
4553 }
4554
4555 // In the code string, the letters represent the following:
4556 //
4557 // A:
4558 // if (certain bit is set)
4559 // {
4560 // result++;
4561 //
4562 // B:
4563 // } else {
4564 //
4565 // C:
4566 // }
4567 //
4568 // examples:
4569 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4570 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4571 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4572 //
4573 // Code generation gets a bit complicated due to the else-branches,
4574 // which do not generate new values. Thus, the generator needs to
4575 // keep track of the previous variable change seen by the else
4576 // branch.
generateOpPhiCase5(const string & s)4577 string generateOpPhiCase5 (const string& s)
4578 {
4579 std::stack<int> idStack;
4580 std::stack<std::string> value;
4581 std::stack<std::string> valueLabel;
4582 std::stack<std::string> mergeLeft;
4583 std::stack<std::string> mergeRight;
4584 std::ostringstream res;
4585 const char* p = s.c_str();
4586 int depth = -1;
4587 int currId = 0;
4588 int iter = 0;
4589
4590 idStack.push(-1);
4591 value.push("%f32_0");
4592 valueLabel.push("%f32_0 %entry");
4593
4594 while (*p)
4595 {
4596 if (*p == 'A')
4597 {
4598 depth++;
4599 currId = iter;
4600 idStack.push(currId);
4601 res << "\tOpSelectionMerge %m" << currId << " None\n";
4602 res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4603 res << "%t" << currId << " = OpLabel\n";
4604 res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4605 std::ostringstream tag;
4606 tag << "%rt" << currId;
4607 value.push(tag.str());
4608 tag << " %t" << currId;
4609 valueLabel.push(tag.str());
4610 }
4611
4612 if (*p == 'B')
4613 {
4614 mergeLeft.push(valueLabel.top());
4615 value.pop();
4616 valueLabel.pop();
4617 res << "\tOpBranch %m" << currId << "\n";
4618 res << "%f" << currId << " = OpLabel\n";
4619 std::ostringstream tag;
4620 tag << value.top() << " %f" << currId;
4621 valueLabel.pop();
4622 valueLabel.push(tag.str());
4623 }
4624
4625 if (*p == 'C')
4626 {
4627 mergeRight.push(valueLabel.top());
4628 res << "\tOpBranch %m" << currId << "\n";
4629 res << "%m" << currId << " = OpLabel\n";
4630 if (*(p + 1) == 0)
4631 res << "%res"; // last result goes to %res
4632 else
4633 res << "%rm" << currId;
4634 res << " = OpPhi %f32 " << mergeLeft.top() << " " << mergeRight.top() << "\n";
4635 std::ostringstream tag;
4636 tag << "%rm" << currId;
4637 value.pop();
4638 value.push(tag.str());
4639 tag << " %m" << currId;
4640 valueLabel.pop();
4641 valueLabel.push(tag.str());
4642 mergeLeft.pop();
4643 mergeRight.pop();
4644 depth--;
4645 idStack.pop();
4646 currId = idStack.top();
4647 }
4648 p++;
4649 iter++;
4650 }
4651 return res.str();
4652 }
4653
createOpPhiGroup(tcu::TestContext & testCtx)4654 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4655 {
4656 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4657 ComputeShaderSpec spec1;
4658 ComputeShaderSpec spec2;
4659 ComputeShaderSpec spec3;
4660 ComputeShaderSpec spec4;
4661 ComputeShaderSpec spec5;
4662 de::Random rnd (deStringHash(group->getName()));
4663 const int numElements = 100;
4664 vector<float> inputFloats (numElements, 0);
4665 vector<float> outputFloats1 (numElements, 0);
4666 vector<float> outputFloats2 (numElements, 0);
4667 vector<float> outputFloats3 (numElements, 0);
4668 vector<float> outputFloats4 (numElements, 0);
4669 vector<float> outputFloats5 (numElements, 0);
4670 std::string codestring = "ABC";
4671 const int test4Width = 512;
4672
4673 // Build case 5 code string. Each iteration makes the hierarchy more complicated.
4674 // 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4675 // shader code.
4676 for (int i = 0, acc = 0; i < 9; i++)
4677 codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4678
4679 fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4680
4681 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4682 floorAll(inputFloats);
4683
4684 for (size_t ndx = 0; ndx < numElements; ++ndx)
4685 {
4686 switch (ndx % 3)
4687 {
4688 case 0: outputFloats1[ndx] = inputFloats[ndx] + 5.5f; break;
4689 case 1: outputFloats1[ndx] = inputFloats[ndx] + 20.5f; break;
4690 case 2: outputFloats1[ndx] = inputFloats[ndx] + 1.75f; break;
4691 default: break;
4692 }
4693 outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4694 outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4695
4696 int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4697 outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4698
4699 outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4700 }
4701
4702 spec1.assembly =
4703 string(getComputeAsmShaderPreamble()) +
4704
4705 "OpSource GLSL 430\n"
4706 "OpName %main \"main\"\n"
4707 "OpName %id \"gl_GlobalInvocationID\"\n"
4708
4709 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4710
4711 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4712
4713 "%id = OpVariable %uvec3ptr Input\n"
4714 "%zero = OpConstant %i32 0\n"
4715 "%three = OpConstant %u32 3\n"
4716 "%constf5p5 = OpConstant %f32 5.5\n"
4717 "%constf20p5 = OpConstant %f32 20.5\n"
4718 "%constf1p75 = OpConstant %f32 1.75\n"
4719 "%constf8p5 = OpConstant %f32 8.5\n"
4720 "%constf6p5 = OpConstant %f32 6.5\n"
4721
4722 "%main = OpFunction %void None %voidf\n"
4723 "%entry = OpLabel\n"
4724 "%idval = OpLoad %uvec3 %id\n"
4725 "%x = OpCompositeExtract %u32 %idval 0\n"
4726 "%selector = OpUMod %u32 %x %three\n"
4727 " OpSelectionMerge %phi None\n"
4728 " OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4729
4730 // Case 1 before OpPhi.
4731 "%case1 = OpLabel\n"
4732 " OpBranch %phi\n"
4733
4734 "%default = OpLabel\n"
4735 " OpUnreachable\n"
4736
4737 "%phi = OpLabel\n"
4738 "%operand = OpPhi %f32 %constf1p75 %case2 %constf20p5 %case1 %constf5p5 %case0\n" // not in the order of blocks
4739 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4740 "%inval = OpLoad %f32 %inloc\n"
4741 "%add = OpFAdd %f32 %inval %operand\n"
4742 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4743 " OpStore %outloc %add\n"
4744 " OpReturn\n"
4745
4746 // Case 0 after OpPhi.
4747 "%case0 = OpLabel\n"
4748 " OpBranch %phi\n"
4749
4750
4751 // Case 2 after OpPhi.
4752 "%case2 = OpLabel\n"
4753 " OpBranch %phi\n"
4754
4755 " OpFunctionEnd\n";
4756 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4757 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4758 spec1.numWorkGroups = IVec3(numElements, 1, 1);
4759
4760 group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4761
4762 spec2.assembly =
4763 string(getComputeAsmShaderPreamble()) +
4764
4765 "OpName %main \"main\"\n"
4766 "OpName %id \"gl_GlobalInvocationID\"\n"
4767
4768 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4769
4770 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4771
4772 "%id = OpVariable %uvec3ptr Input\n"
4773 "%zero = OpConstant %i32 0\n"
4774 "%one = OpConstant %i32 1\n"
4775 "%three = OpConstant %i32 3\n"
4776 "%constf6p5 = OpConstant %f32 6.5\n"
4777
4778 "%main = OpFunction %void None %voidf\n"
4779 "%entry = OpLabel\n"
4780 "%idval = OpLoad %uvec3 %id\n"
4781 "%x = OpCompositeExtract %u32 %idval 0\n"
4782 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4783 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4784 "%inval = OpLoad %f32 %inloc\n"
4785 " OpBranch %phi\n"
4786
4787 "%phi = OpLabel\n"
4788 "%step = OpPhi %i32 %zero %entry %step_next %phi\n"
4789 "%accum = OpPhi %f32 %inval %entry %accum_next %phi\n"
4790 "%step_next = OpIAdd %i32 %step %one\n"
4791 "%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4792 "%still_loop = OpSLessThan %bool %step %three\n"
4793 " OpLoopMerge %exit %phi None\n"
4794 " OpBranchConditional %still_loop %phi %exit\n"
4795
4796 "%exit = OpLabel\n"
4797 " OpStore %outloc %accum\n"
4798 " OpReturn\n"
4799 " OpFunctionEnd\n";
4800 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4801 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4802 spec2.numWorkGroups = IVec3(numElements, 1, 1);
4803
4804 group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4805
4806 spec3.assembly =
4807 string(getComputeAsmShaderPreamble()) +
4808
4809 "OpName %main \"main\"\n"
4810 "OpName %id \"gl_GlobalInvocationID\"\n"
4811
4812 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4813
4814 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4815
4816 "%f32ptr_f = OpTypePointer Function %f32\n"
4817 "%id = OpVariable %uvec3ptr Input\n"
4818 "%true = OpConstantTrue %bool\n"
4819 "%false = OpConstantFalse %bool\n"
4820 "%zero = OpConstant %i32 0\n"
4821 "%constf8p5 = OpConstant %f32 8.5\n"
4822
4823 "%main = OpFunction %void None %voidf\n"
4824 "%entry = OpLabel\n"
4825 "%b = OpVariable %f32ptr_f Function %constf8p5\n"
4826 "%idval = OpLoad %uvec3 %id\n"
4827 "%x = OpCompositeExtract %u32 %idval 0\n"
4828 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4829 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4830 "%a_init = OpLoad %f32 %inloc\n"
4831 "%b_init = OpLoad %f32 %b\n"
4832 " OpBranch %phi\n"
4833
4834 "%phi = OpLabel\n"
4835 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
4836 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
4837 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
4838 " OpLoopMerge %exit %phi None\n"
4839 " OpBranchConditional %still_loop %phi %exit\n"
4840
4841 "%exit = OpLabel\n"
4842 "%sub = OpFSub %f32 %a_next %b_next\n"
4843 " OpStore %outloc %sub\n"
4844 " OpReturn\n"
4845 " OpFunctionEnd\n";
4846 spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4847 spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4848 spec3.numWorkGroups = IVec3(numElements, 1, 1);
4849
4850 group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4851
4852 spec4.assembly =
4853 "OpCapability Shader\n"
4854 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4855 "OpMemoryModel Logical GLSL450\n"
4856 "OpEntryPoint GLCompute %main \"main\" %id\n"
4857 "OpExecutionMode %main LocalSize 1 1 1\n"
4858
4859 "OpSource GLSL 430\n"
4860 "OpName %main \"main\"\n"
4861 "OpName %id \"gl_GlobalInvocationID\"\n"
4862
4863 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4864
4865 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4866
4867 "%id = OpVariable %uvec3ptr Input\n"
4868 "%zero = OpConstant %i32 0\n"
4869 "%cimod = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4870
4871 + generateConstantDefinitions(test4Width) +
4872
4873 "%main = OpFunction %void None %voidf\n"
4874 "%entry = OpLabel\n"
4875 "%idval = OpLoad %uvec3 %id\n"
4876 "%x = OpCompositeExtract %u32 %idval 0\n"
4877 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4878 "%inval = OpLoad %f32 %inloc\n"
4879 "%xf = OpConvertUToF %f32 %x\n"
4880 "%xm = OpFMul %f32 %xf %inval\n"
4881 "%xa = OpExtInst %f32 %ext FAbs %xm\n"
4882 "%xi = OpConvertFToU %u32 %xa\n"
4883 "%selector = OpUMod %u32 %xi %cimod\n"
4884 " OpSelectionMerge %phi None\n"
4885 " OpSwitch %selector %default "
4886
4887 + generateSwitchCases(test4Width) +
4888
4889 "%default = OpLabel\n"
4890 " OpUnreachable\n"
4891
4892 + generateSwitchTargets(test4Width) +
4893
4894 "%phi = OpLabel\n"
4895 "%result = OpPhi %f32"
4896
4897 + generateOpPhiParams(test4Width) +
4898
4899 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4900 " OpStore %outloc %result\n"
4901 " OpReturn\n"
4902
4903 " OpFunctionEnd\n";
4904 spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4905 spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4906 spec4.numWorkGroups = IVec3(numElements, 1, 1);
4907
4908 group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4909
4910 spec5.assembly =
4911 "OpCapability Shader\n"
4912 "%ext = OpExtInstImport \"GLSL.std.450\"\n"
4913 "OpMemoryModel Logical GLSL450\n"
4914 "OpEntryPoint GLCompute %main \"main\" %id\n"
4915 "OpExecutionMode %main LocalSize 1 1 1\n"
4916 "%code = OpString \"" + codestring + "\"\n"
4917
4918 "OpSource GLSL 430\n"
4919 "OpName %main \"main\"\n"
4920 "OpName %id \"gl_GlobalInvocationID\"\n"
4921
4922 "OpDecorate %id BuiltIn GlobalInvocationId\n"
4923
4924 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4925
4926 "%id = OpVariable %uvec3ptr Input\n"
4927 "%zero = OpConstant %i32 0\n"
4928 "%f32_0 = OpConstant %f32 0.0\n"
4929 "%f32_0_5 = OpConstant %f32 0.5\n"
4930 "%f32_1 = OpConstant %f32 1.0\n"
4931 "%f32_1_5 = OpConstant %f32 1.5\n"
4932 "%f32_2 = OpConstant %f32 2.0\n"
4933 "%f32_3_5 = OpConstant %f32 3.5\n"
4934 "%f32_4 = OpConstant %f32 4.0\n"
4935 "%f32_7_5 = OpConstant %f32 7.5\n"
4936 "%f32_8 = OpConstant %f32 8.0\n"
4937 "%f32_15_5 = OpConstant %f32 15.5\n"
4938 "%f32_16 = OpConstant %f32 16.0\n"
4939 "%f32_31_5 = OpConstant %f32 31.5\n"
4940 "%f32_32 = OpConstant %f32 32.0\n"
4941 "%f32_63_5 = OpConstant %f32 63.5\n"
4942 "%f32_64 = OpConstant %f32 64.0\n"
4943 "%f32_127_5 = OpConstant %f32 127.5\n"
4944 "%f32_128 = OpConstant %f32 128.0\n"
4945 "%f32_256 = OpConstant %f32 256.0\n"
4946
4947 "%main = OpFunction %void None %voidf\n"
4948 "%entry = OpLabel\n"
4949 "%idval = OpLoad %uvec3 %id\n"
4950 "%x = OpCompositeExtract %u32 %idval 0\n"
4951 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
4952 "%inval = OpLoad %f32 %inloc\n"
4953
4954 "%xabs = OpExtInst %f32 %ext FAbs %inval\n"
4955 "%x8 = OpFMod %f32 %xabs %f32_256\n"
4956 "%x7 = OpFMod %f32 %xabs %f32_128\n"
4957 "%x6 = OpFMod %f32 %xabs %f32_64\n"
4958 "%x5 = OpFMod %f32 %xabs %f32_32\n"
4959 "%x4 = OpFMod %f32 %xabs %f32_16\n"
4960 "%x3 = OpFMod %f32 %xabs %f32_8\n"
4961 "%x2 = OpFMod %f32 %xabs %f32_4\n"
4962 "%x1 = OpFMod %f32 %xabs %f32_2\n"
4963
4964 "%b7 = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4965 "%b6 = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4966 "%b5 = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4967 "%b4 = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4968 "%b3 = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4969 "%b2 = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4970 "%b1 = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4971 "%b0 = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4972
4973 + generateOpPhiCase5(codestring) +
4974
4975 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
4976 " OpStore %outloc %res\n"
4977 " OpReturn\n"
4978
4979 " OpFunctionEnd\n";
4980 spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4981 spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4982 spec5.numWorkGroups = IVec3(numElements, 1, 1);
4983
4984 group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4985
4986 createOpPhiVartypeTests(group, testCtx);
4987
4988 return group.release();
4989 }
4990
4991 // Assembly code used for testing block order is based on GLSL source code:
4992 //
4993 // #version 430
4994 //
4995 // layout(std140, set = 0, binding = 0) readonly buffer Input {
4996 // float elements[];
4997 // } input_data;
4998 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
4999 // float elements[];
5000 // } output_data;
5001 //
5002 // void main() {
5003 // uint x = gl_GlobalInvocationID.x;
5004 // output_data.elements[x] = input_data.elements[x];
5005 // if (x > uint(50)) {
5006 // switch (x % uint(3)) {
5007 // case 0: output_data.elements[x] += 1.5f; break;
5008 // case 1: output_data.elements[x] += 42.f; break;
5009 // case 2: output_data.elements[x] -= 27.f; break;
5010 // default: break;
5011 // }
5012 // } else {
5013 // output_data.elements[x] = -input_data.elements[x];
5014 // }
5015 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5016 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5017 {
5018 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5019 ComputeShaderSpec spec;
5020 de::Random rnd (deStringHash(group->getName()));
5021 const int numElements = 100;
5022 vector<float> inputFloats (numElements, 0);
5023 vector<float> outputFloats (numElements, 0);
5024
5025 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5026
5027 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5028 floorAll(inputFloats);
5029
5030 for (size_t ndx = 0; ndx <= 50; ++ndx)
5031 outputFloats[ndx] = -inputFloats[ndx];
5032
5033 for (size_t ndx = 51; ndx < numElements; ++ndx)
5034 {
5035 switch (ndx % 3)
5036 {
5037 case 0: outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5038 case 1: outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5039 case 2: outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5040 default: break;
5041 }
5042 }
5043
5044 spec.assembly =
5045 string(getComputeAsmShaderPreamble()) +
5046
5047 "OpSource GLSL 430\n"
5048 "OpName %main \"main\"\n"
5049 "OpName %id \"gl_GlobalInvocationID\"\n"
5050
5051 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5052
5053 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5054
5055 "%u32ptr = OpTypePointer Function %u32\n"
5056 "%u32ptr_input = OpTypePointer Input %u32\n"
5057
5058 + string(getComputeAsmInputOutputBuffer()) +
5059
5060 "%id = OpVariable %uvec3ptr Input\n"
5061 "%zero = OpConstant %i32 0\n"
5062 "%const3 = OpConstant %u32 3\n"
5063 "%const50 = OpConstant %u32 50\n"
5064 "%constf1p5 = OpConstant %f32 1.5\n"
5065 "%constf27 = OpConstant %f32 27.0\n"
5066 "%constf42 = OpConstant %f32 42.0\n"
5067
5068 "%main = OpFunction %void None %voidf\n"
5069
5070 // entry block.
5071 "%entry = OpLabel\n"
5072
5073 // Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5074 "%xvar = OpVariable %u32ptr Function\n"
5075 "%xptr = OpAccessChain %u32ptr_input %id %zero\n"
5076 "%x = OpLoad %u32 %xptr\n"
5077 " OpStore %xvar %x\n"
5078
5079 "%cmp = OpUGreaterThan %bool %x %const50\n"
5080 " OpSelectionMerge %if_merge None\n"
5081 " OpBranchConditional %cmp %if_true %if_false\n"
5082
5083 // False branch for if-statement: placed in the middle of switch cases and before true branch.
5084 "%if_false = OpLabel\n"
5085 "%x_f = OpLoad %u32 %xvar\n"
5086 "%inloc_f = OpAccessChain %f32ptr %indata %zero %x_f\n"
5087 "%inval_f = OpLoad %f32 %inloc_f\n"
5088 "%negate = OpFNegate %f32 %inval_f\n"
5089 "%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5090 " OpStore %outloc_f %negate\n"
5091 " OpBranch %if_merge\n"
5092
5093 // Merge block for if-statement: placed in the middle of true and false branch.
5094 "%if_merge = OpLabel\n"
5095 " OpReturn\n"
5096
5097 // True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5098 "%if_true = OpLabel\n"
5099 "%xval_t = OpLoad %u32 %xvar\n"
5100 "%mod = OpUMod %u32 %xval_t %const3\n"
5101 " OpSelectionMerge %switch_merge None\n"
5102 " OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5103
5104 // Merge block for switch-statement: placed before the case
5105 // bodies. But it must follow OpSwitch which dominates it.
5106 "%switch_merge = OpLabel\n"
5107 " OpBranch %if_merge\n"
5108
5109 // Case 1 for switch-statement: placed before case 0.
5110 // It must follow the OpSwitch that dominates it.
5111 "%case1 = OpLabel\n"
5112 "%x_1 = OpLoad %u32 %xvar\n"
5113 "%inloc_1 = OpAccessChain %f32ptr %indata %zero %x_1\n"
5114 "%inval_1 = OpLoad %f32 %inloc_1\n"
5115 "%addf42 = OpFAdd %f32 %inval_1 %constf42\n"
5116 "%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5117 " OpStore %outloc_1 %addf42\n"
5118 " OpBranch %switch_merge\n"
5119
5120 // Case 2 for switch-statement.
5121 "%case2 = OpLabel\n"
5122 "%x_2 = OpLoad %u32 %xvar\n"
5123 "%inloc_2 = OpAccessChain %f32ptr %indata %zero %x_2\n"
5124 "%inval_2 = OpLoad %f32 %inloc_2\n"
5125 "%subf27 = OpFSub %f32 %inval_2 %constf27\n"
5126 "%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5127 " OpStore %outloc_2 %subf27\n"
5128 " OpBranch %switch_merge\n"
5129
5130 // Default case for switch-statement: placed in the middle of normal cases.
5131 "%default = OpLabel\n"
5132 " OpBranch %switch_merge\n"
5133
5134 // Case 0 for switch-statement: out of order.
5135 "%case0 = OpLabel\n"
5136 "%x_0 = OpLoad %u32 %xvar\n"
5137 "%inloc_0 = OpAccessChain %f32ptr %indata %zero %x_0\n"
5138 "%inval_0 = OpLoad %f32 %inloc_0\n"
5139 "%addf1p5 = OpFAdd %f32 %inval_0 %constf1p5\n"
5140 "%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5141 " OpStore %outloc_0 %addf1p5\n"
5142 " OpBranch %switch_merge\n"
5143
5144 " OpFunctionEnd\n";
5145 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5146 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5147 spec.numWorkGroups = IVec3(numElements, 1, 1);
5148
5149 group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
5150
5151 return group.release();
5152 }
5153
createMultipleShaderGroup(tcu::TestContext & testCtx)5154 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5155 {
5156 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5157 ComputeShaderSpec spec1;
5158 ComputeShaderSpec spec2;
5159 de::Random rnd (deStringHash(group->getName()));
5160 const int numElements = 100;
5161 vector<float> inputFloats (numElements, 0);
5162 vector<float> outputFloats1 (numElements, 0);
5163 vector<float> outputFloats2 (numElements, 0);
5164 fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5165
5166 for (size_t ndx = 0; ndx < numElements; ++ndx)
5167 {
5168 outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5169 outputFloats2[ndx] = -inputFloats[ndx];
5170 }
5171
5172 const string assembly(
5173 "OpCapability Shader\n"
5174 "OpMemoryModel Logical GLSL450\n"
5175 "OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5176 "OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5177 // A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5178 "OpEntryPoint Vertex %vert_main \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5179 "OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5180 "OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5181
5182 "OpName %comp_main1 \"entrypoint1\"\n"
5183 "OpName %comp_main2 \"entrypoint2\"\n"
5184 "OpName %vert_main \"entrypoint2\"\n"
5185 "OpName %id \"gl_GlobalInvocationID\"\n"
5186 "OpName %vert_builtin_st \"gl_PerVertex\"\n"
5187 "OpName %vertexIndex \"gl_VertexIndex\"\n"
5188 "OpName %instanceIndex \"gl_InstanceIndex\"\n"
5189 "OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5190 "OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5191 "OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5192
5193 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5194 "OpDecorate %vertexIndex BuiltIn VertexIndex\n"
5195 "OpDecorate %instanceIndex BuiltIn InstanceIndex\n"
5196 "OpDecorate %vert_builtin_st Block\n"
5197 "OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5198 "OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5199 "OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5200
5201 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5202
5203 "%zero = OpConstant %i32 0\n"
5204 "%one = OpConstant %u32 1\n"
5205 "%c_f32_1 = OpConstant %f32 1\n"
5206
5207 "%i32inputptr = OpTypePointer Input %i32\n"
5208 "%vec4 = OpTypeVector %f32 4\n"
5209 "%vec4ptr = OpTypePointer Output %vec4\n"
5210 "%f32arr1 = OpTypeArray %f32 %one\n"
5211 "%vert_builtin_st = OpTypeStruct %vec4 %f32 %f32arr1\n"
5212 "%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5213 "%vert_builtins = OpVariable %vert_builtin_st_ptr Output\n"
5214
5215 "%id = OpVariable %uvec3ptr Input\n"
5216 "%vertexIndex = OpVariable %i32inputptr Input\n"
5217 "%instanceIndex = OpVariable %i32inputptr Input\n"
5218 "%c_vec4_1 = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5219
5220 // gl_Position = vec4(1.);
5221 "%vert_main = OpFunction %void None %voidf\n"
5222 "%vert_entry = OpLabel\n"
5223 "%position = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5224 " OpStore %position %c_vec4_1\n"
5225 " OpReturn\n"
5226 " OpFunctionEnd\n"
5227
5228 // Double inputs.
5229 "%comp_main1 = OpFunction %void None %voidf\n"
5230 "%comp1_entry = OpLabel\n"
5231 "%idval1 = OpLoad %uvec3 %id\n"
5232 "%x1 = OpCompositeExtract %u32 %idval1 0\n"
5233 "%inloc1 = OpAccessChain %f32ptr %indata %zero %x1\n"
5234 "%inval1 = OpLoad %f32 %inloc1\n"
5235 "%add = OpFAdd %f32 %inval1 %inval1\n"
5236 "%outloc1 = OpAccessChain %f32ptr %outdata %zero %x1\n"
5237 " OpStore %outloc1 %add\n"
5238 " OpReturn\n"
5239 " OpFunctionEnd\n"
5240
5241 // Negate inputs.
5242 "%comp_main2 = OpFunction %void None %voidf\n"
5243 "%comp2_entry = OpLabel\n"
5244 "%idval2 = OpLoad %uvec3 %id\n"
5245 "%x2 = OpCompositeExtract %u32 %idval2 0\n"
5246 "%inloc2 = OpAccessChain %f32ptr %indata %zero %x2\n"
5247 "%inval2 = OpLoad %f32 %inloc2\n"
5248 "%neg = OpFNegate %f32 %inval2\n"
5249 "%outloc2 = OpAccessChain %f32ptr %outdata %zero %x2\n"
5250 " OpStore %outloc2 %neg\n"
5251 " OpReturn\n"
5252 " OpFunctionEnd\n");
5253
5254 spec1.assembly = assembly;
5255 spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5256 spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5257 spec1.numWorkGroups = IVec3(numElements, 1, 1);
5258 spec1.entryPoint = "entrypoint1";
5259
5260 spec2.assembly = assembly;
5261 spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5262 spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5263 spec2.numWorkGroups = IVec3(numElements, 1, 1);
5264 spec2.entryPoint = "entrypoint2";
5265
5266 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
5267 group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
5268
5269 return group.release();
5270 }
5271
makeLongUTF8String(size_t num4ByteChars)5272 inline std::string makeLongUTF8String (size_t num4ByteChars)
5273 {
5274 // An example of a longest valid UTF-8 character. Be explicit about the
5275 // character type because Microsoft compilers can otherwise interpret the
5276 // character string as being over wide (16-bit) characters. Ideally, we
5277 // would just use a C++11 UTF-8 string literal, but we want to support older
5278 // Microsoft compilers.
5279 const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5280 std::string longString;
5281 longString.reserve(num4ByteChars * 4);
5282 for (size_t count = 0; count < num4ByteChars; count++)
5283 {
5284 longString += earthAfrica;
5285 }
5286 return longString;
5287 }
5288
createOpSourceGroup(tcu::TestContext & testCtx)5289 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5290 {
5291 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5292 vector<CaseParameter> cases;
5293 de::Random rnd (deStringHash(group->getName()));
5294 const int numElements = 100;
5295 vector<float> positiveFloats (numElements, 0);
5296 vector<float> negativeFloats (numElements, 0);
5297 const StringTemplate shaderTemplate (
5298 "OpCapability Shader\n"
5299 "OpMemoryModel Logical GLSL450\n"
5300
5301 "OpEntryPoint GLCompute %main \"main\" %id\n"
5302 "OpExecutionMode %main LocalSize 1 1 1\n"
5303
5304 "${SOURCE}\n"
5305
5306 "OpName %main \"main\"\n"
5307 "OpName %id \"gl_GlobalInvocationID\"\n"
5308
5309 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5310
5311 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5312
5313 "%id = OpVariable %uvec3ptr Input\n"
5314 "%zero = OpConstant %i32 0\n"
5315
5316 "%main = OpFunction %void None %voidf\n"
5317 "%label = OpLabel\n"
5318 "%idval = OpLoad %uvec3 %id\n"
5319 "%x = OpCompositeExtract %u32 %idval 0\n"
5320 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5321 "%inval = OpLoad %f32 %inloc\n"
5322 "%neg = OpFNegate %f32 %inval\n"
5323 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5324 " OpStore %outloc %neg\n"
5325 " OpReturn\n"
5326 " OpFunctionEnd\n");
5327
5328 cases.push_back(CaseParameter("unknown_source", "OpSource Unknown 0"));
5329 cases.push_back(CaseParameter("wrong_source", "OpSource OpenCL_C 210"));
5330 cases.push_back(CaseParameter("normal_filename", "%fname = OpString \"filename\"\n"
5331 "OpSource GLSL 430 %fname"));
5332 cases.push_back(CaseParameter("empty_filename", "%fname = OpString \"\"\n"
5333 "OpSource GLSL 430 %fname"));
5334 cases.push_back(CaseParameter("normal_source_code", "%fname = OpString \"filename\"\n"
5335 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5336 cases.push_back(CaseParameter("empty_source_code", "%fname = OpString \"filename\"\n"
5337 "OpSource GLSL 430 %fname \"\""));
5338 cases.push_back(CaseParameter("long_source_code", "%fname = OpString \"filename\"\n"
5339 "OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5340 cases.push_back(CaseParameter("utf8_source_code", "%fname = OpString \"filename\"\n"
5341 "OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5342 cases.push_back(CaseParameter("normal_sourcecontinued", "%fname = OpString \"filename\"\n"
5343 "OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5344 "OpSourceContinued \"id main() {}\""));
5345 cases.push_back(CaseParameter("empty_sourcecontinued", "%fname = OpString \"filename\"\n"
5346 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5347 "OpSourceContinued \"\""));
5348 cases.push_back(CaseParameter("long_sourcecontinued", "%fname = OpString \"filename\"\n"
5349 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5350 "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5351 cases.push_back(CaseParameter("utf8_sourcecontinued", "%fname = OpString \"filename\"\n"
5352 "OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5353 "OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5354 cases.push_back(CaseParameter("multi_sourcecontinued", "%fname = OpString \"filename\"\n"
5355 "OpSource GLSL 430 %fname \"#version 430\n\"\n"
5356 "OpSourceContinued \"void\"\n"
5357 "OpSourceContinued \"main()\"\n"
5358 "OpSourceContinued \"{}\""));
5359 cases.push_back(CaseParameter("empty_source_before_sourcecontinued", "%fname = OpString \"filename\"\n"
5360 "OpSource GLSL 430 %fname \"\"\n"
5361 "OpSourceContinued \"#version 430\nvoid main() {}\""));
5362
5363 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5364
5365 for (size_t ndx = 0; ndx < numElements; ++ndx)
5366 negativeFloats[ndx] = -positiveFloats[ndx];
5367
5368 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5369 {
5370 map<string, string> specializations;
5371 ComputeShaderSpec spec;
5372
5373 specializations["SOURCE"] = cases[caseNdx].param;
5374 spec.assembly = shaderTemplate.specialize(specializations);
5375 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5376 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5377 spec.numWorkGroups = IVec3(numElements, 1, 1);
5378
5379 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5380 }
5381
5382 return group.release();
5383 }
5384
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5385 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5386 {
5387 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5388 vector<CaseParameter> cases;
5389 de::Random rnd (deStringHash(group->getName()));
5390 const int numElements = 100;
5391 vector<float> inputFloats (numElements, 0);
5392 vector<float> outputFloats (numElements, 0);
5393 const StringTemplate shaderTemplate (
5394 string(getComputeAsmShaderPreamble()) +
5395
5396 "OpSourceExtension \"${EXTENSION}\"\n"
5397
5398 "OpName %main \"main\"\n"
5399 "OpName %id \"gl_GlobalInvocationID\"\n"
5400
5401 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5402
5403 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5404
5405 "%id = OpVariable %uvec3ptr Input\n"
5406 "%zero = OpConstant %i32 0\n"
5407
5408 "%main = OpFunction %void None %voidf\n"
5409 "%label = OpLabel\n"
5410 "%idval = OpLoad %uvec3 %id\n"
5411 "%x = OpCompositeExtract %u32 %idval 0\n"
5412 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5413 "%inval = OpLoad %f32 %inloc\n"
5414 "%neg = OpFNegate %f32 %inval\n"
5415 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5416 " OpStore %outloc %neg\n"
5417 " OpReturn\n"
5418 " OpFunctionEnd\n");
5419
5420 cases.push_back(CaseParameter("empty_extension", ""));
5421 cases.push_back(CaseParameter("real_extension", "GL_ARB_texture_rectangle"));
5422 cases.push_back(CaseParameter("fake_extension", "GL_ARB_im_the_ultimate_extension"));
5423 cases.push_back(CaseParameter("utf8_extension", "GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5424 cases.push_back(CaseParameter("long_extension", makeLongUTF8String(65533) + "ccc")); // word count: 65535
5425
5426 fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5427
5428 for (size_t ndx = 0; ndx < numElements; ++ndx)
5429 outputFloats[ndx] = -inputFloats[ndx];
5430
5431 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5432 {
5433 map<string, string> specializations;
5434 ComputeShaderSpec spec;
5435
5436 specializations["EXTENSION"] = cases[caseNdx].param;
5437 spec.assembly = shaderTemplate.specialize(specializations);
5438 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5439 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5440 spec.numWorkGroups = IVec3(numElements, 1, 1);
5441
5442 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5443 }
5444
5445 return group.release();
5446 }
5447
5448 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5449 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5450 {
5451 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5452 vector<CaseParameter> cases;
5453 de::Random rnd (deStringHash(group->getName()));
5454 const int numElements = 100;
5455 vector<float> positiveFloats (numElements, 0);
5456 vector<float> negativeFloats (numElements, 0);
5457 const StringTemplate shaderTemplate (
5458 string(getComputeAsmShaderPreamble()) +
5459
5460 "OpSource GLSL 430\n"
5461 "OpName %main \"main\"\n"
5462 "OpName %id \"gl_GlobalInvocationID\"\n"
5463
5464 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5465
5466 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5467 "%uvec2 = OpTypeVector %u32 2\n"
5468 "%bvec3 = OpTypeVector %bool 3\n"
5469 "%fvec4 = OpTypeVector %f32 4\n"
5470 "%fmat33 = OpTypeMatrix %fvec3 3\n"
5471 "%const100 = OpConstant %u32 100\n"
5472 "%uarr100 = OpTypeArray %i32 %const100\n"
5473 "%struct = OpTypeStruct %f32 %i32 %u32\n"
5474 "%pointer = OpTypePointer Function %i32\n"
5475 + string(getComputeAsmInputOutputBuffer()) +
5476
5477 "%null = OpConstantNull ${TYPE}\n"
5478
5479 "%id = OpVariable %uvec3ptr Input\n"
5480 "%zero = OpConstant %i32 0\n"
5481
5482 "%main = OpFunction %void None %voidf\n"
5483 "%label = OpLabel\n"
5484 "%idval = OpLoad %uvec3 %id\n"
5485 "%x = OpCompositeExtract %u32 %idval 0\n"
5486 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5487 "%inval = OpLoad %f32 %inloc\n"
5488 "%neg = OpFNegate %f32 %inval\n"
5489 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5490 " OpStore %outloc %neg\n"
5491 " OpReturn\n"
5492 " OpFunctionEnd\n");
5493
5494 cases.push_back(CaseParameter("bool", "%bool"));
5495 cases.push_back(CaseParameter("sint32", "%i32"));
5496 cases.push_back(CaseParameter("uint32", "%u32"));
5497 cases.push_back(CaseParameter("float32", "%f32"));
5498 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
5499 cases.push_back(CaseParameter("vec3bool", "%bvec3"));
5500 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
5501 cases.push_back(CaseParameter("matrix", "%fmat33"));
5502 cases.push_back(CaseParameter("array", "%uarr100"));
5503 cases.push_back(CaseParameter("struct", "%struct"));
5504 cases.push_back(CaseParameter("pointer", "%pointer"));
5505
5506 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5507
5508 for (size_t ndx = 0; ndx < numElements; ++ndx)
5509 negativeFloats[ndx] = -positiveFloats[ndx];
5510
5511 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5512 {
5513 map<string, string> specializations;
5514 ComputeShaderSpec spec;
5515
5516 specializations["TYPE"] = cases[caseNdx].param;
5517 spec.assembly = shaderTemplate.specialize(specializations);
5518 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5519 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5520 spec.numWorkGroups = IVec3(numElements, 1, 1);
5521
5522 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5523 }
5524
5525 return group.release();
5526 }
5527
5528 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5529 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5530 {
5531 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5532 vector<CaseParameter> cases;
5533 de::Random rnd (deStringHash(group->getName()));
5534 const int numElements = 100;
5535 vector<float> positiveFloats (numElements, 0);
5536 vector<float> negativeFloats (numElements, 0);
5537 const StringTemplate shaderTemplate (
5538 string(getComputeAsmShaderPreamble()) +
5539
5540 "OpSource GLSL 430\n"
5541 "OpName %main \"main\"\n"
5542 "OpName %id \"gl_GlobalInvocationID\"\n"
5543
5544 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5545
5546 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5547
5548 "%id = OpVariable %uvec3ptr Input\n"
5549 "%zero = OpConstant %i32 0\n"
5550
5551 "${CONSTANT}\n"
5552
5553 "%main = OpFunction %void None %voidf\n"
5554 "%label = OpLabel\n"
5555 "%idval = OpLoad %uvec3 %id\n"
5556 "%x = OpCompositeExtract %u32 %idval 0\n"
5557 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5558 "%inval = OpLoad %f32 %inloc\n"
5559 "%neg = OpFNegate %f32 %inval\n"
5560 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5561 " OpStore %outloc %neg\n"
5562 " OpReturn\n"
5563 " OpFunctionEnd\n");
5564
5565 cases.push_back(CaseParameter("vector", "%five = OpConstant %u32 5\n"
5566 "%const = OpConstantComposite %uvec3 %five %zero %five"));
5567 cases.push_back(CaseParameter("matrix", "%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5568 "%ten = OpConstant %f32 10.\n"
5569 "%fzero = OpConstant %f32 0.\n"
5570 "%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5571 "%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5572 cases.push_back(CaseParameter("struct", "%m2vec3 = OpTypeMatrix %fvec3 2\n"
5573 "%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5574 "%fzero = OpConstant %f32 0.\n"
5575 "%one = OpConstant %f32 1.\n"
5576 "%point5 = OpConstant %f32 0.5\n"
5577 "%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5578 "%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5579 "%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5580 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %u32 %f32\n"
5581 "%st2 = OpTypeStruct %i32 %i32\n"
5582 "%struct = OpTypeStruct %st1 %st2\n"
5583 "%point5 = OpConstant %f32 0.5\n"
5584 "%one = OpConstant %u32 1\n"
5585 "%ten = OpConstant %i32 10\n"
5586 "%st1val = OpConstantComposite %st1 %one %point5\n"
5587 "%st2val = OpConstantComposite %st2 %ten %ten\n"
5588 "%const = OpConstantComposite %struct %st1val %st2val"));
5589
5590 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5591
5592 for (size_t ndx = 0; ndx < numElements; ++ndx)
5593 negativeFloats[ndx] = -positiveFloats[ndx];
5594
5595 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5596 {
5597 map<string, string> specializations;
5598 ComputeShaderSpec spec;
5599
5600 specializations["CONSTANT"] = cases[caseNdx].param;
5601 spec.assembly = shaderTemplate.specialize(specializations);
5602 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5603 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5604 spec.numWorkGroups = IVec3(numElements, 1, 1);
5605
5606 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5607 }
5608
5609 return group.release();
5610 }
5611
5612 // Creates a floating point number with the given exponent, and significand
5613 // bits set. It can only create normalized numbers. Only the least significant
5614 // 24 bits of the significand will be examined. The final bit of the
5615 // significand will also be ignored. This allows alignment to be written
5616 // similarly to C99 hex-floats.
5617 // For example if you wanted to write 0x1.7f34p-12 you would call
5618 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(deInt32 exponent,deUint32 significand)5619 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5620 {
5621 float f = 1.0f;
5622
5623 for (deInt32 idx = 0; idx < 23; ++idx)
5624 {
5625 f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5626 significand <<= 1;
5627 }
5628
5629 return std::ldexp(f, exponent);
5630 }
5631
5632 // Compare instruction for the OpQuantizeF16 compute exact case.
5633 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5634 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5635 {
5636 if (outputAllocs.size() != 1)
5637 return false;
5638
5639 // Only size is needed because we cannot compare Nans.
5640 size_t byteSize = expectedOutputs[0].getByteSize();
5641
5642 const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5643
5644 if (byteSize != 4*sizeof(float)) {
5645 return false;
5646 }
5647
5648 if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5649 *outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5650 return false;
5651 }
5652 outputAsFloat++;
5653
5654 if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5655 *outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5656 return false;
5657 }
5658 outputAsFloat++;
5659
5660 if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5661 *outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5662 return false;
5663 }
5664 outputAsFloat++;
5665
5666 if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5667 *outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5668 return false;
5669 }
5670
5671 return true;
5672 }
5673
5674 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5675 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5676 {
5677 if (outputAllocs.size() != 1)
5678 return false;
5679
5680 // Only size is needed because we cannot compare Nans.
5681 size_t byteSize = expectedOutputs[0].getByteSize();
5682
5683 const float* const output_as_float = static_cast<const float*>(outputAllocs[0]->getHostPtr());
5684
5685 for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5686 {
5687 if (!deFloatIsNaN(output_as_float[idx]))
5688 {
5689 return false;
5690 }
5691 }
5692
5693 return true;
5694 }
5695
5696 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5697 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5698 {
5699 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5700
5701 const std::string shader (
5702 string(getComputeAsmShaderPreamble()) +
5703
5704 "OpSource GLSL 430\n"
5705 "OpName %main \"main\"\n"
5706 "OpName %id \"gl_GlobalInvocationID\"\n"
5707
5708 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5709
5710 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5711
5712 "%id = OpVariable %uvec3ptr Input\n"
5713 "%zero = OpConstant %i32 0\n"
5714
5715 "%main = OpFunction %void None %voidf\n"
5716 "%label = OpLabel\n"
5717 "%idval = OpLoad %uvec3 %id\n"
5718 "%x = OpCompositeExtract %u32 %idval 0\n"
5719 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
5720 "%inval = OpLoad %f32 %inloc\n"
5721 "%quant = OpQuantizeToF16 %f32 %inval\n"
5722 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5723 " OpStore %outloc %quant\n"
5724 " OpReturn\n"
5725 " OpFunctionEnd\n");
5726
5727 {
5728 ComputeShaderSpec spec;
5729 const deUint32 numElements = 100;
5730 vector<float> infinities;
5731 vector<float> results;
5732
5733 infinities.reserve(numElements);
5734 results.reserve(numElements);
5735
5736 for (size_t idx = 0; idx < numElements; ++idx)
5737 {
5738 switch(idx % 4)
5739 {
5740 case 0:
5741 infinities.push_back(std::numeric_limits<float>::infinity());
5742 results.push_back(std::numeric_limits<float>::infinity());
5743 break;
5744 case 1:
5745 infinities.push_back(-std::numeric_limits<float>::infinity());
5746 results.push_back(-std::numeric_limits<float>::infinity());
5747 break;
5748 case 2:
5749 infinities.push_back(std::ldexp(1.0f, 16));
5750 results.push_back(std::numeric_limits<float>::infinity());
5751 break;
5752 case 3:
5753 infinities.push_back(std::ldexp(-1.0f, 32));
5754 results.push_back(-std::numeric_limits<float>::infinity());
5755 break;
5756 }
5757 }
5758
5759 spec.assembly = shader;
5760 spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5761 spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5762 spec.numWorkGroups = IVec3(numElements, 1, 1);
5763
5764 group->addChild(new SpvAsmComputeShaderCase(
5765 testCtx, "infinities", "Check that infinities propagated and created", spec));
5766 }
5767
5768 {
5769 ComputeShaderSpec spec;
5770 vector<float> nans;
5771 const deUint32 numElements = 100;
5772
5773 nans.reserve(numElements);
5774
5775 for (size_t idx = 0; idx < numElements; ++idx)
5776 {
5777 if (idx % 2 == 0)
5778 {
5779 nans.push_back(std::numeric_limits<float>::quiet_NaN());
5780 }
5781 else
5782 {
5783 nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5784 }
5785 }
5786
5787 spec.assembly = shader;
5788 spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5789 spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5790 spec.numWorkGroups = IVec3(numElements, 1, 1);
5791 spec.verifyIO = &compareNan;
5792
5793 group->addChild(new SpvAsmComputeShaderCase(
5794 testCtx, "propagated_nans", "Check that nans are propagated", spec));
5795 }
5796
5797 {
5798 ComputeShaderSpec spec;
5799 vector<float> small;
5800 vector<float> zeros;
5801 const deUint32 numElements = 100;
5802
5803 small.reserve(numElements);
5804 zeros.reserve(numElements);
5805
5806 for (size_t idx = 0; idx < numElements; ++idx)
5807 {
5808 switch(idx % 6)
5809 {
5810 case 0:
5811 small.push_back(0.f);
5812 zeros.push_back(0.f);
5813 break;
5814 case 1:
5815 small.push_back(-0.f);
5816 zeros.push_back(-0.f);
5817 break;
5818 case 2:
5819 small.push_back(std::ldexp(1.0f, -16));
5820 zeros.push_back(0.f);
5821 break;
5822 case 3:
5823 small.push_back(std::ldexp(-1.0f, -32));
5824 zeros.push_back(-0.f);
5825 break;
5826 case 4:
5827 small.push_back(std::ldexp(1.0f, -127));
5828 zeros.push_back(0.f);
5829 break;
5830 case 5:
5831 small.push_back(-std::ldexp(1.0f, -128));
5832 zeros.push_back(-0.f);
5833 break;
5834 }
5835 }
5836
5837 spec.assembly = shader;
5838 spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5839 spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5840 spec.numWorkGroups = IVec3(numElements, 1, 1);
5841
5842 group->addChild(new SpvAsmComputeShaderCase(
5843 testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5844 }
5845
5846 {
5847 ComputeShaderSpec spec;
5848 vector<float> exact;
5849 const deUint32 numElements = 200;
5850
5851 exact.reserve(numElements);
5852
5853 for (size_t idx = 0; idx < numElements; ++idx)
5854 exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5855
5856 spec.assembly = shader;
5857 spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5858 spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5859 spec.numWorkGroups = IVec3(numElements, 1, 1);
5860
5861 group->addChild(new SpvAsmComputeShaderCase(
5862 testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5863 }
5864
5865 {
5866 ComputeShaderSpec spec;
5867 vector<float> inputs;
5868 const deUint32 numElements = 4;
5869
5870 inputs.push_back(constructNormalizedFloat(8, 0x300300));
5871 inputs.push_back(-constructNormalizedFloat(-7, 0x600800));
5872 inputs.push_back(constructNormalizedFloat(2, 0x01E000));
5873 inputs.push_back(constructNormalizedFloat(1, 0xFFE000));
5874
5875 spec.assembly = shader;
5876 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5877 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5878 spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5879 spec.numWorkGroups = IVec3(numElements, 1, 1);
5880
5881 group->addChild(new SpvAsmComputeShaderCase(
5882 testCtx, "rounded", "Check that are rounded when needed", spec));
5883 }
5884
5885 return group.release();
5886 }
5887
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)5888 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5889 {
5890 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5891
5892 const std::string shader (
5893 string(getComputeAsmShaderPreamble()) +
5894
5895 "OpName %main \"main\"\n"
5896 "OpName %id \"gl_GlobalInvocationID\"\n"
5897
5898 "OpDecorate %id BuiltIn GlobalInvocationId\n"
5899
5900 "OpDecorate %sc_0 SpecId 0\n"
5901 "OpDecorate %sc_1 SpecId 1\n"
5902 "OpDecorate %sc_2 SpecId 2\n"
5903 "OpDecorate %sc_3 SpecId 3\n"
5904 "OpDecorate %sc_4 SpecId 4\n"
5905 "OpDecorate %sc_5 SpecId 5\n"
5906
5907 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5908
5909 "%id = OpVariable %uvec3ptr Input\n"
5910 "%zero = OpConstant %i32 0\n"
5911 "%c_u32_6 = OpConstant %u32 6\n"
5912
5913 "%sc_0 = OpSpecConstant %f32 0.\n"
5914 "%sc_1 = OpSpecConstant %f32 0.\n"
5915 "%sc_2 = OpSpecConstant %f32 0.\n"
5916 "%sc_3 = OpSpecConstant %f32 0.\n"
5917 "%sc_4 = OpSpecConstant %f32 0.\n"
5918 "%sc_5 = OpSpecConstant %f32 0.\n"
5919
5920 "%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5921 "%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5922 "%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5923 "%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5924 "%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5925 "%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5926
5927 "%main = OpFunction %void None %voidf\n"
5928 "%label = OpLabel\n"
5929 "%idval = OpLoad %uvec3 %id\n"
5930 "%x = OpCompositeExtract %u32 %idval 0\n"
5931 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
5932 "%selector = OpUMod %u32 %x %c_u32_6\n"
5933 " OpSelectionMerge %exit None\n"
5934 " OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5935
5936 "%case0 = OpLabel\n"
5937 " OpStore %outloc %sc_0_quant\n"
5938 " OpBranch %exit\n"
5939
5940 "%case1 = OpLabel\n"
5941 " OpStore %outloc %sc_1_quant\n"
5942 " OpBranch %exit\n"
5943
5944 "%case2 = OpLabel\n"
5945 " OpStore %outloc %sc_2_quant\n"
5946 " OpBranch %exit\n"
5947
5948 "%case3 = OpLabel\n"
5949 " OpStore %outloc %sc_3_quant\n"
5950 " OpBranch %exit\n"
5951
5952 "%case4 = OpLabel\n"
5953 " OpStore %outloc %sc_4_quant\n"
5954 " OpBranch %exit\n"
5955
5956 "%case5 = OpLabel\n"
5957 " OpStore %outloc %sc_5_quant\n"
5958 " OpBranch %exit\n"
5959
5960 "%exit = OpLabel\n"
5961 " OpReturn\n"
5962
5963 " OpFunctionEnd\n");
5964
5965 {
5966 ComputeShaderSpec spec;
5967 const deUint8 numCases = 4;
5968 vector<float> inputs (numCases, 0.f);
5969 vector<float> outputs;
5970
5971 spec.assembly = shader;
5972 spec.numWorkGroups = IVec3(numCases, 1, 1);
5973
5974 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5975 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5976 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5977 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5978
5979 outputs.push_back(std::numeric_limits<float>::infinity());
5980 outputs.push_back(-std::numeric_limits<float>::infinity());
5981 outputs.push_back(std::numeric_limits<float>::infinity());
5982 outputs.push_back(-std::numeric_limits<float>::infinity());
5983
5984 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5985 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5986
5987 group->addChild(new SpvAsmComputeShaderCase(
5988 testCtx, "infinities", "Check that infinities propagated and created", spec));
5989 }
5990
5991 {
5992 ComputeShaderSpec spec;
5993 const deUint8 numCases = 2;
5994 vector<float> inputs (numCases, 0.f);
5995 vector<float> outputs;
5996
5997 spec.assembly = shader;
5998 spec.numWorkGroups = IVec3(numCases, 1, 1);
5999 spec.verifyIO = &compareNan;
6000
6001 outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6002 outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6003
6004 for (deUint8 idx = 0; idx < numCases; ++idx)
6005 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6006
6007 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6008 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6009
6010 group->addChild(new SpvAsmComputeShaderCase(
6011 testCtx, "propagated_nans", "Check that nans are propagated", spec));
6012 }
6013
6014 {
6015 ComputeShaderSpec spec;
6016 const deUint8 numCases = 6;
6017 vector<float> inputs (numCases, 0.f);
6018 vector<float> outputs;
6019
6020 spec.assembly = shader;
6021 spec.numWorkGroups = IVec3(numCases, 1, 1);
6022
6023 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6024 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6025 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6026 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6027 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6028 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6029
6030 outputs.push_back(0.f);
6031 outputs.push_back(-0.f);
6032 outputs.push_back(0.f);
6033 outputs.push_back(-0.f);
6034 outputs.push_back(0.f);
6035 outputs.push_back(-0.f);
6036
6037 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6038 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6039
6040 group->addChild(new SpvAsmComputeShaderCase(
6041 testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
6042 }
6043
6044 {
6045 ComputeShaderSpec spec;
6046 const deUint8 numCases = 6;
6047 vector<float> inputs (numCases, 0.f);
6048 vector<float> outputs;
6049
6050 spec.assembly = shader;
6051 spec.numWorkGroups = IVec3(numCases, 1, 1);
6052
6053 for (deUint8 idx = 0; idx < 6; ++idx)
6054 {
6055 const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6056 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6057 outputs.push_back(f);
6058 }
6059
6060 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6061 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6062
6063 group->addChild(new SpvAsmComputeShaderCase(
6064 testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
6065 }
6066
6067 {
6068 ComputeShaderSpec spec;
6069 const deUint8 numCases = 4;
6070 vector<float> inputs (numCases, 0.f);
6071 vector<float> outputs;
6072
6073 spec.assembly = shader;
6074 spec.numWorkGroups = IVec3(numCases, 1, 1);
6075 spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
6076
6077 outputs.push_back(constructNormalizedFloat(8, 0x300300));
6078 outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6079 outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6080 outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6081
6082 for (deUint8 idx = 0; idx < numCases; ++idx)
6083 spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6084
6085 spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6086 spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6087
6088 group->addChild(new SpvAsmComputeShaderCase(
6089 testCtx, "rounded", "Check that are rounded when needed", spec));
6090 }
6091
6092 return group.release();
6093 }
6094
6095 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6096 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6097 {
6098 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6099 ComputeShaderSpec spec;
6100 de::Random rnd (deStringHash(group->getName()));
6101 const int numElements = 100;
6102 vector<float> positiveFloats (numElements, 0);
6103 vector<float> negativeFloats (numElements, 0);
6104
6105 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6106
6107 for (size_t ndx = 0; ndx < numElements; ++ndx)
6108 negativeFloats[ndx] = -positiveFloats[ndx];
6109
6110 spec.assembly =
6111 "OpCapability Shader\n"
6112 "%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6113 "OpMemoryModel Logical GLSL450\n"
6114 "OpEntryPoint GLCompute %main \"main\" %id\n"
6115 "OpExecutionMode %main LocalSize 1 1 1\n"
6116
6117 "OpSource GLSL 430\n"
6118 "OpName %main \"main\"\n"
6119 "OpName %id \"gl_GlobalInvocationID\"\n"
6120
6121 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6122
6123 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6124
6125 "%fmat = OpTypeMatrix %fvec3 3\n"
6126 "%ten = OpConstant %u32 10\n"
6127 "%f32arr10 = OpTypeArray %f32 %ten\n"
6128 "%fst = OpTypeStruct %f32 %f32\n"
6129
6130 + string(getComputeAsmInputOutputBuffer()) +
6131
6132 "%id = OpVariable %uvec3ptr Input\n"
6133 "%zero = OpConstant %i32 0\n"
6134
6135 // Create a bunch of null values
6136 "%unull = OpConstantNull %u32\n"
6137 "%fnull = OpConstantNull %f32\n"
6138 "%vnull = OpConstantNull %fvec3\n"
6139 "%mnull = OpConstantNull %fmat\n"
6140 "%anull = OpConstantNull %f32arr10\n"
6141 "%snull = OpConstantComposite %fst %fnull %fnull\n"
6142
6143 "%main = OpFunction %void None %voidf\n"
6144 "%label = OpLabel\n"
6145 "%idval = OpLoad %uvec3 %id\n"
6146 "%x = OpCompositeExtract %u32 %idval 0\n"
6147 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6148 "%inval = OpLoad %f32 %inloc\n"
6149 "%neg = OpFNegate %f32 %inval\n"
6150
6151 // Get the abs() of (a certain element of) those null values
6152 "%unull_cov = OpConvertUToF %f32 %unull\n"
6153 "%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6154 "%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6155 "%vnull_0 = OpCompositeExtract %f32 %vnull 0\n"
6156 "%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6157 "%mnull_12 = OpCompositeExtract %f32 %mnull 1 2\n"
6158 "%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6159 "%anull_3 = OpCompositeExtract %f32 %anull 3\n"
6160 "%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6161 "%snull_1 = OpCompositeExtract %f32 %snull 1\n"
6162 "%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6163
6164 // Add them all
6165 "%add1 = OpFAdd %f32 %neg %unull_abs\n"
6166 "%add2 = OpFAdd %f32 %add1 %fnull_abs\n"
6167 "%add3 = OpFAdd %f32 %add2 %vnull_abs\n"
6168 "%add4 = OpFAdd %f32 %add3 %mnull_abs\n"
6169 "%add5 = OpFAdd %f32 %add4 %anull_abs\n"
6170 "%final = OpFAdd %f32 %add5 %snull_abs\n"
6171
6172 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6173 " OpStore %outloc %final\n" // write to output
6174 " OpReturn\n"
6175 " OpFunctionEnd\n";
6176 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6177 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6178 spec.numWorkGroups = IVec3(numElements, 1, 1);
6179
6180 group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
6181
6182 return group.release();
6183 }
6184
6185 // Assembly code used for testing loop control is based on GLSL source code:
6186 // #version 430
6187 //
6188 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6189 // float elements[];
6190 // } input_data;
6191 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6192 // float elements[];
6193 // } output_data;
6194 //
6195 // void main() {
6196 // uint x = gl_GlobalInvocationID.x;
6197 // output_data.elements[x] = input_data.elements[x];
6198 // for (uint i = 0; i < 4; ++i)
6199 // output_data.elements[x] += 1.f;
6200 // }
createLoopControlGroup(tcu::TestContext & testCtx)6201 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6202 {
6203 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6204 vector<CaseParameter> cases;
6205 de::Random rnd (deStringHash(group->getName()));
6206 const int numElements = 100;
6207 vector<float> inputFloats (numElements, 0);
6208 vector<float> outputFloats (numElements, 0);
6209 const StringTemplate shaderTemplate (
6210 string(getComputeAsmShaderPreamble()) +
6211
6212 "OpSource GLSL 430\n"
6213 "OpName %main \"main\"\n"
6214 "OpName %id \"gl_GlobalInvocationID\"\n"
6215
6216 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6217
6218 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6219
6220 "%u32ptr = OpTypePointer Function %u32\n"
6221
6222 "%id = OpVariable %uvec3ptr Input\n"
6223 "%zero = OpConstant %i32 0\n"
6224 "%uzero = OpConstant %u32 0\n"
6225 "%one = OpConstant %i32 1\n"
6226 "%constf1 = OpConstant %f32 1.0\n"
6227 "%four = OpConstant %u32 4\n"
6228
6229 "%main = OpFunction %void None %voidf\n"
6230 "%entry = OpLabel\n"
6231 "%i = OpVariable %u32ptr Function\n"
6232 " OpStore %i %uzero\n"
6233
6234 "%idval = OpLoad %uvec3 %id\n"
6235 "%x = OpCompositeExtract %u32 %idval 0\n"
6236 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6237 "%inval = OpLoad %f32 %inloc\n"
6238 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6239 " OpStore %outloc %inval\n"
6240 " OpBranch %loop_entry\n"
6241
6242 "%loop_entry = OpLabel\n"
6243 "%i_val = OpLoad %u32 %i\n"
6244 "%cmp_lt = OpULessThan %bool %i_val %four\n"
6245 " OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6246 " OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6247 "%loop_body = OpLabel\n"
6248 "%outval = OpLoad %f32 %outloc\n"
6249 "%addf1 = OpFAdd %f32 %outval %constf1\n"
6250 " OpStore %outloc %addf1\n"
6251 "%new_i = OpIAdd %u32 %i_val %one\n"
6252 " OpStore %i %new_i\n"
6253 " OpBranch %loop_entry\n"
6254 "%loop_merge = OpLabel\n"
6255 " OpReturn\n"
6256 " OpFunctionEnd\n");
6257
6258 cases.push_back(CaseParameter("none", "None"));
6259 cases.push_back(CaseParameter("unroll", "Unroll"));
6260 cases.push_back(CaseParameter("dont_unroll", "DontUnroll"));
6261
6262 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6263
6264 for (size_t ndx = 0; ndx < numElements; ++ndx)
6265 outputFloats[ndx] = inputFloats[ndx] + 4.f;
6266
6267 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6268 {
6269 map<string, string> specializations;
6270 ComputeShaderSpec spec;
6271
6272 specializations["CONTROL"] = cases[caseNdx].param;
6273 spec.assembly = shaderTemplate.specialize(specializations);
6274 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6275 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6276 spec.numWorkGroups = IVec3(numElements, 1, 1);
6277
6278 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6279 }
6280
6281 group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
6282 group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
6283
6284 return group.release();
6285 }
6286
6287 // Assembly code used for testing selection control is based on GLSL source code:
6288 // #version 430
6289 //
6290 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6291 // float elements[];
6292 // } input_data;
6293 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6294 // float elements[];
6295 // } output_data;
6296 //
6297 // void main() {
6298 // uint x = gl_GlobalInvocationID.x;
6299 // float val = input_data.elements[x];
6300 // if (val > 10.f)
6301 // output_data.elements[x] = val + 1.f;
6302 // else
6303 // output_data.elements[x] = val - 1.f;
6304 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6305 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6306 {
6307 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6308 vector<CaseParameter> cases;
6309 de::Random rnd (deStringHash(group->getName()));
6310 const int numElements = 100;
6311 vector<float> inputFloats (numElements, 0);
6312 vector<float> outputFloats (numElements, 0);
6313 const StringTemplate shaderTemplate (
6314 string(getComputeAsmShaderPreamble()) +
6315
6316 "OpSource GLSL 430\n"
6317 "OpName %main \"main\"\n"
6318 "OpName %id \"gl_GlobalInvocationID\"\n"
6319
6320 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6321
6322 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6323
6324 "%id = OpVariable %uvec3ptr Input\n"
6325 "%zero = OpConstant %i32 0\n"
6326 "%constf1 = OpConstant %f32 1.0\n"
6327 "%constf10 = OpConstant %f32 10.0\n"
6328
6329 "%main = OpFunction %void None %voidf\n"
6330 "%entry = OpLabel\n"
6331 "%idval = OpLoad %uvec3 %id\n"
6332 "%x = OpCompositeExtract %u32 %idval 0\n"
6333 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6334 "%inval = OpLoad %f32 %inloc\n"
6335 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6336 "%cmp_gt = OpFOrdGreaterThan %bool %inval %constf10\n"
6337
6338 " OpSelectionMerge %if_end ${CONTROL}\n"
6339 " OpBranchConditional %cmp_gt %if_true %if_false\n"
6340 "%if_true = OpLabel\n"
6341 "%addf1 = OpFAdd %f32 %inval %constf1\n"
6342 " OpStore %outloc %addf1\n"
6343 " OpBranch %if_end\n"
6344 "%if_false = OpLabel\n"
6345 "%subf1 = OpFSub %f32 %inval %constf1\n"
6346 " OpStore %outloc %subf1\n"
6347 " OpBranch %if_end\n"
6348 "%if_end = OpLabel\n"
6349 " OpReturn\n"
6350 " OpFunctionEnd\n");
6351
6352 cases.push_back(CaseParameter("none", "None"));
6353 cases.push_back(CaseParameter("flatten", "Flatten"));
6354 cases.push_back(CaseParameter("dont_flatten", "DontFlatten"));
6355 cases.push_back(CaseParameter("flatten_dont_flatten", "DontFlatten|Flatten"));
6356
6357 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6358
6359 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6360 floorAll(inputFloats);
6361
6362 for (size_t ndx = 0; ndx < numElements; ++ndx)
6363 outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6364
6365 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6366 {
6367 map<string, string> specializations;
6368 ComputeShaderSpec spec;
6369
6370 specializations["CONTROL"] = cases[caseNdx].param;
6371 spec.assembly = shaderTemplate.specialize(specializations);
6372 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6373 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6374 spec.numWorkGroups = IVec3(numElements, 1, 1);
6375
6376 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6377 }
6378
6379 return group.release();
6380 }
6381
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6382 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6383 {
6384 // Generate a long name.
6385 std::string longname;
6386 longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6387
6388 // Some bad names, abusing utf-8 encoding. This may also cause problems
6389 // with the logs.
6390 // 1. Various illegal code points in utf-8
6391 std::string utf8illegal =
6392 "Illegal bytes in UTF-8: "
6393 "\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6394 "illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6395
6396 // 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6397 std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6398
6399 // 3. Some overlong encodings
6400 std::string utf8overlong =
6401 "UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6402 "\xf0\x8f\xbf\xbf";
6403
6404 // 4. Internet "zalgo" meme "bleeding text"
6405 std::string utf8zalgo =
6406 "\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6407 "\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6408 "\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6409 "\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6410 "\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6411 "\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6412 "\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6413 "\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6414 "\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6415 "\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6416 "\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6417 "\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6418 "\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6419 "\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6420 "\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6421 "\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6422 "\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6423 "\x93\xcd\x96\xcc\x97\xff";
6424
6425 // General name abuses
6426 abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6427 abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6428 abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6429 abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6430 abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6431
6432 // GL keywords
6433 abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6434 abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6435 abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6436 abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6437 abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6438 abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6439 abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6440 abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6441 abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6442 abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6443 abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6444 abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6445 abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6446 abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6447 abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6448 abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6449 abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6450 abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6451 abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6452 abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6453 abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6454 }
6455
createOpNameGroup(tcu::TestContext & testCtx)6456 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6457 {
6458 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6459 de::MovePtr<tcu::TestCaseGroup> entryMainGroup (new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6460 de::MovePtr<tcu::TestCaseGroup> entryNotGroup (new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6461 de::MovePtr<tcu::TestCaseGroup> abuseGroup (new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6462 vector<CaseParameter> cases;
6463 vector<CaseParameter> abuseCases;
6464 vector<string> testFunc;
6465 de::Random rnd (deStringHash(group->getName()));
6466 const int numElements = 128;
6467 vector<float> inputFloats (numElements, 0);
6468 vector<float> outputFloats (numElements, 0);
6469
6470 getOpNameAbuseCases(abuseCases);
6471
6472 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6473
6474 for(size_t ndx = 0; ndx < numElements; ++ndx)
6475 outputFloats[ndx] = -inputFloats[ndx];
6476
6477 const string commonShaderHeader =
6478 "OpCapability Shader\n"
6479 "OpMemoryModel Logical GLSL450\n"
6480 "OpEntryPoint GLCompute %main \"main\" %id\n"
6481 "OpExecutionMode %main LocalSize 1 1 1\n";
6482
6483 const string commonShaderFooter =
6484 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6485
6486 + string(getComputeAsmInputOutputBufferTraits())
6487 + string(getComputeAsmCommonTypes())
6488 + string(getComputeAsmInputOutputBuffer()) +
6489
6490 "%id = OpVariable %uvec3ptr Input\n"
6491 "%zero = OpConstant %i32 0\n"
6492
6493 "%func = OpFunction %void None %voidf\n"
6494 "%5 = OpLabel\n"
6495 " OpReturn\n"
6496 " OpFunctionEnd\n"
6497
6498 "%main = OpFunction %void None %voidf\n"
6499 "%entry = OpLabel\n"
6500 "%7 = OpFunctionCall %void %func\n"
6501
6502 "%idval = OpLoad %uvec3 %id\n"
6503 "%x = OpCompositeExtract %u32 %idval 0\n"
6504
6505 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6506 "%inval = OpLoad %f32 %inloc\n"
6507 "%neg = OpFNegate %f32 %inval\n"
6508 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6509 " OpStore %outloc %neg\n"
6510
6511 " OpReturn\n"
6512 " OpFunctionEnd\n";
6513
6514 const StringTemplate shaderTemplate (
6515 "OpCapability Shader\n"
6516 "OpMemoryModel Logical GLSL450\n"
6517 "OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6518 "OpExecutionMode %main LocalSize 1 1 1\n"
6519 "OpName %${ID} \"${NAME}\"\n" +
6520 commonShaderFooter);
6521
6522 const std::string multipleNames =
6523 commonShaderHeader +
6524 "OpName %main \"to_be\"\n"
6525 "OpName %id \"or_not\"\n"
6526 "OpName %main \"to_be\"\n"
6527 "OpName %main \"makes_no\"\n"
6528 "OpName %func \"difference\"\n"
6529 "OpName %5 \"to_me\"\n" +
6530 commonShaderFooter;
6531
6532 {
6533 ComputeShaderSpec spec;
6534
6535 spec.assembly = multipleNames;
6536 spec.numWorkGroups = IVec3(numElements, 1, 1);
6537 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6538 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6539
6540 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6541 }
6542
6543 const std::string everythingNamed =
6544 commonShaderHeader +
6545 "OpName %main \"name1\"\n"
6546 "OpName %id \"name2\"\n"
6547 "OpName %zero \"name3\"\n"
6548 "OpName %entry \"name4\"\n"
6549 "OpName %func \"name5\"\n"
6550 "OpName %5 \"name6\"\n"
6551 "OpName %7 \"name7\"\n"
6552 "OpName %idval \"name8\"\n"
6553 "OpName %inloc \"name9\"\n"
6554 "OpName %inval \"name10\"\n"
6555 "OpName %neg \"name11\"\n"
6556 "OpName %outloc \"name12\"\n"+
6557 commonShaderFooter;
6558 {
6559 ComputeShaderSpec spec;
6560
6561 spec.assembly = everythingNamed;
6562 spec.numWorkGroups = IVec3(numElements, 1, 1);
6563 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6564 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6565
6566 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6567 }
6568
6569 const std::string everythingNamedTheSame =
6570 commonShaderHeader +
6571 "OpName %main \"the_same\"\n"
6572 "OpName %id \"the_same\"\n"
6573 "OpName %zero \"the_same\"\n"
6574 "OpName %entry \"the_same\"\n"
6575 "OpName %func \"the_same\"\n"
6576 "OpName %5 \"the_same\"\n"
6577 "OpName %7 \"the_same\"\n"
6578 "OpName %idval \"the_same\"\n"
6579 "OpName %inloc \"the_same\"\n"
6580 "OpName %inval \"the_same\"\n"
6581 "OpName %neg \"the_same\"\n"
6582 "OpName %outloc \"the_same\"\n"+
6583 commonShaderFooter;
6584 {
6585 ComputeShaderSpec spec;
6586
6587 spec.assembly = everythingNamedTheSame;
6588 spec.numWorkGroups = IVec3(numElements, 1, 1);
6589 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6590 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6591
6592 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6593 }
6594
6595 // main_is_...
6596 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6597 {
6598 map<string, string> specializations;
6599 ComputeShaderSpec spec;
6600
6601 specializations["ENTRY"] = "main";
6602 specializations["ID"] = "main";
6603 specializations["NAME"] = abuseCases[ndx].param;
6604 spec.assembly = shaderTemplate.specialize(specializations);
6605 spec.numWorkGroups = IVec3(numElements, 1, 1);
6606 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6607 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6608
6609 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6610 }
6611
6612 // x_is_....
6613 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6614 {
6615 map<string, string> specializations;
6616 ComputeShaderSpec spec;
6617
6618 specializations["ENTRY"] = "main";
6619 specializations["ID"] = "x";
6620 specializations["NAME"] = abuseCases[ndx].param;
6621 spec.assembly = shaderTemplate.specialize(specializations);
6622 spec.numWorkGroups = IVec3(numElements, 1, 1);
6623 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6624 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6625
6626 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6627 }
6628
6629 cases.push_back(CaseParameter("_is_main", "main"));
6630 cases.push_back(CaseParameter("_is_not_main", "not_main"));
6631 testFunc.push_back("main");
6632 testFunc.push_back("func");
6633
6634 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6635 {
6636 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6637 {
6638 map<string, string> specializations;
6639 ComputeShaderSpec spec;
6640
6641 specializations["ENTRY"] = "main";
6642 specializations["ID"] = testFunc[fNdx];
6643 specializations["NAME"] = cases[ndx].param;
6644 spec.assembly = shaderTemplate.specialize(specializations);
6645 spec.numWorkGroups = IVec3(numElements, 1, 1);
6646 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6647 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6648
6649 entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6650 }
6651 }
6652
6653 cases.push_back(CaseParameter("_is_entry", "rdc"));
6654
6655 for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6656 {
6657 for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6658 {
6659 map<string, string> specializations;
6660 ComputeShaderSpec spec;
6661
6662 specializations["ENTRY"] = "rdc";
6663 specializations["ID"] = testFunc[fNdx];
6664 specializations["NAME"] = cases[ndx].param;
6665 spec.assembly = shaderTemplate.specialize(specializations);
6666 spec.numWorkGroups = IVec3(numElements, 1, 1);
6667 spec.entryPoint = "rdc";
6668 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6669 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6670
6671 entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6672 }
6673 }
6674
6675 group->addChild(entryMainGroup.release());
6676 group->addChild(entryNotGroup.release());
6677 group->addChild(abuseGroup.release());
6678
6679 return group.release();
6680 }
6681
createOpMemberNameGroup(tcu::TestContext & testCtx)6682 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6683 {
6684 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6685 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6686 vector<CaseParameter> abuseCases;
6687 vector<string> testFunc;
6688 de::Random rnd(deStringHash(group->getName()));
6689 const int numElements = 128;
6690 vector<float> inputFloats(numElements, 0);
6691 vector<float> outputFloats(numElements, 0);
6692
6693 getOpNameAbuseCases(abuseCases);
6694
6695 fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6696
6697 for (size_t ndx = 0; ndx < numElements; ++ndx)
6698 outputFloats[ndx] = -inputFloats[ndx];
6699
6700 const string commonShaderHeader =
6701 "OpCapability Shader\n"
6702 "OpMemoryModel Logical GLSL450\n"
6703 "OpEntryPoint GLCompute %main \"main\" %id\n"
6704 "OpExecutionMode %main LocalSize 1 1 1\n";
6705
6706 const string commonShaderFooter =
6707 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6708
6709 + string(getComputeAsmInputOutputBufferTraits())
6710 + string(getComputeAsmCommonTypes())
6711 + string(getComputeAsmInputOutputBuffer()) +
6712
6713 "%u3str = OpTypeStruct %u32 %u32 %u32\n"
6714
6715 "%id = OpVariable %uvec3ptr Input\n"
6716 "%zero = OpConstant %i32 0\n"
6717
6718 "%main = OpFunction %void None %voidf\n"
6719 "%entry = OpLabel\n"
6720
6721 "%idval = OpLoad %uvec3 %id\n"
6722 "%x0 = OpCompositeExtract %u32 %idval 0\n"
6723
6724 "%idstr = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6725 "%x = OpCompositeExtract %u32 %idstr 0\n"
6726
6727 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6728 "%inval = OpLoad %f32 %inloc\n"
6729 "%neg = OpFNegate %f32 %inval\n"
6730 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6731 " OpStore %outloc %neg\n"
6732
6733 " OpReturn\n"
6734 " OpFunctionEnd\n";
6735
6736 const StringTemplate shaderTemplate(
6737 commonShaderHeader +
6738 "OpMemberName %u3str 0 \"${NAME}\"\n" +
6739 commonShaderFooter);
6740
6741 const std::string multipleNames =
6742 commonShaderHeader +
6743 "OpMemberName %u3str 0 \"to_be\"\n"
6744 "OpMemberName %u3str 1 \"or_not\"\n"
6745 "OpMemberName %u3str 0 \"to_be\"\n"
6746 "OpMemberName %u3str 2 \"makes_no\"\n"
6747 "OpMemberName %u3str 0 \"difference\"\n"
6748 "OpMemberName %u3str 0 \"to_me\"\n" +
6749 commonShaderFooter;
6750 {
6751 ComputeShaderSpec spec;
6752
6753 spec.assembly = multipleNames;
6754 spec.numWorkGroups = IVec3(numElements, 1, 1);
6755 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6756 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6757
6758 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6759 }
6760
6761 const std::string everythingNamedTheSame =
6762 commonShaderHeader +
6763 "OpMemberName %u3str 0 \"the_same\"\n"
6764 "OpMemberName %u3str 1 \"the_same\"\n"
6765 "OpMemberName %u3str 2 \"the_same\"\n" +
6766 commonShaderFooter;
6767
6768 {
6769 ComputeShaderSpec spec;
6770
6771 spec.assembly = everythingNamedTheSame;
6772 spec.numWorkGroups = IVec3(numElements, 1, 1);
6773 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6774 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6775
6776 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6777 }
6778
6779 // u3str_x_is_....
6780 for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6781 {
6782 map<string, string> specializations;
6783 ComputeShaderSpec spec;
6784
6785 specializations["NAME"] = abuseCases[ndx].param;
6786 spec.assembly = shaderTemplate.specialize(specializations);
6787 spec.numWorkGroups = IVec3(numElements, 1, 1);
6788 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6789 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6790
6791 abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6792 }
6793
6794 group->addChild(abuseGroup.release());
6795
6796 return group.release();
6797 }
6798
6799 // Assembly code used for testing function control is based on GLSL source code:
6800 //
6801 // #version 430
6802 //
6803 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6804 // float elements[];
6805 // } input_data;
6806 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6807 // float elements[];
6808 // } output_data;
6809 //
6810 // float const10() { return 10.f; }
6811 //
6812 // void main() {
6813 // uint x = gl_GlobalInvocationID.x;
6814 // output_data.elements[x] = input_data.elements[x] + const10();
6815 // }
createFunctionControlGroup(tcu::TestContext & testCtx)6816 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6817 {
6818 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6819 vector<CaseParameter> cases;
6820 de::Random rnd (deStringHash(group->getName()));
6821 const int numElements = 100;
6822 vector<float> inputFloats (numElements, 0);
6823 vector<float> outputFloats (numElements, 0);
6824 const StringTemplate shaderTemplate (
6825 string(getComputeAsmShaderPreamble()) +
6826
6827 "OpSource GLSL 430\n"
6828 "OpName %main \"main\"\n"
6829 "OpName %func_const10 \"const10(\"\n"
6830 "OpName %id \"gl_GlobalInvocationID\"\n"
6831
6832 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6833
6834 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6835
6836 "%f32f = OpTypeFunction %f32\n"
6837 "%id = OpVariable %uvec3ptr Input\n"
6838 "%zero = OpConstant %i32 0\n"
6839 "%constf10 = OpConstant %f32 10.0\n"
6840
6841 "%main = OpFunction %void None %voidf\n"
6842 "%entry = OpLabel\n"
6843 "%idval = OpLoad %uvec3 %id\n"
6844 "%x = OpCompositeExtract %u32 %idval 0\n"
6845 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6846 "%inval = OpLoad %f32 %inloc\n"
6847 "%ret_10 = OpFunctionCall %f32 %func_const10\n"
6848 "%fadd = OpFAdd %f32 %inval %ret_10\n"
6849 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6850 " OpStore %outloc %fadd\n"
6851 " OpReturn\n"
6852 " OpFunctionEnd\n"
6853
6854 "%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6855 "%label = OpLabel\n"
6856 " OpReturnValue %constf10\n"
6857 " OpFunctionEnd\n");
6858
6859 cases.push_back(CaseParameter("none", "None"));
6860 cases.push_back(CaseParameter("inline", "Inline"));
6861 cases.push_back(CaseParameter("dont_inline", "DontInline"));
6862 cases.push_back(CaseParameter("pure", "Pure"));
6863 cases.push_back(CaseParameter("const", "Const"));
6864 cases.push_back(CaseParameter("inline_pure", "Inline|Pure"));
6865 cases.push_back(CaseParameter("const_dont_inline", "Const|DontInline"));
6866 cases.push_back(CaseParameter("inline_dont_inline", "Inline|DontInline"));
6867 cases.push_back(CaseParameter("pure_inline_dont_inline", "Pure|Inline|DontInline"));
6868
6869 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6870
6871 // CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6872 floorAll(inputFloats);
6873
6874 for (size_t ndx = 0; ndx < numElements; ++ndx)
6875 outputFloats[ndx] = inputFloats[ndx] + 10.f;
6876
6877 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6878 {
6879 map<string, string> specializations;
6880 ComputeShaderSpec spec;
6881
6882 specializations["CONTROL"] = cases[caseNdx].param;
6883 spec.assembly = shaderTemplate.specialize(specializations);
6884 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6885 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6886 spec.numWorkGroups = IVec3(numElements, 1, 1);
6887
6888 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6889 }
6890
6891 return group.release();
6892 }
6893
createMemoryAccessGroup(tcu::TestContext & testCtx)6894 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6895 {
6896 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6897 vector<CaseParameter> cases;
6898 de::Random rnd (deStringHash(group->getName()));
6899 const int numElements = 100;
6900 vector<float> inputFloats (numElements, 0);
6901 vector<float> outputFloats (numElements, 0);
6902 const StringTemplate shaderTemplate (
6903 string(getComputeAsmShaderPreamble()) +
6904
6905 "OpSource GLSL 430\n"
6906 "OpName %main \"main\"\n"
6907 "OpName %id \"gl_GlobalInvocationID\"\n"
6908
6909 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6910
6911 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6912
6913 "%f32ptr_f = OpTypePointer Function %f32\n"
6914
6915 "%id = OpVariable %uvec3ptr Input\n"
6916 "%zero = OpConstant %i32 0\n"
6917 "%four = OpConstant %i32 4\n"
6918
6919 "%main = OpFunction %void None %voidf\n"
6920 "%label = OpLabel\n"
6921 "%copy = OpVariable %f32ptr_f Function\n"
6922 "%idval = OpLoad %uvec3 %id ${ACCESS}\n"
6923 "%x = OpCompositeExtract %u32 %idval 0\n"
6924 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
6925 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
6926 " OpCopyMemory %copy %inloc ${ACCESS}\n"
6927 "%val1 = OpLoad %f32 %copy\n"
6928 "%val2 = OpLoad %f32 %inloc\n"
6929 "%add = OpFAdd %f32 %val1 %val2\n"
6930 " OpStore %outloc %add ${ACCESS}\n"
6931 " OpReturn\n"
6932 " OpFunctionEnd\n");
6933
6934 cases.push_back(CaseParameter("null", ""));
6935 cases.push_back(CaseParameter("none", "None"));
6936 cases.push_back(CaseParameter("volatile", "Volatile"));
6937 cases.push_back(CaseParameter("aligned", "Aligned 4"));
6938 cases.push_back(CaseParameter("nontemporal", "Nontemporal"));
6939 cases.push_back(CaseParameter("aligned_nontemporal", "Aligned|Nontemporal 4"));
6940 cases.push_back(CaseParameter("aligned_volatile", "Volatile|Aligned 4"));
6941
6942 fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6943
6944 for (size_t ndx = 0; ndx < numElements; ++ndx)
6945 outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6946
6947 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6948 {
6949 map<string, string> specializations;
6950 ComputeShaderSpec spec;
6951
6952 specializations["ACCESS"] = cases[caseNdx].param;
6953 spec.assembly = shaderTemplate.specialize(specializations);
6954 spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6955 spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6956 spec.numWorkGroups = IVec3(numElements, 1, 1);
6957
6958 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6959 }
6960
6961 return group.release();
6962 }
6963
6964 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)6965 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6966 {
6967 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6968 vector<CaseParameter> cases;
6969 de::Random rnd (deStringHash(group->getName()));
6970 const int numElements = 100;
6971 vector<float> positiveFloats (numElements, 0);
6972 vector<float> negativeFloats (numElements, 0);
6973 const StringTemplate shaderTemplate (
6974 string(getComputeAsmShaderPreamble()) +
6975
6976 "OpSource GLSL 430\n"
6977 "OpName %main \"main\"\n"
6978 "OpName %id \"gl_GlobalInvocationID\"\n"
6979
6980 "OpDecorate %id BuiltIn GlobalInvocationId\n"
6981
6982 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6983 "%uvec2 = OpTypeVector %u32 2\n"
6984 "%fvec4 = OpTypeVector %f32 4\n"
6985 "%fmat33 = OpTypeMatrix %fvec3 3\n"
6986 "%image = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
6987 "%sampler = OpTypeSampler\n"
6988 "%simage = OpTypeSampledImage %image\n"
6989 "%const100 = OpConstant %u32 100\n"
6990 "%uarr100 = OpTypeArray %i32 %const100\n"
6991 "%struct = OpTypeStruct %f32 %i32 %u32\n"
6992 "%pointer = OpTypePointer Function %i32\n"
6993 + string(getComputeAsmInputOutputBuffer()) +
6994
6995 "%id = OpVariable %uvec3ptr Input\n"
6996 "%zero = OpConstant %i32 0\n"
6997
6998 "%main = OpFunction %void None %voidf\n"
6999 "%label = OpLabel\n"
7000
7001 "%undef = OpUndef ${TYPE}\n"
7002
7003 "%idval = OpLoad %uvec3 %id\n"
7004 "%x = OpCompositeExtract %u32 %idval 0\n"
7005
7006 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7007 "%inval = OpLoad %f32 %inloc\n"
7008 "%neg = OpFNegate %f32 %inval\n"
7009 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7010 " OpStore %outloc %neg\n"
7011 " OpReturn\n"
7012 " OpFunctionEnd\n");
7013
7014 cases.push_back(CaseParameter("bool", "%bool"));
7015 cases.push_back(CaseParameter("sint32", "%i32"));
7016 cases.push_back(CaseParameter("uint32", "%u32"));
7017 cases.push_back(CaseParameter("float32", "%f32"));
7018 cases.push_back(CaseParameter("vec4float32", "%fvec4"));
7019 cases.push_back(CaseParameter("vec2uint32", "%uvec2"));
7020 cases.push_back(CaseParameter("matrix", "%fmat33"));
7021 cases.push_back(CaseParameter("image", "%image"));
7022 cases.push_back(CaseParameter("sampler", "%sampler"));
7023 cases.push_back(CaseParameter("sampledimage", "%simage"));
7024 cases.push_back(CaseParameter("array", "%uarr100"));
7025 cases.push_back(CaseParameter("runtimearray", "%f32arr"));
7026 cases.push_back(CaseParameter("struct", "%struct"));
7027 cases.push_back(CaseParameter("pointer", "%pointer"));
7028
7029 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7030
7031 for (size_t ndx = 0; ndx < numElements; ++ndx)
7032 negativeFloats[ndx] = -positiveFloats[ndx];
7033
7034 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7035 {
7036 map<string, string> specializations;
7037 ComputeShaderSpec spec;
7038
7039 specializations["TYPE"] = cases[caseNdx].param;
7040 spec.assembly = shaderTemplate.specialize(specializations);
7041 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7042 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7043 spec.numWorkGroups = IVec3(numElements, 1, 1);
7044
7045 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7046 }
7047
7048 // OpUndef with constants.
7049 {
7050 static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7051
7052 static const struct
7053 {
7054 const std::string name;
7055 const std::string desc;
7056 } amberCases[] =
7057 {
7058 { "undefined_constant_composite", "OpUndef value in OpConstantComposite" },
7059 { "undefined_spec_constant_composite", "OpUndef value in OpSpecConstantComposite" },
7060 };
7061
7062 for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7063 {
7064 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7065 amberCases[i].name.c_str(),
7066 amberCases[i].desc.c_str(),
7067 data_dir,
7068 amberCases[i].name + ".amber");
7069 group->addChild(testCase);
7070 }
7071 }
7072
7073 return group.release();
7074 }
7075
7076 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7077 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7078 {
7079 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7080 vector<CaseParameter> cases;
7081 de::Random rnd (deStringHash(group->getName()));
7082 const int numElements = 100;
7083 vector<float> positiveFloats (numElements, 0);
7084 vector<float> negativeFloats (numElements, 0);
7085 const StringTemplate shaderTemplate (
7086 "OpCapability Shader\n"
7087 "OpCapability Float16\n"
7088 "OpMemoryModel Logical GLSL450\n"
7089 "OpEntryPoint GLCompute %main \"main\" %id\n"
7090 "OpExecutionMode %main LocalSize 1 1 1\n"
7091 "OpSource GLSL 430\n"
7092 "OpName %main \"main\"\n"
7093 "OpName %id \"gl_GlobalInvocationID\"\n"
7094
7095 "OpDecorate %id BuiltIn GlobalInvocationId\n"
7096
7097 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7098
7099 "%id = OpVariable %uvec3ptr Input\n"
7100 "%zero = OpConstant %i32 0\n"
7101 "%f16 = OpTypeFloat 16\n"
7102 "%c_f16_0 = OpConstant %f16 0.0\n"
7103 "%c_f16_0_5 = OpConstant %f16 0.5\n"
7104 "%c_f16_1 = OpConstant %f16 1.0\n"
7105 "%v2f16 = OpTypeVector %f16 2\n"
7106 "%v3f16 = OpTypeVector %f16 3\n"
7107 "%v4f16 = OpTypeVector %f16 4\n"
7108
7109 "${CONSTANT}\n"
7110
7111 "%main = OpFunction %void None %voidf\n"
7112 "%label = OpLabel\n"
7113 "%idval = OpLoad %uvec3 %id\n"
7114 "%x = OpCompositeExtract %u32 %idval 0\n"
7115 "%inloc = OpAccessChain %f32ptr %indata %zero %x\n"
7116 "%inval = OpLoad %f32 %inloc\n"
7117 "%neg = OpFNegate %f32 %inval\n"
7118 "%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
7119 " OpStore %outloc %neg\n"
7120 " OpReturn\n"
7121 " OpFunctionEnd\n");
7122
7123
7124 cases.push_back(CaseParameter("vector", "%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7125 cases.push_back(CaseParameter("matrix", "%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7126 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7127 "%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7128 cases.push_back(CaseParameter("struct", "%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7129 "%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7130 "%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7131 "%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7132 "%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7133 cases.push_back(CaseParameter("nested_struct", "%st1 = OpTypeStruct %i32 %f16\n"
7134 "%st2 = OpTypeStruct %i32 %i32\n"
7135 "%struct = OpTypeStruct %st1 %st2\n"
7136 "%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7137 "%st2val = OpConstantComposite %st2 %zero %zero\n"
7138 "%const = OpConstantComposite %struct %st1val %st2val"));
7139
7140 fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7141
7142 for (size_t ndx = 0; ndx < numElements; ++ndx)
7143 negativeFloats[ndx] = -positiveFloats[ndx];
7144
7145 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7146 {
7147 map<string, string> specializations;
7148 ComputeShaderSpec spec;
7149
7150 specializations["CONSTANT"] = cases[caseNdx].param;
7151 spec.assembly = shaderTemplate.specialize(specializations);
7152 spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7153 spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7154 spec.numWorkGroups = IVec3(numElements, 1, 1);
7155
7156 spec.extensions.push_back("VK_KHR_shader_float16_int8");
7157
7158 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7159
7160 group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7161 }
7162
7163 return group.release();
7164 }
7165
squarize(const vector<deFloat16> & inData,const deUint32 argNo)7166 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7167 {
7168 const size_t inDataLength = inData.size();
7169 vector<deFloat16> result;
7170
7171 result.reserve(inDataLength * inDataLength);
7172
7173 if (argNo == 0)
7174 {
7175 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7176 result.insert(result.end(), inData.begin(), inData.end());
7177 }
7178
7179 if (argNo == 1)
7180 {
7181 for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7182 {
7183 const vector<deFloat16> tmp(inDataLength, inData[numIdx]);
7184
7185 result.insert(result.end(), tmp.begin(), tmp.end());
7186 }
7187 }
7188
7189 return result;
7190 }
7191
squarizeVector(const vector<deFloat16> & inData,const deUint32 argNo)7192 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7193 {
7194 vector<deFloat16> vec;
7195 vector<deFloat16> result;
7196
7197 // Create vectors. vec will contain each possible pair from inData
7198 {
7199 const size_t inDataLength = inData.size();
7200
7201 DE_ASSERT(inDataLength <= 64);
7202
7203 vec.reserve(2 * inDataLength * inDataLength);
7204
7205 for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7206 for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7207 {
7208 vec.push_back(inData[numIdxX]);
7209 vec.push_back(inData[numIdxY]);
7210 }
7211 }
7212
7213 // Create vector pairs. result will contain each possible pair from vec
7214 {
7215 const size_t coordsPerVector = 2;
7216 const size_t vectorsCount = vec.size() / coordsPerVector;
7217
7218 result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7219
7220 if (argNo == 0)
7221 {
7222 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7223 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7224 {
7225 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7226 result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7227 }
7228 }
7229
7230 if (argNo == 1)
7231 {
7232 for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7233 for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7234 {
7235 for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7236 result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7237 }
7238 }
7239 }
7240
7241 return result;
7242 }
7243
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isNan7244 struct fp16isNan { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isNaN(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isInf7245 struct fp16isInf { bool operator()(const tcu::Float16 in1, const tcu::Float16) { return in1.isInf(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isEqual7246 struct fp16isEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() == in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isUnequal7247 struct fp16isUnequal { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() != in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isLess7248 struct fp16isLess { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() < in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isGreater7249 struct fp16isGreater { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() > in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isLessOrEqual7250 struct fp16isLessOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() <= in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isGreaterOrEqual7251 struct fp16isGreaterOrEqual { bool operator()(const tcu::Float16 in1, const tcu::Float16 in2) { return in1.asFloat() >= in2.asFloat(); } };
7252
7253 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7254 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7255 {
7256 if (inputs.size() != 2 || outputAllocs.size() != 1)
7257 return false;
7258
7259 vector<deUint8> input1Bytes;
7260 vector<deUint8> input2Bytes;
7261
7262 inputs[0].getBytes(input1Bytes);
7263 inputs[1].getBytes(input2Bytes);
7264
7265 const deUint32 denormModesCount = 2;
7266 const deFloat16 float16one = tcu::Float16(1.0f).bits();
7267 const deFloat16 float16zero = tcu::Float16(0.0f).bits();
7268 const tcu::Float16 zero = tcu::Float16::zero(1);
7269 const deFloat16* const outputAsFP16 = static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7270 const deFloat16* const input1AsFP16 = reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7271 const deFloat16* const input2AsFP16 = reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7272 deUint32 successfulRuns = denormModesCount;
7273 std::string results[denormModesCount];
7274 TestedLogicalFunction testedLogicalFunction;
7275
7276 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7277 {
7278 const bool flushToZero = (denormMode == 1);
7279
7280 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7281 {
7282 const tcu::Float16 f1pre = tcu::Float16(input1AsFP16[idx]);
7283 const tcu::Float16 f2pre = tcu::Float16(input2AsFP16[idx]);
7284 const tcu::Float16 f1 = (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7285 const tcu::Float16 f2 = (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7286 deFloat16 expectedOutput = float16zero;
7287
7288 if (onlyTestFunc)
7289 {
7290 if (testedLogicalFunction(f1, f2))
7291 expectedOutput = float16one;
7292 }
7293 else
7294 {
7295 const bool f1nan = f1.isNaN();
7296 const bool f2nan = f2.isNaN();
7297
7298 // Skip NaN floats if not supported by implementation
7299 if (!nanSupported && (f1nan || f2nan))
7300 continue;
7301
7302 if (unationModeAnd)
7303 {
7304 const bool ordered = !f1nan && !f2nan;
7305
7306 if (ordered && testedLogicalFunction(f1, f2))
7307 expectedOutput = float16one;
7308 }
7309 else
7310 {
7311 const bool unordered = f1nan || f2nan;
7312
7313 if (unordered || testedLogicalFunction(f1, f2))
7314 expectedOutput = float16one;
7315 }
7316 }
7317
7318 if (outputAsFP16[idx] != expectedOutput)
7319 {
7320 std::ostringstream str;
7321
7322 str << "ERROR: Sub-case #" << idx
7323 << " flushToZero:" << flushToZero
7324 << std::hex
7325 << " failed, inputs: 0x" << f1.bits()
7326 << ";0x" << f2.bits()
7327 << " output: 0x" << outputAsFP16[idx]
7328 << " expected output: 0x" << expectedOutput;
7329
7330 results[denormMode] = str.str();
7331
7332 successfulRuns--;
7333
7334 break;
7335 }
7336 }
7337 }
7338
7339 if (successfulRuns == 0)
7340 for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7341 log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7342
7343 return successfulRuns > 0;
7344 }
7345
7346 } // anonymous
7347
createOpSourceTests(tcu::TestContext & testCtx)7348 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7349 {
7350 struct NameCodePair { string name, code; };
7351 RGBA defaultColors[4];
7352 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7353 const std::string opsourceGLSLWithFile = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7354 map<string, string> fragments = passthruFragments();
7355 const NameCodePair tests[] =
7356 {
7357 {"unknown", "OpSource Unknown 321"},
7358 {"essl", "OpSource ESSL 310"},
7359 {"glsl", "OpSource GLSL 450"},
7360 {"opencl_cpp", "OpSource OpenCL_CPP 120"},
7361 {"opencl_c", "OpSource OpenCL_C 120"},
7362 {"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7363 {"file", opsourceGLSLWithFile},
7364 {"source", opsourceGLSLWithFile + "\"void main(){}\""},
7365 // Longest possible source string: SPIR-V limits instructions to 65535
7366 // words, of which the first 4 are opsourceGLSLWithFile; the rest will
7367 // contain 65530 UTF8 characters (one word each) plus one last word
7368 // containing 3 ASCII characters and \0.
7369 {"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7370 };
7371
7372 getDefaultColors(defaultColors);
7373 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7374 {
7375 fragments["debug"] = tests[testNdx].code;
7376 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7377 }
7378
7379 return opSourceTests.release();
7380 }
7381
createOpSourceContinuedTests(tcu::TestContext & testCtx)7382 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7383 {
7384 struct NameCodePair { string name, code; };
7385 RGBA defaultColors[4];
7386 de::MovePtr<tcu::TestCaseGroup> opSourceTests (new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7387 map<string, string> fragments = passthruFragments();
7388 const std::string opsource = "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7389 const NameCodePair tests[] =
7390 {
7391 {"empty", opsource + "OpSourceContinued \"\""},
7392 {"short", opsource + "OpSourceContinued \"abcde\""},
7393 {"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7394 // Longest possible source string: SPIR-V limits instructions to 65535
7395 // words, of which the first one is OpSourceContinued/length; the rest
7396 // will contain 65533 UTF8 characters (one word each) plus one last word
7397 // containing 3 ASCII characters and \0.
7398 {"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7399 };
7400
7401 getDefaultColors(defaultColors);
7402 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7403 {
7404 fragments["debug"] = tests[testNdx].code;
7405 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7406 }
7407
7408 return opSourceTests.release();
7409 }
createOpNoLineTests(tcu::TestContext & testCtx)7410 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7411 {
7412 RGBA defaultColors[4];
7413 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7414 map<string, string> fragments;
7415 getDefaultColors(defaultColors);
7416 fragments["debug"] =
7417 "%name = OpString \"name\"\n";
7418
7419 fragments["pre_main"] =
7420 "OpNoLine\n"
7421 "OpNoLine\n"
7422 "OpLine %name 1 1\n"
7423 "OpNoLine\n"
7424 "OpLine %name 1 1\n"
7425 "OpLine %name 1 1\n"
7426 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7427 "OpNoLine\n"
7428 "OpLine %name 1 1\n"
7429 "OpNoLine\n"
7430 "OpLine %name 1 1\n"
7431 "OpLine %name 1 1\n"
7432 "%second_param1 = OpFunctionParameter %v4f32\n"
7433 "OpNoLine\n"
7434 "OpNoLine\n"
7435 "%label_secondfunction = OpLabel\n"
7436 "OpNoLine\n"
7437 "OpReturnValue %second_param1\n"
7438 "OpFunctionEnd\n"
7439 "OpNoLine\n"
7440 "OpNoLine\n";
7441
7442 fragments["testfun"] =
7443 // A %test_code function that returns its argument unchanged.
7444 "OpNoLine\n"
7445 "OpNoLine\n"
7446 "OpLine %name 1 1\n"
7447 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7448 "OpNoLine\n"
7449 "%param1 = OpFunctionParameter %v4f32\n"
7450 "OpNoLine\n"
7451 "OpNoLine\n"
7452 "%label_testfun = OpLabel\n"
7453 "OpNoLine\n"
7454 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7455 "OpReturnValue %val1\n"
7456 "OpFunctionEnd\n"
7457 "OpLine %name 1 1\n"
7458 "OpNoLine\n";
7459
7460 createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7461
7462 return opLineTests.release();
7463 }
7464
createOpModuleProcessedTests(tcu::TestContext & testCtx)7465 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7466 {
7467 RGBA defaultColors[4];
7468 de::MovePtr<tcu::TestCaseGroup> opModuleProcessedTests (new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7469 map<string, string> fragments;
7470 std::vector<std::string> noExtensions;
7471 GraphicsResources resources;
7472
7473 getDefaultColors(defaultColors);
7474 resources.verifyBinary = veryfiBinaryShader;
7475 resources.spirvVersion = SPIRV_VERSION_1_3;
7476
7477 fragments["moduleprocessed"] =
7478 "OpModuleProcessed \"VULKAN CTS\"\n"
7479 "OpModuleProcessed \"Negative values\"\n"
7480 "OpModuleProcessed \"Date: 2017/09/21\"\n";
7481
7482 fragments["pre_main"] =
7483 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7484 "%second_param1 = OpFunctionParameter %v4f32\n"
7485 "%label_secondfunction = OpLabel\n"
7486 "OpReturnValue %second_param1\n"
7487 "OpFunctionEnd\n";
7488
7489 fragments["testfun"] =
7490 // A %test_code function that returns its argument unchanged.
7491 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7492 "%param1 = OpFunctionParameter %v4f32\n"
7493 "%label_testfun = OpLabel\n"
7494 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7495 "OpReturnValue %val1\n"
7496 "OpFunctionEnd\n";
7497
7498 createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7499
7500 return opModuleProcessedTests.release();
7501 }
7502
7503
createOpLineTests(tcu::TestContext & testCtx)7504 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7505 {
7506 RGBA defaultColors[4];
7507 de::MovePtr<tcu::TestCaseGroup> opLineTests (new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7508 map<string, string> fragments;
7509 std::vector<std::pair<std::string, std::string> > problemStrings;
7510
7511 problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7512 problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7513 problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7514 getDefaultColors(defaultColors);
7515
7516 fragments["debug"] =
7517 "%other_name = OpString \"other_name\"\n";
7518
7519 fragments["pre_main"] =
7520 "OpLine %file_name 32 0\n"
7521 "OpLine %file_name 32 32\n"
7522 "OpLine %file_name 32 40\n"
7523 "OpLine %other_name 32 40\n"
7524 "OpLine %other_name 0 100\n"
7525 "OpLine %other_name 0 4294967295\n"
7526 "OpLine %other_name 4294967295 0\n"
7527 "OpLine %other_name 32 40\n"
7528 "OpLine %file_name 0 0\n"
7529 "%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7530 "OpLine %file_name 1 0\n"
7531 "%second_param1 = OpFunctionParameter %v4f32\n"
7532 "OpLine %file_name 1 3\n"
7533 "OpLine %file_name 1 2\n"
7534 "%label_secondfunction = OpLabel\n"
7535 "OpLine %file_name 0 2\n"
7536 "OpReturnValue %second_param1\n"
7537 "OpFunctionEnd\n"
7538 "OpLine %file_name 0 2\n"
7539 "OpLine %file_name 0 2\n";
7540
7541 fragments["testfun"] =
7542 // A %test_code function that returns its argument unchanged.
7543 "OpLine %file_name 1 0\n"
7544 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7545 "OpLine %file_name 16 330\n"
7546 "%param1 = OpFunctionParameter %v4f32\n"
7547 "OpLine %file_name 14 442\n"
7548 "%label_testfun = OpLabel\n"
7549 "OpLine %file_name 11 1024\n"
7550 "%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7551 "OpLine %file_name 2 97\n"
7552 "OpReturnValue %val1\n"
7553 "OpFunctionEnd\n"
7554 "OpLine %file_name 5 32\n";
7555
7556 for (size_t i = 0; i < problemStrings.size(); ++i)
7557 {
7558 map<string, string> testFragments = fragments;
7559 testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7560 createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7561 }
7562
7563 return opLineTests.release();
7564 }
7565
createOpConstantNullTests(tcu::TestContext & testCtx)7566 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7567 {
7568 de::MovePtr<tcu::TestCaseGroup> opConstantNullTests (new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7569 RGBA colors[4];
7570
7571
7572 const char functionStart[] =
7573 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7574 "%param1 = OpFunctionParameter %v4f32\n"
7575 "%lbl = OpLabel\n";
7576
7577 const char functionEnd[] =
7578 "OpReturnValue %transformed_param\n"
7579 "OpFunctionEnd\n";
7580
7581 struct NameConstantsCode
7582 {
7583 string name;
7584 string constants;
7585 string code;
7586 };
7587
7588 NameConstantsCode tests[] =
7589 {
7590 {
7591 "vec4",
7592 "%cnull = OpConstantNull %v4f32\n",
7593 "%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7594 },
7595 {
7596 "float",
7597 "%cnull = OpConstantNull %f32\n",
7598 "%vp = OpVariable %fp_v4f32 Function\n"
7599 "%v = OpLoad %v4f32 %vp\n"
7600 "%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7601 "%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7602 "%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7603 "%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7604 "%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7605 },
7606 {
7607 "bool",
7608 "%cnull = OpConstantNull %bool\n",
7609 "%v = OpVariable %fp_v4f32 Function\n"
7610 " OpStore %v %param1\n"
7611 " OpSelectionMerge %false_label None\n"
7612 " OpBranchConditional %cnull %true_label %false_label\n"
7613 "%true_label = OpLabel\n"
7614 " OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7615 " OpBranch %false_label\n"
7616 "%false_label = OpLabel\n"
7617 "%transformed_param = OpLoad %v4f32 %v\n"
7618 },
7619 {
7620 "i32",
7621 "%cnull = OpConstantNull %i32\n",
7622 "%v = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7623 "%b = OpIEqual %bool %cnull %c_i32_0\n"
7624 " OpSelectionMerge %false_label None\n"
7625 " OpBranchConditional %b %true_label %false_label\n"
7626 "%true_label = OpLabel\n"
7627 " OpStore %v %param1\n"
7628 " OpBranch %false_label\n"
7629 "%false_label = OpLabel\n"
7630 "%transformed_param = OpLoad %v4f32 %v\n"
7631 },
7632 {
7633 "struct",
7634 "%stype = OpTypeStruct %f32 %v4f32\n"
7635 "%fp_stype = OpTypePointer Function %stype\n"
7636 "%cnull = OpConstantNull %stype\n",
7637 "%v = OpVariable %fp_stype Function %cnull\n"
7638 "%f = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7639 "%f_val = OpLoad %v4f32 %f\n"
7640 "%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7641 },
7642 {
7643 "array",
7644 "%a4_v4f32 = OpTypeArray %v4f32 %c_u32_4\n"
7645 "%fp_a4_v4f32 = OpTypePointer Function %a4_v4f32\n"
7646 "%cnull = OpConstantNull %a4_v4f32\n",
7647 "%v = OpVariable %fp_a4_v4f32 Function %cnull\n"
7648 "%f = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7649 "%f1 = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7650 "%f2 = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7651 "%f3 = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7652 "%f_val = OpLoad %v4f32 %f\n"
7653 "%f1_val = OpLoad %v4f32 %f1\n"
7654 "%f2_val = OpLoad %v4f32 %f2\n"
7655 "%f3_val = OpLoad %v4f32 %f3\n"
7656 "%t0 = OpFAdd %v4f32 %param1 %f_val\n"
7657 "%t1 = OpFAdd %v4f32 %t0 %f1_val\n"
7658 "%t2 = OpFAdd %v4f32 %t1 %f2_val\n"
7659 "%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7660 },
7661 {
7662 "matrix",
7663 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7664 "%cnull = OpConstantNull %mat4x4_f32\n",
7665 // Our null matrix * any vector should result in a zero vector.
7666 "%v = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7667 "%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7668 }
7669 };
7670
7671 getHalfColorsFullAlpha(colors);
7672
7673 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7674 {
7675 map<string, string> fragments;
7676 fragments["pre_main"] = tests[testNdx].constants;
7677 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7678 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7679 }
7680 return opConstantNullTests.release();
7681 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7682 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7683 {
7684 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7685 RGBA inputColors[4];
7686 RGBA outputColors[4];
7687
7688
7689 const char functionStart[] =
7690 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7691 "%param1 = OpFunctionParameter %v4f32\n"
7692 "%lbl = OpLabel\n";
7693
7694 const char functionEnd[] =
7695 "OpReturnValue %transformed_param\n"
7696 "OpFunctionEnd\n";
7697
7698 struct NameConstantsCode
7699 {
7700 string name;
7701 string constants;
7702 string code;
7703 };
7704
7705 NameConstantsCode tests[] =
7706 {
7707 {
7708 "vec4",
7709
7710 "%cval = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7711 "%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7712 },
7713 {
7714 "struct",
7715
7716 "%stype = OpTypeStruct %v4f32 %f32\n"
7717 "%fp_stype = OpTypePointer Function %stype\n"
7718 "%f32_n_1 = OpConstant %f32 -1.0\n"
7719 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7720 "%cvec = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7721 "%cval = OpConstantComposite %stype %cvec %f32_n_1\n",
7722
7723 "%v = OpVariable %fp_stype Function %cval\n"
7724 "%vec_ptr = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7725 "%f32_ptr = OpAccessChain %fp_f32 %v %c_u32_1\n"
7726 "%vec_val = OpLoad %v4f32 %vec_ptr\n"
7727 "%f32_val = OpLoad %f32 %f32_ptr\n"
7728 "%tmp1 = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7729 "%tmp2 = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7730 "%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7731 },
7732 {
7733 // [1|0|0|0.5] [x] = x + 0.5
7734 // [0|1|0|0.5] [y] = y + 0.5
7735 // [0|0|1|0.5] [z] = z + 0.5
7736 // [0|0|0|1 ] [1] = 1
7737 "matrix",
7738
7739 "%mat4x4_f32 = OpTypeMatrix %v4f32 4\n"
7740 "%v4f32_1_0_0_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7741 "%v4f32_0_1_0_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7742 "%v4f32_0_0_1_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7743 "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7744 "%cval = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7745
7746 "%transformed_param = OpMatrixTimesVector %v4f32 %cval %param1\n"
7747 },
7748 {
7749 "array",
7750
7751 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7752 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7753 "%f32_n_1 = OpConstant %f32 -1.0\n"
7754 "%f32_1_5 = OpConstant %f32 !0x3fc00000\n" // +1.5
7755 "%carr = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7756
7757 "%v = OpVariable %fp_a4f32 Function %carr\n"
7758 "%f = OpAccessChain %fp_f32 %v %c_u32_0\n"
7759 "%f1 = OpAccessChain %fp_f32 %v %c_u32_1\n"
7760 "%f2 = OpAccessChain %fp_f32 %v %c_u32_2\n"
7761 "%f3 = OpAccessChain %fp_f32 %v %c_u32_3\n"
7762 "%f_val = OpLoad %f32 %f\n"
7763 "%f1_val = OpLoad %f32 %f1\n"
7764 "%f2_val = OpLoad %f32 %f2\n"
7765 "%f3_val = OpLoad %f32 %f3\n"
7766 "%ftot1 = OpFAdd %f32 %f_val %f1_val\n"
7767 "%ftot2 = OpFAdd %f32 %ftot1 %f2_val\n"
7768 "%ftot3 = OpFAdd %f32 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
7769 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7770 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7771 },
7772 {
7773 //
7774 // [
7775 // {
7776 // 0.0,
7777 // [ 1.0, 1.0, 1.0, 1.0]
7778 // },
7779 // {
7780 // 1.0,
7781 // [ 0.0, 0.5, 0.0, 0.0]
7782 // }, // ^^^
7783 // {
7784 // 0.0,
7785 // [ 1.0, 1.0, 1.0, 1.0]
7786 // }
7787 // ]
7788 "array_of_struct_of_array",
7789
7790 "%c_v4f32_1_1_1_0 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7791 "%fp_a4f32 = OpTypePointer Function %a4f32\n"
7792 "%stype = OpTypeStruct %f32 %a4f32\n"
7793 "%a3stype = OpTypeArray %stype %c_u32_3\n"
7794 "%fp_a3stype = OpTypePointer Function %a3stype\n"
7795 "%ca4f32_0 = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7796 "%ca4f32_1 = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7797 "%cstype1 = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7798 "%cstype2 = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7799 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7800
7801 "%v = OpVariable %fp_a3stype Function %carr\n"
7802 "%f = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7803 "%f_l = OpLoad %f32 %f\n"
7804 "%add_vec = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7805 "%transformed_param = OpFAdd %v4f32 %param1 %add_vec\n"
7806 }
7807 };
7808
7809 getHalfColorsFullAlpha(inputColors);
7810 outputColors[0] = RGBA(255, 255, 255, 255);
7811 outputColors[1] = RGBA(255, 127, 127, 255);
7812 outputColors[2] = RGBA(127, 255, 127, 255);
7813 outputColors[3] = RGBA(127, 127, 255, 255);
7814
7815 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7816 {
7817 map<string, string> fragments;
7818 fragments["pre_main"] = tests[testNdx].constants;
7819 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7820 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7821 }
7822 return opConstantCompositeTests.release();
7823 }
7824
createSelectionBlockOrderTests(tcu::TestContext & testCtx)7825 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7826 {
7827 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7828 RGBA inputColors[4];
7829 RGBA outputColors[4];
7830 map<string, string> fragments;
7831
7832 // vec4 test_code(vec4 param) {
7833 // vec4 result = param;
7834 // for (int i = 0; i < 4; ++i) {
7835 // if (i == 0) result[i] = 0.;
7836 // else result[i] = 1. - result[i];
7837 // }
7838 // return result;
7839 // }
7840 const char function[] =
7841 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7842 "%param1 = OpFunctionParameter %v4f32\n"
7843 "%lbl = OpLabel\n"
7844 "%iptr = OpVariable %fp_i32 Function\n"
7845 "%result = OpVariable %fp_v4f32 Function\n"
7846 " OpStore %iptr %c_i32_0\n"
7847 " OpStore %result %param1\n"
7848 " OpBranch %loop\n"
7849
7850 // Loop entry block.
7851 "%loop = OpLabel\n"
7852 "%ival = OpLoad %i32 %iptr\n"
7853 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7854 " OpLoopMerge %exit %if_entry None\n"
7855 " OpBranchConditional %lt_4 %if_entry %exit\n"
7856
7857 // Merge block for loop.
7858 "%exit = OpLabel\n"
7859 "%ret = OpLoad %v4f32 %result\n"
7860 " OpReturnValue %ret\n"
7861
7862 // If-statement entry block.
7863 "%if_entry = OpLabel\n"
7864 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7865 "%eq_0 = OpIEqual %bool %ival %c_i32_0\n"
7866 " OpSelectionMerge %if_exit None\n"
7867 " OpBranchConditional %eq_0 %if_true %if_false\n"
7868
7869 // False branch for if-statement.
7870 "%if_false = OpLabel\n"
7871 "%val = OpLoad %f32 %loc\n"
7872 "%sub = OpFSub %f32 %c_f32_1 %val\n"
7873 " OpStore %loc %sub\n"
7874 " OpBranch %if_exit\n"
7875
7876 // Merge block for if-statement.
7877 "%if_exit = OpLabel\n"
7878 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7879 " OpStore %iptr %ival_next\n"
7880 " OpBranch %loop\n"
7881
7882 // True branch for if-statement.
7883 "%if_true = OpLabel\n"
7884 " OpStore %loc %c_f32_0\n"
7885 " OpBranch %if_exit\n"
7886
7887 " OpFunctionEnd\n";
7888
7889 fragments["testfun"] = function;
7890
7891 inputColors[0] = RGBA(127, 127, 127, 0);
7892 inputColors[1] = RGBA(127, 0, 0, 0);
7893 inputColors[2] = RGBA(0, 127, 0, 0);
7894 inputColors[3] = RGBA(0, 0, 127, 0);
7895
7896 outputColors[0] = RGBA(0, 128, 128, 255);
7897 outputColors[1] = RGBA(0, 255, 255, 255);
7898 outputColors[2] = RGBA(0, 128, 255, 255);
7899 outputColors[3] = RGBA(0, 255, 128, 255);
7900
7901 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7902
7903 return group.release();
7904 }
7905
createSwitchBlockOrderTests(tcu::TestContext & testCtx)7906 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7907 {
7908 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7909 RGBA inputColors[4];
7910 RGBA outputColors[4];
7911 map<string, string> fragments;
7912
7913 const char typesAndConstants[] =
7914 "%c_f32_p2 = OpConstant %f32 0.2\n"
7915 "%c_f32_p4 = OpConstant %f32 0.4\n"
7916 "%c_f32_p6 = OpConstant %f32 0.6\n"
7917 "%c_f32_p8 = OpConstant %f32 0.8\n";
7918
7919 // vec4 test_code(vec4 param) {
7920 // vec4 result = param;
7921 // for (int i = 0; i < 4; ++i) {
7922 // switch (i) {
7923 // case 0: result[i] += .2; break;
7924 // case 1: result[i] += .6; break;
7925 // case 2: result[i] += .4; break;
7926 // case 3: result[i] += .8; break;
7927 // default: break; // unreachable
7928 // }
7929 // }
7930 // return result;
7931 // }
7932 const char function[] =
7933 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7934 "%param1 = OpFunctionParameter %v4f32\n"
7935 "%lbl = OpLabel\n"
7936 "%iptr = OpVariable %fp_i32 Function\n"
7937 "%result = OpVariable %fp_v4f32 Function\n"
7938 " OpStore %iptr %c_i32_0\n"
7939 " OpStore %result %param1\n"
7940 " OpBranch %loop\n"
7941
7942 // Loop entry block.
7943 "%loop = OpLabel\n"
7944 "%ival = OpLoad %i32 %iptr\n"
7945 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
7946 " OpLoopMerge %exit %cont None\n"
7947 " OpBranchConditional %lt_4 %switch_entry %exit\n"
7948
7949 // Merge block for loop.
7950 "%exit = OpLabel\n"
7951 "%ret = OpLoad %v4f32 %result\n"
7952 " OpReturnValue %ret\n"
7953
7954 // Switch-statement entry block.
7955 "%switch_entry = OpLabel\n"
7956 "%loc = OpAccessChain %fp_f32 %result %ival\n"
7957 "%val = OpLoad %f32 %loc\n"
7958 " OpSelectionMerge %switch_exit None\n"
7959 " OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7960
7961 "%case2 = OpLabel\n"
7962 "%addp4 = OpFAdd %f32 %val %c_f32_p4\n"
7963 " OpStore %loc %addp4\n"
7964 " OpBranch %switch_exit\n"
7965
7966 "%switch_default = OpLabel\n"
7967 " OpUnreachable\n"
7968
7969 "%case3 = OpLabel\n"
7970 "%addp8 = OpFAdd %f32 %val %c_f32_p8\n"
7971 " OpStore %loc %addp8\n"
7972 " OpBranch %switch_exit\n"
7973
7974 "%case0 = OpLabel\n"
7975 "%addp2 = OpFAdd %f32 %val %c_f32_p2\n"
7976 " OpStore %loc %addp2\n"
7977 " OpBranch %switch_exit\n"
7978
7979 // Merge block for switch-statement.
7980 "%switch_exit = OpLabel\n"
7981 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7982 " OpStore %iptr %ival_next\n"
7983 " OpBranch %cont\n"
7984 "%cont = OpLabel\n"
7985 " OpBranch %loop\n"
7986
7987 "%case1 = OpLabel\n"
7988 "%addp6 = OpFAdd %f32 %val %c_f32_p6\n"
7989 " OpStore %loc %addp6\n"
7990 " OpBranch %switch_exit\n"
7991
7992 " OpFunctionEnd\n";
7993
7994 fragments["pre_main"] = typesAndConstants;
7995 fragments["testfun"] = function;
7996
7997 inputColors[0] = RGBA(127, 27, 127, 51);
7998 inputColors[1] = RGBA(127, 0, 0, 51);
7999 inputColors[2] = RGBA(0, 27, 0, 51);
8000 inputColors[3] = RGBA(0, 0, 127, 51);
8001
8002 outputColors[0] = RGBA(178, 180, 229, 255);
8003 outputColors[1] = RGBA(178, 153, 102, 255);
8004 outputColors[2] = RGBA(51, 180, 102, 255);
8005 outputColors[3] = RGBA(51, 153, 229, 255);
8006
8007 createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8008
8009 addOpSwitchAmberTests(*group, testCtx);
8010
8011 return group.release();
8012 }
8013
createDecorationGroupTests(tcu::TestContext & testCtx)8014 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8015 {
8016 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8017 RGBA inputColors[4];
8018 RGBA outputColors[4];
8019 map<string, string> fragments;
8020
8021 const char decorations[] =
8022 "OpDecorate %array_group ArrayStride 4\n"
8023 "OpDecorate %struct_member_group Offset 0\n"
8024 "%array_group = OpDecorationGroup\n"
8025 "%struct_member_group = OpDecorationGroup\n"
8026
8027 "OpDecorate %group1 RelaxedPrecision\n"
8028 "OpDecorate %group3 RelaxedPrecision\n"
8029 "OpDecorate %group3 Flat\n"
8030 "OpDecorate %group3 Restrict\n"
8031 "%group0 = OpDecorationGroup\n"
8032 "%group1 = OpDecorationGroup\n"
8033 "%group3 = OpDecorationGroup\n";
8034
8035 const char typesAndConstants[] =
8036 "%a3f32 = OpTypeArray %f32 %c_u32_3\n"
8037 "%struct1 = OpTypeStruct %a3f32\n"
8038 "%struct2 = OpTypeStruct %a3f32\n"
8039 "%fp_struct1 = OpTypePointer Function %struct1\n"
8040 "%fp_struct2 = OpTypePointer Function %struct2\n"
8041 "%c_f32_2 = OpConstant %f32 2.\n"
8042 "%c_f32_n2 = OpConstant %f32 -2.\n"
8043
8044 "%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8045 "%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8046 "%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8047 "%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8048
8049 const char function[] =
8050 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8051 "%param = OpFunctionParameter %v4f32\n"
8052 "%entry = OpLabel\n"
8053 "%result = OpVariable %fp_v4f32 Function\n"
8054 "%v_struct1 = OpVariable %fp_struct1 Function\n"
8055 "%v_struct2 = OpVariable %fp_struct2 Function\n"
8056 " OpStore %result %param\n"
8057 " OpStore %v_struct1 %c_struct1\n"
8058 " OpStore %v_struct2 %c_struct2\n"
8059 "%ptr1 = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8060 "%val1 = OpLoad %f32 %ptr1\n"
8061 "%ptr2 = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8062 "%val2 = OpLoad %f32 %ptr2\n"
8063 "%addvalues = OpFAdd %f32 %val1 %val2\n"
8064 "%ptr = OpAccessChain %fp_f32 %result %c_i32_1\n"
8065 "%val = OpLoad %f32 %ptr\n"
8066 "%addresult = OpFAdd %f32 %addvalues %val\n"
8067 " OpStore %ptr %addresult\n"
8068 "%ret = OpLoad %v4f32 %result\n"
8069 " OpReturnValue %ret\n"
8070 " OpFunctionEnd\n";
8071
8072 struct CaseNameDecoration
8073 {
8074 string name;
8075 string decoration;
8076 };
8077
8078 CaseNameDecoration tests[] =
8079 {
8080 {
8081 "same_decoration_group_on_multiple_types",
8082 "OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8083 },
8084 {
8085 "empty_decoration_group",
8086 "OpGroupDecorate %group0 %a3f32\n"
8087 "OpGroupDecorate %group0 %result\n"
8088 },
8089 {
8090 "one_element_decoration_group",
8091 "OpGroupDecorate %array_group %a3f32\n"
8092 },
8093 {
8094 "multiple_elements_decoration_group",
8095 "OpGroupDecorate %group3 %v_struct1\n"
8096 },
8097 {
8098 "multiple_decoration_groups_on_same_variable",
8099 "OpGroupDecorate %group0 %v_struct2\n"
8100 "OpGroupDecorate %group1 %v_struct2\n"
8101 "OpGroupDecorate %group3 %v_struct2\n"
8102 },
8103 {
8104 "same_decoration_group_multiple_times",
8105 "OpGroupDecorate %group1 %addvalues\n"
8106 "OpGroupDecorate %group1 %addvalues\n"
8107 "OpGroupDecorate %group1 %addvalues\n"
8108 },
8109
8110 };
8111
8112 getHalfColorsFullAlpha(inputColors);
8113 getHalfColorsFullAlpha(outputColors);
8114
8115 for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8116 {
8117 fragments["decoration"] = decorations + tests[idx].decoration;
8118 fragments["pre_main"] = typesAndConstants;
8119 fragments["testfun"] = function;
8120
8121 createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8122 }
8123
8124 return group.release();
8125 }
8126
8127 struct SpecConstantTwoValGraphicsCase
8128 {
8129 const std::string caseName;
8130 const std::string scDefinition0;
8131 const std::string scDefinition1;
8132 const std::string scResultType;
8133 const std::string scOperation;
8134 SpecConstantValue scActualValue0;
8135 SpecConstantValue scActualValue1;
8136 const std::string resultOperation;
8137 RGBA expectedColors[4];
8138 CaseFlags caseFlags;
8139
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8140 SpecConstantTwoValGraphicsCase (const std::string& name,
8141 const std::string& definition0,
8142 const std::string& definition1,
8143 const std::string& resultType,
8144 const std::string& operation,
8145 const SpecConstantValue& value0,
8146 const SpecConstantValue& value1,
8147 const std::string& resultOp,
8148 const RGBA (&output)[4],
8149 CaseFlags flags = FLAG_NONE)
8150 : caseName (name)
8151 , scDefinition0 (definition0)
8152 , scDefinition1 (definition1)
8153 , scResultType (resultType)
8154 , scOperation (operation)
8155 , scActualValue0 (value0)
8156 , scActualValue1 (value1)
8157 , resultOperation (resultOp)
8158 , caseFlags (flags)
8159 {
8160 expectedColors[0] = output[0];
8161 expectedColors[1] = output[1];
8162 expectedColors[2] = output[2];
8163 expectedColors[3] = output[3];
8164 }
8165 };
8166
createSpecConstantTests(tcu::TestContext & testCtx)8167 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8168 {
8169 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8170 vector<SpecConstantTwoValGraphicsCase> cases;
8171 RGBA inputColors[4];
8172 RGBA outputColors0[4];
8173 RGBA outputColors1[4];
8174 RGBA outputColors2[4];
8175
8176 const char decorations1[] =
8177 "OpDecorate %sc_0 SpecId 0\n"
8178 "OpDecorate %sc_1 SpecId 1\n";
8179
8180 const char typesAndConstants1[] =
8181 "${OPTYPE_DEFINITIONS:opt}"
8182 "%sc_0 = OpSpecConstant${SC_DEF0}\n"
8183 "%sc_1 = OpSpecConstant${SC_DEF1}\n"
8184 "%sc_op = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8185
8186 const char function1[] =
8187 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8188 "%param = OpFunctionParameter %v4f32\n"
8189 "%label = OpLabel\n"
8190 "%result = OpVariable %fp_v4f32 Function\n"
8191 "${TYPE_CONVERT:opt}"
8192 " OpStore %result %param\n"
8193 "%gen = ${GEN_RESULT}\n"
8194 "%index = OpIAdd %i32 %gen %c_i32_1\n"
8195 "%loc = OpAccessChain %fp_f32 %result %index\n"
8196 "%val = OpLoad %f32 %loc\n"
8197 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8198 " OpStore %loc %add\n"
8199 "%ret = OpLoad %v4f32 %result\n"
8200 " OpReturnValue %ret\n"
8201 " OpFunctionEnd\n";
8202
8203 inputColors[0] = RGBA(127, 127, 127, 255);
8204 inputColors[1] = RGBA(127, 0, 0, 255);
8205 inputColors[2] = RGBA(0, 127, 0, 255);
8206 inputColors[3] = RGBA(0, 0, 127, 255);
8207
8208 // Derived from inputColors[x] by adding 128 to inputColors[x][0].
8209 outputColors0[0] = RGBA(255, 127, 127, 255);
8210 outputColors0[1] = RGBA(255, 0, 0, 255);
8211 outputColors0[2] = RGBA(128, 127, 0, 255);
8212 outputColors0[3] = RGBA(128, 0, 127, 255);
8213
8214 // Derived from inputColors[x] by adding 128 to inputColors[x][1].
8215 outputColors1[0] = RGBA(127, 255, 127, 255);
8216 outputColors1[1] = RGBA(127, 128, 0, 255);
8217 outputColors1[2] = RGBA(0, 255, 0, 255);
8218 outputColors1[3] = RGBA(0, 128, 127, 255);
8219
8220 // Derived from inputColors[x] by adding 128 to inputColors[x][2].
8221 outputColors2[0] = RGBA(127, 127, 255, 255);
8222 outputColors2[1] = RGBA(127, 0, 128, 255);
8223 outputColors2[2] = RGBA(0, 127, 128, 255);
8224 outputColors2[3] = RGBA(0, 0, 255, 255);
8225
8226 const char addZeroToSc[] = "OpIAdd %i32 %c_i32_0 %sc_op";
8227 const char addZeroToSc32[] = "OpIAdd %i32 %c_i32_0 %sc_op32";
8228 const char selectTrueUsingSc[] = "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8229 const char selectFalseUsingSc[] = "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8230
8231 cases.push_back(SpecConstantTwoValGraphicsCase("iadd", " %i32 0", " %i32 0", "%i32", "IAdd %sc_0 %sc_1", 19, -20, addZeroToSc, outputColors0));
8232 cases.push_back(SpecConstantTwoValGraphicsCase("isub", " %i32 0", " %i32 0", "%i32", "ISub %sc_0 %sc_1", 19, 20, addZeroToSc, outputColors0));
8233 cases.push_back(SpecConstantTwoValGraphicsCase("imul", " %i32 0", " %i32 0", "%i32", "IMul %sc_0 %sc_1", -1, -1, addZeroToSc, outputColors2));
8234 cases.push_back(SpecConstantTwoValGraphicsCase("sdiv", " %i32 0", " %i32 0", "%i32", "SDiv %sc_0 %sc_1", -126, 126, addZeroToSc, outputColors0));
8235 cases.push_back(SpecConstantTwoValGraphicsCase("udiv", " %i32 0", " %i32 0", "%i32", "UDiv %sc_0 %sc_1", 126, 126, addZeroToSc, outputColors2));
8236 cases.push_back(SpecConstantTwoValGraphicsCase("srem", " %i32 0", " %i32 0", "%i32", "SRem %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8237 cases.push_back(SpecConstantTwoValGraphicsCase("smod", " %i32 0", " %i32 0", "%i32", "SMod %sc_0 %sc_1", 3, 2, addZeroToSc, outputColors2));
8238 cases.push_back(SpecConstantTwoValGraphicsCase("umod", " %i32 0", " %i32 0", "%i32", "UMod %sc_0 %sc_1", 1001, 500, addZeroToSc, outputColors2));
8239 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand", " %i32 0", " %i32 0", "%i32", "BitwiseAnd %sc_0 %sc_1", 0x33, 0x0d, addZeroToSc, outputColors2));
8240 cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor", " %i32 0", " %i32 0", "%i32", "BitwiseOr %sc_0 %sc_1", 0, 1, addZeroToSc, outputColors2));
8241 cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor", " %i32 0", " %i32 0", "%i32", "BitwiseXor %sc_0 %sc_1", 0x2e, 0x2f, addZeroToSc, outputColors2));
8242 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical", " %i32 0", " %i32 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, 1, addZeroToSc, outputColors2));
8243 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic", " %i32 0", " %i32 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, 2, addZeroToSc, outputColors0));
8244 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical", " %i32 0", " %i32 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, 0, addZeroToSc, outputColors2));
8245
8246 // Shifts for other integer sizes.
8247 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightLogical %sc_0 %sc_1", deInt64{2}, deInt64{1}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8248 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64", " %i64 0", " %i64 0", "%i64", "ShiftRightArithmetic %sc_0 %sc_1", deInt64{-4}, deInt64{2}, addZeroToSc32, outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8249 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64", " %i64 0", " %i64 0", "%i64", "ShiftLeftLogical %sc_0 %sc_1", deInt64{1}, deInt64{0}, addZeroToSc32, outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8250 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightLogical %sc_0 %sc_1", deInt16{2}, deInt16{1}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8251 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16", " %i16 0", " %i16 0", "%i16", "ShiftRightArithmetic %sc_0 %sc_1", deInt16{-4}, deInt16{2}, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8252 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16", " %i16 0", " %i16 0", "%i16", "ShiftLeftLogical %sc_0 %sc_1", deInt16{1}, deInt16{0}, addZeroToSc32, outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8253 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightLogical %sc_0 %sc_1", deInt8{2}, deInt8{1}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8254 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8", " %i8 0", " %i8 0", "%i8", "ShiftRightArithmetic %sc_0 %sc_1", deInt8{-4}, deInt8{2}, addZeroToSc32, outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8255 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8", " %i8 0", " %i8 0", "%i8", "ShiftLeftLogical %sc_0 %sc_1", deInt8{1}, deInt8{0}, addZeroToSc32, outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8256
8257 // Shifts for other integer sizes but only in the shift amount.
8258 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt64{1}, addZeroToSc, outputColors2, (FLAG_I64)));
8259 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt64{2}, addZeroToSc, outputColors0, (FLAG_I64)));
8260 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64", " %i32 0", " %i64 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt64{0}, addZeroToSc, outputColors2, (FLAG_I64)));
8261 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt16{1}, addZeroToSc, outputColors2, (FLAG_I16)));
8262 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt16{2}, addZeroToSc, outputColors0, (FLAG_I16)));
8263 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16", " %i32 0", " %i16 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt16{0}, addZeroToSc, outputColors2, (FLAG_I16)));
8264 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightLogical %sc_0 %sc_1", 2, deInt8{1}, addZeroToSc, outputColors2, (FLAG_I8)));
8265 cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftRightArithmetic %sc_0 %sc_1", -4, deInt8{2}, addZeroToSc, outputColors0, (FLAG_I8)));
8266 cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8", " %i32 0", " %i8 0", "%i32", "ShiftLeftLogical %sc_0 %sc_1", 1, deInt8{0}, addZeroToSc, outputColors2, (FLAG_I8)));
8267
8268 cases.push_back(SpecConstantTwoValGraphicsCase("slessthan", " %i32 0", " %i32 0", "%bool", "SLessThan %sc_0 %sc_1", -20, -10, selectTrueUsingSc, outputColors2));
8269 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan", " %i32 0", " %i32 0", "%bool", "ULessThan %sc_0 %sc_1", 10, 20, selectTrueUsingSc, outputColors2));
8270 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan", " %i32 0", " %i32 0", "%bool", "SGreaterThan %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8271 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan", " %i32 0", " %i32 0", "%bool", "UGreaterThan %sc_0 %sc_1", 10, 5, selectTrueUsingSc, outputColors2));
8272 cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal", " %i32 0", " %i32 0", "%bool", "SLessThanEqual %sc_0 %sc_1", -10, -10, selectTrueUsingSc, outputColors2));
8273 cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal", " %i32 0", " %i32 0", "%bool", "ULessThanEqual %sc_0 %sc_1", 50, 100, selectTrueUsingSc, outputColors2));
8274 cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal", " %i32 0", " %i32 0", "%bool", "SGreaterThanEqual %sc_0 %sc_1", -1000, 50, selectFalseUsingSc, outputColors2));
8275 cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal", " %i32 0", " %i32 0", "%bool", "UGreaterThanEqual %sc_0 %sc_1", 10, 10, selectTrueUsingSc, outputColors2));
8276 cases.push_back(SpecConstantTwoValGraphicsCase("iequal", " %i32 0", " %i32 0", "%bool", "IEqual %sc_0 %sc_1", 42, 24, selectFalseUsingSc, outputColors2));
8277 cases.push_back(SpecConstantTwoValGraphicsCase("inotequal", " %i32 0", " %i32 0", "%bool", "INotEqual %sc_0 %sc_1", 42, 24, selectTrueUsingSc, outputColors2));
8278 cases.push_back(SpecConstantTwoValGraphicsCase("logicaland", "True %bool", "True %bool", "%bool", "LogicalAnd %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8279 cases.push_back(SpecConstantTwoValGraphicsCase("logicalor", "False %bool", "False %bool", "%bool", "LogicalOr %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8280 cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal", "True %bool", "True %bool", "%bool", "LogicalEqual %sc_0 %sc_1", 0, 1, selectFalseUsingSc, outputColors2));
8281 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal", "False %bool", "False %bool", "%bool", "LogicalNotEqual %sc_0 %sc_1", 1, 0, selectTrueUsingSc, outputColors2));
8282 cases.push_back(SpecConstantTwoValGraphicsCase("snegate", " %i32 0", " %i32 0", "%i32", "SNegate %sc_0", -1, 0, addZeroToSc, outputColors2));
8283 cases.push_back(SpecConstantTwoValGraphicsCase("not", " %i32 0", " %i32 0", "%i32", "Not %sc_0", -2, 0, addZeroToSc, outputColors2));
8284 cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot", "False %bool", "False %bool", "%bool", "LogicalNot %sc_0", 1, 0, selectFalseUsingSc, outputColors2));
8285 cases.push_back(SpecConstantTwoValGraphicsCase("select", "False %bool", " %i32 0", "%i32", "Select %sc_0 %sc_1 %c_i32_0", 1, 1, addZeroToSc, outputColors2));
8286 cases.push_back(SpecConstantTwoValGraphicsCase("sconvert", " %i32 0", " %i32 0", "%i16", "SConvert %sc_0", -1, 0, addZeroToSc32, outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8287 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert", " %f32 0", " %f32 0", "%f64", "FConvert %sc_0", tcu::Float32(-1.0), tcu::Float32(0.0), addZeroToSc32, outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8288 cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16", " %f16 0", " %f16 0", "%f32", "FConvert %sc_0", tcu::Float16(-1.0), tcu::Float16(0.0), addZeroToSc32, outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8289 // \todo[2015-12-1 antiagainst] OpQuantizeToF16
8290
8291 for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8292 {
8293 map<string, string> specializations;
8294 map<string, string> fragments;
8295 SpecConstants specConstants;
8296 PushConstants noPushConstants;
8297 GraphicsResources noResources;
8298 GraphicsInterfaces noInterfaces;
8299 vector<string> extensions;
8300 VulkanFeatures requiredFeatures;
8301
8302 // Special SPIR-V code when using 16-bit integers.
8303 if (cases[caseNdx].caseFlags & FLAG_I16)
8304 {
8305 requiredFeatures.coreFeatures.shaderInt16 = VK_TRUE;
8306 fragments["capability"] += "OpCapability Int16\n"; // Adds 16-bit integer capability
8307 specializations["OPTYPE_DEFINITIONS"] += "%i16 = OpTypeInt 16 1\n"; // Adds 16-bit integer type
8308 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8309 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 16-bit integer to 32-bit integer
8310 }
8311
8312 // Special SPIR-V code when using 64-bit integers.
8313 if (cases[caseNdx].caseFlags & FLAG_I64)
8314 {
8315 requiredFeatures.coreFeatures.shaderInt64 = VK_TRUE;
8316 fragments["capability"] += "OpCapability Int64\n"; // Adds 64-bit integer capability
8317 specializations["OPTYPE_DEFINITIONS"] += "%i64 = OpTypeInt 64 1\n"; // Adds 64-bit integer type
8318 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8319 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 64-bit integer to 32-bit integer
8320 }
8321
8322 // Special SPIR-V code when using 64-bit floats.
8323 if (cases[caseNdx].caseFlags & FLAG_F64)
8324 {
8325 requiredFeatures.coreFeatures.shaderFloat64 = VK_TRUE;
8326 fragments["capability"] += "OpCapability Float64\n"; // Adds 64-bit float capability
8327 specializations["OPTYPE_DEFINITIONS"] += "%f64 = OpTypeFloat 64\n"; // Adds 64-bit float type
8328 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8329 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 64-bit float to 32-bit integer
8330 }
8331
8332 // Extension needed for float16 and int8.
8333 if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8334 extensions.push_back("VK_KHR_shader_float16_int8");
8335
8336 // Special SPIR-V code when using 16-bit floats.
8337 if (cases[caseNdx].caseFlags & FLAG_F16)
8338 {
8339 requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8340 fragments["capability"] += "OpCapability Float16\n"; // Adds 16-bit float capability
8341 specializations["OPTYPE_DEFINITIONS"] += "%f16 = OpTypeFloat 16\n"; // Adds 16-bit float type
8342 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8343 specializations["TYPE_CONVERT"] += "%sc_op32 = OpConvertFToS %i32 %sc_op\n"; // Converts 16-bit float to 32-bit integer
8344 }
8345
8346 // Special SPIR-V code when using 8-bit integers.
8347 if (cases[caseNdx].caseFlags & FLAG_I8)
8348 {
8349 requiredFeatures.extFloat16Int8.shaderInt8 = true;
8350 fragments["capability"] += "OpCapability Int8\n"; // Adds 8-bit integer capability
8351 specializations["OPTYPE_DEFINITIONS"] += "%i8 = OpTypeInt 8 1\n"; // Adds 8-bit integer type
8352 if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8353 specializations["TYPE_CONVERT"] += "%sc_op32 = OpSConvert %i32 %sc_op\n"; // Converts 8-bit integer to 32-bit integer
8354 }
8355
8356 specializations["SC_DEF0"] = cases[caseNdx].scDefinition0;
8357 specializations["SC_DEF1"] = cases[caseNdx].scDefinition1;
8358 specializations["SC_RESULT_TYPE"] = cases[caseNdx].scResultType;
8359 specializations["SC_OP"] = cases[caseNdx].scOperation;
8360 specializations["GEN_RESULT"] = cases[caseNdx].resultOperation;
8361
8362 fragments["decoration"] = tcu::StringTemplate(decorations1).specialize(specializations);
8363 fragments["pre_main"] = tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8364 fragments["testfun"] = tcu::StringTemplate(function1).specialize(specializations);
8365
8366 cases[caseNdx].scActualValue0.appendTo(specConstants);
8367 cases[caseNdx].scActualValue1.appendTo(specConstants);
8368
8369 createTestsForAllStages(
8370 cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8371 noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8372 }
8373
8374 const char decorations2[] =
8375 "OpDecorate %sc_0 SpecId 0\n"
8376 "OpDecorate %sc_1 SpecId 1\n"
8377 "OpDecorate %sc_2 SpecId 2\n";
8378
8379 const std::string typesAndConstants2 =
8380 "%vec3_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8381 "%vec3_undef = OpUndef %v3i32\n"
8382
8383 + getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8384
8385 "%sc_0 = OpSpecConstant %i32 0\n"
8386 "%sc_1 = OpSpecConstant %i32 0\n"
8387 "%sc_2 = OpSpecConstant %i32 0\n"
8388
8389 + getSpecConstantOpStructConstBlock() +
8390
8391 "%sc_vec3_0 = OpSpecConstantOp %v3i32 CompositeInsert %sc_0 %vec3_0 0\n" // (sc_0, 0, 0)
8392 "%sc_vec3_1 = OpSpecConstantOp %v3i32 CompositeInsert %sc_1 %vec3_0 1\n" // (0, sc_1, 0)
8393 "%sc_vec3_2 = OpSpecConstantOp %v3i32 CompositeInsert %sc_2 %vec3_0 2\n" // (0, 0, sc_2)
8394 "%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2\n" // (sc_0, ???, 0)
8395 "%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0\n" // (???, sc_1, 0)
8396 "%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5\n" // (sc_2, ???, sc_2)
8397 "%sc_vec3_01 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n" // (0, sc_0, sc_1)
8398 "%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2\n" // (sc_2, sc_0, sc_1)
8399 "%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0\n" // sc_2
8400 "%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1\n" // sc_0
8401 "%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2\n" // sc_1
8402 "%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1\n" // (sc_2 - sc_0)
8403 "%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2\n"; // (sc_2 - sc_0) * sc_1
8404
8405 const std::string function2 =
8406 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8407 "%param = OpFunctionParameter %v4f32\n"
8408 "%label = OpLabel\n"
8409 "%result = OpVariable %fp_v4f32 Function\n"
8410
8411 + getSpecConstantOpStructInstructions() +
8412
8413 " OpStore %result %param\n"
8414 "%loc = OpAccessChain %fp_f32 %result %sc_final\n"
8415 "%val = OpLoad %f32 %loc\n"
8416 "%add = OpFAdd %f32 %val %c_f32_0_5\n"
8417 " OpStore %loc %add\n"
8418 "%ret = OpLoad %v4f32 %result\n"
8419 " OpReturnValue %ret\n"
8420 " OpFunctionEnd\n";
8421
8422 map<string, string> fragments;
8423 SpecConstants specConstants;
8424
8425 fragments["decoration"] = decorations2;
8426 fragments["pre_main"] = typesAndConstants2;
8427 fragments["testfun"] = function2;
8428
8429 specConstants.append<deInt32>(56789);
8430 specConstants.append<deInt32>(-2);
8431 specConstants.append<deInt32>(56788);
8432
8433 createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8434
8435 return group.release();
8436 }
8437
createOpPhiTests(tcu::TestContext & testCtx)8438 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8439 {
8440 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8441 RGBA inputColors[4];
8442 RGBA outputColors1[4];
8443 RGBA outputColors2[4];
8444 RGBA outputColors3[4];
8445 RGBA outputColors4[4];
8446 map<string, string> fragments1;
8447 map<string, string> fragments2;
8448 map<string, string> fragments3;
8449 map<string, string> fragments4;
8450 std::vector<std::string> extensions4;
8451 GraphicsResources resources4;
8452 VulkanFeatures vulkanFeatures4;
8453
8454 const char typesAndConstants1[] =
8455 "%c_f32_p2 = OpConstant %f32 0.2\n"
8456 "%c_f32_p4 = OpConstant %f32 0.4\n"
8457 "%c_f32_p5 = OpConstant %f32 0.5\n"
8458 "%c_f32_p8 = OpConstant %f32 0.8\n";
8459
8460 // vec4 test_code(vec4 param) {
8461 // vec4 result = param;
8462 // for (int i = 0; i < 4; ++i) {
8463 // float operand;
8464 // switch (i) {
8465 // case 0: operand = .2; break;
8466 // case 1: operand = .5; break;
8467 // case 2: operand = .4; break;
8468 // case 3: operand = .0; break;
8469 // default: break; // unreachable
8470 // }
8471 // result[i] += operand;
8472 // }
8473 // return result;
8474 // }
8475 const char function1[] =
8476 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8477 "%param1 = OpFunctionParameter %v4f32\n"
8478 "%lbl = OpLabel\n"
8479 "%iptr = OpVariable %fp_i32 Function\n"
8480 "%result = OpVariable %fp_v4f32 Function\n"
8481 " OpStore %iptr %c_i32_0\n"
8482 " OpStore %result %param1\n"
8483 " OpBranch %loop\n"
8484
8485 "%loop = OpLabel\n"
8486 "%ival = OpLoad %i32 %iptr\n"
8487 "%lt_4 = OpSLessThan %bool %ival %c_i32_4\n"
8488 " OpLoopMerge %exit %cont None\n"
8489 " OpBranchConditional %lt_4 %entry %exit\n"
8490
8491 "%entry = OpLabel\n"
8492 "%loc = OpAccessChain %fp_f32 %result %ival\n"
8493 "%val = OpLoad %f32 %loc\n"
8494 " OpSelectionMerge %phi None\n"
8495 " OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8496
8497 "%case0 = OpLabel\n"
8498 " OpBranch %phi\n"
8499 "%case1 = OpLabel\n"
8500 " OpBranch %phi\n"
8501 "%case2 = OpLabel\n"
8502 " OpBranch %phi\n"
8503 "%case3 = OpLabel\n"
8504 " OpBranch %phi\n"
8505
8506 "%default = OpLabel\n"
8507 " OpUnreachable\n"
8508
8509 "%phi = OpLabel\n"
8510 "%operand = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8511 " OpBranch %cont\n"
8512 "%cont = OpLabel\n"
8513 "%add = OpFAdd %f32 %val %operand\n"
8514 " OpStore %loc %add\n"
8515 "%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8516 " OpStore %iptr %ival_next\n"
8517 " OpBranch %loop\n"
8518
8519 "%exit = OpLabel\n"
8520 "%ret = OpLoad %v4f32 %result\n"
8521 " OpReturnValue %ret\n"
8522
8523 " OpFunctionEnd\n";
8524
8525 fragments1["pre_main"] = typesAndConstants1;
8526 fragments1["testfun"] = function1;
8527
8528 getHalfColorsFullAlpha(inputColors);
8529
8530 outputColors1[0] = RGBA(178, 255, 229, 255);
8531 outputColors1[1] = RGBA(178, 127, 102, 255);
8532 outputColors1[2] = RGBA(51, 255, 102, 255);
8533 outputColors1[3] = RGBA(51, 127, 229, 255);
8534
8535 createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8536
8537 const char typesAndConstants2[] =
8538 "%c_f32_p2 = OpConstant %f32 0.2\n";
8539
8540 // Add .4 to the second element of the given parameter.
8541 const char function2[] =
8542 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8543 "%param = OpFunctionParameter %v4f32\n"
8544 "%entry = OpLabel\n"
8545 "%result = OpVariable %fp_v4f32 Function\n"
8546 " OpStore %result %param\n"
8547 "%loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8548 "%val = OpLoad %f32 %loc\n"
8549 " OpBranch %phi\n"
8550
8551 "%phi = OpLabel\n"
8552 "%step = OpPhi %i32 %c_i32_0 %entry %step_next %phi\n"
8553 "%accum = OpPhi %f32 %val %entry %accum_next %phi\n"
8554 "%step_next = OpIAdd %i32 %step %c_i32_1\n"
8555 "%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8556 "%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8557 " OpLoopMerge %exit %phi None\n"
8558 " OpBranchConditional %still_loop %phi %exit\n"
8559
8560 "%exit = OpLabel\n"
8561 " OpStore %loc %accum\n"
8562 "%ret = OpLoad %v4f32 %result\n"
8563 " OpReturnValue %ret\n"
8564
8565 " OpFunctionEnd\n";
8566
8567 fragments2["pre_main"] = typesAndConstants2;
8568 fragments2["testfun"] = function2;
8569
8570 outputColors2[0] = RGBA(127, 229, 127, 255);
8571 outputColors2[1] = RGBA(127, 102, 0, 255);
8572 outputColors2[2] = RGBA(0, 229, 0, 255);
8573 outputColors2[3] = RGBA(0, 102, 127, 255);
8574
8575 createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8576
8577 const char typesAndConstants3[] =
8578 "%true = OpConstantTrue %bool\n"
8579 "%false = OpConstantFalse %bool\n"
8580 "%c_f32_p2 = OpConstant %f32 0.2\n";
8581
8582 // Swap the second and the third element of the given parameter.
8583 const char function3[] =
8584 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8585 "%param = OpFunctionParameter %v4f32\n"
8586 "%entry = OpLabel\n"
8587 "%result = OpVariable %fp_v4f32 Function\n"
8588 " OpStore %result %param\n"
8589 "%a_loc = OpAccessChain %fp_f32 %result %c_i32_1\n"
8590 "%a_init = OpLoad %f32 %a_loc\n"
8591 "%b_loc = OpAccessChain %fp_f32 %result %c_i32_2\n"
8592 "%b_init = OpLoad %f32 %b_loc\n"
8593 " OpBranch %phi\n"
8594
8595 "%phi = OpLabel\n"
8596 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8597 "%a_next = OpPhi %f32 %a_init %entry %b_next %phi\n"
8598 "%b_next = OpPhi %f32 %b_init %entry %a_next %phi\n"
8599 " OpLoopMerge %exit %phi None\n"
8600 " OpBranchConditional %still_loop %phi %exit\n"
8601
8602 "%exit = OpLabel\n"
8603 " OpStore %a_loc %a_next\n"
8604 " OpStore %b_loc %b_next\n"
8605 "%ret = OpLoad %v4f32 %result\n"
8606 " OpReturnValue %ret\n"
8607
8608 " OpFunctionEnd\n";
8609
8610 fragments3["pre_main"] = typesAndConstants3;
8611 fragments3["testfun"] = function3;
8612
8613 outputColors3[0] = RGBA(127, 127, 127, 255);
8614 outputColors3[1] = RGBA(127, 0, 0, 255);
8615 outputColors3[2] = RGBA(0, 0, 127, 255);
8616 outputColors3[3] = RGBA(0, 127, 0, 255);
8617
8618 createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8619
8620 const char typesAndConstants4[] =
8621 "%f16 = OpTypeFloat 16\n"
8622 "%v4f16 = OpTypeVector %f16 4\n"
8623 "%fp_f16 = OpTypePointer Function %f16\n"
8624 "%fp_v4f16 = OpTypePointer Function %v4f16\n"
8625 "%true = OpConstantTrue %bool\n"
8626 "%false = OpConstantFalse %bool\n"
8627 "%c_f32_p2 = OpConstant %f32 0.2\n";
8628
8629 // Swap the second and the third element of the given parameter.
8630 const char function4[] =
8631 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8632 "%param = OpFunctionParameter %v4f32\n"
8633 "%entry = OpLabel\n"
8634 "%result = OpVariable %fp_v4f16 Function\n"
8635 "%param16 = OpFConvert %v4f16 %param\n"
8636 " OpStore %result %param16\n"
8637 "%a_loc = OpAccessChain %fp_f16 %result %c_i32_1\n"
8638 "%a_init = OpLoad %f16 %a_loc\n"
8639 "%b_loc = OpAccessChain %fp_f16 %result %c_i32_2\n"
8640 "%b_init = OpLoad %f16 %b_loc\n"
8641 " OpBranch %phi\n"
8642
8643 "%phi = OpLabel\n"
8644 "%still_loop = OpPhi %bool %true %entry %false %phi\n"
8645 "%a_next = OpPhi %f16 %a_init %entry %b_next %phi\n"
8646 "%b_next = OpPhi %f16 %b_init %entry %a_next %phi\n"
8647 " OpLoopMerge %exit %phi None\n"
8648 " OpBranchConditional %still_loop %phi %exit\n"
8649
8650 "%exit = OpLabel\n"
8651 " OpStore %a_loc %a_next\n"
8652 " OpStore %b_loc %b_next\n"
8653 "%ret16 = OpLoad %v4f16 %result\n"
8654 "%ret = OpFConvert %v4f32 %ret16\n"
8655 " OpReturnValue %ret\n"
8656
8657 " OpFunctionEnd\n";
8658
8659 fragments4["pre_main"] = typesAndConstants4;
8660 fragments4["testfun"] = function4;
8661 fragments4["capability"] = "OpCapability Float16\n";
8662
8663 extensions4.push_back("VK_KHR_shader_float16_int8");
8664
8665 vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8666
8667 outputColors4[0] = RGBA(127, 127, 127, 255);
8668 outputColors4[1] = RGBA(127, 0, 0, 255);
8669 outputColors4[2] = RGBA(0, 0, 127, 255);
8670 outputColors4[3] = RGBA(0, 127, 0, 255);
8671
8672 createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8673
8674 return group.release();
8675 }
8676
createNoContractionTests(tcu::TestContext & testCtx)8677 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8678 {
8679 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8680 RGBA inputColors[4];
8681 RGBA outputColors[4];
8682
8683 // With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8684 // For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8685 // only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8686 // On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8687 const char constantsAndTypes[] =
8688 "%c_vec4_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8689 "%c_vec4_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8690 "%c_f32_1pl2_23 = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8691 "%c_f32_1mi2_23 = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8692 "%c_f32_n1pn24 = OpConstant %f32 -0x1p-24\n";
8693
8694 const char function[] =
8695 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8696 "%param = OpFunctionParameter %v4f32\n"
8697 "%label = OpLabel\n"
8698 "%var1 = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8699 "%var2 = OpVariable %fp_f32 Function\n"
8700 "%red = OpCompositeExtract %f32 %param 0\n"
8701 "%plus_red = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8702 " OpStore %var2 %plus_red\n"
8703 "%val1 = OpLoad %f32 %var1\n"
8704 "%val2 = OpLoad %f32 %var2\n"
8705 "%mul = OpFMul %f32 %val1 %val2\n"
8706 "%add = OpFAdd %f32 %mul %c_f32_n1\n"
8707 "%is0 = OpFOrdEqual %bool %add %c_f32_0\n"
8708 "%isn1n24 = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8709 "%success = OpLogicalOr %bool %is0 %isn1n24\n"
8710 "%v4success = OpCompositeConstruct %v4bool %success %success %success %success\n"
8711 "%ret = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8712 " OpReturnValue %ret\n"
8713 " OpFunctionEnd\n";
8714
8715 struct CaseNameDecoration
8716 {
8717 string name;
8718 string decoration;
8719 };
8720
8721
8722 CaseNameDecoration tests[] = {
8723 {"multiplication", "OpDecorate %mul NoContraction"},
8724 {"addition", "OpDecorate %add NoContraction"},
8725 {"both", "OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8726 };
8727
8728 getHalfColorsFullAlpha(inputColors);
8729
8730 for (deUint8 idx = 0; idx < 4; ++idx)
8731 {
8732 inputColors[idx].setRed(0);
8733 outputColors[idx] = RGBA(0, 0, 0, 255);
8734 }
8735
8736 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8737 {
8738 map<string, string> fragments;
8739
8740 fragments["decoration"] = tests[testNdx].decoration;
8741 fragments["pre_main"] = constantsAndTypes;
8742 fragments["testfun"] = function;
8743
8744 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8745 }
8746
8747 return group.release();
8748 }
8749
createMemoryAccessTests(tcu::TestContext & testCtx)8750 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8751 {
8752 de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8753 RGBA colors[4];
8754
8755 const char constantsAndTypes[] =
8756 "%c_a2f32_1 = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8757 "%fp_a2f32 = OpTypePointer Function %a2f32\n"
8758 "%stype = OpTypeStruct %v4f32 %a2f32 %f32\n"
8759 "%fp_stype = OpTypePointer Function %stype\n";
8760
8761 const char function[] =
8762 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8763 "%param1 = OpFunctionParameter %v4f32\n"
8764 "%lbl = OpLabel\n"
8765 "%v1 = OpVariable %fp_v4f32 Function\n"
8766 "%v2 = OpVariable %fp_a2f32 Function\n"
8767 "%v3 = OpVariable %fp_f32 Function\n"
8768 "%v = OpVariable %fp_stype Function\n"
8769 "%vv = OpVariable %fp_stype Function\n"
8770 "%vvv = OpVariable %fp_f32 Function\n"
8771
8772 " OpStore %v1 %c_v4f32_1_1_1_1\n"
8773 " OpStore %v2 %c_a2f32_1\n"
8774 " OpStore %v3 %c_f32_1\n"
8775
8776 "%p_v4f32 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8777 "%p_a2f32 = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8778 "%p_f32 = OpAccessChain %fp_f32 %v %c_u32_2\n"
8779 "%v1_v = OpLoad %v4f32 %v1 ${access_type}\n"
8780 "%v2_v = OpLoad %a2f32 %v2 ${access_type}\n"
8781 "%v3_v = OpLoad %f32 %v3 ${access_type}\n"
8782
8783 " OpStore %p_v4f32 %v1_v ${access_type}\n"
8784 " OpStore %p_a2f32 %v2_v ${access_type}\n"
8785 " OpStore %p_f32 %v3_v ${access_type}\n"
8786
8787 " OpCopyMemory %vv %v ${access_type}\n"
8788 " OpCopyMemory %vvv %p_f32 ${access_type}\n"
8789
8790 "%p_f32_2 = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8791 "%v_f32_2 = OpLoad %f32 %p_f32_2\n"
8792 "%v_f32_3 = OpLoad %f32 %vvv\n"
8793
8794 "%ret1 = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8795 "%ret2 = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8796 " OpReturnValue %ret2\n"
8797 " OpFunctionEnd\n";
8798
8799 struct NameMemoryAccess
8800 {
8801 string name;
8802 string accessType;
8803 };
8804
8805
8806 NameMemoryAccess tests[] =
8807 {
8808 { "none", "" },
8809 { "volatile", "Volatile" },
8810 { "aligned", "Aligned 1" },
8811 { "volatile_aligned", "Volatile|Aligned 1" },
8812 { "nontemporal_aligned", "Nontemporal|Aligned 1" },
8813 { "volatile_nontemporal", "Volatile|Nontemporal" },
8814 { "volatile_nontermporal_aligned", "Volatile|Nontemporal|Aligned 1" },
8815 };
8816
8817 getHalfColorsFullAlpha(colors);
8818
8819 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8820 {
8821 map<string, string> fragments;
8822 map<string, string> memoryAccess;
8823 memoryAccess["access_type"] = tests[testNdx].accessType;
8824
8825 fragments["pre_main"] = constantsAndTypes;
8826 fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8827 createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8828 }
8829 return memoryAccessTests.release();
8830 }
createOpUndefTests(tcu::TestContext & testCtx)8831 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8832 {
8833 de::MovePtr<tcu::TestCaseGroup> opUndefTests (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8834 RGBA defaultColors[4];
8835 map<string, string> fragments;
8836 getDefaultColors(defaultColors);
8837
8838 // First, simple cases that don't do anything with the OpUndef result.
8839 struct NameCodePair { string name, decl, type; };
8840 const NameCodePair tests[] =
8841 {
8842 {"bool", "", "%bool"},
8843 {"vec2uint32", "", "%v2u32"},
8844 {"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8845 {"sampler", "%type = OpTypeSampler", "%type"},
8846 {"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8847 {"pointer", "", "%fp_i32"},
8848 {"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8849 {"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8850 {"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8851 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8852 {
8853 fragments["undef_type"] = tests[testNdx].type;
8854 fragments["testfun"] = StringTemplate(
8855 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8856 "%param1 = OpFunctionParameter %v4f32\n"
8857 "%label_testfun = OpLabel\n"
8858 "%undef = OpUndef ${undef_type}\n"
8859 "OpReturnValue %param1\n"
8860 "OpFunctionEnd\n").specialize(fragments);
8861 fragments["pre_main"] = tests[testNdx].decl;
8862 createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8863 }
8864 fragments.clear();
8865
8866 fragments["testfun"] =
8867 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8868 "%param1 = OpFunctionParameter %v4f32\n"
8869 "%label_testfun = OpLabel\n"
8870 "%undef = OpUndef %f32\n"
8871 "%zero = OpFMul %f32 %undef %c_f32_0\n"
8872 "%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8873 "%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8874 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8875 "%b = OpFAdd %f32 %a %actually_zero\n"
8876 "%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8877 "OpReturnValue %ret\n"
8878 "OpFunctionEnd\n";
8879
8880 createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8881
8882 fragments["testfun"] =
8883 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8884 "%param1 = OpFunctionParameter %v4f32\n"
8885 "%label_testfun = OpLabel\n"
8886 "%undef = OpUndef %i32\n"
8887 "%zero = OpIMul %i32 %undef %c_i32_0\n"
8888 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8889 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8890 "OpReturnValue %ret\n"
8891 "OpFunctionEnd\n";
8892
8893 createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8894
8895 fragments["testfun"] =
8896 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8897 "%param1 = OpFunctionParameter %v4f32\n"
8898 "%label_testfun = OpLabel\n"
8899 "%undef = OpUndef %u32\n"
8900 "%zero = OpIMul %u32 %undef %c_i32_0\n"
8901 "%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8902 "%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8903 "OpReturnValue %ret\n"
8904 "OpFunctionEnd\n";
8905
8906 createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8907
8908 fragments["testfun"] =
8909 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8910 "%param1 = OpFunctionParameter %v4f32\n"
8911 "%label_testfun = OpLabel\n"
8912 "%undef = OpUndef %v4f32\n"
8913 "%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8914 "%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8915 "%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8916 "%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8917 "%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8918 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8919 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8920 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8921 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8922 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8923 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8924 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8925 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8926 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8927 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8928 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8929 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8930 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8931 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8932 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8933 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8934 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8935 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8936 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8937 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8938 "OpReturnValue %ret\n"
8939 "OpFunctionEnd\n";
8940
8941 createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8942
8943 fragments["pre_main"] =
8944 "%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8945 fragments["testfun"] =
8946 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8947 "%param1 = OpFunctionParameter %v4f32\n"
8948 "%label_testfun = OpLabel\n"
8949 "%undef = OpUndef %m2x2f32\n"
8950 "%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8951 "%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8952 "%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8953 "%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8954 "%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8955 "%is_nan_0 = OpIsNan %bool %zero_0\n"
8956 "%is_nan_1 = OpIsNan %bool %zero_1\n"
8957 "%is_nan_2 = OpIsNan %bool %zero_2\n"
8958 "%is_nan_3 = OpIsNan %bool %zero_3\n"
8959 "%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8960 "%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8961 "%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8962 "%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8963 "%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8964 "%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8965 "%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8966 "%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8967 "%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8968 "%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8969 "%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8970 "%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8971 "%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8972 "%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8973 "%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8974 "%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8975 "OpReturnValue %ret\n"
8976 "OpFunctionEnd\n";
8977
8978 createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8979
8980 return opUndefTests.release();
8981 }
8982
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)8983 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8984 {
8985 const RGBA inputColors[4] =
8986 {
8987 RGBA(0, 0, 0, 255),
8988 RGBA(0, 0, 255, 255),
8989 RGBA(0, 255, 0, 255),
8990 RGBA(0, 255, 255, 255)
8991 };
8992
8993 const RGBA expectedColors[4] =
8994 {
8995 RGBA(255, 0, 0, 255),
8996 RGBA(255, 0, 0, 255),
8997 RGBA(255, 0, 0, 255),
8998 RGBA(255, 0, 0, 255)
8999 };
9000
9001 const struct SingleFP16Possibility
9002 {
9003 const char* name;
9004 const char* constant; // Value to assign to %test_constant.
9005 float valueAsFloat;
9006 const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9007 } tests[] =
9008 {
9009 {
9010 "negative",
9011 "-0x1.3p1\n",
9012 -constructNormalizedFloat(1, 0x300000),
9013 "%cond = OpFOrdEqual %bool %c %test_constant\n"
9014 }, // -19
9015 {
9016 "positive",
9017 "0x1.0p7\n",
9018 constructNormalizedFloat(7, 0x000000),
9019 "%cond = OpFOrdEqual %bool %c %test_constant\n"
9020 }, // +128
9021 // SPIR-V requires that OpQuantizeToF16 flushes
9022 // any numbers that would end up denormalized in F16 to zero.
9023 {
9024 "denorm",
9025 "0x0.0006p-126\n",
9026 std::ldexp(1.5f, -140),
9027 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9028 }, // denorm
9029 {
9030 "negative_denorm",
9031 "-0x0.0006p-126\n",
9032 -std::ldexp(1.5f, -140),
9033 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9034 }, // -denorm
9035 {
9036 "too_small",
9037 "0x1.0p-16\n",
9038 std::ldexp(1.0f, -16),
9039 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9040 }, // too small positive
9041 {
9042 "negative_too_small",
9043 "-0x1.0p-32\n",
9044 -std::ldexp(1.0f, -32),
9045 "%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9046 }, // too small negative
9047 {
9048 "negative_inf",
9049 "-0x1.0p128\n",
9050 -std::ldexp(1.0f, 128),
9051
9052 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9053 "%inf = OpIsInf %bool %c\n"
9054 "%cond = OpLogicalAnd %bool %gz %inf\n"
9055 }, // -inf to -inf
9056 {
9057 "inf",
9058 "0x1.0p128\n",
9059 std::ldexp(1.0f, 128),
9060
9061 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9062 "%inf = OpIsInf %bool %c\n"
9063 "%cond = OpLogicalAnd %bool %gz %inf\n"
9064 }, // +inf to +inf
9065 {
9066 "round_to_negative_inf",
9067 "-0x1.0p32\n",
9068 -std::ldexp(1.0f, 32),
9069
9070 "%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9071 "%inf = OpIsInf %bool %c\n"
9072 "%cond = OpLogicalAnd %bool %gz %inf\n"
9073 }, // round to -inf
9074 {
9075 "round_to_inf",
9076 "0x1.0p16\n",
9077 std::ldexp(1.0f, 16),
9078
9079 "%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9080 "%inf = OpIsInf %bool %c\n"
9081 "%cond = OpLogicalAnd %bool %gz %inf\n"
9082 }, // round to +inf
9083 {
9084 "nan",
9085 "0x1.1p128\n",
9086 std::numeric_limits<float>::quiet_NaN(),
9087
9088 // Test for any NaN value, as NaNs are not preserved
9089 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9090 "%cond = OpIsNan %bool %direct_quant\n"
9091 }, // nan
9092 {
9093 "negative_nan",
9094 "-0x1.0001p128\n",
9095 std::numeric_limits<float>::quiet_NaN(),
9096
9097 // Test for any NaN value, as NaNs are not preserved
9098 "%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9099 "%cond = OpIsNan %bool %direct_quant\n"
9100 } // -nan
9101 };
9102 const char* constants =
9103 "%test_constant = OpConstant %f32 "; // The value will be test.constant.
9104
9105 StringTemplate function (
9106 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9107 "%param1 = OpFunctionParameter %v4f32\n"
9108 "%label_testfun = OpLabel\n"
9109 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9110 "%b = OpFAdd %f32 %test_constant %a\n"
9111 "%c = OpQuantizeToF16 %f32 %b\n"
9112 "${condition}\n"
9113 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9114 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9115 " OpReturnValue %retval\n"
9116 "OpFunctionEnd\n"
9117 );
9118
9119 const char* specDecorations = "OpDecorate %test_constant SpecId 0\n";
9120 const char* specConstants =
9121 "%test_constant = OpSpecConstant %f32 0.\n"
9122 "%c = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9123
9124 StringTemplate specConstantFunction(
9125 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9126 "%param1 = OpFunctionParameter %v4f32\n"
9127 "%label_testfun = OpLabel\n"
9128 "${condition}\n"
9129 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9130 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9131 " OpReturnValue %retval\n"
9132 "OpFunctionEnd\n"
9133 );
9134
9135 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9136 {
9137 map<string, string> codeSpecialization;
9138 map<string, string> fragments;
9139 codeSpecialization["condition"] = tests[idx].condition;
9140 fragments["testfun"] = function.specialize(codeSpecialization);
9141 fragments["pre_main"] = string(constants) + tests[idx].constant + "\n";
9142 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9143 }
9144
9145 for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9146 {
9147 map<string, string> codeSpecialization;
9148 map<string, string> fragments;
9149 SpecConstants passConstants;
9150
9151 codeSpecialization["condition"] = tests[idx].condition;
9152 fragments["testfun"] = specConstantFunction.specialize(codeSpecialization);
9153 fragments["decoration"] = specDecorations;
9154 fragments["pre_main"] = specConstants;
9155
9156 passConstants.append<float>(tests[idx].valueAsFloat);
9157
9158 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9159 }
9160 }
9161
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9162 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9163 {
9164 RGBA inputColors[4] = {
9165 RGBA(0, 0, 0, 255),
9166 RGBA(0, 0, 255, 255),
9167 RGBA(0, 255, 0, 255),
9168 RGBA(0, 255, 255, 255)
9169 };
9170
9171 RGBA expectedColors[4] =
9172 {
9173 RGBA(255, 0, 0, 255),
9174 RGBA(255, 0, 0, 255),
9175 RGBA(255, 0, 0, 255),
9176 RGBA(255, 0, 0, 255)
9177 };
9178
9179 struct DualFP16Possibility
9180 {
9181 const char* name;
9182 const char* input;
9183 float inputAsFloat;
9184 const char* possibleOutput1;
9185 const char* possibleOutput2;
9186 } tests[] = {
9187 {
9188 "positive_round_up_or_round_down",
9189 "0x1.3003p8",
9190 constructNormalizedFloat(8, 0x300300),
9191 "0x1.304p8",
9192 "0x1.3p8"
9193 },
9194 {
9195 "negative_round_up_or_round_down",
9196 "-0x1.6008p-7",
9197 -constructNormalizedFloat(-7, 0x600800),
9198 "-0x1.6p-7",
9199 "-0x1.604p-7"
9200 },
9201 {
9202 "carry_bit",
9203 "0x1.01ep2",
9204 constructNormalizedFloat(2, 0x01e000),
9205 "0x1.01cp2",
9206 "0x1.02p2"
9207 },
9208 {
9209 "carry_to_exponent",
9210 "0x1.ffep1",
9211 constructNormalizedFloat(1, 0xffe000),
9212 "0x1.ffcp1",
9213 "0x1.0p2"
9214 },
9215 };
9216 StringTemplate constants (
9217 "%input_const = OpConstant %f32 ${input}\n"
9218 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9219 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9220 );
9221
9222 StringTemplate specConstants (
9223 "%input_const = OpSpecConstant %f32 0.\n"
9224 "%possible_solution1 = OpConstant %f32 ${output1}\n"
9225 "%possible_solution2 = OpConstant %f32 ${output2}\n"
9226 );
9227
9228 const char* specDecorations = "OpDecorate %input_const SpecId 0\n";
9229
9230 const char* function =
9231 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9232 "%param1 = OpFunctionParameter %v4f32\n"
9233 "%label_testfun = OpLabel\n"
9234 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9235 // For the purposes of this test we assume that 0.f will always get
9236 // faithfully passed through the pipeline stages.
9237 "%b = OpFAdd %f32 %input_const %a\n"
9238 "%c = OpQuantizeToF16 %f32 %b\n"
9239 "%eq_1 = OpFOrdEqual %bool %c %possible_solution1\n"
9240 "%eq_2 = OpFOrdEqual %bool %c %possible_solution2\n"
9241 "%cond = OpLogicalOr %bool %eq_1 %eq_2\n"
9242 "%v4cond = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9243 "%retval = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9244 " OpReturnValue %retval\n"
9245 "OpFunctionEnd\n";
9246
9247 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9248 map<string, string> fragments;
9249 map<string, string> constantSpecialization;
9250
9251 constantSpecialization["input"] = tests[idx].input;
9252 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9253 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9254 fragments["testfun"] = function;
9255 fragments["pre_main"] = constants.specialize(constantSpecialization);
9256 createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9257 }
9258
9259 for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9260 map<string, string> fragments;
9261 map<string, string> constantSpecialization;
9262 SpecConstants passConstants;
9263
9264 constantSpecialization["output1"] = tests[idx].possibleOutput1;
9265 constantSpecialization["output2"] = tests[idx].possibleOutput2;
9266 fragments["testfun"] = function;
9267 fragments["decoration"] = specDecorations;
9268 fragments["pre_main"] = specConstants.specialize(constantSpecialization);
9269
9270 passConstants.append<float>(tests[idx].inputAsFloat);
9271
9272 createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9273 }
9274 }
9275
createOpQuantizeTests(tcu::TestContext & testCtx)9276 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9277 {
9278 de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9279 createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9280 createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9281 return opQuantizeTests.release();
9282 }
9283
9284 struct ShaderPermutation
9285 {
9286 deUint8 vertexPermutation;
9287 deUint8 geometryPermutation;
9288 deUint8 tesscPermutation;
9289 deUint8 tessePermutation;
9290 deUint8 fragmentPermutation;
9291 };
9292
getShaderPermutation(deUint8 inputValue)9293 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9294 {
9295 ShaderPermutation permutation =
9296 {
9297 static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9298 static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9299 static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9300 static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9301 static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9302 };
9303 return permutation;
9304 }
9305
createModuleTests(tcu::TestContext & testCtx)9306 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9307 {
9308 RGBA defaultColors[4];
9309 RGBA invertedColors[4];
9310 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9311
9312 getDefaultColors(defaultColors);
9313 getInvertedDefaultColors(invertedColors);
9314
9315 // Combined module tests
9316 {
9317 // Shader stages: vertex and fragment
9318 {
9319 const ShaderElement combinedPipeline[] =
9320 {
9321 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9322 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9323 };
9324
9325 addFunctionCaseWithPrograms<InstanceContext>(
9326 moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
9327 createInstanceContext(combinedPipeline, map<string, string>()));
9328 }
9329
9330 // Shader stages: vertex, geometry and fragment
9331 {
9332 const ShaderElement combinedPipeline[] =
9333 {
9334 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9335 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9336 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9337 };
9338
9339 addFunctionCaseWithPrograms<InstanceContext>(
9340 moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9341 createInstanceContext(combinedPipeline, map<string, string>()));
9342 }
9343
9344 // Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9345 {
9346 const ShaderElement combinedPipeline[] =
9347 {
9348 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9349 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9350 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9351 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9352 };
9353
9354 addFunctionCaseWithPrograms<InstanceContext>(
9355 moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
9356 createInstanceContext(combinedPipeline, map<string, string>()));
9357 }
9358
9359 // Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9360 {
9361 const ShaderElement combinedPipeline[] =
9362 {
9363 ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9364 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9365 ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9366 ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9367 ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9368 };
9369
9370 addFunctionCaseWithPrograms<InstanceContext>(
9371 moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9372 createInstanceContext(combinedPipeline, map<string, string>()));
9373 }
9374 }
9375
9376 const char* numbers[] =
9377 {
9378 "1", "2"
9379 };
9380
9381 for (deInt8 idx = 0; idx < 32; ++idx)
9382 {
9383 ShaderPermutation permutation = getShaderPermutation(idx);
9384 string name = string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9385 const ShaderElement pipeline[] =
9386 {
9387 ShaderElement("vert", string("vert") + numbers[permutation.vertexPermutation], VK_SHADER_STAGE_VERTEX_BIT),
9388 ShaderElement("geom", string("geom") + numbers[permutation.geometryPermutation], VK_SHADER_STAGE_GEOMETRY_BIT),
9389 ShaderElement("tessc", string("tessc") + numbers[permutation.tesscPermutation], VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9390 ShaderElement("tesse", string("tesse") + numbers[permutation.tessePermutation], VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9391 ShaderElement("frag", string("frag") + numbers[permutation.fragmentPermutation], VK_SHADER_STAGE_FRAGMENT_BIT)
9392 };
9393
9394 // If there are an even number of swaps, then it should be no-op.
9395 // If there are an odd number, the color should be flipped.
9396 if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9397 {
9398 addFunctionCaseWithPrograms<InstanceContext>(
9399 moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9400 createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9401 }
9402 else
9403 {
9404 addFunctionCaseWithPrograms<InstanceContext>(
9405 moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9406 createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9407 }
9408 }
9409 return moduleTests.release();
9410 }
9411
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9412 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9413 {
9414 switch (task)
9415 {
9416 case SHADER_TASK_NONE: return "";
9417 case SHADER_TASK_NORMAL: return prefix + "_normal";
9418 case SHADER_TASK_UNUSED_VAR: return prefix + "_unused_var";
9419 case SHADER_TASK_UNUSED_FUNC: return prefix + "_unused_func";
9420 default: DE_ASSERT(DE_FALSE);
9421 }
9422 // unreachable
9423 return "";
9424 }
9425
getShaderTaskIndexName(ShaderTaskIndex index)9426 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9427 {
9428 switch (index)
9429 {
9430 case SHADER_TASK_INDEX_VERTEX: return "vertex";
9431 case SHADER_TASK_INDEX_GEOMETRY: return "geom";
9432 case SHADER_TASK_INDEX_TESS_CONTROL: return "tessc";
9433 case SHADER_TASK_INDEX_TESS_EVAL: return "tesse";
9434 case SHADER_TASK_INDEX_FRAGMENT: return "frag";
9435 default: DE_ASSERT(DE_FALSE);
9436 }
9437 // unreachable
9438 return "";
9439 }
9440
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9441 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9442 {
9443 std::string testName = location.toString();
9444
9445 for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9446 {
9447 if (shaderTasks[i] != SHADER_TASK_NONE)
9448 {
9449 testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9450 }
9451 }
9452
9453 return testName;
9454 }
9455
createUnusedVariableTests(tcu::TestContext & testCtx)9456 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9457 {
9458 de::MovePtr<tcu::TestCaseGroup> moduleTests (new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9459
9460 ShaderTaskArray shaderCombinations[] =
9461 {
9462 // Vertex Geometry Tess. Control Tess. Evaluation Fragment
9463 { SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9464 { SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9465 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR },
9466 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC },
9467 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9468 { SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NONE, SHADER_TASK_NONE, SHADER_TASK_NORMAL },
9469 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9470 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL, SHADER_TASK_NORMAL },
9471 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_VAR, SHADER_TASK_NORMAL },
9472 { SHADER_TASK_NORMAL, SHADER_TASK_NONE, SHADER_TASK_NORMAL, SHADER_TASK_UNUSED_FUNC, SHADER_TASK_NORMAL }
9473 };
9474
9475 const VariableLocation testLocations[] =
9476 {
9477 // Set Binding
9478 { 0, 5 },
9479 { 5, 5 },
9480 };
9481
9482 for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9483 {
9484 for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9485 {
9486 const ShaderTaskArray& shaderTasks = shaderCombinations[combNdx];
9487 const VariableLocation& location = testLocations[locationNdx];
9488 std::string testName = getUnusedVarTestName(shaderTasks, location);
9489
9490 addFunctionCaseWithPrograms<UnusedVariableContext>(
9491 moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9492 createUnusedVariableContext(shaderTasks, location));
9493 }
9494 }
9495
9496 return moduleTests.release();
9497 }
9498
createLoopTests(tcu::TestContext & testCtx)9499 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9500 {
9501 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9502 RGBA defaultColors[4];
9503 getDefaultColors(defaultColors);
9504 map<string, string> fragments;
9505 fragments["pre_main"] =
9506 "%c_f32_5 = OpConstant %f32 5.\n";
9507
9508 // A loop with a single block. The Continue Target is the loop block
9509 // itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9510 // -- the "continue construct" forms the entire loop.
9511 fragments["testfun"] =
9512 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9513 "%param1 = OpFunctionParameter %v4f32\n"
9514
9515 "%entry = OpLabel\n"
9516 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9517 "OpBranch %loop\n"
9518
9519 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9520 "%loop = OpLabel\n"
9521 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9522 "%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9523 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9524 "%val = OpFAdd %f32 %val1 %delta\n"
9525 "%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9526 "%count__ = OpISub %i32 %count %c_i32_1\n"
9527 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9528 "OpLoopMerge %exit %loop None\n"
9529 "OpBranchConditional %again %loop %exit\n"
9530
9531 "%exit = OpLabel\n"
9532 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9533 "OpReturnValue %result\n"
9534
9535 "OpFunctionEnd\n";
9536
9537 createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9538
9539 // Body comprised of multiple basic blocks.
9540 const StringTemplate multiBlock(
9541 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9542 "%param1 = OpFunctionParameter %v4f32\n"
9543
9544 "%entry = OpLabel\n"
9545 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9546 "OpBranch %loop\n"
9547
9548 ";adds and subtracts 1.0 to %val in alternate iterations\n"
9549 "%loop = OpLabel\n"
9550 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9551 "%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9552 "%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9553 // There are several possibilities for the Continue Target below. Each
9554 // will be specialized into a separate test case.
9555 "OpLoopMerge %exit ${continue_target} None\n"
9556 "OpBranch %if\n"
9557
9558 "%if = OpLabel\n"
9559 ";delta_next = (delta > 0) ? -1 : 1;\n"
9560 "%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9561 "OpSelectionMerge %gather DontFlatten\n"
9562 "OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9563
9564 "%odd = OpLabel\n"
9565 "OpBranch %gather\n"
9566
9567 "%even = OpLabel\n"
9568 "OpBranch %gather\n"
9569
9570 "%gather = OpLabel\n"
9571 "%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9572 "%val = OpFAdd %f32 %val1 %delta\n"
9573 "%count__ = OpISub %i32 %count %c_i32_1\n"
9574 "OpBranch %cont\n"
9575
9576 "%cont = OpLabel\n"
9577 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9578 "OpBranchConditional %again %loop %exit\n"
9579
9580 "%exit = OpLabel\n"
9581 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9582 "OpReturnValue %result\n"
9583
9584 "OpFunctionEnd\n");
9585
9586 map<string, string> continue_target;
9587
9588 // The Continue Target is the loop block itself.
9589 continue_target["continue_target"] = "%loop";
9590 fragments["testfun"] = multiBlock.specialize(continue_target);
9591 createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9592
9593 // The Continue Target is at the end of the loop.
9594 continue_target["continue_target"] = "%cont";
9595 fragments["testfun"] = multiBlock.specialize(continue_target);
9596 createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9597
9598 // A loop with continue statement.
9599 fragments["testfun"] =
9600 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9601 "%param1 = OpFunctionParameter %v4f32\n"
9602
9603 "%entry = OpLabel\n"
9604 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9605 "OpBranch %loop\n"
9606
9607 ";adds 4, 3, and 1 to %val0 (skips 2)\n"
9608 "%loop = OpLabel\n"
9609 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9610 "%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9611 "OpLoopMerge %exit %continue None\n"
9612 "OpBranch %if\n"
9613
9614 "%if = OpLabel\n"
9615 ";skip if %count==2\n"
9616 "%eq2 = OpIEqual %bool %count %c_i32_2\n"
9617 "OpBranchConditional %eq2 %continue %body\n"
9618
9619 "%body = OpLabel\n"
9620 "%fcount = OpConvertSToF %f32 %count\n"
9621 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9622 "OpBranch %continue\n"
9623
9624 "%continue = OpLabel\n"
9625 "%val = OpPhi %f32 %val2 %body %val1 %if\n"
9626 "%count__ = OpISub %i32 %count %c_i32_1\n"
9627 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9628 "OpBranchConditional %again %loop %exit\n"
9629
9630 "%exit = OpLabel\n"
9631 "%same = OpFSub %f32 %val %c_f32_8\n"
9632 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9633 "OpReturnValue %result\n"
9634 "OpFunctionEnd\n";
9635 createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9636
9637 // A loop with break.
9638 fragments["testfun"] =
9639 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9640 "%param1 = OpFunctionParameter %v4f32\n"
9641
9642 "%entry = OpLabel\n"
9643 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9644 "%dot = OpDot %f32 %param1 %param1\n"
9645 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9646 "%zero = OpConvertFToU %u32 %div\n"
9647 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9648 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9649 "OpBranch %loop\n"
9650
9651 ";adds 4 and 3 to %val0 (exits early)\n"
9652 "%loop = OpLabel\n"
9653 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9654 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9655 "OpLoopMerge %exit %continue None\n"
9656 "OpBranch %if\n"
9657
9658 "%if = OpLabel\n"
9659 ";end loop if %count==%two\n"
9660 "%above2 = OpSGreaterThan %bool %count %two\n"
9661 "OpBranchConditional %above2 %body %exit\n"
9662
9663 "%body = OpLabel\n"
9664 "%fcount = OpConvertSToF %f32 %count\n"
9665 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9666 "OpBranch %continue\n"
9667
9668 "%continue = OpLabel\n"
9669 "%count__ = OpISub %i32 %count %c_i32_1\n"
9670 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9671 "OpBranchConditional %again %loop %exit\n"
9672
9673 "%exit = OpLabel\n"
9674 "%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9675 "%same = OpFSub %f32 %val_post %c_f32_7\n"
9676 "%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9677 "OpReturnValue %result\n"
9678 "OpFunctionEnd\n";
9679 createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9680
9681 // A loop with return.
9682 fragments["testfun"] =
9683 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9684 "%param1 = OpFunctionParameter %v4f32\n"
9685
9686 "%entry = OpLabel\n"
9687 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9688 "%dot = OpDot %f32 %param1 %param1\n"
9689 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9690 "%zero = OpConvertFToU %u32 %div\n"
9691 "%two = OpIAdd %i32 %zero %c_i32_2\n"
9692 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9693 "OpBranch %loop\n"
9694
9695 ";returns early without modifying %param1\n"
9696 "%loop = OpLabel\n"
9697 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9698 "%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9699 "OpLoopMerge %exit %continue None\n"
9700 "OpBranch %if\n"
9701
9702 "%if = OpLabel\n"
9703 ";return if %count==%two\n"
9704 "%above2 = OpSGreaterThan %bool %count %two\n"
9705 "OpSelectionMerge %body DontFlatten\n"
9706 "OpBranchConditional %above2 %body %early_exit\n"
9707
9708 "%early_exit = OpLabel\n"
9709 "OpReturnValue %param1\n"
9710
9711 "%body = OpLabel\n"
9712 "%fcount = OpConvertSToF %f32 %count\n"
9713 "%val2 = OpFAdd %f32 %val1 %fcount\n"
9714 "OpBranch %continue\n"
9715
9716 "%continue = OpLabel\n"
9717 "%count__ = OpISub %i32 %count %c_i32_1\n"
9718 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9719 "OpBranchConditional %again %loop %exit\n"
9720
9721 "%exit = OpLabel\n"
9722 ";should never get here, so return an incorrect result\n"
9723 "%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9724 "OpReturnValue %result\n"
9725 "OpFunctionEnd\n";
9726 createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9727
9728 // Continue inside a switch block to break to enclosing loop's merge block.
9729 // Matches roughly the following GLSL code:
9730 // for (; keep_going; keep_going = false)
9731 // {
9732 // switch (int(param1.x))
9733 // {
9734 // case 0: continue;
9735 // case 1: continue;
9736 // default: continue;
9737 // }
9738 // dead code: modify return value to invalid result.
9739 // }
9740 fragments["pre_main"] =
9741 "%fp_bool = OpTypePointer Function %bool\n"
9742 "%true = OpConstantTrue %bool\n"
9743 "%false = OpConstantFalse %bool\n";
9744
9745 fragments["testfun"] =
9746 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9747 "%param1 = OpFunctionParameter %v4f32\n"
9748
9749 "%entry = OpLabel\n"
9750 "%keep_going = OpVariable %fp_bool Function\n"
9751 "%val_ptr = OpVariable %fp_f32 Function\n"
9752 "%param1_x = OpCompositeExtract %f32 %param1 0\n"
9753 "OpStore %keep_going %true\n"
9754 "OpBranch %forloop_begin\n"
9755
9756 "%forloop_begin = OpLabel\n"
9757 "OpLoopMerge %forloop_merge %forloop_continue None\n"
9758 "OpBranch %forloop\n"
9759
9760 "%forloop = OpLabel\n"
9761 "%for_condition = OpLoad %bool %keep_going\n"
9762 "OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9763
9764 "%forloop_body = OpLabel\n"
9765 "OpStore %val_ptr %param1_x\n"
9766 "%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9767
9768 "OpSelectionMerge %switch_merge None\n"
9769 "OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9770 "%case_0 = OpLabel\n"
9771 "OpBranch %forloop_continue\n"
9772 "%case_1 = OpLabel\n"
9773 "OpBranch %forloop_continue\n"
9774 "%default = OpLabel\n"
9775 "OpBranch %forloop_continue\n"
9776 "%switch_merge = OpLabel\n"
9777 ";should never get here, so change the return value to invalid result\n"
9778 "OpStore %val_ptr %c_f32_1\n"
9779 "OpBranch %forloop_continue\n"
9780
9781 "%forloop_continue = OpLabel\n"
9782 "OpStore %keep_going %false\n"
9783 "OpBranch %forloop_begin\n"
9784 "%forloop_merge = OpLabel\n"
9785
9786 "%val = OpLoad %f32 %val_ptr\n"
9787 "%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9788 "OpReturnValue %result\n"
9789 "OpFunctionEnd\n";
9790 createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9791
9792 return testGroup.release();
9793 }
9794
9795 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)9796 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9797 {
9798 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9799 map<string, string> fragments;
9800
9801 // A barrier inside a function body.
9802 fragments["pre_main"] =
9803 "%Workgroup = OpConstant %i32 2\n"
9804 "%Invocation = OpConstant %i32 4\n"
9805 "%MemorySemanticsNone = OpConstant %i32 0\n";
9806 fragments["testfun"] =
9807 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9808 "%param1 = OpFunctionParameter %v4f32\n"
9809 "%label_testfun = OpLabel\n"
9810 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9811 "OpReturnValue %param1\n"
9812 "OpFunctionEnd\n";
9813 addTessCtrlTest(testGroup.get(), "in_function", fragments);
9814
9815 // Common setup code for the following tests.
9816 fragments["pre_main"] =
9817 "%Workgroup = OpConstant %i32 2\n"
9818 "%Invocation = OpConstant %i32 4\n"
9819 "%MemorySemanticsNone = OpConstant %i32 0\n"
9820 "%c_f32_5 = OpConstant %f32 5.\n";
9821 const string setupPercentZero = // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9822 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9823 "%param1 = OpFunctionParameter %v4f32\n"
9824 "%entry = OpLabel\n"
9825 ";param1 components are between 0 and 1, so dot product is 4 or less\n"
9826 "%dot = OpDot %f32 %param1 %param1\n"
9827 "%div = OpFDiv %f32 %dot %c_f32_5\n"
9828 "%zero = OpConvertFToU %u32 %div\n";
9829
9830 // Barriers inside OpSwitch branches.
9831 fragments["testfun"] =
9832 setupPercentZero +
9833 "OpSelectionMerge %switch_exit None\n"
9834 "OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9835
9836 "%case1 = OpLabel\n"
9837 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9838 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9839 "%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9840 "OpBranch %switch_exit\n"
9841
9842 "%switch_default = OpLabel\n"
9843 "%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9844 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9845 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9846 "OpBranch %switch_exit\n"
9847
9848 "%case0 = OpLabel\n"
9849 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9850 "OpBranch %switch_exit\n"
9851
9852 "%switch_exit = OpLabel\n"
9853 "%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9854 "OpReturnValue %ret\n"
9855 "OpFunctionEnd\n";
9856 addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9857
9858 // Barriers inside if-then-else.
9859 fragments["testfun"] =
9860 setupPercentZero +
9861 "%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9862 "OpSelectionMerge %exit DontFlatten\n"
9863 "OpBranchConditional %eq0 %then %else\n"
9864
9865 "%else = OpLabel\n"
9866 ";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9867 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9868 "%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9869 "OpBranch %exit\n"
9870
9871 "%then = OpLabel\n"
9872 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9873 "OpBranch %exit\n"
9874 "%exit = OpLabel\n"
9875 "%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9876 "OpReturnValue %ret\n"
9877 "OpFunctionEnd\n";
9878 addTessCtrlTest(testGroup.get(), "in_if", fragments);
9879
9880 // A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9881 // http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9882 fragments["testfun"] =
9883 setupPercentZero +
9884 "%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9885 "%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9886 "OpSelectionMerge %exit DontFlatten\n"
9887 "OpBranchConditional %thread0 %then %else\n"
9888
9889 "%else = OpLabel\n"
9890 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9891 "OpBranch %exit\n"
9892
9893 "%then = OpLabel\n"
9894 "%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9895 "OpBranch %exit\n"
9896
9897 "%exit = OpLabel\n"
9898 "%val = OpPhi %f32 %val0 %else %val1 %then\n"
9899 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9900 "%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9901 "OpReturnValue %ret\n"
9902 "OpFunctionEnd\n";
9903 addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9904
9905 // A barrier inside a loop.
9906 fragments["pre_main"] =
9907 "%Workgroup = OpConstant %i32 2\n"
9908 "%Invocation = OpConstant %i32 4\n"
9909 "%MemorySemanticsNone = OpConstant %i32 0\n"
9910 "%c_f32_10 = OpConstant %f32 10.\n";
9911 fragments["testfun"] =
9912 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9913 "%param1 = OpFunctionParameter %v4f32\n"
9914 "%entry = OpLabel\n"
9915 "%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9916 "OpBranch %loop\n"
9917
9918 ";adds 4, 3, 2, and 1 to %val0\n"
9919 "%loop = OpLabel\n"
9920 "%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9921 "%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9922 "OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9923 "%fcount = OpConvertSToF %f32 %count\n"
9924 "%val = OpFAdd %f32 %val1 %fcount\n"
9925 "%count__ = OpISub %i32 %count %c_i32_1\n"
9926 "%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9927 "OpLoopMerge %exit %loop None\n"
9928 "OpBranchConditional %again %loop %exit\n"
9929
9930 "%exit = OpLabel\n"
9931 "%same = OpFSub %f32 %val %c_f32_10\n"
9932 "%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9933 "OpReturnValue %ret\n"
9934 "OpFunctionEnd\n";
9935 addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9936
9937 return testGroup.release();
9938 }
9939
9940 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)9941 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9942 {
9943 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9944 map<string, string> fragments;
9945 RGBA inputColors[4];
9946 RGBA outputColors[4];
9947
9948 fragments["pre_main"] =
9949 "%c_f32_3 = OpConstant %f32 3.0\n"
9950 "%c_f32_n3 = OpConstant %f32 -3.0\n"
9951 "%c_f32_4 = OpConstant %f32 4.0\n"
9952 "%c_f32_p75 = OpConstant %f32 0.75\n"
9953 "%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9954 "%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9955 "%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9956
9957 // The test does the following.
9958 // vec4 result = (param1 * 8.0) - 4.0;
9959 // return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9960 fragments["testfun"] =
9961 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9962 "%param1 = OpFunctionParameter %v4f32\n"
9963 "%label_testfun = OpLabel\n"
9964 "%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9965 "%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9966 "%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9967 "%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9968 "%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9969 "%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9970 "OpReturnValue %xy_0_1\n"
9971 "OpFunctionEnd\n";
9972
9973
9974 inputColors[0] = RGBA(16, 16, 0, 255);
9975 inputColors[1] = RGBA(232, 232, 0, 255);
9976 inputColors[2] = RGBA(232, 16, 0, 255);
9977 inputColors[3] = RGBA(16, 232, 0, 255);
9978
9979 outputColors[0] = RGBA(64, 64, 0, 255);
9980 outputColors[1] = RGBA(255, 255, 0, 255);
9981 outputColors[2] = RGBA(255, 64, 0, 255);
9982 outputColors[3] = RGBA(64, 255, 0, 255);
9983
9984 createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
9985 return testGroup.release();
9986 }
9987
9988 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)9989 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9990 {
9991 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
9992 map<string, string> fragments;
9993
9994 fragments["pre_main"] =
9995 "%c_f32_255 = OpConstant %f32 255.0\n"
9996 "%c_i32_128 = OpConstant %i32 128\n"
9997 "%c_i32_255 = OpConstant %i32 255\n"
9998 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9999 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10000 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10001
10002 // The test does the following.
10003 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10004 // ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10005 // return float(result + 128) / 255.0;
10006 fragments["testfun"] =
10007 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10008 "%param1 = OpFunctionParameter %v4f32\n"
10009 "%label_testfun = OpLabel\n"
10010 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10011 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10012 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10013 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10014 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10015 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10016 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10017 "%x_out = OpSRem %i32 %x_in %y_in\n"
10018 "%y_out = OpSRem %i32 %y_in %z_in\n"
10019 "%z_out = OpSRem %i32 %z_in %x_in\n"
10020 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10021 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10022 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10023 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10024 "OpReturnValue %float_out\n"
10025 "OpFunctionEnd\n";
10026
10027 const struct CaseParams
10028 {
10029 const char* name;
10030 const char* failMessageTemplate; // customized status message
10031 qpTestResult failResult; // override status on failure
10032 int operands[4][3]; // four (x, y, z) vectors of operands
10033 int results[4][3]; // four (x, y, z) vectors of results
10034 } cases[] =
10035 {
10036 {
10037 "positive",
10038 "${reason}",
10039 QP_TEST_RESULT_FAIL,
10040 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10041 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10042 },
10043 {
10044 "all",
10045 "Inconsistent results, but within specification: ${reason}",
10046 negFailResult, // negative operands, not required by the spec
10047 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10048 { { 5, 12, -2 }, { 0, -5, 2 }, { 3, 8, -6 }, { 25, -60, 0 } }, // results
10049 },
10050 };
10051 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10052
10053 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10054 {
10055 const CaseParams& params = cases[caseNdx];
10056 RGBA inputColors[4];
10057 RGBA outputColors[4];
10058
10059 for (int i = 0; i < 4; ++i)
10060 {
10061 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10062 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10063 }
10064
10065 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10066 }
10067
10068 return testGroup.release();
10069 }
10070
10071 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10072 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10073 {
10074 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10075 map<string, string> fragments;
10076
10077 fragments["pre_main"] =
10078 "%c_f32_255 = OpConstant %f32 255.0\n"
10079 "%c_i32_128 = OpConstant %i32 128\n"
10080 "%c_i32_255 = OpConstant %i32 255\n"
10081 "%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10082 "%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10083 "%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10084
10085 // The test does the following.
10086 // ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10087 // ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10088 // return float(result + 128) / 255.0;
10089 fragments["testfun"] =
10090 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10091 "%param1 = OpFunctionParameter %v4f32\n"
10092 "%label_testfun = OpLabel\n"
10093 "%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10094 "%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10095 "%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10096 "%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10097 "%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10098 "%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10099 "%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10100 "%x_out = OpSMod %i32 %x_in %y_in\n"
10101 "%y_out = OpSMod %i32 %y_in %z_in\n"
10102 "%z_out = OpSMod %i32 %z_in %x_in\n"
10103 "%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10104 "%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10105 "%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10106 "%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10107 "OpReturnValue %float_out\n"
10108 "OpFunctionEnd\n";
10109
10110 const struct CaseParams
10111 {
10112 const char* name;
10113 const char* failMessageTemplate; // customized status message
10114 qpTestResult failResult; // override status on failure
10115 int operands[4][3]; // four (x, y, z) vectors of operands
10116 int results[4][3]; // four (x, y, z) vectors of results
10117 } cases[] =
10118 {
10119 {
10120 "positive",
10121 "${reason}",
10122 QP_TEST_RESULT_FAIL,
10123 { { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } }, // operands
10124 { { 5, 12, 2 }, { 0, 5, 2 }, { 3, 8, 6 }, { 25, 60, 0 } }, // results
10125 },
10126 {
10127 "all",
10128 "Inconsistent results, but within specification: ${reason}",
10129 negFailResult, // negative operands, not required by the spec
10130 { { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } }, // operands
10131 { { 5, -5, 3 }, { 0, 2, -3 }, { 3, -73, 69 }, { -35, 40, 0 } }, // results
10132 },
10133 };
10134 // If either operand is negative the result is undefined. Some implementations may still return correct values.
10135
10136 for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10137 {
10138 const CaseParams& params = cases[caseNdx];
10139 RGBA inputColors[4];
10140 RGBA outputColors[4];
10141
10142 for (int i = 0; i < 4; ++i)
10143 {
10144 inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10145 outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10146 }
10147
10148 createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10149 }
10150 return testGroup.release();
10151 }
10152
10153 enum ConversionDataType
10154 {
10155 DATA_TYPE_SIGNED_8,
10156 DATA_TYPE_SIGNED_16,
10157 DATA_TYPE_SIGNED_32,
10158 DATA_TYPE_SIGNED_64,
10159 DATA_TYPE_UNSIGNED_8,
10160 DATA_TYPE_UNSIGNED_16,
10161 DATA_TYPE_UNSIGNED_32,
10162 DATA_TYPE_UNSIGNED_64,
10163 DATA_TYPE_FLOAT_16,
10164 DATA_TYPE_FLOAT_32,
10165 DATA_TYPE_FLOAT_64,
10166 DATA_TYPE_VEC2_SIGNED_16,
10167 DATA_TYPE_VEC2_SIGNED_32
10168 };
10169
getBitWidthStr(ConversionDataType type)10170 const string getBitWidthStr (ConversionDataType type)
10171 {
10172 switch (type)
10173 {
10174 case DATA_TYPE_SIGNED_8:
10175 case DATA_TYPE_UNSIGNED_8:
10176 return "8";
10177
10178 case DATA_TYPE_SIGNED_16:
10179 case DATA_TYPE_UNSIGNED_16:
10180 case DATA_TYPE_FLOAT_16:
10181 return "16";
10182
10183 case DATA_TYPE_SIGNED_32:
10184 case DATA_TYPE_UNSIGNED_32:
10185 case DATA_TYPE_FLOAT_32:
10186 case DATA_TYPE_VEC2_SIGNED_16:
10187 return "32";
10188
10189 case DATA_TYPE_SIGNED_64:
10190 case DATA_TYPE_UNSIGNED_64:
10191 case DATA_TYPE_FLOAT_64:
10192 case DATA_TYPE_VEC2_SIGNED_32:
10193 return "64";
10194
10195 default:
10196 DE_ASSERT(false);
10197 }
10198 return "";
10199 }
10200
getByteWidthStr(ConversionDataType type)10201 const string getByteWidthStr (ConversionDataType type)
10202 {
10203 switch (type)
10204 {
10205 case DATA_TYPE_SIGNED_8:
10206 case DATA_TYPE_UNSIGNED_8:
10207 return "1";
10208
10209 case DATA_TYPE_SIGNED_16:
10210 case DATA_TYPE_UNSIGNED_16:
10211 case DATA_TYPE_FLOAT_16:
10212 return "2";
10213
10214 case DATA_TYPE_SIGNED_32:
10215 case DATA_TYPE_UNSIGNED_32:
10216 case DATA_TYPE_FLOAT_32:
10217 case DATA_TYPE_VEC2_SIGNED_16:
10218 return "4";
10219
10220 case DATA_TYPE_SIGNED_64:
10221 case DATA_TYPE_UNSIGNED_64:
10222 case DATA_TYPE_FLOAT_64:
10223 case DATA_TYPE_VEC2_SIGNED_32:
10224 return "8";
10225
10226 default:
10227 DE_ASSERT(false);
10228 }
10229 return "";
10230 }
10231
isSigned(ConversionDataType type)10232 bool isSigned (ConversionDataType type)
10233 {
10234 switch (type)
10235 {
10236 case DATA_TYPE_SIGNED_8:
10237 case DATA_TYPE_SIGNED_16:
10238 case DATA_TYPE_SIGNED_32:
10239 case DATA_TYPE_SIGNED_64:
10240 case DATA_TYPE_FLOAT_16:
10241 case DATA_TYPE_FLOAT_32:
10242 case DATA_TYPE_FLOAT_64:
10243 case DATA_TYPE_VEC2_SIGNED_16:
10244 case DATA_TYPE_VEC2_SIGNED_32:
10245 return true;
10246
10247 case DATA_TYPE_UNSIGNED_8:
10248 case DATA_TYPE_UNSIGNED_16:
10249 case DATA_TYPE_UNSIGNED_32:
10250 case DATA_TYPE_UNSIGNED_64:
10251 return false;
10252
10253 default:
10254 DE_ASSERT(false);
10255 }
10256 return false;
10257 }
10258
isInt(ConversionDataType type)10259 bool isInt (ConversionDataType type)
10260 {
10261 switch (type)
10262 {
10263 case DATA_TYPE_SIGNED_8:
10264 case DATA_TYPE_SIGNED_16:
10265 case DATA_TYPE_SIGNED_32:
10266 case DATA_TYPE_SIGNED_64:
10267 case DATA_TYPE_UNSIGNED_8:
10268 case DATA_TYPE_UNSIGNED_16:
10269 case DATA_TYPE_UNSIGNED_32:
10270 case DATA_TYPE_UNSIGNED_64:
10271 return true;
10272
10273 case DATA_TYPE_FLOAT_16:
10274 case DATA_TYPE_FLOAT_32:
10275 case DATA_TYPE_FLOAT_64:
10276 case DATA_TYPE_VEC2_SIGNED_16:
10277 case DATA_TYPE_VEC2_SIGNED_32:
10278 return false;
10279
10280 default:
10281 DE_ASSERT(false);
10282 }
10283 return false;
10284 }
10285
isFloat(ConversionDataType type)10286 bool isFloat (ConversionDataType type)
10287 {
10288 switch (type)
10289 {
10290 case DATA_TYPE_SIGNED_8:
10291 case DATA_TYPE_SIGNED_16:
10292 case DATA_TYPE_SIGNED_32:
10293 case DATA_TYPE_SIGNED_64:
10294 case DATA_TYPE_UNSIGNED_8:
10295 case DATA_TYPE_UNSIGNED_16:
10296 case DATA_TYPE_UNSIGNED_32:
10297 case DATA_TYPE_UNSIGNED_64:
10298 case DATA_TYPE_VEC2_SIGNED_16:
10299 case DATA_TYPE_VEC2_SIGNED_32:
10300 return false;
10301
10302 case DATA_TYPE_FLOAT_16:
10303 case DATA_TYPE_FLOAT_32:
10304 case DATA_TYPE_FLOAT_64:
10305 return true;
10306
10307 default:
10308 DE_ASSERT(false);
10309 }
10310 return false;
10311 }
10312
getTypeName(ConversionDataType type)10313 const string getTypeName (ConversionDataType type)
10314 {
10315 string prefix = isSigned(type) ? "" : "u";
10316
10317 if (isInt(type)) return prefix + "int" + getBitWidthStr(type);
10318 else if (isFloat(type)) return prefix + "float" + getBitWidthStr(type);
10319 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10320 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "i32vec2";
10321 else DE_ASSERT(false);
10322
10323 return "";
10324 }
10325
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10326 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10327 {
10328 const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10329
10330 return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10331 }
10332
getAsmTypeName(ConversionDataType type,deUint32 elements=1)10333 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10334 {
10335 string prefix;
10336
10337 if (isInt(type)) prefix = isSigned(type) ? "i" : "u";
10338 else if (isFloat(type)) prefix = "f";
10339 else if (type == DATA_TYPE_VEC2_SIGNED_16) return "i16vec2";
10340 else if (type == DATA_TYPE_VEC2_SIGNED_32) return "v2i32";
10341 else DE_ASSERT(false);
10342 if ((isInt(type) || isFloat(type)) && elements == 2)
10343 {
10344 prefix = "v2" + prefix;
10345 }
10346
10347 return prefix + getBitWidthStr(type);
10348 }
10349
10350 template<typename T>
getSpecializedBuffer(deInt64 number,deUint32 elements=1)10351 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10352 {
10353 return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10354 }
10355
getBuffer(ConversionDataType type,deInt64 number,deUint32 elements=1)10356 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10357 {
10358 switch (type)
10359 {
10360 case DATA_TYPE_SIGNED_8: return getSpecializedBuffer<deInt8>(number, elements);
10361 case DATA_TYPE_SIGNED_16: return getSpecializedBuffer<deInt16>(number, elements);
10362 case DATA_TYPE_SIGNED_32: return getSpecializedBuffer<deInt32>(number, elements);
10363 case DATA_TYPE_SIGNED_64: return getSpecializedBuffer<deInt64>(number, elements);
10364 case DATA_TYPE_UNSIGNED_8: return getSpecializedBuffer<deUint8>(number, elements);
10365 case DATA_TYPE_UNSIGNED_16: return getSpecializedBuffer<deUint16>(number, elements);
10366 case DATA_TYPE_UNSIGNED_32: return getSpecializedBuffer<deUint32>(number, elements);
10367 case DATA_TYPE_UNSIGNED_64: return getSpecializedBuffer<deUint64>(number, elements);
10368 case DATA_TYPE_FLOAT_16: return getSpecializedBuffer<deUint16>(number, elements);
10369 case DATA_TYPE_FLOAT_32: return getSpecializedBuffer<deUint32>(number, elements);
10370 case DATA_TYPE_FLOAT_64: return getSpecializedBuffer<deUint64>(number, elements);
10371 case DATA_TYPE_VEC2_SIGNED_16: return getSpecializedBuffer<deUint32>(number, elements);
10372 case DATA_TYPE_VEC2_SIGNED_32: return getSpecializedBuffer<deUint64>(number, elements);
10373
10374 default: TCU_THROW(InternalError, "Unimplemented type passed");
10375 }
10376 }
10377
usesInt8(ConversionDataType from,ConversionDataType to)10378 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10379 {
10380 return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10381 from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10382 }
10383
usesInt16(ConversionDataType from,ConversionDataType to)10384 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10385 {
10386 return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10387 from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10388 from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10389 }
10390
usesInt32(ConversionDataType from,ConversionDataType to)10391 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10392 {
10393 return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10394 from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10395 from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10396 }
10397
usesInt64(ConversionDataType from,ConversionDataType to)10398 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10399 {
10400 return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10401 from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10402 }
10403
usesFloat16(ConversionDataType from,ConversionDataType to)10404 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10405 {
10406 return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10407 }
10408
usesFloat32(ConversionDataType from,ConversionDataType to)10409 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10410 {
10411 return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10412 }
10413
usesFloat64(ConversionDataType from,ConversionDataType to)10414 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10415 {
10416 return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10417 }
10418
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10419 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10420 {
10421 if (usesInt16(from, to) && !usesInt32(from, to))
10422 vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10423
10424 if (usesInt64(from, to))
10425 vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10426
10427 if (usesFloat64(from, to))
10428 vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10429
10430 if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10431 {
10432 extensions.push_back("VK_KHR_16bit_storage");
10433 vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10434 }
10435
10436 if (usesFloat16(from, to) || usesInt8(from, to))
10437 {
10438 extensions.push_back("VK_KHR_shader_float16_int8");
10439
10440 if (usesFloat16(from, to))
10441 {
10442 vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10443 }
10444
10445 if (usesInt8(from, to))
10446 {
10447 vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10448
10449 extensions.push_back("VK_KHR_8bit_storage");
10450 vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10451 }
10452 }
10453 }
10454
10455 struct ConvertCase
10456 {
ConvertCasevkt::SpirVAssembly::ConvertCase10457 ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10458 : m_fromType (from)
10459 , m_toType (to)
10460 , m_elements (1)
10461 , m_useStorageExt (useStorageExt)
10462 , m_name (getTestName(from, to, suffix))
10463 {
10464 string caps;
10465 string decl;
10466 string exts;
10467
10468 m_asmTypes["inStorageType"] = getAsmTypeName(from);
10469 m_asmTypes["outStorageType"] = getAsmTypeName(to);
10470 m_asmTypes["inCast"] = "OpCopyObject";
10471 m_asmTypes["outCast"] = "OpCopyObject";
10472 // If the storage extensions are being avoided, tests instead uses
10473 // vectors so that they are easily convertible to 32-bit integers.
10474 // |m_elements| indicates the size of the vector. It modifies how many
10475 // items added to the buffers and converted in the tests.
10476 //
10477 // Currently only supports 1 (default) or 2 elements.
10478 if (!m_useStorageExt)
10479 {
10480 bool in_change = false;
10481 bool out_change = false;
10482 if (usesFloat16(from, from) || usesInt16(from, from))
10483 {
10484 m_asmTypes["inStorageType"] = "u32";
10485 m_asmTypes["inCast"] = "OpBitcast";
10486 m_elements = 2;
10487 in_change = true;
10488 }
10489 if (usesFloat16(to, to) || usesInt16(to, to))
10490 {
10491 m_asmTypes["outStorageType"] = "u32";
10492 m_asmTypes["outCast"] = "OpBitcast";
10493 m_elements = 2;
10494 out_change = true;
10495 }
10496 if (in_change && !out_change)
10497 {
10498 m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10499 }
10500 if (!in_change && out_change)
10501 {
10502 m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10503 }
10504 }
10505
10506 // Safety check for implementation.
10507 if (m_elements < 1 || m_elements > 2)
10508 TCU_THROW(InternalError, "Unsupported number of elements");
10509
10510 m_asmTypes["inputType"] = getAsmTypeName(from, m_elements);
10511 m_asmTypes["outputType"] = getAsmTypeName(to, m_elements);
10512
10513 m_inputBuffer = getBuffer(from, number, m_elements);
10514 if (separateOutput)
10515 m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10516 else
10517 m_outputBuffer = getBuffer(to, number, m_elements);
10518
10519 if (usesInt8(from, to))
10520 {
10521 bool requiresInt8Capability = true;
10522 if (instruction == "OpUConvert" || instruction == "OpSConvert")
10523 {
10524 // Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10525 if (usesInt32(from, to))
10526 requiresInt8Capability = false;
10527 }
10528
10529 caps += "OpCapability StorageBuffer8BitAccess\n";
10530 if (requiresInt8Capability)
10531 caps += "OpCapability Int8\n";
10532
10533 decl += "%i8 = OpTypeInt 8 1\n"
10534 "%u8 = OpTypeInt 8 0\n";
10535
10536 if (m_elements == 2)
10537 {
10538 decl += "%v2i8 = OpTypeVector %i8 2\n"
10539 "%v2u8 = OpTypeVector %u8 2\n";
10540 }
10541 exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10542 }
10543
10544 if (usesInt16(from, to))
10545 {
10546 bool requiresInt16Capability = true;
10547
10548 if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10549 {
10550 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10551 if (usesInt32(from, to) || usesFloat32(from, to))
10552 requiresInt16Capability = false;
10553 }
10554
10555 decl += "%i16 = OpTypeInt 16 1\n"
10556 "%u16 = OpTypeInt 16 0\n";
10557 if (m_elements == 2)
10558 {
10559 decl += "%v2i16 = OpTypeVector %i16 2\n"
10560 "%v2u16 = OpTypeVector %u16 2\n";
10561 }
10562 else
10563 {
10564 decl += "%i16vec2 = OpTypeVector %i16 2\n";
10565 }
10566
10567 // Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10568 if (requiresInt16Capability || !m_useStorageExt)
10569 caps += "OpCapability Int16\n";
10570 }
10571
10572 if (usesFloat16(from, to))
10573 {
10574 decl += "%f16 = OpTypeFloat 16\n";
10575 if (m_elements == 2)
10576 {
10577 decl += "%v2f16 = OpTypeVector %f16 2\n";
10578 }
10579
10580 // Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10581 if (!usesFloat32(from, to) || !m_useStorageExt)
10582 caps += "OpCapability Float16\n";
10583 }
10584
10585 if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10586 {
10587 caps += "OpCapability StorageUniformBufferBlock16\n";
10588 exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10589 }
10590
10591 if (usesInt64(from, to))
10592 {
10593 caps += "OpCapability Int64\n";
10594 decl += "%i64 = OpTypeInt 64 1\n"
10595 "%u64 = OpTypeInt 64 0\n";
10596 if (m_elements == 2)
10597 {
10598 decl += "%v2i64 = OpTypeVector %i64 2\n"
10599 "%v2u64 = OpTypeVector %u64 2\n";
10600 }
10601 }
10602
10603 if (usesFloat64(from, to))
10604 {
10605 caps += "OpCapability Float64\n";
10606 decl += "%f64 = OpTypeFloat 64\n";
10607 if (m_elements == 2)
10608 {
10609 decl += "%v2f64 = OpTypeVector %f64 2\n";
10610 }
10611 }
10612
10613 m_asmTypes["datatype_capabilities"] = caps;
10614 m_asmTypes["datatype_additional_decl"] = decl;
10615 m_asmTypes["datatype_extensions"] = exts;
10616 }
10617
10618 ConversionDataType m_fromType;
10619 ConversionDataType m_toType;
10620 deUint32 m_elements;
10621 bool m_useStorageExt;
10622 string m_name;
10623 map<string, string> m_asmTypes;
10624 BufferSp m_inputBuffer;
10625 BufferSp m_outputBuffer;
10626 };
10627
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10628 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10629 {
10630 map<string, string> params = convertCase.m_asmTypes;
10631
10632 params["instruction"] = instruction;
10633 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
10634 params["outDecorator"] = getByteWidthStr(convertCase.m_toType);
10635
10636 std::string shader (
10637 "OpCapability Shader\n"
10638 "${datatype_capabilities}"
10639 "${datatype_extensions:opt}"
10640 "OpMemoryModel Logical GLSL450\n"
10641 "OpEntryPoint GLCompute %main \"main\"\n"
10642 "OpExecutionMode %main LocalSize 1 1 1\n"
10643 "OpSource GLSL 430\n"
10644 "OpName %main \"main\"\n"
10645 // Decorators
10646 "OpDecorate %indata DescriptorSet 0\n"
10647 "OpDecorate %indata Binding 0\n"
10648 "OpDecorate %outdata DescriptorSet 0\n"
10649 "OpDecorate %outdata Binding 1\n"
10650 "OpDecorate %in_buf BufferBlock\n"
10651 "OpDecorate %out_buf BufferBlock\n"
10652 "OpMemberDecorate %in_buf 0 Offset 0\n"
10653 "OpMemberDecorate %out_buf 0 Offset 0\n"
10654 // Base types
10655 "%void = OpTypeVoid\n"
10656 "%voidf = OpTypeFunction %void\n"
10657 "%u32 = OpTypeInt 32 0\n"
10658 "%i32 = OpTypeInt 32 1\n"
10659 "%f32 = OpTypeFloat 32\n"
10660 "%v2i32 = OpTypeVector %i32 2\n"
10661 "${datatype_additional_decl}"
10662 );
10663 if (addVectors)
10664 {
10665 shader += "%v2u32 = OpTypeVector %u32 2\n"
10666 "%v2f32 = OpTypeVector %f32 2\n";
10667 }
10668 shader +=
10669 "%uvec3 = OpTypeVector %u32 3\n"
10670 // Derived types
10671 "%in_ptr = OpTypePointer Uniform %${inStorageType}\n"
10672 "%out_ptr = OpTypePointer Uniform %${outStorageType}\n"
10673 "%in_buf = OpTypeStruct %${inStorageType}\n"
10674 "%out_buf = OpTypeStruct %${outStorageType}\n"
10675 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
10676 "%out_bufptr = OpTypePointer Uniform %out_buf\n"
10677 "%indata = OpVariable %in_bufptr Uniform\n"
10678 "%outdata = OpVariable %out_bufptr Uniform\n"
10679 // Constants
10680 "%zero = OpConstant %i32 0\n"
10681 // Main function
10682 "%main = OpFunction %void None %voidf\n"
10683 "%label = OpLabel\n"
10684 "%inloc = OpAccessChain %in_ptr %indata %zero\n"
10685 "%outloc = OpAccessChain %out_ptr %outdata %zero\n"
10686 "%inval = OpLoad %${inStorageType} %inloc\n"
10687 "%in_cast = ${inCast} %${inputType} %inval\n"
10688 "%conv = ${instruction} %${outputType} %in_cast\n"
10689 "%out_cast = ${outCast} %${outStorageType} %conv\n"
10690 " OpStore %outloc %out_cast\n"
10691 " OpReturn\n"
10692 " OpFunctionEnd\n"
10693 ;
10694
10695 return StringTemplate(shader).specialize(params);
10696 }
10697
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10698 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10699 {
10700 if (instruction == "OpUConvert")
10701 {
10702 // Convert unsigned int to unsigned int
10703 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_16, 42));
10704 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_32, 73));
10705 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_UNSIGNED_64, 121));
10706
10707 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_8, 33));
10708 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_32, 60653));
10709 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_UNSIGNED_64, 17991));
10710
10711 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_64, 904256275));
10712 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_16, 6275));
10713 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_UNSIGNED_8, 17));
10714
10715 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_32, 701256243));
10716 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_16, 4741));
10717 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_UNSIGNED_8, 65));
10718
10719 // Zero extension for int->uint
10720 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10721 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 209));
10722 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 251));
10723 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10724 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 62195));
10725 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10726
10727 // Truncate for int->uint
10728 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10729 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10730 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10731 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10732 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10733 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10734 }
10735 else if (instruction == "OpSConvert")
10736 {
10737 // Sign extension int->int
10738 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_16, -30));
10739 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_32, 55));
10740 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_SIGNED_64, -3));
10741 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10742 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_64, -3341));
10743 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10744
10745 // Truncate for int->int
10746 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_SIGNED_8, 81));
10747 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_8, -93));
10748 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_8, 3182748172687672ll, true, 56));
10749 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10750 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_32, -972812359));
10751 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_SIGNED_16, -1067742499291926803ll, true, -4371));
10752
10753 // Sign extension for int->uint
10754 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_16, 56));
10755 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_32, -47, true, 4294967249u));
10756 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_UNSIGNED_64, -5, true, 18446744073709551611ull));
10757 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_32, 14669));
10758 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_64, -3341, true, 18446744073709548275ull));
10759 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_64, 973610259));
10760
10761 // Truncate for int->uint
10762 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_UNSIGNED_8, -25711, true, 145));
10763 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_8, 103));
10764 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_8, -1067742499291926803ll, true, 237));
10765 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_UNSIGNED_16, 12382));
10766 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_32, -972812359, true, 3322154937u));
10767 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_UNSIGNED_16, -1067742499291926803ll, true, 61165));
10768
10769 // Sign extension for uint->int
10770 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_16, 71));
10771 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_32, 201, true, -55));
10772 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_SIGNED_64, 188, true, -68));
10773 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_32, 14669));
10774 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_64, 62195, true, -3341));
10775 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_64, 973610259));
10776
10777 // Truncate for uint->int
10778 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_SIGNED_8, 67));
10779 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_8, 133, true, -123));
10780 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_8, 836927654193256494ull, true, 46));
10781 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_SIGNED_16, 12382));
10782 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_32, 18446744072736739257ull, true, -972812359));
10783 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_SIGNED_16, 17379001574417624813ull, true, -4371));
10784
10785 // Convert i16vec2 to i32vec2 and vice versa
10786 // Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10787 // The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10788 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_16, DATA_TYPE_VEC2_SIGNED_32, (33413u << 16) | 27593, true, (4294935173ull << 32) | 27593));
10789 testCases.push_back(ConvertCase(instruction, DATA_TYPE_VEC2_SIGNED_32, DATA_TYPE_VEC2_SIGNED_16, (4294935173ull << 32) | 27593, true, (33413u << 16) | 27593));
10790 }
10791 else if (instruction == "OpFConvert")
10792 {
10793 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10794 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_64, 0x449a4000, true, 0x4093480000000000));
10795 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_32, 0x4093480000000000, true, 0x449a4000));
10796
10797 // Conversion to/from 32-bit floats are supported by both 16-bit
10798 // storage and Float16. The tests are duplicated to exercise both
10799 // cases.
10800 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2));
10801 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000));
10802 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_FLOAT_16, 0x449a4000, true, 0x64D2, "no_storage", false));
10803 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_32, 0x64D2, true, 0x449a4000, "no_storage", false));
10804
10805 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000));
10806 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2));
10807 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_FLOAT_64, 0x64D2, true, 0x4093480000000000, "no_storage", false));
10808 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_FLOAT_16, 0x4093480000000000, true, 0x64D2, "no_storage", false));
10809
10810 }
10811 else if (instruction == "OpConvertFToU")
10812 {
10813 // Normal numbers from uint8 range
10814 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5020, true, 33, "33", false));
10815 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x42280000, true, 42, "42"));
10816 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x4067800000000000ull, true, 188, "188"));
10817
10818 // Maximum uint8 value
10819 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x5BF8, true, 255, "max", false));
10820 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x437F0000, true, 255, "max"));
10821 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x406FE00000000000ull, true, 255, "max"));
10822
10823 // +0
10824 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x0000, true, 0, "p0", false));
10825 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x00000000, true, 0, "p0"));
10826 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
10827
10828 // -0
10829 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_8, 0x8000, true, 0, "m0", false));
10830 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_8, 0x80000000, true, 0, "m0"));
10831 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
10832
10833 // All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10834 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x64D2, true, 1234, "1234", false));
10835 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x64D2, true, 1234, "1234", false));
10836 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x64D2, true, 1234, "1234", false));
10837
10838 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10839 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x7BFF, true, 65504, "max", false));
10840 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x7BFF, true, 65504, "max", false));
10841 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x7BFF, true, 65504, "max", false));
10842
10843 // +0
10844 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x0000, true, 0, "p0", false));
10845 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x0000, true, 0, "p0", false));
10846 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x0000, true, 0, "p0", false));
10847
10848 // -0
10849 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_16, 0x8000, true, 0, "m0", false));
10850 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_32, 0x8000, true, 0, "m0", false));
10851 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_UNSIGNED_64, 0x8000, true, 0, "m0", false));
10852
10853 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_16, 0x449a4000, true, 1234));
10854 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_32, 0x449a4000, true, 1234));
10855 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_UNSIGNED_64, 0x449a4000, true, 1234));
10856 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_16, 0x4093480000000000, true, 1234));
10857 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_32, 0x4093480000000000, true, 1234));
10858 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_UNSIGNED_64, 0x4093480000000000, true, 1234));
10859 }
10860 else if (instruction == "OpConvertUToF")
10861 {
10862 // Normal numbers from uint8 range
10863 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 116, true, 0x5740, "116", false));
10864 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 232, true, 0x43680000, "232"));
10865 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 164, true, 0x4064800000000000ull, "164"));
10866
10867 // Maximum uint8 value
10868 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_16, 255, true, 0x5BF8, "max", false));
10869 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_32, 255, true, 0x437F0000, "max"));
10870 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_8, DATA_TYPE_FLOAT_64, 255, true, 0x406FE00000000000ull, "max"));
10871
10872 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10873 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10874 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10875 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 1234, true, 0x64D2, "1234", false));
10876
10877 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10878 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10879 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10880 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10881
10882 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "4294967296", false));
10883 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "4294967296", false));
10884
10885 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 0xffffff0000000000, true, 0x5f7fffff, "max", false));
10886
10887 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10888 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_16, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10889 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10890 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_32, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10891 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_32, 1234, true, 0x449a4000));
10892 testCases.push_back(ConvertCase(instruction, DATA_TYPE_UNSIGNED_64, DATA_TYPE_FLOAT_64, 1234, true, 0x4093480000000000));
10893 }
10894 else if (instruction == "OpConvertFToS")
10895 {
10896 // Normal numbers from int8 range
10897 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xC980, true, -11, "m11", false));
10898 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC2140000, true, -37, "m37"));
10899 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC050800000000000ull, true, -66, "m66"));
10900
10901 // Minimum int8 value
10902 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0xD800, true, -128, "min", false));
10903 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0xC3000000, true, -128, "min"));
10904 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0xC060000000000000ull, true, -128, "min"));
10905
10906 // Maximum int8 value
10907 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x57F0, true, 127, "max", false));
10908 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x42FE0000, true, 127, "max"));
10909 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x405FC00000000000ull, true, 127, "max"));
10910
10911 // +0
10912 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x0000, true, 0, "p0", false));
10913 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x00000000, true, 0, "p0"));
10914 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x0000000000000000ull, true, 0, "p0"));
10915
10916 // -0
10917 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_8, 0x8000, true, 0, "m0", false));
10918 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_8, 0x80000000, true, 0, "m0"));
10919 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_8, 0x8000000000000000ull, true, 0, "m0"));
10920
10921 // All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10922 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xE4D2, true, -1234, "m1234", false));
10923 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xE4D2, true, -1234, "m1234", false));
10924 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xE4D2, true, -1234, "m1234", false));
10925
10926 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10927 // 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
10928 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0xF800, true, -32768, "min", false));
10929 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0xFBFF, true, -65504, "min", false));
10930 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0xFBFF, true, -65504, "min", false));
10931
10932 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10933 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10934 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x77FF, true, 32752, "max", false));
10935 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x7BFF, true, 65504, "max", false));
10936 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x7BFF, true, 65504, "max", false));
10937
10938 // +0
10939 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x0000, true, 0, "p0", false));
10940 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x0000, true, 0, "p0", false));
10941 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x0000, true, 0, "p0", false));
10942
10943 // -0
10944 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_16, 0x8000, true, 0, "m0", false));
10945 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_32, 0x8000, true, 0, "m0", false));
10946 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_16, DATA_TYPE_SIGNED_64, 0x8000, true, 0, "m0", false));
10947
10948 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc49a4000, true, -1234));
10949 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_32, 0xc49a4000, true, -1234));
10950 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_64, 0xc49a4000, true, -1234));
10951 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_16, 0xc093480000000000, true, -1234));
10952 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_32, 0xc093480000000000, true, -1234));
10953 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_64, DATA_TYPE_SIGNED_64, 0xc093480000000000, true, -1234));
10954 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0x453b9000, true, 3001, "p3001"));
10955 testCases.push_back(ConvertCase(instruction, DATA_TYPE_FLOAT_32, DATA_TYPE_SIGNED_16, 0xc53b9000, true, -3001, "m3001"));
10956 }
10957 else if (instruction == "OpConvertSToF")
10958 {
10959 // Normal numbers from int8 range
10960 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -12, true, 0xCA00, "m21", false));
10961 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -21, true, 0xC1A80000, "m21"));
10962 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -99, true, 0xC058C00000000000ull, "m99"));
10963
10964 // Minimum int8 value
10965 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, -128, true, 0xD800, "min", false));
10966 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, -128, true, 0xC3000000, "min"));
10967 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, -128, true, 0xC060000000000000ull, "min"));
10968
10969 // Maximum int8 value
10970 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_16, 127, true, 0x57F0, "max", false));
10971 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_32, 127, true, 0x42FE0000, "max"));
10972 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_8, DATA_TYPE_FLOAT_64, 127, true, 0x405FC00000000000ull, "max"));
10973
10974 // All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10975 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10976 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10977 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -1234, true, 0xE4D2, "m1234", false));
10978
10979 // 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
10980 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
10981 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 32768, true, 0x7800, "p32768", false));
10982
10983 // 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10984 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
10985 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "m32768", false));
10986
10987 // 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
10988 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, -32768, true, 0xF800, "min", false));
10989 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
10990 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, -65504, true, 0xFBFF, "min", false));
10991
10992 // 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10993 // 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10994 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_16, 32752, true, 0x77FF, "max", false));
10995 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10996 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_16, 65504, true, 0x7BFF, "max", false));
10997
10998 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 4294967296ll, true, 0x4f800000, "p4294967296", false));
10999 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, 4294967296ll, true, 0x41f0000000000000, "p4294967296", false));
11000 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -4294967296ll, true, 0xcf800000, "m4294967296", false));
11001 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -4294967296ll, true, 0xc1f0000000000000, "m4294967296", false));
11002
11003 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, 0x7fffff8000000000, true, 0x5effffff, "max", false));
11004 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -0x7fffff8000000000, true, 0xdeffffff, "min", false));
11005
11006 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11007 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_16, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11008 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11009 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_32, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11010 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_32, -1234, true, 0xc49a4000));
11011 testCases.push_back(ConvertCase(instruction, DATA_TYPE_SIGNED_64, DATA_TYPE_FLOAT_64, -1234, true, 0xc093480000000000));
11012 }
11013 else
11014 DE_FATAL("Unknown instruction");
11015 }
11016
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11017 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11018 {
11019 map<string, string> params = convertCase.m_asmTypes;
11020 map<string, string> fragments;
11021
11022 params["instruction"] = instruction;
11023 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11024
11025 const StringTemplate decoration (
11026 " OpDecorate %SSBOi DescriptorSet 0\n"
11027 " OpDecorate %SSBOo DescriptorSet 0\n"
11028 " OpDecorate %SSBOi Binding 0\n"
11029 " OpDecorate %SSBOo Binding 1\n"
11030 " OpDecorate %s_SSBOi Block\n"
11031 " OpDecorate %s_SSBOo Block\n"
11032 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11033 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11034
11035 const StringTemplate pre_main (
11036 "${datatype_additional_decl:opt}"
11037 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11038 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11039 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11040 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11041 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11042 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11043 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11044 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11045
11046 const StringTemplate testfun (
11047 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11048 "%param = OpFunctionParameter %v4f32\n"
11049 "%label = OpLabel\n"
11050 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11051 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11052 "%valIn = OpLoad %${inStorageType} %iLoc\n"
11053 "%valInCast = ${inCast} %${inputType} %valIn\n"
11054 "%conv = ${instruction} %${outputType} %valInCast\n"
11055 "%valOutCast = ${outCast} %${outStorageType} %conv\n"
11056 " OpStore %oLoc %valOutCast\n"
11057 " OpReturnValue %param\n"
11058 " OpFunctionEnd\n");
11059
11060 params["datatype_extensions"] =
11061 params["datatype_extensions"] +
11062 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11063
11064 fragments["capability"] = params["datatype_capabilities"];
11065 fragments["extension"] = params["datatype_extensions"];
11066 fragments["decoration"] = decoration.specialize(params);
11067 fragments["pre_main"] = pre_main.specialize(params);
11068 fragments["testfun"] = testfun.specialize(params);
11069
11070 return fragments;
11071 }
11072
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11073 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11074 {
11075 map<string, string> params = convertCase.m_asmTypes;
11076 map<string, string> fragments;
11077
11078 params["instruction"] = instruction;
11079 params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11080
11081 const StringTemplate decoration(
11082 " OpDecorate %SSBOi DescriptorSet 0\n"
11083 " OpDecorate %SSBOo DescriptorSet 0\n"
11084 " OpDecorate %SSBOi Binding 0\n"
11085 " OpDecorate %SSBOo Binding 1\n"
11086 " OpDecorate %s_SSBOi Block\n"
11087 " OpDecorate %s_SSBOo Block\n"
11088 "OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11089 "OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11090
11091 const StringTemplate pre_main(
11092 "${datatype_additional_decl:opt}"
11093 " %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11094 " %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11095 " %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11096 " %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11097 " %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11098 " %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11099 " %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11100 " %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11101
11102 const StringTemplate testfun(
11103 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11104 "%param = OpFunctionParameter %v4f32\n"
11105 "%label = OpLabel\n"
11106 "%iLoc = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11107 "%oLoc = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11108 "%inval = OpLoad %${inStorageType} %iLoc\n"
11109 "%in_cast = ${inCast} %${inputType} %inval\n"
11110 "%conv = ${instruction} %${outputType} %in_cast\n"
11111 "%out_cast = ${outCast} %${outStorageType} %conv\n"
11112 " OpStore %oLoc %out_cast\n"
11113 " OpReturnValue %param\n"
11114 " OpFunctionEnd\n");
11115
11116 params["datatype_extensions"] =
11117 params["datatype_extensions"] +
11118 "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11119
11120 fragments["capability"] = params["datatype_capabilities"];
11121 fragments["extension"] = params["datatype_extensions"];
11122 fragments["decoration"] = decoration.specialize(params);
11123 fragments["pre_main"] = pre_main.specialize(params);
11124 fragments["testfun"] = testfun.specialize(params);
11125 return fragments;
11126 }
11127
11128 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11129 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11130 {
11131 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11132 vector<ConvertCase> testCases;
11133 createConvertCases(testCases, instruction);
11134
11135 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11136 {
11137 ComputeShaderSpec spec;
11138 spec.assembly = getConvertCaseShaderStr(instruction, *test, true);
11139 spec.numWorkGroups = IVec3(1, 1, 1);
11140 spec.inputs.push_back (test->m_inputBuffer);
11141 spec.outputs.push_back (test->m_outputBuffer);
11142
11143 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11144
11145 group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
11146 }
11147 return group.release();
11148 }
11149
11150 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11151 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11152 {
11153 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11154 vector<ConvertCase> testCases;
11155 createConvertCases(testCases, instruction);
11156
11157 for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11158 {
11159 map<string, string> fragments = (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11160 VulkanFeatures vulkanFeatures;
11161 GraphicsResources resources;
11162 vector<string> extensions;
11163 SpecConstants noSpecConstants;
11164 PushConstants noPushConstants;
11165 GraphicsInterfaces noInterfaces;
11166 tcu::RGBA defaultColors[4];
11167
11168 getDefaultColors (defaultColors);
11169 resources.inputs.push_back (Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11170 resources.outputs.push_back (Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11171 extensions.push_back ("VK_KHR_storage_buffer_storage_class");
11172
11173 getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11174
11175 vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics = true;
11176 vulkanFeatures.coreFeatures.fragmentStoresAndAtomics = true;
11177
11178 createTestsForAllStages(
11179 test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11180 noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11181 }
11182 return group.release();
11183 }
11184
11185 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11186 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11187 {
11188 de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests (new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11189 RGBA inputColors[4];
11190 RGBA outputColors[4];
11191 vector<string> extensions;
11192 GraphicsResources resources;
11193 VulkanFeatures features;
11194
11195 const char functionStart[] =
11196 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11197 "%param1 = OpFunctionParameter %v4f32\n"
11198 "%lbl = OpLabel\n";
11199
11200 const char functionEnd[] =
11201 "%transformed_param_32 = OpFConvert %v4f32 %transformed_param\n"
11202 " OpReturnValue %transformed_param_32\n"
11203 " OpFunctionEnd\n";
11204
11205 struct NameConstantsCode
11206 {
11207 string name;
11208 string constants;
11209 string code;
11210 };
11211
11212 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11213 "%f16 = OpTypeFloat 16\n" \
11214 "%c_f16_0 = OpConstant %f16 0.0\n" \
11215 "%c_f16_0_5 = OpConstant %f16 0.5\n" \
11216 "%c_f16_1 = OpConstant %f16 1.0\n" \
11217 "%v4f16 = OpTypeVector %f16 4\n" \
11218 "%fp_f16 = OpTypePointer Function %f16\n" \
11219 "%fp_v4f16 = OpTypePointer Function %v4f16\n" \
11220 "%c_v4f16_1_1_1_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11221 "%a4f16 = OpTypeArray %f16 %c_u32_4\n" \
11222
11223 NameConstantsCode tests[] =
11224 {
11225 {
11226 "vec4",
11227
11228 FLOAT_16_COMMON_TYPES_AND_CONSTS
11229 "%cval = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11230 "%param1_16 = OpFConvert %v4f16 %param1\n"
11231 "%transformed_param = OpFAdd %v4f16 %param1_16 %cval\n"
11232 },
11233 {
11234 "struct",
11235
11236 FLOAT_16_COMMON_TYPES_AND_CONSTS
11237 "%stype = OpTypeStruct %v4f16 %f16\n"
11238 "%fp_stype = OpTypePointer Function %stype\n"
11239 "%f16_n_1 = OpConstant %f16 -1.0\n"
11240 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11241 "%cvec = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11242 "%cval = OpConstantComposite %stype %cvec %f16_n_1\n",
11243
11244 "%v = OpVariable %fp_stype Function %cval\n"
11245 "%vec_ptr = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11246 "%f16_ptr = OpAccessChain %fp_f16 %v %c_u32_1\n"
11247 "%vec_val = OpLoad %v4f16 %vec_ptr\n"
11248 "%f16_val = OpLoad %f16 %f16_ptr\n"
11249 "%tmp1 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11250 "%param1_16 = OpFConvert %v4f16 %param1\n"
11251 "%tmp2 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11252 "%transformed_param = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11253 },
11254 {
11255 // [1|0|0|0.5] [x] = x + 0.5
11256 // [0|1|0|0.5] [y] = y + 0.5
11257 // [0|0|1|0.5] [z] = z + 0.5
11258 // [0|0|0|1 ] [1] = 1
11259 "matrix",
11260
11261 FLOAT_16_COMMON_TYPES_AND_CONSTS
11262 "%mat4x4_f16 = OpTypeMatrix %v4f16 4\n"
11263 "%v4f16_1_0_0_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11264 "%v4f16_0_1_0_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11265 "%v4f16_0_0_1_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11266 "%v4f16_0_5_0_5_0_5_1 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11267 "%cval = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11268
11269 "%param1_16 = OpFConvert %v4f16 %param1\n"
11270 "%transformed_param = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11271 },
11272 {
11273 "array",
11274
11275 FLOAT_16_COMMON_TYPES_AND_CONSTS
11276 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11277 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11278 "%f16_n_1 = OpConstant %f16 -1.0\n"
11279 "%f16_1_5 = OpConstant %f16 !0x3e00\n" // +1.5
11280 "%carr = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11281
11282 "%v = OpVariable %fp_a4f16 Function %carr\n"
11283 "%f = OpAccessChain %fp_f16 %v %c_u32_0\n"
11284 "%f1 = OpAccessChain %fp_f16 %v %c_u32_1\n"
11285 "%f2 = OpAccessChain %fp_f16 %v %c_u32_2\n"
11286 "%f3 = OpAccessChain %fp_f16 %v %c_u32_3\n"
11287 "%f_val = OpLoad %f16 %f\n"
11288 "%f1_val = OpLoad %f16 %f1\n"
11289 "%f2_val = OpLoad %f16 %f2\n"
11290 "%f3_val = OpLoad %f16 %f3\n"
11291 "%ftot1 = OpFAdd %f16 %f_val %f1_val\n"
11292 "%ftot2 = OpFAdd %f16 %ftot1 %f2_val\n"
11293 "%ftot3 = OpFAdd %f16 %ftot2 %f3_val\n" // 0 - 1 + 1.5 + 0
11294 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11295 "%param1_16 = OpFConvert %v4f16 %param1\n"
11296 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11297 },
11298 {
11299 //
11300 // [
11301 // {
11302 // 0.0,
11303 // [ 1.0, 1.0, 1.0, 1.0]
11304 // },
11305 // {
11306 // 1.0,
11307 // [ 0.0, 0.5, 0.0, 0.0]
11308 // }, // ^^^
11309 // {
11310 // 0.0,
11311 // [ 1.0, 1.0, 1.0, 1.0]
11312 // }
11313 // ]
11314 "array_of_struct_of_array",
11315
11316 FLOAT_16_COMMON_TYPES_AND_CONSTS
11317 "%c_v4f16_1_1_1_0 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11318 "%fp_a4f16 = OpTypePointer Function %a4f16\n"
11319 "%stype = OpTypeStruct %f16 %a4f16\n"
11320 "%a3stype = OpTypeArray %stype %c_u32_3\n"
11321 "%fp_a3stype = OpTypePointer Function %a3stype\n"
11322 "%ca4f16_0 = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11323 "%ca4f16_1 = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11324 "%cstype1 = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11325 "%cstype2 = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11326 "%carr = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11327
11328 "%v = OpVariable %fp_a3stype Function %carr\n"
11329 "%f = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11330 "%f_l = OpLoad %f16 %f\n"
11331 "%add_vec = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11332 "%param1_16 = OpFConvert %v4f16 %param1\n"
11333 "%transformed_param = OpFAdd %v4f16 %param1_16 %add_vec\n"
11334 }
11335 };
11336
11337 getHalfColorsFullAlpha(inputColors);
11338 outputColors[0] = RGBA(255, 255, 255, 255);
11339 outputColors[1] = RGBA(255, 127, 127, 255);
11340 outputColors[2] = RGBA(127, 255, 127, 255);
11341 outputColors[3] = RGBA(127, 127, 255, 255);
11342
11343 extensions.push_back("VK_KHR_shader_float16_int8");
11344 features.extFloat16Int8.shaderFloat16 = true;
11345
11346 for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11347 {
11348 map<string, string> fragments;
11349
11350 fragments["capability"] = "OpCapability Float16\n";
11351 fragments["pre_main"] = tests[testNdx].constants;
11352 fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
11353
11354 createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11355 }
11356 return opConstantCompositeTests.release();
11357 }
11358
11359 template<typename T>
11360 void finalizeTestsCreation (T& specResource,
11361 const map<string, string>& fragments,
11362 tcu::TestContext& testCtx,
11363 tcu::TestCaseGroup& testGroup,
11364 const std::string& testName,
11365 const VulkanFeatures& vulkanFeatures,
11366 const vector<string>& extensions,
11367 const IVec3& numWorkGroups,
11368 const bool splitRenderArea = false);
11369
11370 template<>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11371 void finalizeTestsCreation (GraphicsResources& specResource,
11372 const map<string, string>& fragments,
11373 tcu::TestContext& ,
11374 tcu::TestCaseGroup& testGroup,
11375 const std::string& testName,
11376 const VulkanFeatures& vulkanFeatures,
11377 const vector<string>& extensions,
11378 const IVec3& ,
11379 const bool splitRenderArea)
11380 {
11381 RGBA defaultColors[4];
11382 getDefaultColors(defaultColors);
11383
11384 createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11385 }
11386
11387 template<>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11388 void finalizeTestsCreation (ComputeShaderSpec& specResource,
11389 const map<string, string>& fragments,
11390 tcu::TestContext& testCtx,
11391 tcu::TestCaseGroup& testGroup,
11392 const std::string& testName,
11393 const VulkanFeatures& vulkanFeatures,
11394 const vector<string>& extensions,
11395 const IVec3& numWorkGroups,
11396 bool)
11397 {
11398 specResource.numWorkGroups = numWorkGroups;
11399 specResource.requestedVulkanFeatures = vulkanFeatures;
11400 specResource.extensions = extensions;
11401
11402 specResource.assembly = makeComputeShaderAssembly(fragments);
11403
11404 testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
11405 }
11406
11407 template<class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11408 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11409 {
11410 const string nan = nanSupported ? "_nan" : "";
11411 const string groupName = "logical" + nan;
11412 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11413
11414 de::Random rnd (deStringHash(testGroup->getName()));
11415 const string spvCapabilities = string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11416 const string spvExtensions = (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11417 const string spvExecutionMode = nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11418 const deUint32 numDataPointsScalar = 16;
11419 const deUint32 numDataPointsVector = 14;
11420 const vector<deFloat16> float16DataScalar = getFloat16s(rnd, numDataPointsScalar);
11421 const vector<deFloat16> float16DataVector = getFloat16s(rnd, numDataPointsVector);
11422 const vector<deFloat16> float16Data1 = squarize(float16DataScalar, 0); // Total Size: square(sizeof(float16DataScalar))
11423 const vector<deFloat16> float16Data2 = squarize(float16DataScalar, 1);
11424 const vector<deFloat16> float16DataVec1 = squarizeVector(float16DataVector, 0); // Total Size: 2 * (square(square(sizeof(float16DataVector))))
11425 const vector<deFloat16> float16DataVec2 = squarizeVector(float16DataVector, 1);
11426 const vector<deFloat16> float16OutDummy (float16Data1.size(), 0);
11427 const vector<deFloat16> float16OutVecDummy (float16DataVec1.size(), 0);
11428
11429 struct TestOp
11430 {
11431 const char* opCode;
11432 VerifyIOFunc verifyFuncNan;
11433 VerifyIOFunc verifyFuncNonNan;
11434 const deUint32 argCount;
11435 };
11436
11437 const TestOp testOps[] =
11438 {
11439 { "OpIsNan" , compareFP16Logical<fp16isNan, true, false, true>, compareFP16Logical<fp16isNan, true, false, false>, 1 },
11440 { "OpIsInf" , compareFP16Logical<fp16isInf, true, false, true>, compareFP16Logical<fp16isInf, true, false, false>, 1 },
11441 { "OpFOrdEqual" , compareFP16Logical<fp16isEqual, false, true, true>, compareFP16Logical<fp16isEqual, false, true, false>, 2 },
11442 { "OpFUnordEqual" , compareFP16Logical<fp16isEqual, false, false, true>, compareFP16Logical<fp16isEqual, false, false, false>, 2 },
11443 { "OpFOrdNotEqual" , compareFP16Logical<fp16isUnequal, false, true, true>, compareFP16Logical<fp16isUnequal, false, true, false>, 2 },
11444 { "OpFUnordNotEqual" , compareFP16Logical<fp16isUnequal, false, false, true>, compareFP16Logical<fp16isUnequal, false, false, false>, 2 },
11445 { "OpFOrdLessThan" , compareFP16Logical<fp16isLess, false, true, true>, compareFP16Logical<fp16isLess, false, true, false>, 2 },
11446 { "OpFUnordLessThan" , compareFP16Logical<fp16isLess, false, false, true>, compareFP16Logical<fp16isLess, false, false, false>, 2 },
11447 { "OpFOrdGreaterThan" , compareFP16Logical<fp16isGreater, false, true, true>, compareFP16Logical<fp16isGreater, false, true, false>, 2 },
11448 { "OpFUnordGreaterThan" , compareFP16Logical<fp16isGreater, false, false, true>, compareFP16Logical<fp16isGreater, false, false, false>, 2 },
11449 { "OpFOrdLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, true, true>, compareFP16Logical<fp16isLessOrEqual, false, true, false>, 2 },
11450 { "OpFUnordLessThanEqual" , compareFP16Logical<fp16isLessOrEqual, false, false, true>, compareFP16Logical<fp16isLessOrEqual, false, false, false>, 2 },
11451 { "OpFOrdGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, true, true>, compareFP16Logical<fp16isGreaterOrEqual, false, true, false>, 2 },
11452 { "OpFUnordGreaterThanEqual" , compareFP16Logical<fp16isGreaterOrEqual, false, false, true>, compareFP16Logical<fp16isGreaterOrEqual, false, false, false>, 2 },
11453 };
11454
11455 { // scalar cases
11456 const StringTemplate preMain
11457 (
11458 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11459 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11460 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11461 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11462 " %f16 = OpTypeFloat 16\n"
11463 " %v2f16 = OpTypeVector %f16 2\n"
11464 " %c_f16_0 = OpConstant %f16 0.0\n"
11465 " %c_f16_1 = OpConstant %f16 1.0\n"
11466 " %up_u32 = OpTypePointer Uniform %u32\n"
11467 " %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11468 " %SSBO16 = OpTypeStruct %ra_u32\n"
11469 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11470 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11471 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11472 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11473 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11474 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11475 );
11476
11477 const StringTemplate decoration
11478 (
11479 "OpDecorate %ra_u32 ArrayStride 4\n"
11480 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11481 "OpDecorate %SSBO16 BufferBlock\n"
11482 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11483 "OpDecorate %ssbo_src0 Binding 0\n"
11484 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11485 "OpDecorate %ssbo_src1 Binding 1\n"
11486 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11487 "OpDecorate %ssbo_dst Binding 2\n"
11488 );
11489
11490 const StringTemplate testFun
11491 (
11492 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11493 " %param = OpFunctionParameter %v4f32\n"
11494
11495 " %entry = OpLabel\n"
11496 " %i = OpVariable %fp_i32 Function\n"
11497 " OpStore %i %c_i32_0\n"
11498 " OpBranch %loop\n"
11499
11500 " %loop = OpLabel\n"
11501 " %i_cmp = OpLoad %i32 %i\n"
11502 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11503 " OpLoopMerge %merge %next None\n"
11504 " OpBranchConditional %lt %write %merge\n"
11505
11506 " %write = OpLabel\n"
11507 " %ndx = OpLoad %i32 %i\n"
11508
11509 " %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11510
11511 "${op_arg1_calc}"
11512
11513 " %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11514 " %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11515 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11516 " OpBranch %next\n"
11517
11518 " %next = OpLabel\n"
11519 " %i_cur = OpLoad %i32 %i\n"
11520 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11521 " OpStore %i %i_new\n"
11522 " OpBranch %loop\n"
11523
11524 " %merge = OpLabel\n"
11525 " OpReturnValue %param\n"
11526
11527 " OpFunctionEnd\n"
11528 );
11529
11530 const StringTemplate arg1Calc
11531 (
11532 " %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11533 );
11534
11535 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11536 {
11537 const size_t iterations = float16Data1.size();
11538 const TestOp& testOp = testOps[testOpsIdx];
11539 const string testName = de::toLower(string(testOp.opCode)) + "_scalar";
11540 SpecResource specResource;
11541 map<string, string> specs;
11542 VulkanFeatures features;
11543 map<string, string> fragments;
11544 vector<string> extensions;
11545
11546 specs["num_data_points"] = de::toString(iterations);
11547 specs["op_code"] = testOp.opCode;
11548 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11549 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11550
11551 fragments["extension"] = spvExtensions;
11552 fragments["capability"] = spvCapabilities;
11553 fragments["execution_mode"] = spvExecutionMode;
11554 fragments["decoration"] = decoration.specialize(specs);
11555 fragments["pre_main"] = preMain.specialize(specs);
11556 fragments["testfun"] = testFun.specialize(specs);
11557 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11558 if (testOp.argCount > 1)
11559 {
11560 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11561 }
11562 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11563
11564 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11565 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11566 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11567 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11568
11569 extensions.push_back("VK_KHR_shader_float16_int8");
11570
11571 if (nanSupported)
11572 {
11573 extensions.push_back("VK_KHR_shader_float_controls");
11574
11575 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11576 }
11577
11578 features.extFloat16Int8.shaderFloat16 = true;
11579
11580 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11581 }
11582 }
11583 { // vector cases
11584 const StringTemplate preMain
11585 (
11586 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11587 " %v2bool = OpTypeVector %bool 2\n"
11588 " %f16 = OpTypeFloat 16\n"
11589 " %c_f16_0 = OpConstant %f16 0.0\n"
11590 " %c_f16_1 = OpConstant %f16 1.0\n"
11591 " %v2f16 = OpTypeVector %f16 2\n"
11592 " %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11593 " %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11594 " %up_u32 = OpTypePointer Uniform %u32\n"
11595 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11596 " %SSBO16 = OpTypeStruct %ra_u32\n"
11597 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11598 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11599 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11600 " %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11601 " %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11602 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11603 );
11604
11605 const StringTemplate decoration
11606 (
11607 "OpDecorate %ra_u32 ArrayStride 4\n"
11608 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11609 "OpDecorate %SSBO16 BufferBlock\n"
11610 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
11611 "OpDecorate %ssbo_src0 Binding 0\n"
11612 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
11613 "OpDecorate %ssbo_src1 Binding 1\n"
11614 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11615 "OpDecorate %ssbo_dst Binding 2\n"
11616 );
11617
11618 const StringTemplate testFun
11619 (
11620 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11621 " %param = OpFunctionParameter %v4f32\n"
11622
11623 " %entry = OpLabel\n"
11624 " %i = OpVariable %fp_i32 Function\n"
11625 " OpStore %i %c_i32_0\n"
11626 " OpBranch %loop\n"
11627
11628 " %loop = OpLabel\n"
11629 " %i_cmp = OpLoad %i32 %i\n"
11630 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11631 " OpLoopMerge %merge %next None\n"
11632 " OpBranchConditional %lt %write %merge\n"
11633
11634 " %write = OpLabel\n"
11635 " %ndx = OpLoad %i32 %i\n"
11636
11637 " %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11638
11639 "${op_arg1_calc}"
11640
11641 " %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11642 " %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11643 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11644 " OpBranch %next\n"
11645
11646 " %next = OpLabel\n"
11647 " %i_cur = OpLoad %i32 %i\n"
11648 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11649 " OpStore %i %i_new\n"
11650 " OpBranch %loop\n"
11651
11652 " %merge = OpLabel\n"
11653 " OpReturnValue %param\n"
11654
11655 " OpFunctionEnd\n"
11656 );
11657
11658 const StringTemplate arg1Calc
11659 (
11660 " %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11661 );
11662
11663 for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11664 {
11665 const deUint32 itemsPerVec = 2;
11666 const size_t iterations = float16DataVec1.size() / itemsPerVec;
11667 const TestOp& testOp = testOps[testOpsIdx];
11668 const string testName = de::toLower(string(testOp.opCode)) + "_vector";
11669 SpecResource specResource;
11670 map<string, string> specs;
11671 vector<string> extensions;
11672 VulkanFeatures features;
11673 map<string, string> fragments;
11674
11675 specs["num_data_points"] = de::toString(iterations);
11676 specs["op_code"] = testOp.opCode;
11677 specs["op_arg1"] = (testOp.argCount == 1) ? "" : "%val_src1";
11678 specs["op_arg1_calc"] = (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11679
11680 fragments["extension"] = spvExtensions;
11681 fragments["capability"] = spvCapabilities;
11682 fragments["execution_mode"] = spvExecutionMode;
11683 fragments["decoration"] = decoration.specialize(specs);
11684 fragments["pre_main"] = preMain.specialize(specs);
11685 fragments["testfun"] = testFun.specialize(specs);
11686 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11687 if (testOp.argCount > 1)
11688 {
11689 fragments["testfun"] += StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11690 }
11691 fragments["testfun"] += StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11692
11693 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11694 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11695 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11696 specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11697
11698 extensions.push_back("VK_KHR_shader_float16_int8");
11699
11700 if (nanSupported)
11701 {
11702 extensions.push_back("VK_KHR_shader_float_controls");
11703
11704 features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11705 }
11706
11707 features.extFloat16Int8.shaderFloat16 = true;
11708
11709 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11710 }
11711 }
11712
11713 return testGroup.release();
11714 }
11715
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11716 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11717 {
11718 if (inputs.size() != 1 || outputAllocs.size() != 1)
11719 return false;
11720
11721 vector<deUint8> input1Bytes;
11722
11723 inputs[0].getBytes(input1Bytes);
11724
11725 const deUint16* const input1AsFP16 = (const deUint16*)&input1Bytes[0];
11726 const deUint16* const outputAsFP16 = (const deUint16*)outputAllocs[0]->getHostPtr();
11727 std::string error;
11728
11729 for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11730 {
11731 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11732 {
11733 log << TestLog::Message << error << TestLog::EndMessage;
11734
11735 return false;
11736 }
11737 }
11738
11739 return true;
11740 }
11741
11742 template<class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)11743 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11744 {
11745 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11746
11747 de::Random rnd (deStringHash(testGroup->getName()));
11748 const StringTemplate capabilities ("OpCapability Float16\n");
11749 const deUint32 numDataPoints = 256;
11750 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
11751 const vector<deFloat16> float16OutputDummy (float16InputData.size(), 0);
11752 map<string, string> fragments;
11753
11754 struct TestType
11755 {
11756 const deUint32 typeComponents;
11757 const char* typeName;
11758 const char* typeDecls;
11759 const char* typeStorage;
11760 const string loadFunc;
11761 const string storeFunc;
11762 };
11763
11764 const TestType testTypes[] =
11765 {
11766 {
11767 1,
11768 "f16",
11769 " %v2f16 = OpTypeVector %f16 2\n"
11770 "%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11771 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11772 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11773 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11774 "u32_hndp",
11775 loadScalarF16FromUint,
11776 storeScalarF16AsUint
11777 },
11778 {
11779 2,
11780 "v2f16",
11781 " %v2f16 = OpTypeVector %f16 2\n"
11782 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11783 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11784 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11785 "u32_ndp",
11786 loadV2F16FromUint,
11787 storeV2F16AsUint
11788 },
11789 {
11790 4,
11791 "v4f16",
11792 " %v2f16 = OpTypeVector %f16 2\n"
11793 " %v4f16 = OpTypeVector %f16 4\n"
11794 " %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11795 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11796 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11797 "ra_u32_2",
11798 loadV4F16FromUints,
11799 storeV4F16AsUints
11800 },
11801 };
11802
11803 const StringTemplate preMain
11804 (
11805 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11806 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11807 " %v2bool = OpTypeVector %bool 2\n"
11808 " %f16 = OpTypeFloat 16\n"
11809 " %c_f16_0 = OpConstant %f16 0.0\n"
11810
11811 "${type_decls}"
11812
11813 " %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11814 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11815 "%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11816 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11817 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11818 " %up_u32 = OpTypePointer Uniform %u32\n"
11819 " %SSBO16 = OpTypeStruct %ra_${ts}\n"
11820 " %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11821 " %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11822 " %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11823 );
11824
11825 const StringTemplate decoration
11826 (
11827 "OpDecorate %ra_u32_2 ArrayStride 4\n"
11828 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
11829 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
11830 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11831 "OpMemberDecorate %SSBO16 0 Offset 0\n"
11832 "OpDecorate %SSBO16 BufferBlock\n"
11833 "OpDecorate %ssbo_src DescriptorSet 0\n"
11834 "OpDecorate %ssbo_src Binding 0\n"
11835 "OpDecorate %ssbo_dst DescriptorSet 0\n"
11836 "OpDecorate %ssbo_dst Binding 1\n"
11837 );
11838
11839 const StringTemplate testFun
11840 (
11841 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11842 " %param = OpFunctionParameter %v4f32\n"
11843 " %entry = OpLabel\n"
11844
11845 " %i = OpVariable %fp_i32 Function\n"
11846 " OpStore %i %c_i32_0\n"
11847 " OpBranch %loop\n"
11848
11849 " %loop = OpLabel\n"
11850 " %i_cmp = OpLoad %i32 %i\n"
11851 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11852 " OpLoopMerge %merge %next None\n"
11853 " OpBranchConditional %lt %write %merge\n"
11854
11855 " %write = OpLabel\n"
11856 " %ndx = OpLoad %i32 %i\n"
11857
11858 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11859 " %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11860 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11861 " OpBranch %next\n"
11862
11863 " %next = OpLabel\n"
11864 " %i_cur = OpLoad %i32 %i\n"
11865 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11866 " OpStore %i %i_new\n"
11867 " OpBranch %loop\n"
11868
11869 " %merge = OpLabel\n"
11870 " OpReturnValue %param\n"
11871
11872 " OpFunctionEnd\n"
11873
11874 " %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11875 " %param0 = OpFunctionParameter %${tt}\n"
11876 " %entry_pf = OpLabel\n"
11877 " %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11878 " OpReturnValue %res0\n"
11879 " OpFunctionEnd\n"
11880 );
11881
11882 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11883 {
11884 const TestType& testType = testTypes[testTypeIdx];
11885 const string testName = testType.typeName;
11886 const deUint32 itemsPerType = testType.typeComponents;
11887 const size_t iterations = float16InputData.size() / itemsPerType;
11888 const size_t typeStride = itemsPerType * sizeof(deFloat16);
11889 SpecResource specResource;
11890 map<string, string> specs;
11891 VulkanFeatures features;
11892 vector<string> extensions;
11893
11894 specs["num_data_points"] = de::toString(iterations);
11895 specs["tt"] = testType.typeName;
11896 specs["ts"] = testType.typeStorage;
11897 specs["tt_stride"] = de::toString(typeStride);
11898 specs["type_decls"] = testType.typeDecls;
11899
11900 fragments["capability"] = capabilities.specialize(specs);
11901 fragments["decoration"] = decoration.specialize(specs);
11902 fragments["pre_main"] = preMain.specialize(specs);
11903 fragments["testfun"] = testFun.specialize(specs);
11904 fragments["testfun"] += StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11905 fragments["testfun"] += StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11906
11907 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11908 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11909 specResource.verifyIO = compareFP16FunctionSetFunc;
11910
11911 extensions.push_back("VK_KHR_shader_float16_int8");
11912
11913 features.extFloat16Int8.shaderFloat16 = true;
11914
11915 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11916 }
11917
11918 return testGroup.release();
11919 }
11920
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11921 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11922 {
11923 if (inputs.size() != 2 || outputAllocs.size() != 1)
11924 return false;
11925
11926 vector<deUint8> input1Bytes;
11927 vector<deUint8> input2Bytes;
11928
11929 inputs[0].getBytes(input1Bytes);
11930 inputs[1].getBytes(input2Bytes);
11931
11932 DE_ASSERT(input1Bytes.size() > 0);
11933 DE_ASSERT(input2Bytes.size() > 0);
11934 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11935
11936 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
11937 const size_t components = input1Bytes.size() / (sizeof(deFloat16) * iterations);
11938 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
11939 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
11940 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
11941 std::string error;
11942
11943 DE_ASSERT(components == 2 || components == 4);
11944 DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11945
11946 for (size_t idx = 0; idx < iterations; ++idx)
11947 {
11948 const deUint32 componentNdx = inputIndices[idx];
11949
11950 DE_ASSERT(componentNdx < components);
11951
11952 const deFloat16 expected = input1AsFP16[components * idx + componentNdx];
11953
11954 if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11955 {
11956 log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11957
11958 return false;
11959 }
11960 }
11961
11962 return true;
11963 }
11964
11965 template<class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)11966 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11967 {
11968 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11969
11970 de::Random rnd (deStringHash(testGroup->getName()));
11971 const deUint32 numDataPoints = 256;
11972 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
11973 const vector<deFloat16> float16OutputDummy (float16InputData.size(), 0);
11974
11975 struct TestType
11976 {
11977 const deUint32 typeComponents;
11978 const size_t typeStride;
11979 const char* typeName;
11980 const char* typeDecls;
11981 const char* typeStorage;
11982 const string loadFunction;
11983 const string storeFunction;
11984 };
11985
11986 const TestType testTypes[] =
11987 {
11988 {
11989 2,
11990 2 * sizeof(deFloat16),
11991 "v2f16",
11992 " %v2f16 = OpTypeVector %f16 2\n"
11993 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11994 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11995 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11996 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11997 "u32",
11998 loadV2F16FromUint,
11999 storeScalarF16AsUint
12000 },
12001 {
12002 3,
12003 4 * sizeof(deFloat16),
12004 "v3f16",
12005 " %v2f16 = OpTypeVector %f16 2\n"
12006 " %v3f16 = OpTypeVector %f16 3\n"
12007 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12008 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12009 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12010 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12011 "ra_u32_2",
12012 loadV3F16FromUints,
12013 storeScalarF16AsUint
12014 },
12015 {
12016 4,
12017 4 * sizeof(deFloat16),
12018 "v4f16",
12019 " %v2f16 = OpTypeVector %f16 2\n"
12020 " %v4f16 = OpTypeVector %f16 4\n"
12021 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12022 "%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12023 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12024 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12025 "ra_u32_2",
12026 loadV4F16FromUints,
12027 storeScalarF16AsUint
12028 },
12029 };
12030
12031 const StringTemplate preMain
12032 (
12033 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12034 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12035 " %f16 = OpTypeFloat 16\n"
12036
12037 "${type_decl}"
12038
12039 " %up_u32 = OpTypePointer Uniform %u32\n"
12040 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12041 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12042 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12043
12044 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12045 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12046 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12047 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12048 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12049
12050 " %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12051 " %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12052 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12053
12054 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12055 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12056 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12057 );
12058
12059 const StringTemplate decoration
12060 (
12061 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12062 "OpDecorate %ra_u32_hndp ArrayStride 4\n"
12063 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12064 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12065 "OpDecorate %SSBO_SRC BufferBlock\n"
12066 "OpDecorate %ssbo_src DescriptorSet 0\n"
12067 "OpDecorate %ssbo_src Binding 0\n"
12068
12069 "OpDecorate %ra_u32 ArrayStride 4\n"
12070 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12071 "OpDecorate %SSBO_IDX BufferBlock\n"
12072 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12073 "OpDecorate %ssbo_idx Binding 1\n"
12074
12075 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12076 "OpDecorate %SSBO_DST BufferBlock\n"
12077 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12078 "OpDecorate %ssbo_dst Binding 2\n"
12079 );
12080
12081 const StringTemplate testFun
12082 (
12083 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12084 " %param = OpFunctionParameter %v4f32\n"
12085 " %entry = OpLabel\n"
12086
12087 " %i = OpVariable %fp_i32 Function\n"
12088 " OpStore %i %c_i32_0\n"
12089
12090 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12091 " OpSelectionMerge %end_if None\n"
12092 " OpBranchConditional %will_run %run_test %end_if\n"
12093
12094 " %run_test = OpLabel\n"
12095 " OpBranch %loop\n"
12096
12097 " %loop = OpLabel\n"
12098 " %i_cmp = OpLoad %i32 %i\n"
12099 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12100 " OpLoopMerge %merge %next None\n"
12101 " OpBranchConditional %lt %write %merge\n"
12102
12103 " %write = OpLabel\n"
12104 " %ndx = OpLoad %i32 %i\n"
12105
12106 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12107
12108 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12109 " %val_idx = OpLoad %u32 %src_idx\n"
12110
12111 " %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12112 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12113
12114 " OpBranch %next\n"
12115
12116 " %next = OpLabel\n"
12117 " %i_cur = OpLoad %i32 %i\n"
12118 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12119 " OpStore %i %i_new\n"
12120 " OpBranch %loop\n"
12121
12122 " %merge = OpLabel\n"
12123 " OpBranch %end_if\n"
12124 " %end_if = OpLabel\n"
12125 " OpReturnValue %param\n"
12126
12127 " OpFunctionEnd\n"
12128 );
12129
12130 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12131 {
12132 const TestType& testType = testTypes[testTypeIdx];
12133 const string testName = testType.typeName;
12134 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12135 const size_t iterations = float16InputData.size() / itemsPerType;
12136 SpecResource specResource;
12137 map<string, string> specs;
12138 VulkanFeatures features;
12139 vector<deUint32> inputDataNdx;
12140 map<string, string> fragments;
12141 vector<string> extensions;
12142
12143 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12144 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12145
12146 specs["num_data_points"] = de::toString(iterations);
12147 specs["tt"] = testType.typeName;
12148 specs["ts"] = testType.typeStorage;
12149 specs["tt_stride"] = de::toString(testType.typeStride);
12150 specs["type_decl"] = testType.typeDecls;
12151
12152 fragments["capability"] = "OpCapability Float16\n";
12153 fragments["decoration"] = decoration.specialize(specs);
12154 fragments["pre_main"] = preMain.specialize(specs);
12155 fragments["testfun"] = testFun.specialize(specs);
12156 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12157 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12158
12159 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12160 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12161 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12162 specResource.verifyIO = compareFP16VectorExtractFunc;
12163
12164 extensions.push_back("VK_KHR_shader_float16_int8");
12165
12166 features.extFloat16Int8.shaderFloat16 = true;
12167
12168 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12169 }
12170
12171 return testGroup.release();
12172 }
12173
12174 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12175 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12176 {
12177 if (inputs.size() != 2 || outputAllocs.size() != 1)
12178 return false;
12179
12180 vector<deUint8> input1Bytes;
12181 vector<deUint8> input2Bytes;
12182
12183 inputs[0].getBytes(input1Bytes);
12184 inputs[1].getBytes(input2Bytes);
12185
12186 DE_ASSERT(input1Bytes.size() > 0);
12187 DE_ASSERT(input2Bytes.size() > 0);
12188 DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12189
12190 const size_t iterations = input2Bytes.size() / sizeof(deUint32);
12191 const size_t componentsStride = input1Bytes.size() / (sizeof(deFloat16) * iterations);
12192 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12193 const deUint32* const inputIndices = (const deUint32*)&input2Bytes[0];
12194 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12195 const deFloat16 magic = tcu::Float16(float(REPLACEMENT)).bits();
12196 std::string error;
12197
12198 DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12199 DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12200
12201 for (size_t idx = 0; idx < iterations; ++idx)
12202 {
12203 const deFloat16* inputVec = &input1AsFP16[componentsStride * idx];
12204 const deFloat16* outputVec = &outputAsFP16[componentsStride * idx];
12205 const deUint32 replacedCompNdx = inputIndices[idx];
12206
12207 DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12208
12209 for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12210 {
12211 const deFloat16 expected = (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12212
12213 if (!compare16BitFloat(expected, outputVec[compNdx], error))
12214 {
12215 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12216
12217 return false;
12218 }
12219 }
12220 }
12221
12222 return true;
12223 }
12224
12225 template<class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12226 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12227 {
12228 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12229
12230 de::Random rnd (deStringHash(testGroup->getName()));
12231 const deUint32 replacement = 42;
12232 const deUint32 numDataPoints = 256;
12233 const vector<deFloat16> float16InputData = getFloat16s(rnd, numDataPoints);
12234 const vector<deFloat16> float16OutputDummy (float16InputData.size(), 0);
12235
12236 struct TestType
12237 {
12238 const deUint32 typeComponents;
12239 const size_t typeStride;
12240 const char* typeName;
12241 const char* typeDecls;
12242 VerifyIOFunc verifyIOFunc;
12243 const char* typeStorage;
12244 const string loadFunction;
12245 const string storeFunction;
12246 };
12247
12248 const TestType testTypes[] =
12249 {
12250 {
12251 2,
12252 2 * sizeof(deFloat16),
12253 "v2f16",
12254 " %v2f16 = OpTypeVector %f16 2\n"
12255 "%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12256 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12257 compareFP16VectorInsertFunc<2, replacement>,
12258 "u32",
12259 loadV2F16FromUint,
12260 storeV2F16AsUint
12261 },
12262 {
12263 3,
12264 4 * sizeof(deFloat16),
12265 "v3f16",
12266 " %v2f16 = OpTypeVector %f16 2\n"
12267 " %v3f16 = OpTypeVector %f16 3\n"
12268 "%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12269 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12270 compareFP16VectorInsertFunc<3, replacement>,
12271 "ra_u32_2",
12272 loadV3F16FromUints,
12273 storeV3F16AsUints
12274 },
12275 {
12276 4,
12277 4 * sizeof(deFloat16),
12278 "v4f16",
12279 " %v2f16 = OpTypeVector %f16 2\n"
12280 " %v4f16 = OpTypeVector %f16 4\n"
12281 "%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12282 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12283 compareFP16VectorInsertFunc<4, replacement>,
12284 "ra_u32_2",
12285 loadV4F16FromUints,
12286 storeV4F16AsUints
12287 },
12288 };
12289
12290 const StringTemplate preMain
12291 (
12292 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12293 " %f16 = OpTypeFloat 16\n"
12294 " %c_f16_ins = OpConstant %f16 ${replacement}\n"
12295
12296 "${type_decl}"
12297
12298 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12299 " %up_u32 = OpTypePointer Uniform %u32\n"
12300 " %SSBO_IDX = OpTypeStruct %ra_u32\n"
12301 "%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12302
12303 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12304 "%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12305 " %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12306 "%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12307
12308 " %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12309 "%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12310
12311 " %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12312 " %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12313 " %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12314 );
12315
12316 const StringTemplate decoration
12317 (
12318 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12319 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12320 "OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12321 "OpDecorate %SSBO_SRC BufferBlock\n"
12322 "OpDecorate %ssbo_src DescriptorSet 0\n"
12323 "OpDecorate %ssbo_src Binding 0\n"
12324
12325 "OpDecorate %ra_u32 ArrayStride 4\n"
12326 "OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12327 "OpDecorate %SSBO_IDX BufferBlock\n"
12328 "OpDecorate %ssbo_idx DescriptorSet 0\n"
12329 "OpDecorate %ssbo_idx Binding 1\n"
12330
12331 "OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12332 "OpDecorate %SSBO_DST BufferBlock\n"
12333 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12334 "OpDecorate %ssbo_dst Binding 2\n"
12335 );
12336
12337 const StringTemplate testFun
12338 (
12339 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12340 " %param = OpFunctionParameter %v4f32\n"
12341 " %entry = OpLabel\n"
12342
12343 " %i = OpVariable %fp_i32 Function\n"
12344 " OpStore %i %c_i32_0\n"
12345
12346 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12347 " OpSelectionMerge %end_if None\n"
12348 " OpBranchConditional %will_run %run_test %end_if\n"
12349
12350 " %run_test = OpLabel\n"
12351 " OpBranch %loop\n"
12352
12353 " %loop = OpLabel\n"
12354 " %i_cmp = OpLoad %i32 %i\n"
12355 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12356 " OpLoopMerge %merge %next None\n"
12357 " OpBranchConditional %lt %write %merge\n"
12358
12359 " %write = OpLabel\n"
12360 " %ndx = OpLoad %i32 %i\n"
12361
12362 " %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12363
12364 " %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12365 " %val_idx = OpLoad %u32 %src_idx\n"
12366
12367 " %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12368 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12369
12370 " OpBranch %next\n"
12371
12372 " %next = OpLabel\n"
12373 " %i_cur = OpLoad %i32 %i\n"
12374 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12375 " OpStore %i %i_new\n"
12376 " OpBranch %loop\n"
12377
12378 " %merge = OpLabel\n"
12379 " OpBranch %end_if\n"
12380 " %end_if = OpLabel\n"
12381 " OpReturnValue %param\n"
12382
12383 " OpFunctionEnd\n"
12384 );
12385
12386 for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12387 {
12388 const TestType& testType = testTypes[testTypeIdx];
12389 const string testName = testType.typeName;
12390 const size_t itemsPerType = testType.typeStride / sizeof(deFloat16);
12391 const size_t iterations = float16InputData.size() / itemsPerType;
12392 SpecResource specResource;
12393 map<string, string> specs;
12394 VulkanFeatures features;
12395 vector<deUint32> inputDataNdx;
12396 map<string, string> fragments;
12397 vector<string> extensions;
12398
12399 for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12400 inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12401
12402 specs["num_data_points"] = de::toString(iterations);
12403 specs["tt"] = testType.typeName;
12404 specs["ts"] = testType.typeStorage;
12405 specs["tt_stride"] = de::toString(testType.typeStride);
12406 specs["type_decl"] = testType.typeDecls;
12407 specs["replacement"] = de::toString(replacement);
12408
12409 fragments["capability"] = "OpCapability Float16\n";
12410 fragments["decoration"] = decoration.specialize(specs);
12411 fragments["pre_main"] = preMain.specialize(specs);
12412 fragments["testfun"] = testFun.specialize(specs);
12413 fragments["testfun"] += StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12414 fragments["testfun"] += StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12415
12416 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12417 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12418 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12419 specResource.verifyIO = testType.verifyIOFunc;
12420
12421 extensions.push_back("VK_KHR_shader_float16_int8");
12422
12423 features.extFloat16Int8.shaderFloat16 = true;
12424
12425 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12426 }
12427
12428 return testGroup.release();
12429 }
12430
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12431 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12432 {
12433 const size_t compNdxCount = (vec1Len + vec2Len + 1);
12434 const size_t compNdxLimited = iteration % (compNdxCount * compNdxCount);
12435 size_t comp;
12436
12437 switch (componentNdx)
12438 {
12439 case 0: comp = compNdxLimited / compNdxCount; break;
12440 case 1: comp = compNdxLimited % compNdxCount; break;
12441 case 2: comp = 0; break;
12442 case 3: comp = 1; break;
12443 default: TCU_THROW(InternalError, "Impossible");
12444 }
12445
12446 if (comp >= vec1Len + vec2Len)
12447 {
12448 validate = false;
12449 return 0;
12450 }
12451 else
12452 {
12453 validate = true;
12454 return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12455 }
12456 }
12457
12458 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12459 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12460 {
12461 DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12462 DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12463 DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12464
12465 if (inputs.size() != 2 || outputAllocs.size() != 1)
12466 return false;
12467
12468 vector<deUint8> input1Bytes;
12469 vector<deUint8> input2Bytes;
12470
12471 inputs[0].getBytes(input1Bytes);
12472 inputs[1].getBytes(input2Bytes);
12473
12474 DE_ASSERT(input1Bytes.size() > 0);
12475 DE_ASSERT(input2Bytes.size() > 0);
12476 DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12477
12478 const size_t componentsStrideDst = (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12479 const size_t componentsStrideSrc0 = (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12480 const size_t componentsStrideSrc1 = (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12481 const size_t iterations = input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12482 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12483 const deFloat16* const input2AsFP16 = (const deFloat16*)&input2Bytes[0];
12484 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12485 std::string error;
12486
12487 DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12488 DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12489
12490 for (size_t idx = 0; idx < iterations; ++idx)
12491 {
12492 const deFloat16* input1Vec = &input1AsFP16[componentsStrideSrc0 * idx];
12493 const deFloat16* input2Vec = &input2AsFP16[componentsStrideSrc1 * idx];
12494 const deFloat16* outputVec = &outputAsFP16[componentsStrideDst * idx];
12495
12496 for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12497 {
12498 bool validate = true;
12499 deFloat16 expected = getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12500
12501 if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12502 {
12503 log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12504
12505 return false;
12506 }
12507 }
12508 }
12509
12510 return true;
12511 }
12512
getFloat16VectorShuffleVerifyIOFunc(deUint32 dstComponentsCount,deUint32 src0ComponentsCount,deUint32 src1ComponentsCount)12513 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12514 {
12515 DE_ASSERT(dstComponentsCount <= 4);
12516 DE_ASSERT(src0ComponentsCount <= 4);
12517 DE_ASSERT(src1ComponentsCount <= 4);
12518 deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12519
12520 switch (funcCode)
12521 {
12522 case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12523 case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12524 case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12525 case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12526 case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12527 case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12528 case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12529 case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12530 case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12531 case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12532 case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12533 case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12534 case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12535 case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12536 case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12537 case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12538 case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12539 case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12540 case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12541 case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12542 case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12543 case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12544 case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12545 case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12546 case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12547 case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12548 case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12549 default: TCU_THROW(InternalError, "Invalid number of components specified.");
12550 }
12551 }
12552
12553 template<class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)12554 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12555 {
12556 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12557 const int testSpecificSeed = deStringHash(testGroup->getName());
12558 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12559 de::Random rnd (seed);
12560 const deUint32 numDataPoints = 128;
12561 map<string, string> fragments;
12562
12563 struct TestType
12564 {
12565 const deUint32 typeComponents;
12566 const char* typeName;
12567 const string loadFunction;
12568 const string storeFunction;
12569 };
12570
12571 const TestType testTypes[] =
12572 {
12573 {
12574 2,
12575 "v2f16",
12576 loadV2F16FromUint,
12577 storeV2F16AsUint
12578 },
12579 {
12580 3,
12581 "v3f16",
12582 loadV3F16FromUints,
12583 storeV3F16AsUints
12584 },
12585 {
12586 4,
12587 "v4f16",
12588 loadV4F16FromUints,
12589 storeV4F16AsUints
12590 },
12591 };
12592
12593 const StringTemplate preMain
12594 (
12595 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12596 " %c_i32_cc = OpConstant %i32 ${case_count}\n"
12597 " %f16 = OpTypeFloat 16\n"
12598 " %v2f16 = OpTypeVector %f16 2\n"
12599 " %v3f16 = OpTypeVector %f16 3\n"
12600 " %v4f16 = OpTypeVector %f16 4\n"
12601
12602 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12603 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12604 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12605 "%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12606 "%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12607 "%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12608
12609 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12610 " %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12611 " %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12612 " %up_u32 = OpTypePointer Uniform %u32\n"
12613 " %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12614 " %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12615 " %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12616
12617 "%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12618 "%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12619 "%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12620
12621 " %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12622
12623 " %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12624 " %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12625 " %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12626 );
12627
12628 const StringTemplate decoration
12629 (
12630 "OpDecorate %ra_u32_2 ArrayStride 4\n"
12631 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
12632 "OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12633
12634 "OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12635 "OpDecorate %SSBO_v2f16 BufferBlock\n"
12636
12637 "OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12638 "OpDecorate %SSBO_v3f16 BufferBlock\n"
12639
12640 "OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12641 "OpDecorate %SSBO_v4f16 BufferBlock\n"
12642
12643 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
12644 "OpDecorate %ssbo_src0 Binding 0\n"
12645 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
12646 "OpDecorate %ssbo_src1 Binding 1\n"
12647 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12648 "OpDecorate %ssbo_dst Binding 2\n"
12649 );
12650
12651 const StringTemplate testFun
12652 (
12653 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12654 " %param = OpFunctionParameter %v4f32\n"
12655 " %entry = OpLabel\n"
12656
12657 " %i = OpVariable %fp_i32 Function\n"
12658 " OpStore %i %c_i32_0\n"
12659
12660 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12661 " OpSelectionMerge %end_if None\n"
12662 " OpBranchConditional %will_run %run_test %end_if\n"
12663
12664 " %run_test = OpLabel\n"
12665 " OpBranch %loop\n"
12666
12667 " %loop = OpLabel\n"
12668 " %i_cmp = OpLoad %i32 %i\n"
12669 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12670 " OpLoopMerge %merge %next None\n"
12671 " OpBranchConditional %lt %write %merge\n"
12672
12673 " %write = OpLabel\n"
12674 " %ndx = OpLoad %i32 %i\n"
12675 " %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12676 " %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12677 " %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12678 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12679 " OpBranch %next\n"
12680
12681 " %next = OpLabel\n"
12682 " %i_cur = OpLoad %i32 %i\n"
12683 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12684 " OpStore %i %i_new\n"
12685 " OpBranch %loop\n"
12686
12687 " %merge = OpLabel\n"
12688 " OpBranch %end_if\n"
12689 " %end_if = OpLabel\n"
12690 " OpReturnValue %param\n"
12691 " OpFunctionEnd\n"
12692 "\n"
12693
12694 " %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12695 "%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12696 "%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12697 "%sw_paramn = OpFunctionParameter %i32\n"
12698 " %sw_entry = OpLabel\n"
12699 " %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12700 " OpSelectionMerge %switch_e None\n"
12701 " OpSwitch %modulo %default ${case_list}\n"
12702 "${case_bodies}"
12703 "%default = OpLabel\n"
12704 " OpUnreachable\n" // Unreachable default case for switch statement
12705 "%switch_e = OpLabel\n"
12706 " OpUnreachable\n" // Unreachable merge block for switch statement
12707 " OpFunctionEnd\n"
12708 );
12709
12710 const StringTemplate testCaseBody
12711 (
12712 "%case_${case_ndx} = OpLabel\n"
12713 "%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12714 " OpReturnValue %val_dst_${case_ndx}\n"
12715 );
12716
12717 for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12718 {
12719 const TestType& dstType = testTypes[dstTypeIdx];
12720
12721 for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12722 {
12723 const TestType& src0Type = testTypes[comp0Idx];
12724
12725 for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12726 {
12727 const TestType& src1Type = testTypes[comp1Idx];
12728 const deUint32 input0Stride = (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12729 const deUint32 input1Stride = (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12730 const deUint32 outputStride = (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12731 const vector<deFloat16> float16Input0Data = getFloat16s(rnd, input0Stride * numDataPoints);
12732 const vector<deFloat16> float16Input1Data = getFloat16s(rnd, input1Stride * numDataPoints);
12733 const vector<deFloat16> float16OutputDummy (outputStride * numDataPoints, 0);
12734 const string testName = de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12735 deUint32 caseCount = 0;
12736 SpecResource specResource;
12737 map<string, string> specs;
12738 vector<string> extensions;
12739 VulkanFeatures features;
12740 string caseBodies;
12741 string caseList;
12742
12743 // Generate case
12744 {
12745 vector<string> componentList;
12746
12747 // Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12748 {
12749 deUint32 caseNo = 0;
12750
12751 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12752 componentList.push_back(de::toString(caseNo++));
12753 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12754 componentList.push_back(de::toString(caseNo++));
12755 componentList.push_back("0xFFFFFFFF");
12756 }
12757
12758 for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12759 {
12760 for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12761 {
12762 map<string, string> specCase;
12763 string shuffle = componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12764
12765 for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12766 shuffle += " " + de::toString(compIdx - 2);
12767
12768 specCase["case_ndx"] = de::toString(caseCount);
12769 specCase["shuffle"] = shuffle;
12770 specCase["tt_dst"] = dstType.typeName;
12771
12772 caseBodies += testCaseBody.specialize(specCase);
12773 caseList += de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12774
12775 caseCount++;
12776 }
12777 }
12778 }
12779
12780 specs["num_data_points"] = de::toString(numDataPoints);
12781 specs["tt_dst"] = dstType.typeName;
12782 specs["tt_src0"] = src0Type.typeName;
12783 specs["tt_src1"] = src1Type.typeName;
12784 specs["case_bodies"] = caseBodies;
12785 specs["case_list"] = caseList;
12786 specs["case_count"] = de::toString(caseCount);
12787
12788 fragments["capability"] = "OpCapability Float16\n";
12789 fragments["decoration"] = decoration.specialize(specs);
12790 fragments["pre_main"] = preMain.specialize(specs);
12791 fragments["testfun"] = testFun.specialize(specs);
12792 fragments["testfun"] += StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12793 fragments["testfun"] += StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12794 fragments["testfun"] += StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12795
12796 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12797 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12798 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12799 specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12800
12801 extensions.push_back("VK_KHR_shader_float16_int8");
12802
12803 features.extFloat16Int8.shaderFloat16 = true;
12804
12805 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12806 }
12807 }
12808 }
12809
12810 return testGroup.release();
12811 }
12812
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12813 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12814 {
12815 if (inputs.size() != 1 || outputAllocs.size() != 1)
12816 return false;
12817
12818 vector<deUint8> input1Bytes;
12819
12820 inputs[0].getBytes(input1Bytes);
12821
12822 DE_ASSERT(input1Bytes.size() > 0);
12823 DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12824
12825 const size_t iterations = input1Bytes.size() / sizeof(deFloat16);
12826 const deFloat16* const input1AsFP16 = (const deFloat16*)&input1Bytes[0];
12827 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
12828 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12829 std::string error;
12830
12831 for (size_t idx = 0; idx < iterations; ++idx)
12832 {
12833 if (input1AsFP16[idx] == exceptionValue)
12834 continue;
12835
12836 if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12837 {
12838 log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12839
12840 return false;
12841 }
12842 }
12843
12844 return true;
12845 }
12846
12847 template<class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)12848 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12849 {
12850 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12851 const deUint32 numElements = 8;
12852 const string testName = "struct";
12853 const deUint32 structItemsCount = 88;
12854 const deUint32 exceptionIndices[] = { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12855 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
12856 const deUint32 fieldModifier = 2;
12857 const deUint32 fieldModifiedMulIndex = 60;
12858 const deUint32 fieldModifiedAddIndex = 66;
12859
12860 const StringTemplate preMain
12861 (
12862 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12863 " %f16 = OpTypeFloat 16\n"
12864 " %v2f16 = OpTypeVector %f16 2\n"
12865 " %v3f16 = OpTypeVector %f16 3\n"
12866 " %v4f16 = OpTypeVector %f16 4\n"
12867 " %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12868
12869 "${consts}"
12870
12871 " %c_f16_n1 = OpConstant %f16 -1.0\n"
12872 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12873 " %c_u32_5 = OpConstant %u32 5\n"
12874 " %c_u32_6 = OpConstant %u32 6\n"
12875 " %c_u32_7 = OpConstant %u32 7\n"
12876 " %c_u32_8 = OpConstant %u32 8\n"
12877 " %c_u32_9 = OpConstant %u32 9\n"
12878 " %c_u32_10 = OpConstant %u32 10\n"
12879 " %c_u32_11 = OpConstant %u32 11\n"
12880 " %c_u32_12 = OpConstant %u32 12\n"
12881 " %c_u32_13 = OpConstant %u32 13\n"
12882 " %c_u32_14 = OpConstant %u32 14\n"
12883 " %c_u32_15 = OpConstant %u32 15\n"
12884 " %c_u32_16 = OpConstant %u32 16\n"
12885 " %c_u32_17 = OpConstant %u32 17\n"
12886 " %c_u32_18 = OpConstant %u32 18\n"
12887 " %c_u32_19 = OpConstant %u32 19\n"
12888 " %c_u32_20 = OpConstant %u32 20\n"
12889 " %c_u32_21 = OpConstant %u32 21\n"
12890 " %c_u32_22 = OpConstant %u32 22\n"
12891 " %c_u32_23 = OpConstant %u32 23\n"
12892 " %c_u32_24 = OpConstant %u32 24\n"
12893 " %c_u32_25 = OpConstant %u32 25\n"
12894 " %c_u32_26 = OpConstant %u32 26\n"
12895 " %c_u32_27 = OpConstant %u32 27\n"
12896 " %c_u32_28 = OpConstant %u32 28\n"
12897 " %c_u32_29 = OpConstant %u32 29\n"
12898 " %c_u32_30 = OpConstant %u32 30\n"
12899 " %c_u32_31 = OpConstant %u32 31\n"
12900 " %c_u32_33 = OpConstant %u32 33\n"
12901 " %c_u32_34 = OpConstant %u32 34\n"
12902 " %c_u32_35 = OpConstant %u32 35\n"
12903 " %c_u32_36 = OpConstant %u32 36\n"
12904 " %c_u32_37 = OpConstant %u32 37\n"
12905 " %c_u32_38 = OpConstant %u32 38\n"
12906 " %c_u32_39 = OpConstant %u32 39\n"
12907 " %c_u32_40 = OpConstant %u32 40\n"
12908 " %c_u32_41 = OpConstant %u32 41\n"
12909 " %c_u32_44 = OpConstant %u32 44\n"
12910
12911 " %f16arr3 = OpTypeArray %f16 %c_u32_3\n"
12912 " %v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
12913 " %v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
12914 " %v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
12915 " %v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
12916 " %struct16 = OpTypeStruct %f16 %v2f16arr3\n"
12917 " %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12918 " %st_test = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12919
12920 " %up_u32 = OpTypePointer Uniform %u32\n"
12921 " %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12922 " %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12923 " %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12924 " %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12925
12926 " %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12927 );
12928
12929 const StringTemplate decoration
12930 (
12931 "OpDecorate %SSBO_st BufferBlock\n"
12932 "OpDecorate %ra_u32_44 ArrayStride 4\n"
12933 "OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12934 "OpDecorate %ssbo_dst DescriptorSet 0\n"
12935 "OpDecorate %ssbo_dst Binding 1\n"
12936
12937 "OpMemberDecorate %SSBO_st 0 Offset 0\n"
12938
12939 "OpDecorate %v2f16arr3 ArrayStride 4\n"
12940 "OpMemberDecorate %struct16 0 Offset 0\n"
12941 "OpMemberDecorate %struct16 1 Offset 4\n"
12942 "OpDecorate %struct16arr3 ArrayStride 16\n"
12943 "OpDecorate %f16arr3 ArrayStride 2\n"
12944 "OpDecorate %v2f16arr5 ArrayStride 4\n"
12945 "OpDecorate %v3f16arr5 ArrayStride 8\n"
12946 "OpDecorate %v4f16arr3 ArrayStride 8\n"
12947
12948 "OpMemberDecorate %st_test 0 Offset 0\n"
12949 "OpMemberDecorate %st_test 1 Offset 4\n"
12950 "OpMemberDecorate %st_test 2 Offset 8\n"
12951 "OpMemberDecorate %st_test 3 Offset 16\n"
12952 "OpMemberDecorate %st_test 4 Offset 24\n"
12953 "OpMemberDecorate %st_test 5 Offset 32\n"
12954 "OpMemberDecorate %st_test 6 Offset 80\n"
12955 "OpMemberDecorate %st_test 7 Offset 100\n"
12956 "OpMemberDecorate %st_test 8 Offset 104\n"
12957 "OpMemberDecorate %st_test 9 Offset 144\n"
12958 );
12959
12960 const StringTemplate testFun
12961 (
12962 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12963 " %param = OpFunctionParameter %v4f32\n"
12964 " %entry = OpLabel\n"
12965
12966 " %i = OpVariable %fp_i32 Function\n"
12967 " OpStore %i %c_i32_0\n"
12968
12969 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12970 " OpSelectionMerge %end_if None\n"
12971 " OpBranchConditional %will_run %run_test %end_if\n"
12972
12973 " %run_test = OpLabel\n"
12974 " OpBranch %loop\n"
12975
12976 " %loop = OpLabel\n"
12977 " %i_cmp = OpLoad %i32 %i\n"
12978 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12979 " OpLoopMerge %merge %next None\n"
12980 " OpBranchConditional %lt %write %merge\n"
12981
12982 " %write = OpLabel\n"
12983 " %ndx = OpLoad %i32 %i\n"
12984
12985 " %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
12986 " %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
12987 " %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
12988
12989 " %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
12990
12991 "%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
12992 "%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
12993 "%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
12994 " %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
12995 " %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
12996
12997 "%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
12998 "%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
12999 "%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13000 " %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13001 " %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13002
13003 "%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13004 "%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13005 "%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13006 " %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13007 " %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13008
13009 " %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13010
13011 " %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13012 " %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13013 " %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13014 " %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13015 " %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13016 " %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13017
13018 " %fndx = OpConvertSToF %f16 %ndx\n"
13019 " %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13020 " %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13021
13022 " %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13023 " %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13024 " %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13025 " %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13026 " %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13027 " %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13028 " %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13029 " %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13030
13031 " %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13032 " %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13033 " %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13034 " %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13035
13036 " %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13037
13038 // Storage section: all elements that are not directly accessed should
13039 // have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13040 // is constructed with one element from a constant -1.0.
13041 // half offset 0
13042 " %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13043 " %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13044 " %bc_0 = OpBitcast %u32 %vec_0\n"
13045 " %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13046 " OpStore %gep_0 %bc_0\n"
13047
13048 // <2 x half> offset 4
13049 " %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13050 " %bc_1 = OpBitcast %u32 %ex_1\n"
13051 " %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13052 " OpStore %gep_1 %bc_1\n"
13053
13054 // <3 x half> offset 8
13055 " %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13056 " %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13057 " %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13058 " %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13059 " %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13060 " %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13061 " %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13062 " OpStore %gep_2_0 %bc_2_0\n"
13063 " OpStore %gep_2_1 %bc_2_1\n"
13064
13065 // <4 x half> offset 16
13066 " %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13067 " %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13068 " %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13069 " %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13070 " %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13071 " %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13072 " %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13073 " OpStore %gep_3_0 %bc_3_0\n"
13074 " OpStore %gep_3_1 %bc_3_1\n"
13075
13076 // [3 x half] offset 24
13077 " %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13078 " %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13079 " %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13080 " %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13081 " %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13082 " %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13083 " %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13084 " %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13085 " %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13086 " OpStore %gep_4_0 %bc_4_0\n"
13087 " OpStore %gep_4_1 %bc_4_1\n"
13088
13089 // [3 x {half, [3 x <2 x half>]}] offset 32
13090 " %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13091 " %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13092 " %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13093 " %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13094 " %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13095 " %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13096 "%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13097 "%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13098 "%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13099 "%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13100 "%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13101 "%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13102 "%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13103 "%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13104 "%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13105 " %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13106 " %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13107 " %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13108 " %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13109 " %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13110 " %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13111 "%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13112 "%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13113 "%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13114 "%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13115 "%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13116 "%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13117 "%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13118 "%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13119 "%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13120 " %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13121 "%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13122 "%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13123 "%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13124 " %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13125 "%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13126 "%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13127 "%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13128 " %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13129 "%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13130 "%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13131 "%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13132 " OpStore %gep_5_0_0 %bc_5_0_0\n"
13133 " OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13134 " OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13135 " OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13136 " OpStore %gep_5_1_0 %bc_5_1_0\n"
13137 " OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13138 " OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13139 " OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13140 " OpStore %gep_5_2_0 %bc_5_2_0\n"
13141 " OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13142 " OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13143 " OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13144
13145 // [5 x <2 x half>] offset 80
13146 " %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13147 " %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13148 " %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13149 " %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13150 " %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13151 " %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13152 " %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13153 " %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13154 " %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13155 " %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13156 " %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13157 " %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13158 " %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13159 " %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13160 " %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13161 " OpStore %gep_6_0 %bc_6_0\n"
13162 " OpStore %gep_6_1 %bc_6_1\n"
13163 " OpStore %gep_6_2 %bc_6_2\n"
13164 " OpStore %gep_6_3 %bc_6_3\n"
13165 " OpStore %gep_6_4 %bc_6_4\n"
13166
13167 // half offset 100
13168 " %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13169 " %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13170 " %bc_7 = OpBitcast %u32 %vec_7\n"
13171 " %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13172 " OpStore %gep_7 %bc_7\n"
13173
13174 // [5 x <3 x half>] offset 104
13175 " %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13176 " %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13177 " %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13178 " %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13179 " %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13180 " %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13181 " %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13182 " %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13183 " %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13184 " %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13185 " %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13186 " %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13187 " %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13188 " %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13189 " %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13190 " %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13191 " %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13192 " %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13193 " %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13194 " %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13195 " %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13196 " %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13197 " %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13198 " %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13199 " %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13200 " %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13201 " %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13202 " %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13203 " %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13204 " %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13205 " %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13206 " %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13207 " %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13208 " %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13209 " %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13210 " OpStore %gep_8_0_0 %bc_8_0_0\n"
13211 " OpStore %gep_8_0_1 %bc_8_0_1\n"
13212 " OpStore %gep_8_1_0 %bc_8_1_0\n"
13213 " OpStore %gep_8_1_1 %bc_8_1_1\n"
13214 " OpStore %gep_8_2_0 %bc_8_2_0\n"
13215 " OpStore %gep_8_2_1 %bc_8_2_1\n"
13216 " OpStore %gep_8_3_0 %bc_8_3_0\n"
13217 " OpStore %gep_8_3_1 %bc_8_3_1\n"
13218 " OpStore %gep_8_4_0 %bc_8_4_0\n"
13219 " OpStore %gep_8_4_1 %bc_8_4_1\n"
13220
13221 // [3 x <4 x half>] offset 144
13222 " %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13223 " %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13224 " %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13225 " %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13226 " %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13227 " %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13228 " %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13229 " %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13230 " %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13231 " %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13232 " %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13233 " %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13234 " %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13235 " %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13236 " %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13237 " %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13238 " %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13239 " %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13240 " %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13241 " %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13242 " %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13243 " OpStore %gep_9_0_0 %bc_9_0_0\n"
13244 " OpStore %gep_9_0_1 %bc_9_0_1\n"
13245 " OpStore %gep_9_1_0 %bc_9_1_0\n"
13246 " OpStore %gep_9_1_1 %bc_9_1_1\n"
13247 " OpStore %gep_9_2_0 %bc_9_2_0\n"
13248 " OpStore %gep_9_2_1 %bc_9_2_1\n"
13249
13250 " OpBranch %next\n"
13251
13252 " %next = OpLabel\n"
13253 " %i_cur = OpLoad %i32 %i\n"
13254 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13255 " OpStore %i %i_new\n"
13256 " OpBranch %loop\n"
13257
13258 " %merge = OpLabel\n"
13259 " OpBranch %end_if\n"
13260 " %end_if = OpLabel\n"
13261 " OpReturnValue %param\n"
13262 " OpFunctionEnd\n"
13263 );
13264
13265 {
13266 SpecResource specResource;
13267 map<string, string> specs;
13268 VulkanFeatures features;
13269 map<string, string> fragments;
13270 vector<string> extensions;
13271 vector<deFloat16> expectedOutput;
13272 string consts;
13273
13274 for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13275 {
13276 vector<deFloat16> expectedIterationOutput;
13277
13278 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13279 expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13280
13281 for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13282 expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13283
13284 expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13285 expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13286
13287 expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13288 }
13289
13290 for (deUint32 i = 0; i < structItemsCount; ++i)
13291 consts += " %c_f16_" + de::toString(i) + " = OpConstant %f16 " + de::toString(i) + "\n";
13292
13293 specs["num_elements"] = de::toString(numElements);
13294 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
13295 specs["field_modifier"] = de::toString(fieldModifier);
13296 specs["consts"] = consts;
13297
13298 fragments["capability"] = "OpCapability Float16\n";
13299 fragments["decoration"] = decoration.specialize(specs);
13300 fragments["pre_main"] = preMain.specialize(specs);
13301 fragments["testfun"] = testFun.specialize(specs);
13302
13303 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13304 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13305 specResource.verifyIO = compareFP16CompositeFunc;
13306
13307 extensions.push_back("VK_KHR_shader_float16_int8");
13308
13309 features.extFloat16Int8.shaderFloat16 = true;
13310
13311 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13312 }
13313
13314 return testGroup.release();
13315 }
13316
13317 template<class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13318 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13319 {
13320 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13321 const deFloat16 exceptionValue = tcu::Float16(-1.0).bits();
13322 const string opName (op);
13323 const deUint32 opIndex = (opName == "OpCompositeInsert") ? 0
13324 : (opName == "OpCompositeExtract") ? 1
13325 : std::numeric_limits<deUint32>::max();
13326
13327 const StringTemplate preMain
13328 (
13329 " %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13330 " %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13331 " %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13332 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13333 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13334 " %f16 = OpTypeFloat 16\n"
13335 " %v2f16 = OpTypeVector %f16 2\n"
13336 " %v3f16 = OpTypeVector %f16 3\n"
13337 " %v4f16 = OpTypeVector %f16 4\n"
13338 " %c_f16_na = OpConstant %f16 -1.0\n"
13339 " %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13340 " %c_u32_5 = OpConstant %u32 5\n"
13341 " %c_i32_5 = OpConstant %i32 5\n"
13342 " %c_i32_6 = OpConstant %i32 6\n"
13343 " %c_i32_7 = OpConstant %i32 7\n"
13344 " %c_i32_8 = OpConstant %i32 8\n"
13345 " %c_i32_9 = OpConstant %i32 9\n"
13346 " %c_i32_10 = OpConstant %i32 10\n"
13347 " %c_i32_11 = OpConstant %i32 11\n"
13348
13349 "%f16arr3 = OpTypeArray %f16 %c_u32_3\n"
13350 "%v2f16arr3 = OpTypeArray %v2f16 %c_u32_3\n"
13351 "%v2f16arr5 = OpTypeArray %v2f16 %c_u32_5\n"
13352 "%v3f16arr5 = OpTypeArray %v3f16 %c_u32_5\n"
13353 "%v4f16arr3 = OpTypeArray %v4f16 %c_u32_3\n"
13354 "%struct16 = OpTypeStruct %f16 %v2f16arr3\n"
13355 "%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13356 "%st_test = OpTypeStruct %${field_type}\n"
13357
13358 " %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13359 " %ra_st = OpTypeArray %u32 %c_i32_size\n"
13360 " %up_u32 = OpTypePointer Uniform %u32\n"
13361 " %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13362 "%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13363 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13364 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13365 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13366 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13367
13368 "${op_premain_decls}"
13369
13370 " %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13371 " %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13372
13373 " %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13374 " %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13375 );
13376
13377 const StringTemplate decoration
13378 (
13379 "OpDecorate %SSBO_src BufferBlock\n"
13380 "OpDecorate %SSBO_dst BufferBlock\n"
13381 "OpDecorate %ra_f16 ArrayStride 4\n"
13382 "OpDecorate %ra_st ArrayStride 4\n"
13383 "OpDecorate %ssbo_src DescriptorSet 0\n"
13384 "OpDecorate %ssbo_src Binding 0\n"
13385 "OpDecorate %ssbo_dst DescriptorSet 0\n"
13386 "OpDecorate %ssbo_dst Binding 1\n"
13387
13388 "OpMemberDecorate %SSBO_src 0 Offset 0\n"
13389 "OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13390
13391 "OpDecorate %v2f16arr3 ArrayStride 4\n"
13392 "OpMemberDecorate %struct16 0 Offset 0\n"
13393 "OpMemberDecorate %struct16 1 Offset 4\n"
13394 "OpDecorate %struct16arr3 ArrayStride 16\n"
13395 "OpDecorate %f16arr3 ArrayStride 2\n"
13396 "OpDecorate %v2f16arr5 ArrayStride 4\n"
13397 "OpDecorate %v3f16arr5 ArrayStride 8\n"
13398 "OpDecorate %v4f16arr3 ArrayStride 8\n"
13399
13400 "OpMemberDecorate %st_test 0 Offset 0\n"
13401 );
13402
13403 const StringTemplate testFun
13404 (
13405 " %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13406 " %param = OpFunctionParameter %v4f32\n"
13407 " %entry = OpLabel\n"
13408
13409 " %i = OpVariable %fp_i32 Function\n"
13410 " OpStore %i %c_i32_0\n"
13411
13412 " %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13413 " OpSelectionMerge %end_if None\n"
13414 " OpBranchConditional %will_run %run_test %end_if\n"
13415
13416 " %run_test = OpLabel\n"
13417 " OpBranch %loop\n"
13418
13419 " %loop = OpLabel\n"
13420 " %i_cmp = OpLoad %i32 %i\n"
13421 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13422 " OpLoopMerge %merge %next None\n"
13423 " OpBranchConditional %lt %write %merge\n"
13424
13425 " %write = OpLabel\n"
13426 " %ndx = OpLoad %i32 %i\n"
13427
13428 "${op_sw_fun_call}"
13429
13430 " %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13431 " OpBranch %next\n"
13432
13433 " %next = OpLabel\n"
13434 " %i_cur = OpLoad %i32 %i\n"
13435 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13436 " OpStore %i %i_new\n"
13437 " OpBranch %loop\n"
13438
13439 " %merge = OpLabel\n"
13440 " OpBranch %end_if\n"
13441 " %end_if = OpLabel\n"
13442 " OpReturnValue %param\n"
13443 " OpFunctionEnd\n"
13444
13445 "${op_sw_fun_header}"
13446 " %sw_param = OpFunctionParameter %st_test\n"
13447 "%sw_paramn = OpFunctionParameter %i32\n"
13448 " %sw_entry = OpLabel\n"
13449 " OpSelectionMerge %switch_e None\n"
13450 " OpSwitch %sw_paramn %default ${case_list}\n"
13451
13452 "${case_bodies}"
13453
13454 "%default = OpLabel\n"
13455 " OpReturnValue ${op_case_default_value}\n"
13456 "%switch_e = OpLabel\n"
13457 " OpUnreachable\n" // Unreachable merge block for switch statement
13458 " OpFunctionEnd\n"
13459 );
13460
13461 const StringTemplate testCaseBody
13462 (
13463 "%case_${case_ndx} = OpLabel\n"
13464 "%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13465 " OpReturnValue %val_ret_${case_ndx}\n"
13466 );
13467
13468 const string loadF16
13469 (
13470 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13471 " %ld_${var}_param = OpFunctionParameter %i32\n"
13472 " %ld_${var}_entry = OpLabel\n"
13473 " %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13474 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13475 " OpReturnValue %ld_${var}_st_test\n"
13476 " OpFunctionEnd\n" +
13477 loadScalarF16FromUint
13478 );
13479
13480 const string loadV2F16
13481 (
13482 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13483 " %ld_${var}_param = OpFunctionParameter %i32\n"
13484 " %ld_${var}_entry = OpLabel\n"
13485 " %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13486 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13487 " OpReturnValue %ld_${var}_st_test\n"
13488 " OpFunctionEnd\n" +
13489 loadV2F16FromUint
13490 );
13491
13492 const string loadV3F16
13493 (
13494 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13495 " %ld_${var}_param = OpFunctionParameter %i32\n"
13496 " %ld_${var}_entry = OpLabel\n"
13497 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13498 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13499 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13500 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13501 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13502 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13503 " %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13504 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13505 " OpReturnValue %ld_${var}_st_test\n"
13506 " OpFunctionEnd\n"
13507 );
13508
13509 const string loadV4F16
13510 (
13511 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13512 " %ld_${var}_param = OpFunctionParameter %i32\n"
13513 " %ld_${var}_entry = OpLabel\n"
13514 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13515 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13516 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13517 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13518 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13519 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13520 " %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13521 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13522 " OpReturnValue %ld_${var}_st_test\n"
13523 " OpFunctionEnd\n"
13524 );
13525
13526 const string loadF16Arr3
13527 (
13528 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13529 " %ld_${var}_param = OpFunctionParameter %i32\n"
13530 " %ld_${var}_entry = OpLabel\n"
13531 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13532 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13533 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13534 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13535 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13536 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13537 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13538 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13539 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13540 " %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13541 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13542 " OpReturnValue %ld_${var}_st_test\n"
13543 " OpFunctionEnd\n"
13544 );
13545
13546 const string loadV2F16Arr5
13547 (
13548 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13549 " %ld_${var}_param = OpFunctionParameter %i32\n"
13550 " %ld_${var}_label = OpLabel\n"
13551 " %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13552 " %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13553 " %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13554 " %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13555 " %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13556 " %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13557 " %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13558 " %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13559 " %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13560 " %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13561 " %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13562 " %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13563 " %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13564 " %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13565 " %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13566 " %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13567 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13568 " OpReturnValue %ld_${var}_st_test\n"
13569 " OpFunctionEnd\n"
13570 );
13571
13572 const string loadV3F16Arr5
13573 (
13574 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13575 " %ld_${var}_param = OpFunctionParameter %i32\n"
13576 " %ld_${var}_entry = OpLabel\n"
13577 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13578 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13579 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13580 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13581 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13582 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13583 "%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13584 "%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13585 "%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13586 "%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13587 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13588 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13589 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13590 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13591 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13592 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13593 " %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13594 " %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13595 " %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13596 " %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13597 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13598 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13599 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13600 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13601 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13602 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13603 " %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13604 " %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13605 " %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13606 " %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13607 " %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13608 " %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13609 " %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13610 " %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13611 " %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13612 " %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13613 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13614 " OpReturnValue %ld_${var}_st_test\n"
13615 " OpFunctionEnd\n"
13616 );
13617
13618 const string loadV4F16Arr3
13619 (
13620 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13621 " %ld_${var}_param = OpFunctionParameter %i32\n"
13622 " %ld_${var}_entry = OpLabel\n"
13623 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13624 "%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13625 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13626 "%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13627 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13628 "%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13629 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13630 " %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13631 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13632 " %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13633 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13634 " %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13635 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13636 " %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13637 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13638 " %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13639 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13640 " %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13641 " %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13642 " %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13643 " %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13644 " %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13645 "%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13646 " OpReturnValue %ld_${var}_st_test\n"
13647 " OpFunctionEnd\n"
13648 );
13649
13650 const string loadStruct16Arr3
13651 (
13652 " %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13653 " %ld_${var}_param = OpFunctionParameter %i32\n"
13654 " %ld_${var}_entry = OpLabel\n"
13655 "%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13656 "%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13657 "%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13658 "%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13659 "%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13660 "%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13661 "%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13662 "%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13663 "%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13664 "%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13665 "%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13666 "%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13667 " %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13668 " %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13669 " %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13670 " %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13671 " %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13672 " %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13673 " %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13674 " %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13675 " %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13676 " %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13677 " %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13678 " %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13679 " %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13680 " %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13681 " %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13682 " %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13683 " %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13684 " %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13685 " %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13686 " %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13687 " %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13688 " %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13689 " %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13690 " %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13691 " %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13692 " %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13693 " %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13694 " %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13695 " %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13696 " %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13697 " %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13698 " %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13699 " %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13700 " %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13701 " %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13702 " OpReturnValue %ld_${var}_st_test\n"
13703 " OpFunctionEnd\n"
13704 );
13705
13706 const string storeF16
13707 (
13708 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13709 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13710 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13711 " %st_${var}_entry = OpLabel\n"
13712 " %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13713 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13714 " OpReturn\n"
13715 " OpFunctionEnd\n" +
13716 storeScalarF16AsUint
13717 );
13718
13719 const string storeV2F16
13720 (
13721 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13722 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13723 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13724 " %st_${var}_entry = OpLabel\n"
13725 " %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13726 " %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13727 " OpReturn\n"
13728 " OpFunctionEnd\n" +
13729 storeV2F16AsUint
13730 );
13731
13732 const string storeV3F16
13733 (
13734 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13735 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13736 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13737 " %st_${var}_entry = OpLabel\n"
13738 " %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13739 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13740 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13741 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13742 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13743 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13744 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13745 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13746 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13747 " OpReturn\n"
13748 " OpFunctionEnd\n"
13749 );
13750
13751 const string storeV4F16
13752 (
13753 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13754 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13755 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13756 " %st_${var}_entry = OpLabel\n"
13757 " %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13758 " %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13759 " %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13760 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13761 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13762 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13763 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13764 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13765 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13766 " OpReturn\n"
13767 " OpFunctionEnd\n"
13768 );
13769
13770 const string storeF16Arr3
13771 (
13772 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13773 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13774 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13775 " %st_${var}_entry = OpLabel\n"
13776 " %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13777 " %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13778 " %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13779 " %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13780 " %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13781 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13782 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13783 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13784 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13785 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13786 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13787 " OpReturn\n"
13788 " OpFunctionEnd\n"
13789 );
13790
13791 const string storeV2F16Arr5
13792 (
13793 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13794 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13795 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13796 " %st_${var}_entry = OpLabel\n"
13797 " %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13798 " %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13799 " %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13800 " %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13801 " %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13802 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13803 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13804 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13805 " %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13806 " %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13807 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13808 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13809 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13810 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13811 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13812 " OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13813 " OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13814 " OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13815 " OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13816 " OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13817 " OpReturn\n"
13818 " OpFunctionEnd\n"
13819 );
13820
13821 const string storeV3F16Arr5
13822 (
13823 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13824 "%st_${var}_param1 = OpFunctionParameter %st_test\n"
13825 "%st_${var}_param2 = OpFunctionParameter %i32\n"
13826 " %st_${var}_entry = OpLabel\n"
13827 " %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13828 " %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13829 " %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13830 " %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13831 " %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13832 "%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13833 "%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13834 "%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13835 "%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13836 "%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13837 "%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13838 "%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13839 "%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13840 "%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13841 "%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13842 "%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13843 "%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13844 "%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13845 "%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13846 "%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13847 "%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13848 "%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13849 "%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13850 "%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13851 "%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13852 " %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13853 " %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13854 " %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13855 " %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13856 " %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13857 " %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13858 " %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13859 " %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13860 " %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13861 " %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13862 " OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13863 " OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13864 " OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13865 " OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13866 " OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13867 " OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13868 " OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13869 " OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13870 " OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13871 " OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13872 " OpReturn\n"
13873 " OpFunctionEnd\n"
13874 );
13875
13876 const string storeV4F16Arr3
13877 (
13878 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13879 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13880 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13881 " %st_${var}_entry = OpLabel\n"
13882 " %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13883 " %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13884 " %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13885 "%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13886 "%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13887 "%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13888 "%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13889 "%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13890 "%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13891 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13892 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13893 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13894 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13895 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13896 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13897 "%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13898 "%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13899 "%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13900 "%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13901 "%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13902 "%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13903 " OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13904 " OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13905 " OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13906 " OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13907 " OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13908 " OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13909 " OpReturn\n"
13910 " OpFunctionEnd\n"
13911 );
13912
13913 const string storeStruct16Arr3
13914 (
13915 " %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13916 " %st_${var}_param1 = OpFunctionParameter %st_test\n"
13917 " %st_${var}_param2 = OpFunctionParameter %i32\n"
13918 " %st_${var}_entry = OpLabel\n"
13919 " %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13920 " %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13921 " %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13922 " %st_${var}_el_0 = OpCompositeExtract %f16 %st_${var}_st_0 0\n"
13923 " %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13924 " %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13925 " %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13926 " %st_${var}_el_1 = OpCompositeExtract %f16 %st_${var}_st_1 0\n"
13927 " %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13928 " %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13929 " %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13930 " %st_${var}_el_2 = OpCompositeExtract %f16 %st_${var}_st_2 0\n"
13931 " %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13932 " %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13933 " %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13934 " %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13935 " %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13936 " %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13937 " %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_v2_0\n"
13938 " %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13939 " %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13940 " %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13941 " %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_v2_1\n"
13942 " %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13943 " %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13944 " %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13945 " %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_v2_2\n"
13946 " %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13947 " %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13948 " %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13949 "%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13950 "%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13951 "%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13952 "%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13953 "%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13954 "%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13955 "%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13956 "%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13957 "%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13958 "%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13959 "%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13960 "%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13961 " OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13962 " OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13963 " OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13964 " OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13965 " OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13966 " OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13967 " OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13968 " OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13969 " OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13970 " OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13971 " OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13972 " OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13973 " OpReturn\n"
13974 " OpFunctionEnd\n"
13975 );
13976
13977 struct OpParts
13978 {
13979 const char* premainDecls;
13980 const char* swFunCall;
13981 const char* swFunHeader;
13982 const char* caseDefaultValue;
13983 const char* argsPartial;
13984 };
13985
13986 OpParts opPartsArray[] =
13987 {
13988 // OpCompositeInsert
13989 {
13990 " %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
13991 " %SSBO_src = OpTypeStruct %ra_f16\n"
13992 " %SSBO_dst = OpTypeStruct %ra_st\n",
13993
13994 " %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
13995 " %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
13996 " %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
13997
13998 " %sw_fun = OpFunction %st_test None %fun_t\n"
13999 "%sw_paramv = OpFunctionParameter %f16\n",
14000
14001 "%sw_param",
14002
14003 "%st_test %sw_paramv %sw_param",
14004 },
14005 // OpCompositeExtract
14006 {
14007 " %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14008 " %SSBO_src = OpTypeStruct %ra_st\n"
14009 " %SSBO_dst = OpTypeStruct %ra_f16\n",
14010
14011 " %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14012 " %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14013
14014 " %sw_fun = OpFunction %f16 None %fun_t\n",
14015
14016 "%c_f16_na",
14017
14018 "%f16 %sw_param",
14019 },
14020 };
14021
14022 DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14023
14024 const char* accessPathF16[] =
14025 {
14026 "0", // %f16
14027 DE_NULL,
14028 };
14029 const char* accessPathV2F16[] =
14030 {
14031 "0 0", // %v2f16
14032 "0 1",
14033 };
14034 const char* accessPathV3F16[] =
14035 {
14036 "0 0", // %v3f16
14037 "0 1",
14038 "0 2",
14039 DE_NULL,
14040 };
14041 const char* accessPathV4F16[] =
14042 {
14043 "0 0", // %v4f16"
14044 "0 1",
14045 "0 2",
14046 "0 3",
14047 };
14048 const char* accessPathF16Arr3[] =
14049 {
14050 "0 0", // %f16arr3
14051 "0 1",
14052 "0 2",
14053 DE_NULL,
14054 };
14055 const char* accessPathStruct16Arr3[] =
14056 {
14057 "0 0 0", // %struct16arr3
14058 DE_NULL,
14059 "0 0 1 0 0",
14060 "0 0 1 0 1",
14061 "0 0 1 1 0",
14062 "0 0 1 1 1",
14063 "0 0 1 2 0",
14064 "0 0 1 2 1",
14065 "0 1 0",
14066 DE_NULL,
14067 "0 1 1 0 0",
14068 "0 1 1 0 1",
14069 "0 1 1 1 0",
14070 "0 1 1 1 1",
14071 "0 1 1 2 0",
14072 "0 1 1 2 1",
14073 "0 2 0",
14074 DE_NULL,
14075 "0 2 1 0 0",
14076 "0 2 1 0 1",
14077 "0 2 1 1 0",
14078 "0 2 1 1 1",
14079 "0 2 1 2 0",
14080 "0 2 1 2 1",
14081 };
14082 const char* accessPathV2F16Arr5[] =
14083 {
14084 "0 0 0", // %v2f16arr5
14085 "0 0 1",
14086 "0 1 0",
14087 "0 1 1",
14088 "0 2 0",
14089 "0 2 1",
14090 "0 3 0",
14091 "0 3 1",
14092 "0 4 0",
14093 "0 4 1",
14094 };
14095 const char* accessPathV3F16Arr5[] =
14096 {
14097 "0 0 0", // %v3f16arr5
14098 "0 0 1",
14099 "0 0 2",
14100 DE_NULL,
14101 "0 1 0",
14102 "0 1 1",
14103 "0 1 2",
14104 DE_NULL,
14105 "0 2 0",
14106 "0 2 1",
14107 "0 2 2",
14108 DE_NULL,
14109 "0 3 0",
14110 "0 3 1",
14111 "0 3 2",
14112 DE_NULL,
14113 "0 4 0",
14114 "0 4 1",
14115 "0 4 2",
14116 DE_NULL,
14117 };
14118 const char* accessPathV4F16Arr3[] =
14119 {
14120 "0 0 0", // %v4f16arr3
14121 "0 0 1",
14122 "0 0 2",
14123 "0 0 3",
14124 "0 1 0",
14125 "0 1 1",
14126 "0 1 2",
14127 "0 1 3",
14128 "0 2 0",
14129 "0 2 1",
14130 "0 2 2",
14131 "0 2 3",
14132 DE_NULL,
14133 DE_NULL,
14134 DE_NULL,
14135 DE_NULL,
14136 };
14137
14138 struct TypeTestParameters
14139 {
14140 const char* name;
14141 size_t accessPathLength;
14142 const char** accessPath;
14143 const string loadFunction;
14144 const string storeFunction;
14145 };
14146
14147 const TypeTestParameters typeTestParameters[] =
14148 {
14149 { "f16", DE_LENGTH_OF_ARRAY(accessPathF16), accessPathF16, loadF16, storeF16 },
14150 { "v2f16", DE_LENGTH_OF_ARRAY(accessPathV2F16), accessPathV2F16, loadV2F16, storeV2F16 },
14151 { "v3f16", DE_LENGTH_OF_ARRAY(accessPathV3F16), accessPathV3F16, loadV3F16, storeV3F16 },
14152 { "v4f16", DE_LENGTH_OF_ARRAY(accessPathV4F16), accessPathV4F16, loadV4F16, storeV4F16 },
14153 { "f16arr3", DE_LENGTH_OF_ARRAY(accessPathF16Arr3), accessPathF16Arr3, loadF16Arr3, storeF16Arr3 },
14154 { "v2f16arr5", DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5), accessPathV2F16Arr5, loadV2F16Arr5, storeV2F16Arr5 },
14155 { "v3f16arr5", DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5), accessPathV3F16Arr5, loadV3F16Arr5, storeV3F16Arr5 },
14156 { "v4f16arr3", DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3), accessPathV4F16Arr3, loadV4F16Arr3, storeV4F16Arr3 },
14157 { "struct16arr3", DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3), accessPathStruct16Arr3, loadStruct16Arr3, storeStruct16Arr3},
14158 };
14159
14160 for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14161 {
14162 const OpParts opParts = opPartsArray[opIndex];
14163 const string testName = typeTestParameters[typeTestNdx].name;
14164 const size_t structItemsCount = typeTestParameters[typeTestNdx].accessPathLength;
14165 const char** accessPath = typeTestParameters[typeTestNdx].accessPath;
14166 SpecResource specResource;
14167 map<string, string> specs;
14168 VulkanFeatures features;
14169 map<string, string> fragments;
14170 vector<string> extensions;
14171 vector<deFloat16> inputFP16;
14172 vector<deFloat16> dummyFP16Output;
14173
14174 // Generate values for input
14175 inputFP16.reserve(structItemsCount);
14176 for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14177 inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14178
14179 dummyFP16Output.resize(structItemsCount);
14180
14181 // Generate cases for OpSwitch
14182 {
14183 string caseBodies;
14184 string caseList;
14185
14186 for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14187 if (accessPath[caseNdx] != DE_NULL)
14188 {
14189 map<string, string> specCase;
14190
14191 specCase["case_ndx"] = de::toString(caseNdx);
14192 specCase["access_path"] = accessPath[caseNdx];
14193 specCase["op_args_part"] = opParts.argsPartial;
14194 specCase["op_name"] = opName;
14195
14196 caseBodies += testCaseBody.specialize(specCase);
14197 caseList += de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14198 }
14199
14200 specs["case_bodies"] = caseBodies;
14201 specs["case_list"] = caseList;
14202 }
14203
14204 specs["num_elements"] = de::toString(structItemsCount);
14205 specs["field_type"] = typeTestParameters[typeTestNdx].name;
14206 specs["struct_item_size"] = de::toString(structItemsCount * sizeof(deFloat16));
14207 specs["struct_u32s"] = de::toString(structItemsCount / 2);
14208 specs["op_premain_decls"] = opParts.premainDecls;
14209 specs["op_sw_fun_call"] = opParts.swFunCall;
14210 specs["op_sw_fun_header"] = opParts.swFunHeader;
14211 specs["op_case_default_value"] = opParts.caseDefaultValue;
14212 if (opIndex == 0) {
14213 specs["st_call"] = "st_ssbo_dst";
14214 specs["st_ndx"] = "c_i32_0";
14215 } else {
14216 specs["st_call"] = "st_fn_ssbo_dst";
14217 specs["st_ndx"] = "ndx";
14218 }
14219
14220 fragments["capability"] = "OpCapability Float16\n";
14221 fragments["decoration"] = decoration.specialize(specs);
14222 fragments["pre_main"] = preMain.specialize(specs);
14223 fragments["testfun"] = testFun.specialize(specs);
14224 if (opIndex == 0) {
14225 fragments["testfun"] += StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14226 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14227 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14228 } else {
14229 fragments["testfun"] += StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14230 fragments["testfun"] += StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14231 }
14232
14233 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14234 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(dummyFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14235 specResource.verifyIO = compareFP16CompositeFunc;
14236
14237 extensions.push_back("VK_KHR_shader_float16_int8");
14238
14239 features.extFloat16Int8.shaderFloat16 = true;
14240
14241 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14242 }
14243
14244 return testGroup.release();
14245 }
14246
14247 struct fp16PerComponent
14248 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14249 fp16PerComponent()
14250 : flavor(0)
14251 , floatFormat16 (-14, 15, 10, true)
14252 , outCompCount(0)
14253 , argCompCount(3, 0)
14254 {
14255 }
14256
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14257 bool callOncePerComponent () { return true; }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14258 deUint32 getComponentValidity () { return static_cast<deUint32>(-1); }
14259
getULPsvkt::SpirVAssembly::fp16PerComponent14260 virtual double getULPs (vector<const deFloat16*>&) { return 1.0; }
getMinvkt::SpirVAssembly::fp16PerComponent14261 virtual double getMin (double value, double ulps) { return value - floatFormat16.ulp(deAbs(value), ulps); }
getMaxvkt::SpirVAssembly::fp16PerComponent14262 virtual double getMax (double value, double ulps) { return value + floatFormat16.ulp(deAbs(value), ulps); }
14263
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14264 virtual size_t getFlavorCount () { return flavorNames.empty() ? 1 : flavorNames.size(); }
setFlavorvkt::SpirVAssembly::fp16PerComponent14265 virtual void setFlavor (size_t flavorNo) { DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
getFlavorvkt::SpirVAssembly::fp16PerComponent14266 virtual size_t getFlavor () { return flavor; }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14267 virtual string getCurrentFlavorName () { return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14268
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14269 virtual void setOutCompCount (size_t compCount) { outCompCount = compCount; }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14270 virtual size_t getOutCompCount () { return outCompCount; }
14271
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14272 virtual void setArgCompCount (size_t argNo, size_t compCount) { argCompCount[argNo] = compCount; }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14273 virtual size_t getArgCompCount (size_t argNo) { return argCompCount[argNo]; }
14274
14275 protected:
14276 size_t flavor;
14277 tcu::FloatFormat floatFormat16;
14278 size_t outCompCount;
14279 vector<size_t> argCompCount;
14280 vector<string> flavorNames;
14281 };
14282
14283 struct fp16OpFNegate : public fp16PerComponent
14284 {
14285 template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14286 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14287 {
14288 const fp16type x (*in[0]);
14289 const double d (x.asDouble());
14290 const double result (0.0 - d);
14291
14292 out[0] = fp16type(result).bits();
14293 min[0] = getMin(result, getULPs(in));
14294 max[0] = getMax(result, getULPs(in));
14295
14296 return true;
14297 }
14298 };
14299
14300 struct fp16Round : public fp16PerComponent
14301 {
fp16Roundvkt::SpirVAssembly::fp16Round14302 fp16Round() : fp16PerComponent()
14303 {
14304 flavorNames.push_back("Floor(x+0.5)");
14305 flavorNames.push_back("Floor(x-0.5)");
14306 flavorNames.push_back("RoundEven");
14307 }
14308
14309 template<class fp16type>
calcvkt::SpirVAssembly::fp16Round14310 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14311 {
14312 const fp16type x (*in[0]);
14313 const double d (x.asDouble());
14314 double result (0.0);
14315
14316 switch (flavor)
14317 {
14318 case 0: result = deRound(d); break;
14319 case 1: result = deFloor(d - 0.5); break;
14320 case 2: result = deRoundEven(d); break;
14321 default: TCU_THROW(InternalError, "Invalid flavor specified");
14322 }
14323
14324 out[0] = fp16type(result).bits();
14325 min[0] = getMin(result, getULPs(in));
14326 max[0] = getMax(result, getULPs(in));
14327
14328 return true;
14329 }
14330 };
14331
14332 struct fp16RoundEven : public fp16PerComponent
14333 {
14334 template<class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14335 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14336 {
14337 const fp16type x (*in[0]);
14338 const double d (x.asDouble());
14339 const double result (deRoundEven(d));
14340
14341 out[0] = fp16type(result).bits();
14342 min[0] = getMin(result, getULPs(in));
14343 max[0] = getMax(result, getULPs(in));
14344
14345 return true;
14346 }
14347 };
14348
14349 struct fp16Trunc : public fp16PerComponent
14350 {
14351 template<class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14352 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14353 {
14354 const fp16type x (*in[0]);
14355 const double d (x.asDouble());
14356 const double result (deTrunc(d));
14357
14358 out[0] = fp16type(result).bits();
14359 min[0] = getMin(result, getULPs(in));
14360 max[0] = getMax(result, getULPs(in));
14361
14362 return true;
14363 }
14364 };
14365
14366 struct fp16FAbs : public fp16PerComponent
14367 {
14368 template<class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14369 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14370 {
14371 const fp16type x (*in[0]);
14372 const double d (x.asDouble());
14373 const double result (deAbs(d));
14374
14375 out[0] = fp16type(result).bits();
14376 min[0] = getMin(result, getULPs(in));
14377 max[0] = getMax(result, getULPs(in));
14378
14379 return true;
14380 }
14381 };
14382
14383 struct fp16FSign : public fp16PerComponent
14384 {
14385 template<class fp16type>
calcvkt::SpirVAssembly::fp16FSign14386 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14387 {
14388 const fp16type x (*in[0]);
14389 const double d (x.asDouble());
14390 const double result (deSign(d));
14391
14392 if (x.isNaN())
14393 return false;
14394
14395 out[0] = fp16type(result).bits();
14396 min[0] = getMin(result, getULPs(in));
14397 max[0] = getMax(result, getULPs(in));
14398
14399 return true;
14400 }
14401 };
14402
14403 struct fp16Floor : public fp16PerComponent
14404 {
14405 template<class fp16type>
calcvkt::SpirVAssembly::fp16Floor14406 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14407 {
14408 const fp16type x (*in[0]);
14409 const double d (x.asDouble());
14410 const double result (deFloor(d));
14411
14412 out[0] = fp16type(result).bits();
14413 min[0] = getMin(result, getULPs(in));
14414 max[0] = getMax(result, getULPs(in));
14415
14416 return true;
14417 }
14418 };
14419
14420 struct fp16Ceil : public fp16PerComponent
14421 {
14422 template<class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14423 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14424 {
14425 const fp16type x (*in[0]);
14426 const double d (x.asDouble());
14427 const double result (deCeil(d));
14428
14429 out[0] = fp16type(result).bits();
14430 min[0] = getMin(result, getULPs(in));
14431 max[0] = getMax(result, getULPs(in));
14432
14433 return true;
14434 }
14435 };
14436
14437 struct fp16Fract : public fp16PerComponent
14438 {
14439 template<class fp16type>
calcvkt::SpirVAssembly::fp16Fract14440 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14441 {
14442 const fp16type x (*in[0]);
14443 const double d (x.asDouble());
14444 const double result (deFrac(d));
14445
14446 out[0] = fp16type(result).bits();
14447 min[0] = getMin(result, getULPs(in));
14448 max[0] = getMax(result, getULPs(in));
14449
14450 return true;
14451 }
14452 };
14453
14454 struct fp16Radians : public fp16PerComponent
14455 {
getULPsvkt::SpirVAssembly::fp16Radians14456 virtual double getULPs (vector<const deFloat16*>& in)
14457 {
14458 DE_UNREF(in);
14459
14460 return 2.5;
14461 }
14462
14463 template<class fp16type>
calcvkt::SpirVAssembly::fp16Radians14464 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14465 {
14466 const fp16type x (*in[0]);
14467 const float d (x.asFloat());
14468 const float result (deFloatRadians(d));
14469
14470 out[0] = fp16type(result).bits();
14471 min[0] = getMin(result, getULPs(in));
14472 max[0] = getMax(result, getULPs(in));
14473
14474 return true;
14475 }
14476 };
14477
14478 struct fp16Degrees : public fp16PerComponent
14479 {
getULPsvkt::SpirVAssembly::fp16Degrees14480 virtual double getULPs (vector<const deFloat16*>& in)
14481 {
14482 DE_UNREF(in);
14483
14484 return 2.5;
14485 }
14486
14487 template<class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14488 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14489 {
14490 const fp16type x (*in[0]);
14491 const float d (x.asFloat());
14492 const float result (deFloatDegrees(d));
14493
14494 out[0] = fp16type(result).bits();
14495 min[0] = getMin(result, getULPs(in));
14496 max[0] = getMax(result, getULPs(in));
14497
14498 return true;
14499 }
14500 };
14501
14502 struct fp16Sin : public fp16PerComponent
14503 {
14504 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sin14505 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14506 {
14507 const fp16type x (*in[0]);
14508 const double d (x.asDouble());
14509 const double result (deSin(d));
14510 const double unspecUlp (16.0);
14511 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14512
14513 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14514 return false;
14515
14516 out[0] = fp16type(result).bits();
14517 min[0] = result - err;
14518 max[0] = result + err;
14519
14520 return true;
14521 }
14522 };
14523
14524 struct fp16Cos : public fp16PerComponent
14525 {
14526 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cos14527 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14528 {
14529 const fp16type x (*in[0]);
14530 const double d (x.asDouble());
14531 const double result (deCos(d));
14532 const double unspecUlp (16.0);
14533 const double err (de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14534
14535 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14536 return false;
14537
14538 out[0] = fp16type(result).bits();
14539 min[0] = result - err;
14540 max[0] = result + err;
14541
14542 return true;
14543 }
14544 };
14545
14546 struct fp16Tan : public fp16PerComponent
14547 {
14548 template<class fp16type>
calcvkt::SpirVAssembly::fp16Tan14549 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14550 {
14551 const fp16type x (*in[0]);
14552 const double d (x.asDouble());
14553 const double result (deTan(d));
14554
14555 if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14556 return false;
14557
14558 out[0] = fp16type(result).bits();
14559 {
14560 const double err = deLdExp(1.0, -7);
14561 const double s1 = deSin(d) + err;
14562 const double s2 = deSin(d) - err;
14563 const double c1 = deCos(d) + err;
14564 const double c2 = deCos(d) - err;
14565 const double edgeVals[] = {s1/c1, s1/c2, s2/c1, s2/c2};
14566 double edgeLeft = out[0];
14567 double edgeRight = out[0];
14568
14569 if (deSign(c1 * c2) < 0.0)
14570 {
14571 edgeLeft = -std::numeric_limits<double>::infinity();
14572 edgeRight = +std::numeric_limits<double>::infinity();
14573 }
14574 else
14575 {
14576 edgeLeft = *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14577 edgeRight = *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14578 }
14579
14580 min[0] = edgeLeft;
14581 max[0] = edgeRight;
14582 }
14583
14584 return true;
14585 }
14586 };
14587
14588 struct fp16Asin : public fp16PerComponent
14589 {
14590 template<class fp16type>
calcvkt::SpirVAssembly::fp16Asin14591 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14592 {
14593 const fp16type x (*in[0]);
14594 const double d (x.asDouble());
14595 const double result (deAsin(d));
14596 const double error (deAtan2(d, sqrt(1.0 - d * d)));
14597
14598 if (!x.isNaN() && deAbs(d) > 1.0)
14599 return false;
14600
14601 out[0] = fp16type(result).bits();
14602 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14603 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14604
14605 return true;
14606 }
14607 };
14608
14609 struct fp16Acos : public fp16PerComponent
14610 {
14611 template<class fp16type>
calcvkt::SpirVAssembly::fp16Acos14612 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14613 {
14614 const fp16type x (*in[0]);
14615 const double d (x.asDouble());
14616 const double result (deAcos(d));
14617 const double error (deAtan2(sqrt(1.0 - d * d), d));
14618
14619 if (!x.isNaN() && deAbs(d) > 1.0)
14620 return false;
14621
14622 out[0] = fp16type(result).bits();
14623 min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14624 max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14625
14626 return true;
14627 }
14628 };
14629
14630 struct fp16Atan : public fp16PerComponent
14631 {
getULPsvkt::SpirVAssembly::fp16Atan14632 virtual double getULPs(vector<const deFloat16*>& in)
14633 {
14634 DE_UNREF(in);
14635
14636 return 2 * 5.0; // This is not a precision test. Value is not from spec
14637 }
14638
14639 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan14640 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14641 {
14642 const fp16type x (*in[0]);
14643 const double d (x.asDouble());
14644 const double result (deAtanOver(d));
14645
14646 out[0] = fp16type(result).bits();
14647 min[0] = getMin(result, getULPs(in));
14648 max[0] = getMax(result, getULPs(in));
14649
14650 return true;
14651 }
14652 };
14653
14654 struct fp16Sinh : public fp16PerComponent
14655 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh14656 fp16Sinh() : fp16PerComponent()
14657 {
14658 flavorNames.push_back("Double");
14659 flavorNames.push_back("ExpFP16");
14660 }
14661
14662 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sinh14663 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14664 {
14665 const fp16type x (*in[0]);
14666 const double d (x.asDouble());
14667 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14668 double result (0.0);
14669 double error (0.0);
14670
14671 if (getFlavor() == 0)
14672 {
14673 result = deSinh(d);
14674 error = floatFormat16.ulp(deAbs(result), ulps);
14675 }
14676 else if (getFlavor() == 1)
14677 {
14678 const fp16type epx (deExp(d));
14679 const fp16type enx (deExp(-d));
14680 const fp16type esx (epx.asDouble() - enx.asDouble());
14681 const fp16type sx2 (esx.asDouble() / 2.0);
14682
14683 result = sx2.asDouble();
14684 error = deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14685 }
14686 else
14687 {
14688 TCU_THROW(InternalError, "Unknown flavor");
14689 }
14690
14691 out[0] = fp16type(result).bits();
14692 min[0] = result - error;
14693 max[0] = result + error;
14694
14695 return true;
14696 }
14697 };
14698
14699 struct fp16Cosh : public fp16PerComponent
14700 {
fp16Coshvkt::SpirVAssembly::fp16Cosh14701 fp16Cosh() : fp16PerComponent()
14702 {
14703 flavorNames.push_back("Double");
14704 flavorNames.push_back("ExpFP16");
14705 }
14706
14707 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cosh14708 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14709 {
14710 const fp16type x (*in[0]);
14711 const double d (x.asDouble());
14712 const double ulps (64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14713 double result (0.0);
14714
14715 if (getFlavor() == 0)
14716 {
14717 result = deCosh(d);
14718 }
14719 else if (getFlavor() == 1)
14720 {
14721 const fp16type epx (deExp(d));
14722 const fp16type enx (deExp(-d));
14723 const fp16type esx (epx.asDouble() + enx.asDouble());
14724 const fp16type sx2 (esx.asDouble() / 2.0);
14725
14726 result = sx2.asDouble();
14727 }
14728 else
14729 {
14730 TCU_THROW(InternalError, "Unknown flavor");
14731 }
14732
14733 out[0] = fp16type(result).bits();
14734 min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14735 max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14736
14737 return true;
14738 }
14739 };
14740
14741 struct fp16Tanh : public fp16PerComponent
14742 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh14743 fp16Tanh() : fp16PerComponent()
14744 {
14745 flavorNames.push_back("Tanh");
14746 flavorNames.push_back("SinhCosh");
14747 flavorNames.push_back("SinhCoshFP16");
14748 flavorNames.push_back("PolyFP16");
14749 }
14750
getULPsvkt::SpirVAssembly::fp16Tanh14751 virtual double getULPs (vector<const deFloat16*>& in)
14752 {
14753 const tcu::Float16 x (*in[0]);
14754 const double d (x.asDouble());
14755
14756 return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14757 }
14758
14759 template<class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh14760 inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14761 {
14762 const fp16type esx (espx.asDouble() - esnx.asDouble());
14763 const fp16type sx2 (esx.asDouble() / 2.0);
14764 const fp16type ecx (ecpx.asDouble() + ecnx.asDouble());
14765 const fp16type cx2 (ecx.asDouble() / 2.0);
14766 const fp16type tg (sx2.asDouble() / cx2.asDouble());
14767 const double rez (tg.asDouble());
14768
14769 return rez;
14770 }
14771
14772 template<class fp16type>
calcvkt::SpirVAssembly::fp16Tanh14773 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14774 {
14775 const fp16type x (*in[0]);
14776 const double d (x.asDouble());
14777 double result (0.0);
14778
14779 if (getFlavor() == 0)
14780 {
14781 result = deTanh(d);
14782 min[0] = getMin(result, getULPs(in));
14783 max[0] = getMax(result, getULPs(in));
14784 }
14785 else if (getFlavor() == 1)
14786 {
14787 result = deSinh(d) / deCosh(d);
14788 min[0] = getMin(result, getULPs(in));
14789 max[0] = getMax(result, getULPs(in));
14790 }
14791 else if (getFlavor() == 2)
14792 {
14793 const fp16type s (deSinh(d));
14794 const fp16type c (deCosh(d));
14795
14796 result = s.asDouble() / c.asDouble();
14797 min[0] = getMin(result, getULPs(in));
14798 max[0] = getMax(result, getULPs(in));
14799 }
14800 else if (getFlavor() == 3)
14801 {
14802 const double ulps (getULPs(in));
14803 const double epxm (deExp( d));
14804 const double enxm (deExp(-d));
14805 const double epxmerr = floatFormat16.ulp(epxm, ulps);
14806 const double enxmerr = floatFormat16.ulp(enxm, ulps);
14807 const fp16type epx[] = { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14808 const fp16type enx[] = { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14809 const fp16type epxm16 (epxm);
14810 const fp16type enxm16 (enxm);
14811 vector<double> tgs;
14812
14813 for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14814 for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14815 for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14816 for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14817 {
14818 const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14819
14820 tgs.push_back(tgh);
14821 }
14822
14823 result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14824 min[0] = *std::min_element(tgs.begin(), tgs.end());
14825 max[0] = *std::max_element(tgs.begin(), tgs.end());
14826 }
14827 else
14828 {
14829 TCU_THROW(InternalError, "Unknown flavor");
14830 }
14831
14832 out[0] = fp16type(result).bits();
14833
14834 return true;
14835 }
14836 };
14837
14838 struct fp16Asinh : public fp16PerComponent
14839 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh14840 fp16Asinh() : fp16PerComponent()
14841 {
14842 flavorNames.push_back("Double");
14843 flavorNames.push_back("PolyFP16Wiki");
14844 flavorNames.push_back("PolyFP16Abs");
14845 }
14846
getULPsvkt::SpirVAssembly::fp16Asinh14847 virtual double getULPs (vector<const deFloat16*>& in)
14848 {
14849 DE_UNREF(in);
14850
14851 return 256.0; // This is not a precision test. Value is not from spec
14852 }
14853
14854 template<class fp16type>
calcvkt::SpirVAssembly::fp16Asinh14855 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14856 {
14857 const fp16type x (*in[0]);
14858 const double d (x.asDouble());
14859 double result (0.0);
14860
14861 if (getFlavor() == 0)
14862 {
14863 result = deAsinh(d);
14864 }
14865 else if (getFlavor() == 1)
14866 {
14867 const fp16type x2 (d * d);
14868 const fp16type x2p1 (x2.asDouble() + 1.0);
14869 const fp16type sq (deSqrt(x2p1.asDouble()));
14870 const fp16type sxsq (d + sq.asDouble());
14871 const fp16type lsxsq (deLog(sxsq.asDouble()));
14872
14873 if (lsxsq.isInf())
14874 return false;
14875
14876 result = lsxsq.asDouble();
14877 }
14878 else if (getFlavor() == 2)
14879 {
14880 const fp16type x2 (d * d);
14881 const fp16type x2p1 (x2.asDouble() + 1.0);
14882 const fp16type sq (deSqrt(x2p1.asDouble()));
14883 const fp16type sxsq (deAbs(d) + sq.asDouble());
14884 const fp16type lsxsq (deLog(sxsq.asDouble()));
14885
14886 result = deSign(d) * lsxsq.asDouble();
14887 }
14888 else
14889 {
14890 TCU_THROW(InternalError, "Unknown flavor");
14891 }
14892
14893 out[0] = fp16type(result).bits();
14894 min[0] = getMin(result, getULPs(in));
14895 max[0] = getMax(result, getULPs(in));
14896
14897 return true;
14898 }
14899 };
14900
14901 struct fp16Acosh : public fp16PerComponent
14902 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh14903 fp16Acosh() : fp16PerComponent()
14904 {
14905 flavorNames.push_back("Double");
14906 flavorNames.push_back("PolyFP16");
14907 }
14908
getULPsvkt::SpirVAssembly::fp16Acosh14909 virtual double getULPs (vector<const deFloat16*>& in)
14910 {
14911 DE_UNREF(in);
14912
14913 return 16.0; // This is not a precision test. Value is not from spec
14914 }
14915
14916 template<class fp16type>
calcvkt::SpirVAssembly::fp16Acosh14917 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14918 {
14919 const fp16type x (*in[0]);
14920 const double d (x.asDouble());
14921 double result (0.0);
14922
14923 if (!x.isNaN() && d < 1.0)
14924 return false;
14925
14926 if (getFlavor() == 0)
14927 {
14928 result = deAcosh(d);
14929 }
14930 else if (getFlavor() == 1)
14931 {
14932 const fp16type x2 (d * d);
14933 const fp16type x2m1 (x2.asDouble() - 1.0);
14934 const fp16type sq (deSqrt(x2m1.asDouble()));
14935 const fp16type sxsq (d + sq.asDouble());
14936 const fp16type lsxsq (deLog(sxsq.asDouble()));
14937
14938 result = lsxsq.asDouble();
14939 }
14940 else
14941 {
14942 TCU_THROW(InternalError, "Unknown flavor");
14943 }
14944
14945 out[0] = fp16type(result).bits();
14946 min[0] = getMin(result, getULPs(in));
14947 max[0] = getMax(result, getULPs(in));
14948
14949 return true;
14950 }
14951 };
14952
14953 struct fp16Atanh : public fp16PerComponent
14954 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh14955 fp16Atanh() : fp16PerComponent()
14956 {
14957 flavorNames.push_back("Double");
14958 flavorNames.push_back("PolyFP16");
14959 }
14960
14961 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atanh14962 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14963 {
14964 const fp16type x (*in[0]);
14965 const double d (x.asDouble());
14966 double result (0.0);
14967
14968 if (deAbs(d) >= 1.0)
14969 return false;
14970
14971 if (getFlavor() == 0)
14972 {
14973 const double ulps (16.0); // This is not a precision test. Value is not from spec
14974
14975 result = deAtanh(d);
14976 min[0] = getMin(result, ulps);
14977 max[0] = getMax(result, ulps);
14978 }
14979 else if (getFlavor() == 1)
14980 {
14981 const fp16type x1a (1.0 + d);
14982 const fp16type x1b (1.0 - d);
14983 const fp16type x1d (x1a.asDouble() / x1b.asDouble());
14984 const fp16type lx1d (deLog(x1d.asDouble()));
14985 const fp16type lx1d2 (0.5 * lx1d.asDouble());
14986 const double error (2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
14987
14988 result = lx1d2.asDouble();
14989 min[0] = result - error;
14990 max[0] = result + error;
14991 }
14992 else
14993 {
14994 TCU_THROW(InternalError, "Unknown flavor");
14995 }
14996
14997 out[0] = fp16type(result).bits();
14998
14999 return true;
15000 }
15001 };
15002
15003 struct fp16Exp : public fp16PerComponent
15004 {
15005 template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp15006 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15007 {
15008 const fp16type x (*in[0]);
15009 const double d (x.asDouble());
15010 const double ulps (10.0 * (1.0 + 2.0 * deAbs(d)));
15011 const double result (deExp(d));
15012
15013 out[0] = fp16type(result).bits();
15014 min[0] = getMin(result, ulps);
15015 max[0] = getMax(result, ulps);
15016
15017 return true;
15018 }
15019 };
15020
15021 struct fp16Log : public fp16PerComponent
15022 {
15023 template<class fp16type>
calcvkt::SpirVAssembly::fp16Log15024 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15025 {
15026 const fp16type x (*in[0]);
15027 const double d (x.asDouble());
15028 const double result (deLog(d));
15029 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15030
15031 if (d <= 0.0)
15032 return false;
15033
15034 out[0] = fp16type(result).bits();
15035 min[0] = result - error;
15036 max[0] = result + error;
15037
15038 return true;
15039 }
15040 };
15041
15042 struct fp16Exp2 : public fp16PerComponent
15043 {
15044 template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp215045 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15046 {
15047 const fp16type x (*in[0]);
15048 const double d (x.asDouble());
15049 const double result (deExp2(d));
15050 const double ulps (1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15051
15052 out[0] = fp16type(result).bits();
15053 min[0] = getMin(result, ulps);
15054 max[0] = getMax(result, ulps);
15055
15056 return true;
15057 }
15058 };
15059
15060 struct fp16Log2 : public fp16PerComponent
15061 {
15062 template<class fp16type>
calcvkt::SpirVAssembly::fp16Log215063 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15064 {
15065 const fp16type x (*in[0]);
15066 const double d (x.asDouble());
15067 const double result (deLog2(d));
15068 const double error (de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15069
15070 if (d <= 0.0)
15071 return false;
15072
15073 out[0] = fp16type(result).bits();
15074 min[0] = result - error;
15075 max[0] = result + error;
15076
15077 return true;
15078 }
15079 };
15080
15081 struct fp16Sqrt : public fp16PerComponent
15082 {
getULPsvkt::SpirVAssembly::fp16Sqrt15083 virtual double getULPs (vector<const deFloat16*>& in)
15084 {
15085 DE_UNREF(in);
15086
15087 return 6.0;
15088 }
15089
15090 template<class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15091 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15092 {
15093 const fp16type x (*in[0]);
15094 const double d (x.asDouble());
15095 const double result (deSqrt(d));
15096
15097 if (!x.isNaN() && d < 0.0)
15098 return false;
15099
15100 out[0] = fp16type(result).bits();
15101 min[0] = getMin(result, getULPs(in));
15102 max[0] = getMax(result, getULPs(in));
15103
15104 return true;
15105 }
15106 };
15107
15108 struct fp16InverseSqrt : public fp16PerComponent
15109 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15110 virtual double getULPs (vector<const deFloat16*>& in)
15111 {
15112 DE_UNREF(in);
15113
15114 return 2.0;
15115 }
15116
15117 template<class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15118 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15119 {
15120 const fp16type x (*in[0]);
15121 const double d (x.asDouble());
15122 const double result (1.0/deSqrt(d));
15123
15124 if (!x.isNaN() && d <= 0.0)
15125 return false;
15126
15127 out[0] = fp16type(result).bits();
15128 min[0] = getMin(result, getULPs(in));
15129 max[0] = getMax(result, getULPs(in));
15130
15131 return true;
15132 }
15133 };
15134
15135 struct fp16ModfFrac : public fp16PerComponent
15136 {
15137 template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15138 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15139 {
15140 const fp16type x (*in[0]);
15141 const double d (x.asDouble());
15142 double i (0.0);
15143 const double result (deModf(d, &i));
15144
15145 if (x.isInf() || x.isNaN())
15146 return false;
15147
15148 out[0] = fp16type(result).bits();
15149 min[0] = getMin(result, getULPs(in));
15150 max[0] = getMax(result, getULPs(in));
15151
15152 return true;
15153 }
15154 };
15155
15156 struct fp16ModfInt : public fp16PerComponent
15157 {
15158 template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15159 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15160 {
15161 const fp16type x (*in[0]);
15162 const double d (x.asDouble());
15163 double i (0.0);
15164 const double dummy (deModf(d, &i));
15165 const double result (i);
15166
15167 DE_UNREF(dummy);
15168
15169 if (x.isInf() || x.isNaN())
15170 return false;
15171
15172 out[0] = fp16type(result).bits();
15173 min[0] = getMin(result, getULPs(in));
15174 max[0] = getMax(result, getULPs(in));
15175
15176 return true;
15177 }
15178 };
15179
15180 struct fp16FrexpS : public fp16PerComponent
15181 {
15182 template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15183 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15184 {
15185 const fp16type x (*in[0]);
15186 const double d (x.asDouble());
15187 int e (0);
15188 const double result (deFrExp(d, &e));
15189
15190 if (x.isNaN() || x.isInf())
15191 return false;
15192
15193 out[0] = fp16type(result).bits();
15194 min[0] = getMin(result, getULPs(in));
15195 max[0] = getMax(result, getULPs(in));
15196
15197 return true;
15198 }
15199 };
15200
15201 struct fp16FrexpE : public fp16PerComponent
15202 {
15203 template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15204 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15205 {
15206 const fp16type x (*in[0]);
15207 const double d (x.asDouble());
15208 int e (0);
15209 const double dummy (deFrExp(d, &e));
15210 const double result (static_cast<double>(e));
15211
15212 DE_UNREF(dummy);
15213
15214 if (x.isNaN() || x.isInf())
15215 return false;
15216
15217 out[0] = fp16type(result).bits();
15218 min[0] = getMin(result, getULPs(in));
15219 max[0] = getMax(result, getULPs(in));
15220
15221 return true;
15222 }
15223 };
15224
15225 struct fp16OpFAdd : public fp16PerComponent
15226 {
15227 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15228 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15229 {
15230 const fp16type x (*in[0]);
15231 const fp16type y (*in[1]);
15232 const double xd (x.asDouble());
15233 const double yd (y.asDouble());
15234 const double result (xd + yd);
15235
15236 out[0] = fp16type(result).bits();
15237 min[0] = getMin(result, getULPs(in));
15238 max[0] = getMax(result, getULPs(in));
15239
15240 return true;
15241 }
15242 };
15243
15244 struct fp16OpFSub : public fp16PerComponent
15245 {
15246 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15247 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15248 {
15249 const fp16type x (*in[0]);
15250 const fp16type y (*in[1]);
15251 const double xd (x.asDouble());
15252 const double yd (y.asDouble());
15253 const double result (xd - yd);
15254
15255 out[0] = fp16type(result).bits();
15256 min[0] = getMin(result, getULPs(in));
15257 max[0] = getMax(result, getULPs(in));
15258
15259 return true;
15260 }
15261 };
15262
15263 struct fp16OpFMul : public fp16PerComponent
15264 {
15265 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15266 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15267 {
15268 const fp16type x (*in[0]);
15269 const fp16type y (*in[1]);
15270 const double xd (x.asDouble());
15271 const double yd (y.asDouble());
15272 const double result (xd * yd);
15273
15274 out[0] = fp16type(result).bits();
15275 min[0] = getMin(result, getULPs(in));
15276 max[0] = getMax(result, getULPs(in));
15277
15278 return true;
15279 }
15280 };
15281
15282 struct fp16OpFDiv : public fp16PerComponent
15283 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15284 fp16OpFDiv() : fp16PerComponent()
15285 {
15286 flavorNames.push_back("DirectDiv");
15287 flavorNames.push_back("InverseDiv");
15288 }
15289
15290 template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15291 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15292 {
15293 const fp16type x (*in[0]);
15294 const fp16type y (*in[1]);
15295 const double xd (x.asDouble());
15296 const double yd (y.asDouble());
15297 const double unspecUlp (16.0);
15298 const double ulpCnt (de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15299 double result (0.0);
15300
15301 if (y.isZero())
15302 return false;
15303
15304 if (getFlavor() == 0)
15305 {
15306 result = (xd / yd);
15307 }
15308 else if (getFlavor() == 1)
15309 {
15310 const double invyd (1.0 / yd);
15311 const fp16type invy (invyd);
15312
15313 result = (xd * invy.asDouble());
15314 }
15315 else
15316 {
15317 TCU_THROW(InternalError, "Unknown flavor");
15318 }
15319
15320 out[0] = fp16type(result).bits();
15321 min[0] = getMin(result, ulpCnt);
15322 max[0] = getMax(result, ulpCnt);
15323
15324 return true;
15325 }
15326 };
15327
15328 struct fp16Atan2 : public fp16PerComponent
15329 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215330 fp16Atan2() : fp16PerComponent()
15331 {
15332 flavorNames.push_back("DoubleCalc");
15333 flavorNames.push_back("DoubleCalc_PI");
15334 }
15335
getULPsvkt::SpirVAssembly::fp16Atan215336 virtual double getULPs(vector<const deFloat16*>& in)
15337 {
15338 DE_UNREF(in);
15339
15340 return 2 * 5.0; // This is not a precision test. Value is not from spec
15341 }
15342
15343 template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan215344 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15345 {
15346 const fp16type x (*in[0]);
15347 const fp16type y (*in[1]);
15348 const double xd (x.asDouble());
15349 const double yd (y.asDouble());
15350 double result (0.0);
15351
15352 if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15353 return false;
15354
15355 if (getFlavor() == 0)
15356 {
15357 result = deAtan2(xd, yd);
15358 }
15359 else if (getFlavor() == 1)
15360 {
15361 const double ulps (2.0 * 5.0); // This is not a precision test. Value is not from spec
15362 const double eps (floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15363
15364 result = deAtan2(xd, yd);
15365
15366 if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15367 result = -result;
15368 }
15369 else
15370 {
15371 TCU_THROW(InternalError, "Unknown flavor");
15372 }
15373
15374 out[0] = fp16type(result).bits();
15375 min[0] = getMin(result, getULPs(in));
15376 max[0] = getMax(result, getULPs(in));
15377
15378 return true;
15379 }
15380 };
15381
15382 struct fp16Pow : public fp16PerComponent
15383 {
fp16Powvkt::SpirVAssembly::fp16Pow15384 fp16Pow() : fp16PerComponent()
15385 {
15386 flavorNames.push_back("Pow");
15387 flavorNames.push_back("PowLog2");
15388 flavorNames.push_back("PowLog2FP16");
15389 }
15390
15391 template<class fp16type>
calcvkt::SpirVAssembly::fp16Pow15392 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15393 {
15394 const fp16type x (*in[0]);
15395 const fp16type y (*in[1]);
15396 const double xd (x.asDouble());
15397 const double yd (y.asDouble());
15398 const double logxeps (de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15399 const double ulps1 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15400 const double ulps2 (1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15401 const double ulps (deMax(deAbs(ulps1), deAbs(ulps2)));
15402 double result (0.0);
15403
15404 if (xd < 0.0)
15405 return false;
15406
15407 if (x.isZero() && yd <= 0.0)
15408 return false;
15409
15410 if (getFlavor() == 0)
15411 {
15412 result = dePow(xd, yd);
15413 }
15414 else if (getFlavor() == 1)
15415 {
15416 const double l2d (deLog2(xd));
15417 const double e2d (deExp2(yd * l2d));
15418
15419 result = e2d;
15420 }
15421 else if (getFlavor() == 2)
15422 {
15423 const double l2d (deLog2(xd));
15424 const fp16type l2 (l2d);
15425 const double e2d (deExp2(yd * l2.asDouble()));
15426 const fp16type e2 (e2d);
15427
15428 result = e2.asDouble();
15429 }
15430 else
15431 {
15432 TCU_THROW(InternalError, "Unknown flavor");
15433 }
15434
15435 out[0] = fp16type(result).bits();
15436 min[0] = getMin(result, ulps);
15437 max[0] = getMax(result, ulps);
15438
15439 return true;
15440 }
15441 };
15442
15443 struct fp16FMin : public fp16PerComponent
15444 {
15445 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMin15446 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15447 {
15448 const fp16type x (*in[0]);
15449 const fp16type y (*in[1]);
15450 const double xd (x.asDouble());
15451 const double yd (y.asDouble());
15452 const double result (deMin(xd, yd));
15453
15454 if (x.isNaN() || y.isNaN())
15455 return false;
15456
15457 out[0] = fp16type(result).bits();
15458 min[0] = getMin(result, getULPs(in));
15459 max[0] = getMax(result, getULPs(in));
15460
15461 return true;
15462 }
15463 };
15464
15465 struct fp16FMax : public fp16PerComponent
15466 {
15467 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMax15468 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15469 {
15470 const fp16type x (*in[0]);
15471 const fp16type y (*in[1]);
15472 const double xd (x.asDouble());
15473 const double yd (y.asDouble());
15474 const double result (deMax(xd, yd));
15475
15476 if (x.isNaN() || y.isNaN())
15477 return false;
15478
15479 out[0] = fp16type(result).bits();
15480 min[0] = getMin(result, getULPs(in));
15481 max[0] = getMax(result, getULPs(in));
15482
15483 return true;
15484 }
15485 };
15486
15487 struct fp16Step : public fp16PerComponent
15488 {
15489 template<class fp16type>
calcvkt::SpirVAssembly::fp16Step15490 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15491 {
15492 const fp16type edge (*in[0]);
15493 const fp16type x (*in[1]);
15494 const double edged (edge.asDouble());
15495 const double xd (x.asDouble());
15496 const double result (deStep(edged, xd));
15497
15498 out[0] = fp16type(result).bits();
15499 min[0] = getMin(result, getULPs(in));
15500 max[0] = getMax(result, getULPs(in));
15501
15502 return true;
15503 }
15504 };
15505
15506 struct fp16Ldexp : public fp16PerComponent
15507 {
15508 template<class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15509 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15510 {
15511 const fp16type x (*in[0]);
15512 const fp16type y (*in[1]);
15513 const double xd (x.asDouble());
15514 const int yd (static_cast<int>(deTrunc(y.asDouble())));
15515 const double result (deLdExp(xd, yd));
15516
15517 if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15518 return false;
15519
15520 // Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15521 if (fp16type(result).isInf())
15522 return false;
15523
15524 out[0] = fp16type(result).bits();
15525 min[0] = getMin(result, getULPs(in));
15526 max[0] = getMax(result, getULPs(in));
15527
15528 return true;
15529 }
15530 };
15531
15532 struct fp16FClamp : public fp16PerComponent
15533 {
15534 template<class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15535 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15536 {
15537 const fp16type x (*in[0]);
15538 const fp16type minVal (*in[1]);
15539 const fp16type maxVal (*in[2]);
15540 const double xd (x.asDouble());
15541 const double minVald (minVal.asDouble());
15542 const double maxVald (maxVal.asDouble());
15543 const double result (deClamp(xd, minVald, maxVald));
15544
15545 if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15546 return false;
15547
15548 out[0] = fp16type(result).bits();
15549 min[0] = getMin(result, getULPs(in));
15550 max[0] = getMax(result, getULPs(in));
15551
15552 return true;
15553 }
15554 };
15555
15556 struct fp16FMix : public fp16PerComponent
15557 {
fp16FMixvkt::SpirVAssembly::fp16FMix15558 fp16FMix() : fp16PerComponent()
15559 {
15560 flavorNames.push_back("DoubleCalc");
15561 flavorNames.push_back("EmulatingFP16");
15562 flavorNames.push_back("EmulatingFP16YminusX");
15563 }
15564
15565 template<class fp16type>
calcvkt::SpirVAssembly::fp16FMix15566 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15567 {
15568 const fp16type x (*in[0]);
15569 const fp16type y (*in[1]);
15570 const fp16type a (*in[2]);
15571 const double ulps (8.0); // This is not a precision test. Value is not from spec
15572 double result (0.0);
15573
15574 if (getFlavor() == 0)
15575 {
15576 const double xd (x.asDouble());
15577 const double yd (y.asDouble());
15578 const double ad (a.asDouble());
15579 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15580 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15581 const double eps (xeps + yeps);
15582
15583 result = deMix(xd, yd, ad);
15584 min[0] = result - eps;
15585 max[0] = result + eps;
15586 }
15587 else if (getFlavor() == 1)
15588 {
15589 const double xd (x.asDouble());
15590 const double yd (y.asDouble());
15591 const double ad (a.asDouble());
15592 const fp16type am (1.0 - ad);
15593 const double amd (am.asDouble());
15594 const fp16type xam (xd * amd);
15595 const double xamd (xam.asDouble());
15596 const fp16type ya (yd * ad);
15597 const double yad (ya.asDouble());
15598 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15599 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15600 const double eps (xeps + yeps);
15601
15602 result = xamd + yad;
15603 min[0] = result - eps;
15604 max[0] = result + eps;
15605 }
15606 else if (getFlavor() == 2)
15607 {
15608 const double xd (x.asDouble());
15609 const double yd (y.asDouble());
15610 const double ad (a.asDouble());
15611 const fp16type ymx (yd - xd);
15612 const double ymxd (ymx.asDouble());
15613 const fp16type ymxa (ymxd * ad);
15614 const double ymxad (ymxa.asDouble());
15615 const double xeps (floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15616 const double yeps (floatFormat16.ulp(deAbs(yd * ad), ulps));
15617 const double eps (xeps + yeps);
15618
15619 result = xd + ymxad;
15620 min[0] = result - eps;
15621 max[0] = result + eps;
15622 }
15623 else
15624 {
15625 TCU_THROW(InternalError, "Unknown flavor");
15626 }
15627
15628 out[0] = fp16type(result).bits();
15629
15630 return true;
15631 }
15632 };
15633
15634 struct fp16SmoothStep : public fp16PerComponent
15635 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep15636 fp16SmoothStep() : fp16PerComponent()
15637 {
15638 flavorNames.push_back("FloatCalc");
15639 flavorNames.push_back("EmulatingFP16");
15640 flavorNames.push_back("EmulatingFP16WClamp");
15641 }
15642
getULPsvkt::SpirVAssembly::fp16SmoothStep15643 virtual double getULPs(vector<const deFloat16*>& in)
15644 {
15645 DE_UNREF(in);
15646
15647 return 4.0; // This is not a precision test. Value is not from spec
15648 }
15649
15650 template<class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep15651 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15652 {
15653 const fp16type edge0 (*in[0]);
15654 const fp16type edge1 (*in[1]);
15655 const fp16type x (*in[2]);
15656 double result (0.0);
15657
15658 if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15659 return false;
15660
15661 if (edge0.isInf() || edge1.isInf() || x.isInf())
15662 return false;
15663
15664 if (getFlavor() == 0)
15665 {
15666 const float edge0d (edge0.asFloat());
15667 const float edge1d (edge1.asFloat());
15668 const float xd (x.asFloat());
15669 const float sstep (deFloatSmoothStep(edge0d, edge1d, xd));
15670
15671 result = sstep;
15672 }
15673 else if (getFlavor() == 1)
15674 {
15675 const double edge0d (edge0.asDouble());
15676 const double edge1d (edge1.asDouble());
15677 const double xd (x.asDouble());
15678
15679 if (xd <= edge0d)
15680 result = 0.0;
15681 else if (xd >= edge1d)
15682 result = 1.0;
15683 else
15684 {
15685 const fp16type a (xd - edge0d);
15686 const fp16type b (edge1d - edge0d);
15687 const fp16type t (a.asDouble() / b.asDouble());
15688 const fp16type t2 (2.0 * t.asDouble());
15689 const fp16type t3 (3.0 - t2.asDouble());
15690 const fp16type t4 (t.asDouble() * t3.asDouble());
15691 const fp16type t5 (t.asDouble() * t4.asDouble());
15692
15693 result = t5.asDouble();
15694 }
15695 }
15696 else if (getFlavor() == 2)
15697 {
15698 const double edge0d (edge0.asDouble());
15699 const double edge1d (edge1.asDouble());
15700 const double xd (x.asDouble());
15701 const fp16type a (xd - edge0d);
15702 const fp16type b (edge1d - edge0d);
15703 const fp16type bi (1.0 / b.asDouble());
15704 const fp16type t0 (a.asDouble() * bi.asDouble());
15705 const double tc (deClamp(t0.asDouble(), 0.0, 1.0));
15706 const fp16type t (tc);
15707 const fp16type t2 (2.0 * t.asDouble());
15708 const fp16type t3 (3.0 - t2.asDouble());
15709 const fp16type t4 (t.asDouble() * t3.asDouble());
15710 const fp16type t5 (t.asDouble() * t4.asDouble());
15711
15712 result = t5.asDouble();
15713 }
15714 else
15715 {
15716 TCU_THROW(InternalError, "Unknown flavor");
15717 }
15718
15719 out[0] = fp16type(result).bits();
15720 min[0] = getMin(result, getULPs(in));
15721 max[0] = getMax(result, getULPs(in));
15722
15723 return true;
15724 }
15725 };
15726
15727 struct fp16Fma : public fp16PerComponent
15728 {
fp16Fmavkt::SpirVAssembly::fp16Fma15729 fp16Fma()
15730 {
15731 flavorNames.push_back("DoubleCalc");
15732 flavorNames.push_back("EmulatingFP16");
15733 }
15734
getULPsvkt::SpirVAssembly::fp16Fma15735 virtual double getULPs(vector<const deFloat16*>& in)
15736 {
15737 DE_UNREF(in);
15738
15739 return 16.0;
15740 }
15741
15742 template<class fp16type>
calcvkt::SpirVAssembly::fp16Fma15743 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15744 {
15745 DE_ASSERT(in.size() == 3);
15746 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15747 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15748 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15749 DE_ASSERT(getOutCompCount() > 0);
15750
15751 const fp16type a (*in[0]);
15752 const fp16type b (*in[1]);
15753 const fp16type c (*in[2]);
15754 double result (0.0);
15755
15756 if (getFlavor() == 0)
15757 {
15758 const double ad (a.asDouble());
15759 const double bd (b.asDouble());
15760 const double cd (c.asDouble());
15761
15762 result = deMadd(ad, bd, cd);
15763 }
15764 else if (getFlavor() == 1)
15765 {
15766 const double ad (a.asDouble());
15767 const double bd (b.asDouble());
15768 const double cd (c.asDouble());
15769 const fp16type ab (ad * bd);
15770 const fp16type r (ab.asDouble() + cd);
15771
15772 result = r.asDouble();
15773 }
15774 else
15775 {
15776 TCU_THROW(InternalError, "Unknown flavor");
15777 }
15778
15779 out[0] = fp16type(result).bits();
15780 min[0] = getMin(result, getULPs(in));
15781 max[0] = getMax(result, getULPs(in));
15782
15783 return true;
15784 }
15785 };
15786
15787
15788 struct fp16AllComponents : public fp16PerComponent
15789 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents15790 bool callOncePerComponent () { return false; }
15791 };
15792
15793 struct fp16Length : public fp16AllComponents
15794 {
fp16Lengthvkt::SpirVAssembly::fp16Length15795 fp16Length() : fp16AllComponents()
15796 {
15797 flavorNames.push_back("EmulatingFP16");
15798 flavorNames.push_back("DoubleCalc");
15799 }
15800
getULPsvkt::SpirVAssembly::fp16Length15801 virtual double getULPs(vector<const deFloat16*>& in)
15802 {
15803 DE_UNREF(in);
15804
15805 return 4.0;
15806 }
15807
15808 template<class fp16type>
calcvkt::SpirVAssembly::fp16Length15809 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15810 {
15811 DE_ASSERT(getOutCompCount() == 1);
15812 DE_ASSERT(in.size() == 1);
15813
15814 double result (0.0);
15815
15816 if (getFlavor() == 0)
15817 {
15818 fp16type r (0.0);
15819
15820 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15821 {
15822 const fp16type x (in[0][componentNdx]);
15823 const fp16type q (x.asDouble() * x.asDouble());
15824
15825 r = fp16type(r.asDouble() + q.asDouble());
15826 }
15827
15828 result = deSqrt(r.asDouble());
15829
15830 out[0] = fp16type(result).bits();
15831 }
15832 else if (getFlavor() == 1)
15833 {
15834 double r (0.0);
15835
15836 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15837 {
15838 const fp16type x (in[0][componentNdx]);
15839 const double q (x.asDouble() * x.asDouble());
15840
15841 r += q;
15842 }
15843
15844 result = deSqrt(r);
15845
15846 out[0] = fp16type(result).bits();
15847 }
15848 else
15849 {
15850 TCU_THROW(InternalError, "Unknown flavor");
15851 }
15852
15853 min[0] = getMin(result, getULPs(in));
15854 max[0] = getMax(result, getULPs(in));
15855
15856 return true;
15857 }
15858 };
15859
15860 struct fp16Distance : public fp16AllComponents
15861 {
fp16Distancevkt::SpirVAssembly::fp16Distance15862 fp16Distance() : fp16AllComponents()
15863 {
15864 flavorNames.push_back("EmulatingFP16");
15865 flavorNames.push_back("DoubleCalc");
15866 }
15867
getULPsvkt::SpirVAssembly::fp16Distance15868 virtual double getULPs(vector<const deFloat16*>& in)
15869 {
15870 DE_UNREF(in);
15871
15872 return 4.0;
15873 }
15874
15875 template<class fp16type>
calcvkt::SpirVAssembly::fp16Distance15876 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15877 {
15878 DE_ASSERT(getOutCompCount() == 1);
15879 DE_ASSERT(in.size() == 2);
15880 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15881
15882 double result (0.0);
15883
15884 if (getFlavor() == 0)
15885 {
15886 fp16type r (0.0);
15887
15888 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15889 {
15890 const fp16type x (in[0][componentNdx]);
15891 const fp16type y (in[1][componentNdx]);
15892 const fp16type d (x.asDouble() - y.asDouble());
15893 const fp16type q (d.asDouble() * d.asDouble());
15894
15895 r = fp16type(r.asDouble() + q.asDouble());
15896 }
15897
15898 result = deSqrt(r.asDouble());
15899 }
15900 else if (getFlavor() == 1)
15901 {
15902 double r (0.0);
15903
15904 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15905 {
15906 const fp16type x (in[0][componentNdx]);
15907 const fp16type y (in[1][componentNdx]);
15908 const double d (x.asDouble() - y.asDouble());
15909 const double q (d * d);
15910
15911 r += q;
15912 }
15913
15914 result = deSqrt(r);
15915 }
15916 else
15917 {
15918 TCU_THROW(InternalError, "Unknown flavor");
15919 }
15920
15921 out[0] = fp16type(result).bits();
15922 min[0] = getMin(result, getULPs(in));
15923 max[0] = getMax(result, getULPs(in));
15924
15925 return true;
15926 }
15927 };
15928
15929 struct fp16Cross : public fp16AllComponents
15930 {
fp16Crossvkt::SpirVAssembly::fp16Cross15931 fp16Cross() : fp16AllComponents()
15932 {
15933 flavorNames.push_back("EmulatingFP16");
15934 flavorNames.push_back("DoubleCalc");
15935 }
15936
getULPsvkt::SpirVAssembly::fp16Cross15937 virtual double getULPs(vector<const deFloat16*>& in)
15938 {
15939 DE_UNREF(in);
15940
15941 return 4.0;
15942 }
15943
15944 template<class fp16type>
calcvkt::SpirVAssembly::fp16Cross15945 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15946 {
15947 DE_ASSERT(getOutCompCount() == 3);
15948 DE_ASSERT(in.size() == 2);
15949 DE_ASSERT(getArgCompCount(0) == 3);
15950 DE_ASSERT(getArgCompCount(1) == 3);
15951
15952 if (getFlavor() == 0)
15953 {
15954 const fp16type x0 (in[0][0]);
15955 const fp16type x1 (in[0][1]);
15956 const fp16type x2 (in[0][2]);
15957 const fp16type y0 (in[1][0]);
15958 const fp16type y1 (in[1][1]);
15959 const fp16type y2 (in[1][2]);
15960 const fp16type x1y2 (x1.asDouble() * y2.asDouble());
15961 const fp16type y1x2 (y1.asDouble() * x2.asDouble());
15962 const fp16type x2y0 (x2.asDouble() * y0.asDouble());
15963 const fp16type y2x0 (y2.asDouble() * x0.asDouble());
15964 const fp16type x0y1 (x0.asDouble() * y1.asDouble());
15965 const fp16type y0x1 (y0.asDouble() * x1.asDouble());
15966
15967 out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15968 out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15969 out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15970 }
15971 else if (getFlavor() == 1)
15972 {
15973 const fp16type x0 (in[0][0]);
15974 const fp16type x1 (in[0][1]);
15975 const fp16type x2 (in[0][2]);
15976 const fp16type y0 (in[1][0]);
15977 const fp16type y1 (in[1][1]);
15978 const fp16type y2 (in[1][2]);
15979 const double x1y2 (x1.asDouble() * y2.asDouble());
15980 const double y1x2 (y1.asDouble() * x2.asDouble());
15981 const double x2y0 (x2.asDouble() * y0.asDouble());
15982 const double y2x0 (y2.asDouble() * x0.asDouble());
15983 const double x0y1 (x0.asDouble() * y1.asDouble());
15984 const double y0x1 (y0.asDouble() * x1.asDouble());
15985
15986 out[0] = fp16type(x1y2 - y1x2).bits();
15987 out[1] = fp16type(x2y0 - y2x0).bits();
15988 out[2] = fp16type(x0y1 - y0x1).bits();
15989 }
15990 else
15991 {
15992 TCU_THROW(InternalError, "Unknown flavor");
15993 }
15994
15995 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15996 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15997 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15998 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15999
16000 return true;
16001 }
16002 };
16003
16004 struct fp16Normalize : public fp16AllComponents
16005 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16006 fp16Normalize() : fp16AllComponents()
16007 {
16008 flavorNames.push_back("EmulatingFP16");
16009 flavorNames.push_back("DoubleCalc");
16010
16011 permutationsFlavorStart = 0;
16012 permutationsFlavorEnd = flavorNames.size();
16013
16014 // flavorNames will be extended later
16015 }
16016
setArgCompCountvkt::SpirVAssembly::fp16Normalize16017 virtual void setArgCompCount (size_t argNo, size_t compCount)
16018 {
16019 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16020
16021 if (argNo == 0 && argCompCount[argNo] == 0)
16022 {
16023 const size_t maxPermutationsCount = 24u; // Equal to 4!
16024 std::vector<int> indices;
16025
16026 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16027 indices.push_back(static_cast<int>(componentNdx));
16028
16029 m_permutations.reserve(maxPermutationsCount);
16030
16031 permutationsFlavorStart = flavorNames.size();
16032
16033 do
16034 {
16035 tcu::UVec4 permutation;
16036 std::string name = "Permutted_";
16037
16038 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16039 {
16040 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16041 name += de::toString(indices[componentNdx]);
16042 }
16043
16044 m_permutations.push_back(permutation);
16045 flavorNames.push_back(name);
16046
16047 } while(std::next_permutation(indices.begin(), indices.end()));
16048
16049 permutationsFlavorEnd = flavorNames.size();
16050 }
16051
16052 fp16AllComponents::setArgCompCount(argNo, compCount);
16053 }
getULPsvkt::SpirVAssembly::fp16Normalize16054 virtual double getULPs(vector<const deFloat16*>& in)
16055 {
16056 DE_UNREF(in);
16057
16058 return 8.0;
16059 }
16060
16061 template<class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16062 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16063 {
16064 DE_ASSERT(in.size() == 1);
16065 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16066
16067 if (getFlavor() == 0)
16068 {
16069 fp16type r(0.0);
16070
16071 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16072 {
16073 const fp16type x (in[0][componentNdx]);
16074 const fp16type q (x.asDouble() * x.asDouble());
16075
16076 r = fp16type(r.asDouble() + q.asDouble());
16077 }
16078
16079 r = fp16type(deSqrt(r.asDouble()));
16080
16081 if (r.isZero())
16082 return false;
16083
16084 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16085 {
16086 const fp16type x (in[0][componentNdx]);
16087
16088 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16089 }
16090 }
16091 else if (getFlavor() == 1)
16092 {
16093 double r(0.0);
16094
16095 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16096 {
16097 const fp16type x (in[0][componentNdx]);
16098 const double q (x.asDouble() * x.asDouble());
16099
16100 r += q;
16101 }
16102
16103 r = deSqrt(r);
16104
16105 if (r == 0)
16106 return false;
16107
16108 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16109 {
16110 const fp16type x (in[0][componentNdx]);
16111
16112 out[componentNdx] = fp16type(x.asDouble() / r).bits();
16113 }
16114 }
16115 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16116 {
16117 const int compCount (static_cast<int>(getArgCompCount(0)));
16118 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16119 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16120 fp16type r (0.0);
16121
16122 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16123 {
16124 const size_t componentNdx (permutation[permComponentNdx]);
16125 const fp16type x (in[0][componentNdx]);
16126 const fp16type q (x.asDouble() * x.asDouble());
16127
16128 r = fp16type(r.asDouble() + q.asDouble());
16129 }
16130
16131 r = fp16type(deSqrt(r.asDouble()));
16132
16133 if (r.isZero())
16134 return false;
16135
16136 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16137 {
16138 const size_t componentNdx (permutation[permComponentNdx]);
16139 const fp16type x (in[0][componentNdx]);
16140
16141 out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16142 }
16143 }
16144 else
16145 {
16146 TCU_THROW(InternalError, "Unknown flavor");
16147 }
16148
16149 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16150 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16151 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16152 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16153
16154 return true;
16155 }
16156
16157 private:
16158 std::vector<tcu::UVec4> m_permutations;
16159 size_t permutationsFlavorStart;
16160 size_t permutationsFlavorEnd;
16161 };
16162
16163 struct fp16FaceForward : public fp16AllComponents
16164 {
getULPsvkt::SpirVAssembly::fp16FaceForward16165 virtual double getULPs(vector<const deFloat16*>& in)
16166 {
16167 DE_UNREF(in);
16168
16169 return 4.0;
16170 }
16171
16172 template<class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16173 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16174 {
16175 DE_ASSERT(in.size() == 3);
16176 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16177 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16178 DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16179
16180 fp16type dp(0.0);
16181
16182 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16183 {
16184 const fp16type x (in[1][componentNdx]);
16185 const fp16type y (in[2][componentNdx]);
16186 const double xd (x.asDouble());
16187 const double yd (y.asDouble());
16188 const fp16type q (xd * yd);
16189
16190 dp = fp16type(dp.asDouble() + q.asDouble());
16191 }
16192
16193 if (dp.isNaN() || dp.isZero())
16194 return false;
16195
16196 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16197 {
16198 const fp16type n (in[0][componentNdx]);
16199
16200 out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16201 }
16202
16203 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16204 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16205 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16206 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16207
16208 return true;
16209 }
16210 };
16211
16212 struct fp16Reflect : public fp16AllComponents
16213 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16214 fp16Reflect() : fp16AllComponents()
16215 {
16216 flavorNames.push_back("EmulatingFP16");
16217 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16218 flavorNames.push_back("FloatCalc");
16219 flavorNames.push_back("FloatCalc+KeepZeroSign");
16220 flavorNames.push_back("EmulatingFP16+2Nfirst");
16221 flavorNames.push_back("EmulatingFP16+2Ifirst");
16222 }
16223
getULPsvkt::SpirVAssembly::fp16Reflect16224 virtual double getULPs(vector<const deFloat16*>& in)
16225 {
16226 DE_UNREF(in);
16227
16228 return 256.0; // This is not a precision test. Value is not from spec
16229 }
16230
16231 template<class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16232 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16233 {
16234 DE_ASSERT(in.size() == 2);
16235 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16236 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16237
16238 if (getFlavor() < 4)
16239 {
16240 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16241 const bool floatCalc ((flavor & 2) != 0 ? true : false);
16242
16243 if (floatCalc)
16244 {
16245 float dp(0.0f);
16246
16247 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16248 {
16249 const fp16type i (in[0][componentNdx]);
16250 const fp16type n (in[1][componentNdx]);
16251 const float id (i.asFloat());
16252 const float nd (n.asFloat());
16253 const float qd (id * nd);
16254
16255 if (keepZeroSign)
16256 dp = (componentNdx == 0) ? qd : dp + qd;
16257 else
16258 dp = dp + qd;
16259 }
16260
16261 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16262 {
16263 const fp16type i (in[0][componentNdx]);
16264 const fp16type n (in[1][componentNdx]);
16265 const float dpnd (dp * n.asFloat());
16266 const float dpn2d (2.0f * dpnd);
16267 const float idpn2d (i.asFloat() - dpn2d);
16268 const fp16type result (idpn2d);
16269
16270 out[componentNdx] = result.bits();
16271 }
16272 }
16273 else
16274 {
16275 fp16type dp(0.0);
16276
16277 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16278 {
16279 const fp16type i (in[0][componentNdx]);
16280 const fp16type n (in[1][componentNdx]);
16281 const double id (i.asDouble());
16282 const double nd (n.asDouble());
16283 const fp16type q (id * nd);
16284
16285 if (keepZeroSign)
16286 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16287 else
16288 dp = fp16type(dp.asDouble() + q.asDouble());
16289 }
16290
16291 if (dp.isNaN())
16292 return false;
16293
16294 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16295 {
16296 const fp16type i (in[0][componentNdx]);
16297 const fp16type n (in[1][componentNdx]);
16298 const fp16type dpn (dp.asDouble() * n.asDouble());
16299 const fp16type dpn2 (2 * dpn.asDouble());
16300 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16301
16302 out[componentNdx] = idpn2.bits();
16303 }
16304 }
16305 }
16306 else if (getFlavor() == 4)
16307 {
16308 fp16type dp(0.0);
16309
16310 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16311 {
16312 const fp16type i (in[0][componentNdx]);
16313 const fp16type n (in[1][componentNdx]);
16314 const double id (i.asDouble());
16315 const double nd (n.asDouble());
16316 const fp16type q (id * nd);
16317
16318 dp = fp16type(dp.asDouble() + q.asDouble());
16319 }
16320
16321 if (dp.isNaN())
16322 return false;
16323
16324 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16325 {
16326 const fp16type i (in[0][componentNdx]);
16327 const fp16type n (in[1][componentNdx]);
16328 const fp16type n2 (2 * n.asDouble());
16329 const fp16type dpn2 (dp.asDouble() * n2.asDouble());
16330 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16331
16332 out[componentNdx] = idpn2.bits();
16333 }
16334 }
16335 else if (getFlavor() == 5)
16336 {
16337 fp16type dp2(0.0);
16338
16339 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16340 {
16341 const fp16type i (in[0][componentNdx]);
16342 const fp16type n (in[1][componentNdx]);
16343 const fp16type i2 (2.0 * i.asDouble());
16344 const double i2d (i2.asDouble());
16345 const double nd (n.asDouble());
16346 const fp16type q (i2d * nd);
16347
16348 dp2 = fp16type(dp2.asDouble() + q.asDouble());
16349 }
16350
16351 if (dp2.isNaN())
16352 return false;
16353
16354 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16355 {
16356 const fp16type i (in[0][componentNdx]);
16357 const fp16type n (in[1][componentNdx]);
16358 const fp16type dpn2 (dp2.asDouble() * n.asDouble());
16359 const fp16type idpn2 (i.asDouble() - dpn2.asDouble());
16360
16361 out[componentNdx] = idpn2.bits();
16362 }
16363 }
16364 else
16365 {
16366 TCU_THROW(InternalError, "Unknown flavor");
16367 }
16368
16369 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16370 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16371 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16372 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16373
16374 return true;
16375 }
16376 };
16377
16378 struct fp16Refract : public fp16AllComponents
16379 {
fp16Refractvkt::SpirVAssembly::fp16Refract16380 fp16Refract() : fp16AllComponents()
16381 {
16382 flavorNames.push_back("EmulatingFP16");
16383 flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16384 flavorNames.push_back("FloatCalc");
16385 flavorNames.push_back("FloatCalc+KeepZeroSign");
16386 }
16387
getULPsvkt::SpirVAssembly::fp16Refract16388 virtual double getULPs(vector<const deFloat16*>& in)
16389 {
16390 DE_UNREF(in);
16391
16392 return 8192.0; // This is not a precision test. Value is not from spec
16393 }
16394
16395 template<class fp16type>
calcvkt::SpirVAssembly::fp16Refract16396 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16397 {
16398 DE_ASSERT(in.size() == 3);
16399 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16400 DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16401 DE_ASSERT(getArgCompCount(2) == 1);
16402
16403 const bool keepZeroSign ((flavor & 1) != 0 ? true : false);
16404 const bool doubleCalc ((flavor & 2) != 0 ? true : false);
16405 const fp16type eta (*in[2]);
16406
16407 if (doubleCalc)
16408 {
16409 double dp (0.0);
16410
16411 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16412 {
16413 const fp16type i (in[0][componentNdx]);
16414 const fp16type n (in[1][componentNdx]);
16415 const double id (i.asDouble());
16416 const double nd (n.asDouble());
16417 const double qd (id * nd);
16418
16419 if (keepZeroSign)
16420 dp = (componentNdx == 0) ? qd : dp + qd;
16421 else
16422 dp = dp + qd;
16423 }
16424
16425 const double eta2 (eta.asDouble() * eta.asDouble());
16426 const double dp2 (dp * dp);
16427 const double dp1 (1.0 - dp2);
16428 const double dpe (eta2 * dp1);
16429 const double k (1.0 - dpe);
16430
16431 if (k < 0.0)
16432 {
16433 const fp16type zero (0.0);
16434
16435 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16436 out[componentNdx] = zero.bits();
16437 }
16438 else
16439 {
16440 const double sk (deSqrt(k));
16441
16442 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16443 {
16444 const fp16type i (in[0][componentNdx]);
16445 const fp16type n (in[1][componentNdx]);
16446 const double etai (i.asDouble() * eta.asDouble());
16447 const double etadp (eta.asDouble() * dp);
16448 const double etadpk (etadp + sk);
16449 const double etadpkn (etadpk * n.asDouble());
16450 const double full (etai - etadpkn);
16451 const fp16type result (full);
16452
16453 if (result.isInf())
16454 return false;
16455
16456 out[componentNdx] = result.bits();
16457 }
16458 }
16459 }
16460 else
16461 {
16462 fp16type dp (0.0);
16463
16464 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16465 {
16466 const fp16type i (in[0][componentNdx]);
16467 const fp16type n (in[1][componentNdx]);
16468 const double id (i.asDouble());
16469 const double nd (n.asDouble());
16470 const fp16type q (id * nd);
16471
16472 if (keepZeroSign)
16473 dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16474 else
16475 dp = fp16type(dp.asDouble() + q.asDouble());
16476 }
16477
16478 if (dp.isNaN())
16479 return false;
16480
16481 const fp16type eta2(eta.asDouble() * eta.asDouble());
16482 const fp16type dp2 (dp.asDouble() * dp.asDouble());
16483 const fp16type dp1 (1.0 - dp2.asDouble());
16484 const fp16type dpe (eta2.asDouble() * dp1.asDouble());
16485 const fp16type k (1.0 - dpe.asDouble());
16486
16487 if (k.asDouble() < 0.0)
16488 {
16489 const fp16type zero (0.0);
16490
16491 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16492 out[componentNdx] = zero.bits();
16493 }
16494 else
16495 {
16496 const fp16type sk (deSqrt(k.asDouble()));
16497
16498 for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16499 {
16500 const fp16type i (in[0][componentNdx]);
16501 const fp16type n (in[1][componentNdx]);
16502 const fp16type etai (i.asDouble() * eta.asDouble());
16503 const fp16type etadp (eta.asDouble() * dp.asDouble());
16504 const fp16type etadpk (etadp.asDouble() + sk.asDouble());
16505 const fp16type etadpkn (etadpk.asDouble() * n.asDouble());
16506 const fp16type full (etai.asDouble() - etadpkn.asDouble());
16507
16508 if (full.isNaN() || full.isInf())
16509 return false;
16510
16511 out[componentNdx] = full.bits();
16512 }
16513 }
16514 }
16515
16516 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16517 min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16518 for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16519 max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16520
16521 return true;
16522 }
16523 };
16524
16525 struct fp16Dot : public fp16AllComponents
16526 {
fp16Dotvkt::SpirVAssembly::fp16Dot16527 fp16Dot() : fp16AllComponents()
16528 {
16529 flavorNames.push_back("EmulatingFP16");
16530 flavorNames.push_back("FloatCalc");
16531 flavorNames.push_back("DoubleCalc");
16532
16533 permutationsFlavorStart = 0;
16534 permutationsFlavorEnd = flavorNames.size();
16535
16536 // flavorNames will be extended later
16537 }
16538
setArgCompCountvkt::SpirVAssembly::fp16Dot16539 virtual void setArgCompCount (size_t argNo, size_t compCount)
16540 {
16541 DE_ASSERT(argCompCount[argNo] == 0); // Once only
16542
16543 if (argNo == 0 && argCompCount[argNo] == 0)
16544 {
16545 const size_t maxPermutationsCount = 24u; // Equal to 4!
16546 std::vector<int> indices;
16547
16548 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16549 indices.push_back(static_cast<int>(componentNdx));
16550
16551 m_permutations.reserve(maxPermutationsCount);
16552
16553 permutationsFlavorStart = flavorNames.size();
16554
16555 do
16556 {
16557 tcu::UVec4 permutation;
16558 std::string name = "Permutted_";
16559
16560 for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16561 {
16562 permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16563 name += de::toString(indices[componentNdx]);
16564 }
16565
16566 m_permutations.push_back(permutation);
16567 flavorNames.push_back(name);
16568
16569 } while(std::next_permutation(indices.begin(), indices.end()));
16570
16571 permutationsFlavorEnd = flavorNames.size();
16572 }
16573
16574 fp16AllComponents::setArgCompCount(argNo, compCount);
16575 }
16576
getULPsvkt::SpirVAssembly::fp16Dot16577 virtual double getULPs(vector<const deFloat16*>& in)
16578 {
16579 DE_UNREF(in);
16580
16581 return 16.0; // This is not a precision test. Value is not from spec
16582 }
16583
16584 template<class fp16type>
calcvkt::SpirVAssembly::fp16Dot16585 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16586 {
16587 DE_ASSERT(in.size() == 2);
16588 DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16589 DE_ASSERT(getOutCompCount() == 1);
16590
16591 double result (0.0);
16592 double eps (0.0);
16593
16594 if (getFlavor() == 0)
16595 {
16596 fp16type dp (0.0);
16597
16598 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16599 {
16600 const fp16type x (in[0][componentNdx]);
16601 const fp16type y (in[1][componentNdx]);
16602 const fp16type q (x.asDouble() * y.asDouble());
16603
16604 dp = fp16type(dp.asDouble() + q.asDouble());
16605 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16606 }
16607
16608 result = dp.asDouble();
16609 }
16610 else if (getFlavor() == 1)
16611 {
16612 float dp (0.0);
16613
16614 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16615 {
16616 const fp16type x (in[0][componentNdx]);
16617 const fp16type y (in[1][componentNdx]);
16618 const float q (x.asFloat() * y.asFloat());
16619
16620 dp += q;
16621 eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16622 }
16623
16624 result = dp;
16625 }
16626 else if (getFlavor() == 2)
16627 {
16628 double dp (0.0);
16629
16630 for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16631 {
16632 const fp16type x (in[0][componentNdx]);
16633 const fp16type y (in[1][componentNdx]);
16634 const double q (x.asDouble() * y.asDouble());
16635
16636 dp += q;
16637 eps += floatFormat16.ulp(q, 2.0);
16638 }
16639
16640 result = dp;
16641 }
16642 else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16643 {
16644 const int compCount (static_cast<int>(getArgCompCount(1)));
16645 const size_t permutationNdx (getFlavor() - permutationsFlavorStart);
16646 const tcu::UVec4& permutation (m_permutations[permutationNdx]);
16647 fp16type dp (0.0);
16648
16649 for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16650 {
16651 const size_t componentNdx (permutation[permComponentNdx]);
16652 const fp16type x (in[0][componentNdx]);
16653 const fp16type y (in[1][componentNdx]);
16654 const fp16type q (x.asDouble() * y.asDouble());
16655
16656 dp = fp16type(dp.asDouble() + q.asDouble());
16657 eps += floatFormat16.ulp(q.asDouble(), 2.0);
16658 }
16659
16660 result = dp.asDouble();
16661 }
16662 else
16663 {
16664 TCU_THROW(InternalError, "Unknown flavor");
16665 }
16666
16667 out[0] = fp16type(result).bits();
16668 min[0] = result - eps;
16669 max[0] = result + eps;
16670
16671 return true;
16672 }
16673
16674 private:
16675 std::vector<tcu::UVec4> m_permutations;
16676 size_t permutationsFlavorStart;
16677 size_t permutationsFlavorEnd;
16678 };
16679
16680 struct fp16VectorTimesScalar : public fp16AllComponents
16681 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar16682 virtual double getULPs(vector<const deFloat16*>& in)
16683 {
16684 DE_UNREF(in);
16685
16686 return 2.0;
16687 }
16688
16689 template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar16690 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16691 {
16692 DE_ASSERT(in.size() == 2);
16693 DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16694 DE_ASSERT(getArgCompCount(1) == 1);
16695
16696 fp16type s (*in[1]);
16697
16698 for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16699 {
16700 const fp16type x (in[0][componentNdx]);
16701 const double result (s.asDouble() * x.asDouble());
16702 const fp16type m (result);
16703
16704 out[componentNdx] = m.bits();
16705 min[componentNdx] = getMin(result, getULPs(in));
16706 max[componentNdx] = getMax(result, getULPs(in));
16707 }
16708
16709 return true;
16710 }
16711 };
16712
16713 struct fp16MatrixBase : public fp16AllComponents
16714 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase16715 deUint32 getComponentValidity ()
16716 {
16717 return static_cast<deUint32>(-1);
16718 }
16719
getNdxvkt::SpirVAssembly::fp16MatrixBase16720 inline size_t getNdx (const size_t rowCount, const size_t col, const size_t row)
16721 {
16722 const size_t minComponentCount = 0;
16723 const size_t maxComponentCount = 3;
16724 const size_t alignedRowsCount = (rowCount == 3) ? 4 : rowCount;
16725
16726 DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16727 DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16728 DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16729 DE_UNREF(minComponentCount);
16730 DE_UNREF(maxComponentCount);
16731
16732 return col * alignedRowsCount + row;
16733 }
16734
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase16735 deUint32 getComponentMatrixValidityMask (size_t cols, size_t rows)
16736 {
16737 deUint32 result = 0u;
16738
16739 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16740 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16741 {
16742 const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16743
16744 DE_ASSERT(bitNdx < sizeof(result) * 8);
16745
16746 result |= (1<<bitNdx);
16747 }
16748
16749 return result;
16750 }
16751 };
16752
16753 template<size_t cols, size_t rows>
16754 struct fp16Transpose : public fp16MatrixBase
16755 {
getULPsvkt::SpirVAssembly::fp16Transpose16756 virtual double getULPs(vector<const deFloat16*>& in)
16757 {
16758 DE_UNREF(in);
16759
16760 return 1.0;
16761 }
16762
getComponentValidityvkt::SpirVAssembly::fp16Transpose16763 deUint32 getComponentValidity ()
16764 {
16765 return getComponentMatrixValidityMask(rows, cols);
16766 }
16767
16768 template<class fp16type>
calcvkt::SpirVAssembly::fp16Transpose16769 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16770 {
16771 DE_ASSERT(in.size() == 1);
16772
16773 const size_t alignedCols = (cols == 3) ? 4 : cols;
16774 const size_t alignedRows = (rows == 3) ? 4 : rows;
16775 vector<deFloat16> output (alignedCols * alignedRows, 0);
16776
16777 DE_ASSERT(output.size() == alignedCols * alignedRows);
16778
16779 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16780 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16781 output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16782
16783 deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16784 deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16785 deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16786
16787 return true;
16788 }
16789 };
16790
16791 template<size_t cols, size_t rows>
16792 struct fp16MatrixTimesScalar : public fp16MatrixBase
16793 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar16794 virtual double getULPs(vector<const deFloat16*>& in)
16795 {
16796 DE_UNREF(in);
16797
16798 return 4.0;
16799 }
16800
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar16801 deUint32 getComponentValidity ()
16802 {
16803 return getComponentMatrixValidityMask(cols, rows);
16804 }
16805
16806 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar16807 bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16808 {
16809 DE_ASSERT(in.size() == 2);
16810 DE_ASSERT(getArgCompCount(1) == 1);
16811
16812 const fp16type y (in[1][0]);
16813 const float scalar (y.asFloat());
16814 const size_t alignedCols = (cols == 3) ? 4 : cols;
16815 const size_t alignedRows = (rows == 3) ? 4 : rows;
16816
16817 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16818 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16819 DE_UNREF(alignedCols);
16820
16821 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16822 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16823 {
16824 const size_t ndx (colNdx * alignedRows + rowNdx);
16825 const fp16type x (in[0][ndx]);
16826 const double result (scalar * x.asFloat());
16827
16828 out[ndx] = fp16type(result).bits();
16829 min[ndx] = getMin(result, getULPs(in));
16830 max[ndx] = getMax(result, getULPs(in));
16831 }
16832
16833 return true;
16834 }
16835 };
16836
16837 template<size_t cols, size_t rows>
16838 struct fp16VectorTimesMatrix : public fp16MatrixBase
16839 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix16840 fp16VectorTimesMatrix() : fp16MatrixBase()
16841 {
16842 flavorNames.push_back("EmulatingFP16");
16843 flavorNames.push_back("FloatCalc");
16844 }
16845
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix16846 virtual double getULPs (vector<const deFloat16*>& in)
16847 {
16848 DE_UNREF(in);
16849
16850 return (8.0 * cols);
16851 }
16852
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix16853 deUint32 getComponentValidity ()
16854 {
16855 return getComponentMatrixValidityMask(cols, 1);
16856 }
16857
16858 template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix16859 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16860 {
16861 DE_ASSERT(in.size() == 2);
16862
16863 const size_t alignedCols = (cols == 3) ? 4 : cols;
16864 const size_t alignedRows = (rows == 3) ? 4 : rows;
16865
16866 DE_ASSERT(getOutCompCount() == cols);
16867 DE_ASSERT(getArgCompCount(0) == rows);
16868 DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16869 DE_UNREF(alignedCols);
16870
16871 if (getFlavor() == 0)
16872 {
16873 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16874 {
16875 fp16type s (fp16type::zero(1));
16876
16877 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16878 {
16879 const fp16type v (in[0][rowNdx]);
16880 const float vf (v.asFloat());
16881 const size_t ndx (colNdx * alignedRows + rowNdx);
16882 const fp16type x (in[1][ndx]);
16883 const float xf (x.asFloat());
16884 const fp16type m (vf * xf);
16885
16886 s = fp16type(s.asFloat() + m.asFloat());
16887 }
16888
16889 out[colNdx] = s.bits();
16890 min[colNdx] = getMin(s.asDouble(), getULPs(in));
16891 max[colNdx] = getMax(s.asDouble(), getULPs(in));
16892 }
16893 }
16894 else if (getFlavor() == 1)
16895 {
16896 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16897 {
16898 float s (0.0f);
16899
16900 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16901 {
16902 const fp16type v (in[0][rowNdx]);
16903 const float vf (v.asFloat());
16904 const size_t ndx (colNdx * alignedRows + rowNdx);
16905 const fp16type x (in[1][ndx]);
16906 const float xf (x.asFloat());
16907 const float m (vf * xf);
16908
16909 s += m;
16910 }
16911
16912 out[colNdx] = fp16type(s).bits();
16913 min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16914 max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16915 }
16916 }
16917 else
16918 {
16919 TCU_THROW(InternalError, "Unknown flavor");
16920 }
16921
16922 return true;
16923 }
16924 };
16925
16926 template<size_t cols, size_t rows>
16927 struct fp16MatrixTimesVector : public fp16MatrixBase
16928 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector16929 fp16MatrixTimesVector() : fp16MatrixBase()
16930 {
16931 flavorNames.push_back("EmulatingFP16");
16932 flavorNames.push_back("FloatCalc");
16933 }
16934
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector16935 virtual double getULPs (vector<const deFloat16*>& in)
16936 {
16937 DE_UNREF(in);
16938
16939 return (8.0 * rows);
16940 }
16941
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector16942 deUint32 getComponentValidity ()
16943 {
16944 return getComponentMatrixValidityMask(rows, 1);
16945 }
16946
16947 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector16948 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16949 {
16950 DE_ASSERT(in.size() == 2);
16951
16952 const size_t alignedCols = (cols == 3) ? 4 : cols;
16953 const size_t alignedRows = (rows == 3) ? 4 : rows;
16954
16955 DE_ASSERT(getOutCompCount() == rows);
16956 DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16957 DE_ASSERT(getArgCompCount(1) == cols);
16958 DE_UNREF(alignedCols);
16959
16960 if (getFlavor() == 0)
16961 {
16962 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16963 {
16964 fp16type s (fp16type::zero(1));
16965
16966 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16967 {
16968 const size_t ndx (colNdx * alignedRows + rowNdx);
16969 const fp16type x (in[0][ndx]);
16970 const float xf (x.asFloat());
16971 const fp16type v (in[1][colNdx]);
16972 const float vf (v.asFloat());
16973 const fp16type m (vf * xf);
16974
16975 s = fp16type(s.asFloat() + m.asFloat());
16976 }
16977
16978 out[rowNdx] = s.bits();
16979 min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16980 max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16981 }
16982 }
16983 else if (getFlavor() == 1)
16984 {
16985 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16986 {
16987 float s (0.0f);
16988
16989 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16990 {
16991 const size_t ndx (colNdx * alignedRows + rowNdx);
16992 const fp16type x (in[0][ndx]);
16993 const float xf (x.asFloat());
16994 const fp16type v (in[1][colNdx]);
16995 const float vf (v.asFloat());
16996 const float m (vf * xf);
16997
16998 s += m;
16999 }
17000
17001 out[rowNdx] = fp16type(s).bits();
17002 min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17003 max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17004 }
17005 }
17006 else
17007 {
17008 TCU_THROW(InternalError, "Unknown flavor");
17009 }
17010
17011 return true;
17012 }
17013 };
17014
17015 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17016 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17017 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17018 fp16MatrixTimesMatrix() : fp16MatrixBase()
17019 {
17020 flavorNames.push_back("EmulatingFP16");
17021 flavorNames.push_back("FloatCalc");
17022 }
17023
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17024 virtual double getULPs (vector<const deFloat16*>& in)
17025 {
17026 DE_UNREF(in);
17027
17028 return 32.0;
17029 }
17030
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17031 deUint32 getComponentValidity ()
17032 {
17033 return getComponentMatrixValidityMask(colsR, rowsL);
17034 }
17035
17036 template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17037 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17038 {
17039 DE_STATIC_ASSERT(colsL == rowsR);
17040
17041 DE_ASSERT(in.size() == 2);
17042
17043 const size_t alignedColsL = (colsL == 3) ? 4 : colsL;
17044 const size_t alignedRowsL = (rowsL == 3) ? 4 : rowsL;
17045 const size_t alignedColsR = (colsR == 3) ? 4 : colsR;
17046 const size_t alignedRowsR = (rowsR == 3) ? 4 : rowsR;
17047
17048 DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17049 DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17050 DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17051 DE_UNREF(alignedColsL);
17052 DE_UNREF(alignedColsR);
17053
17054 if (getFlavor() == 0)
17055 {
17056 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17057 {
17058 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17059 {
17060 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17061 fp16type s (fp16type::zero(1));
17062
17063 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17064 {
17065 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17066 const fp16type l (in[0][ndxl]);
17067 const float lf (l.asFloat());
17068 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17069 const fp16type r (in[1][ndxr]);
17070 const float rf (r.asFloat());
17071 const fp16type m (lf * rf);
17072
17073 s = fp16type(s.asFloat() + m.asFloat());
17074 }
17075
17076 out[ndx] = s.bits();
17077 min[ndx] = getMin(s.asDouble(), getULPs(in));
17078 max[ndx] = getMax(s.asDouble(), getULPs(in));
17079 }
17080 }
17081 }
17082 else if (getFlavor() == 1)
17083 {
17084 for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17085 {
17086 for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17087 {
17088 const size_t ndx (colNdx * alignedRowsL + rowNdx);
17089 float s (0.0f);
17090
17091 for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17092 {
17093 const size_t ndxl (commonNdx * alignedRowsL + rowNdx);
17094 const fp16type l (in[0][ndxl]);
17095 const float lf (l.asFloat());
17096 const size_t ndxr (colNdx * alignedRowsR + commonNdx);
17097 const fp16type r (in[1][ndxr]);
17098 const float rf (r.asFloat());
17099 const float m (lf * rf);
17100
17101 s += m;
17102 }
17103
17104 out[ndx] = fp16type(s).bits();
17105 min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17106 max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17107 }
17108 }
17109 }
17110 else
17111 {
17112 TCU_THROW(InternalError, "Unknown flavor");
17113 }
17114
17115 return true;
17116 }
17117 };
17118
17119 template<size_t cols, size_t rows>
17120 struct fp16OuterProduct : public fp16MatrixBase
17121 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17122 virtual double getULPs (vector<const deFloat16*>& in)
17123 {
17124 DE_UNREF(in);
17125
17126 return 2.0;
17127 }
17128
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17129 deUint32 getComponentValidity ()
17130 {
17131 return getComponentMatrixValidityMask(cols, rows);
17132 }
17133
17134 template<class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17135 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17136 {
17137 DE_ASSERT(in.size() == 2);
17138
17139 const size_t alignedCols = (cols == 3) ? 4 : cols;
17140 const size_t alignedRows = (rows == 3) ? 4 : rows;
17141
17142 DE_ASSERT(getArgCompCount(0) == rows);
17143 DE_ASSERT(getArgCompCount(1) == cols);
17144 DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17145 DE_UNREF(alignedCols);
17146
17147 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17148 {
17149 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17150 {
17151 const size_t ndx (colNdx * alignedRows + rowNdx);
17152 const fp16type x (in[0][rowNdx]);
17153 const float xf (x.asFloat());
17154 const fp16type y (in[1][colNdx]);
17155 const float yf (y.asFloat());
17156 const fp16type m (xf * yf);
17157
17158 out[ndx] = m.bits();
17159 min[ndx] = getMin(m.asDouble(), getULPs(in));
17160 max[ndx] = getMax(m.asDouble(), getULPs(in));
17161 }
17162 }
17163
17164 return true;
17165 }
17166 };
17167
17168 template<size_t size>
17169 struct fp16Determinant;
17170
17171 template<>
17172 struct fp16Determinant<2> : public fp16MatrixBase
17173 {
getULPsvkt::SpirVAssembly::fp16Determinant17174 virtual double getULPs (vector<const deFloat16*>& in)
17175 {
17176 DE_UNREF(in);
17177
17178 return 128.0; // This is not a precision test. Value is not from spec
17179 }
17180
getComponentValidityvkt::SpirVAssembly::fp16Determinant17181 deUint32 getComponentValidity ()
17182 {
17183 return 1;
17184 }
17185
17186 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17187 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17188 {
17189 const size_t cols = 2;
17190 const size_t rows = 2;
17191 const size_t alignedCols = (cols == 3) ? 4 : cols;
17192 const size_t alignedRows = (rows == 3) ? 4 : rows;
17193
17194 DE_ASSERT(in.size() == 1);
17195 DE_ASSERT(getOutCompCount() == 1);
17196 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17197 DE_UNREF(alignedCols);
17198 DE_UNREF(alignedRows);
17199
17200 // [ a b ]
17201 // [ c d ]
17202 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17203 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17204 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17205 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17206 const float ad (a * d);
17207 const fp16type adf16 (ad);
17208 const float bc (b * c);
17209 const fp16type bcf16 (bc);
17210 const float r (adf16.asFloat() - bcf16.asFloat());
17211 const fp16type rf16 (r);
17212
17213 out[0] = rf16.bits();
17214 min[0] = getMin(r, getULPs(in));
17215 max[0] = getMax(r, getULPs(in));
17216
17217 return true;
17218 }
17219 };
17220
17221 template<>
17222 struct fp16Determinant<3> : public fp16MatrixBase
17223 {
getULPsvkt::SpirVAssembly::fp16Determinant17224 virtual double getULPs (vector<const deFloat16*>& in)
17225 {
17226 DE_UNREF(in);
17227
17228 return 128.0; // This is not a precision test. Value is not from spec
17229 }
17230
getComponentValidityvkt::SpirVAssembly::fp16Determinant17231 deUint32 getComponentValidity ()
17232 {
17233 return 1;
17234 }
17235
17236 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17237 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17238 {
17239 const size_t cols = 3;
17240 const size_t rows = 3;
17241 const size_t alignedCols = (cols == 3) ? 4 : cols;
17242 const size_t alignedRows = (rows == 3) ? 4 : rows;
17243
17244 DE_ASSERT(in.size() == 1);
17245 DE_ASSERT(getOutCompCount() == 1);
17246 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17247 DE_UNREF(alignedCols);
17248 DE_UNREF(alignedRows);
17249
17250 // [ a b c ]
17251 // [ d e f ]
17252 // [ g h i ]
17253 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17254 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17255 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17256 const float d (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17257 const float e (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17258 const float f (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17259 const float g (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17260 const float h (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17261 const float i (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17262 const fp16type aei (a * e * i);
17263 const fp16type bfg (b * f * g);
17264 const fp16type cdh (c * d * h);
17265 const fp16type ceg (c * e * g);
17266 const fp16type bdi (b * d * i);
17267 const fp16type afh (a * f * h);
17268 const float r (aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17269 const fp16type rf16 (r);
17270
17271 out[0] = rf16.bits();
17272 min[0] = getMin(r, getULPs(in));
17273 max[0] = getMax(r, getULPs(in));
17274
17275 return true;
17276 }
17277 };
17278
17279 template<>
17280 struct fp16Determinant<4> : public fp16MatrixBase
17281 {
getULPsvkt::SpirVAssembly::fp16Determinant17282 virtual double getULPs (vector<const deFloat16*>& in)
17283 {
17284 DE_UNREF(in);
17285
17286 return 128.0; // This is not a precision test. Value is not from spec
17287 }
17288
getComponentValidityvkt::SpirVAssembly::fp16Determinant17289 deUint32 getComponentValidity ()
17290 {
17291 return 1;
17292 }
17293
17294 template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17295 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17296 {
17297 const size_t rows = 4;
17298 const size_t cols = 4;
17299 const size_t alignedCols = (cols == 3) ? 4 : cols;
17300 const size_t alignedRows = (rows == 3) ? 4 : rows;
17301
17302 DE_ASSERT(in.size() == 1);
17303 DE_ASSERT(getOutCompCount() == 1);
17304 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17305 DE_UNREF(alignedCols);
17306 DE_UNREF(alignedRows);
17307
17308 // [ a b c d ]
17309 // [ e f g h ]
17310 // [ i j k l ]
17311 // [ m n o p ]
17312 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17313 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17314 const float c (fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17315 const float d (fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17316 const float e (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17317 const float f (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17318 const float g (fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17319 const float h (fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17320 const float i (fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17321 const float j (fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17322 const float k (fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17323 const float l (fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17324 const float m (fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17325 const float n (fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17326 const float o (fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17327 const float p (fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17328
17329 // [ f g h ]
17330 // [ j k l ]
17331 // [ n o p ]
17332 const fp16type fkp (f * k * p);
17333 const fp16type gln (g * l * n);
17334 const fp16type hjo (h * j * o);
17335 const fp16type hkn (h * k * n);
17336 const fp16type gjp (g * j * p);
17337 const fp16type flo (f * l * o);
17338 const fp16type detA (a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17339
17340 // [ e g h ]
17341 // [ i k l ]
17342 // [ m o p ]
17343 const fp16type ekp (e * k * p);
17344 const fp16type glm (g * l * m);
17345 const fp16type hio (h * i * o);
17346 const fp16type hkm (h * k * m);
17347 const fp16type gip (g * i * p);
17348 const fp16type elo (e * l * o);
17349 const fp16type detB (b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17350
17351 // [ e f h ]
17352 // [ i j l ]
17353 // [ m n p ]
17354 const fp16type ejp (e * j * p);
17355 const fp16type flm (f * l * m);
17356 const fp16type hin (h * i * n);
17357 const fp16type hjm (h * j * m);
17358 const fp16type fip (f * i * p);
17359 const fp16type eln (e * l * n);
17360 const fp16type detC (c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17361
17362 // [ e f g ]
17363 // [ i j k ]
17364 // [ m n o ]
17365 const fp16type ejo (e * j * o);
17366 const fp16type fkm (f * k * m);
17367 const fp16type gin (g * i * n);
17368 const fp16type gjm (g * j * m);
17369 const fp16type fio (f * i * o);
17370 const fp16type ekn (e * k * n);
17371 const fp16type detD (d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17372
17373 const float r (detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17374 const fp16type rf16 (r);
17375
17376 out[0] = rf16.bits();
17377 min[0] = getMin(r, getULPs(in));
17378 max[0] = getMax(r, getULPs(in));
17379
17380 return true;
17381 }
17382 };
17383
17384 template<size_t size>
17385 struct fp16Inverse;
17386
17387 template<>
17388 struct fp16Inverse<2> : public fp16MatrixBase
17389 {
getULPsvkt::SpirVAssembly::fp16Inverse17390 virtual double getULPs (vector<const deFloat16*>& in)
17391 {
17392 DE_UNREF(in);
17393
17394 return 128.0; // This is not a precision test. Value is not from spec
17395 }
17396
getComponentValidityvkt::SpirVAssembly::fp16Inverse17397 deUint32 getComponentValidity ()
17398 {
17399 return getComponentMatrixValidityMask(2, 2);
17400 }
17401
17402 template<class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17403 bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17404 {
17405 const size_t cols = 2;
17406 const size_t rows = 2;
17407 const size_t alignedCols = (cols == 3) ? 4 : cols;
17408 const size_t alignedRows = (rows == 3) ? 4 : rows;
17409
17410 DE_ASSERT(in.size() == 1);
17411 DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17412 DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17413 DE_UNREF(alignedCols);
17414
17415 // [ a b ]
17416 // [ c d ]
17417 const float a (fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17418 const float b (fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17419 const float c (fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17420 const float d (fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17421 const float ad (a * d);
17422 const fp16type adf16 (ad);
17423 const float bc (b * c);
17424 const fp16type bcf16 (bc);
17425 const float det (adf16.asFloat() - bcf16.asFloat());
17426 const fp16type det16 (det);
17427
17428 out[0] = fp16type( d / det16.asFloat()).bits();
17429 out[1] = fp16type(-c / det16.asFloat()).bits();
17430 out[2] = fp16type(-b / det16.asFloat()).bits();
17431 out[3] = fp16type( a / det16.asFloat()).bits();
17432
17433 for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17434 for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17435 {
17436 const size_t ndx (colNdx * alignedRows + rowNdx);
17437 const fp16type s (out[ndx]);
17438
17439 min[ndx] = getMin(s.asDouble(), getULPs(in));
17440 max[ndx] = getMax(s.asDouble(), getULPs(in));
17441 }
17442
17443 return true;
17444 }
17445 };
17446
fp16ToString(deFloat16 val)17447 inline std::string fp16ToString(deFloat16 val)
17448 {
17449 return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17450 }
17451
17452 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17453 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17454 {
17455 if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17456 return false;
17457
17458 const size_t resultStep = (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17459 const size_t iterationsCount = expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17460 const size_t inputsSteps[3] =
17461 {
17462 (ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17463 (ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17464 (ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17465 };
17466
17467 DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17468 DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17469
17470 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17471 {
17472 DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17473 DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17474 }
17475
17476 const deFloat16* const outputAsFP16 = (const deFloat16*)outputAllocs[0]->getHostPtr();
17477 TestedArithmeticFunction func;
17478
17479 func.setOutCompCount(RES_COMPONENTS);
17480 func.setArgCompCount(0, ARG0_COMPONENTS);
17481 func.setArgCompCount(1, ARG1_COMPONENTS);
17482 func.setArgCompCount(2, ARG2_COMPONENTS);
17483
17484 const bool callOncePerComponent = func.callOncePerComponent();
17485 const deUint32 componentValidityMask = func.getComponentValidity();
17486 const size_t denormModesCount = 2;
17487 const char* denormModes[denormModesCount] = { "keep denormal numbers", "flush to zero" };
17488 const size_t successfulRunsPerComponent = denormModesCount * func.getFlavorCount();
17489 bool success = true;
17490 size_t validatedCount = 0;
17491
17492 vector<deUint8> inputBytes[3];
17493
17494 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17495 inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17496
17497 const deFloat16* const inputsAsFP16[3] =
17498 {
17499 inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17500 inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17501 inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17502 };
17503
17504 for (size_t idx = 0; idx < iterationsCount; ++idx)
17505 {
17506 std::vector<size_t> successfulRuns (RES_COMPONENTS, successfulRunsPerComponent);
17507 std::vector<std::string> errors (RES_COMPONENTS);
17508 bool iterationValidated (true);
17509
17510 for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17511 {
17512 for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17513 {
17514 func.setFlavor(flavorNdx);
17515
17516 const deFloat16* iterationOutputFP16 = &outputAsFP16[idx * resultStep];
17517 vector<deFloat16> iterationCalculatedFP16 (resultStep, 0);
17518 vector<double> iterationEdgeMin (resultStep, 0.0);
17519 vector<double> iterationEdgeMax (resultStep, 0.0);
17520 vector<const deFloat16*> arguments;
17521
17522 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17523 {
17524 std::string error;
17525 bool reportError = false;
17526
17527 if (callOncePerComponent || componentNdx == 0)
17528 {
17529 bool funcCallResult;
17530
17531 arguments.clear();
17532
17533 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17534 arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17535
17536 if (denormNdx == 0)
17537 funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17538 else
17539 funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17540
17541 if (!funcCallResult)
17542 {
17543 iterationValidated = false;
17544
17545 if (callOncePerComponent)
17546 continue;
17547 else
17548 break;
17549 }
17550 }
17551
17552 if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17553 continue;
17554
17555 reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17556
17557 if (reportError)
17558 {
17559 tcu::Float16 expected (iterationCalculatedFP16[componentNdx]);
17560 tcu::Float16 outputted (iterationOutputFP16[componentNdx]);
17561 tcu::Float64 edgeMin (iterationEdgeMin[componentNdx]);
17562 tcu::Float64 edgeMax (iterationEdgeMax[componentNdx]);
17563
17564 if (reportError && expected.isNaN())
17565 reportError = false;
17566
17567 if (reportError && !expected.isNaN() && !outputted.isNaN())
17568 {
17569 if (reportError && !expected.isInf() && !outputted.isInf())
17570 {
17571 // Ignore rounding
17572 if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17573 reportError = false;
17574 }
17575
17576 if (reportError && expected.isInf())
17577 {
17578 // RTZ rounding mode returns +/-65504 instead of Inf on overflow
17579 if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17580 reportError = false;
17581 else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17582 reportError = false;
17583 }
17584
17585 if (reportError)
17586 {
17587 const double outputtedDouble = outputted.asDouble();
17588
17589 DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17590
17591 if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17592 reportError = false;
17593 }
17594 }
17595
17596 if (reportError)
17597 {
17598 const size_t inputsComps[3] =
17599 {
17600 ARG0_COMPONENTS,
17601 ARG1_COMPONENTS,
17602 ARG2_COMPONENTS,
17603 };
17604 string inputsValues ("Inputs:");
17605 string flavorName (func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17606 std::stringstream errStream;
17607
17608 for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17609 {
17610 const size_t inputCompsCount = inputsComps[inputNdx];
17611
17612 inputsValues += " [" + de::toString(inputNdx) + "]=(";
17613
17614 for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17615 {
17616 const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17617
17618 inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17619 }
17620 }
17621
17622 errStream << "At"
17623 << " iteration " << de::toString(idx)
17624 << " component " << de::toString(componentNdx)
17625 << " denormMode " << de::toString(denormNdx)
17626 << " (" << denormModes[denormNdx] << ")"
17627 << " " << flavorName
17628 << " " << inputsValues
17629 << " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17630 << " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17631 << " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17632 << " " << error << "."
17633 << std::endl;
17634
17635 errors[componentNdx] += errStream.str();
17636
17637 successfulRuns[componentNdx]--;
17638 }
17639 }
17640 }
17641 }
17642 }
17643
17644 for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17645 {
17646 // Check if any component has total failure
17647 if (successfulRuns[componentNdx] == 0)
17648 {
17649 // Test failed in all denorm modes and all flavors for certain component: dump errors
17650 log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17651
17652 success = false;
17653 }
17654 }
17655
17656 if (iterationValidated)
17657 validatedCount++;
17658 }
17659
17660 if (validatedCount < 16)
17661 TCU_THROW(InternalError, "Too few samples have been validated.");
17662
17663 return success;
17664 }
17665
17666 // IEEE-754 floating point numbers:
17667 // +--------+------+----------+-------------+
17668 // | binary | sign | exponent | significand |
17669 // +--------+------+----------+-------------+
17670 // | 16-bit | 1 | 5 | 10 |
17671 // +--------+------+----------+-------------+
17672 // | 32-bit | 1 | 8 | 23 |
17673 // +--------+------+----------+-------------+
17674 //
17675 // 16-bit floats:
17676 //
17677 // 0 000 00 00 0000 0001 (0x0001: 2e-24: minimum positive denormalized)
17678 // 0 000 00 11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17679 // 0 000 01 00 0000 0000 (0x0400: 2e-14: minimum positive normalized)
17680 // 0 111 10 11 1111 1111 (0x7bff: 65504: maximum positive normalized)
17681 //
17682 // 0 000 00 00 0000 0000 (0x0000: +0)
17683 // 0 111 11 00 0000 0000 (0x7c00: +Inf)
17684 // 0 000 00 11 1111 0000 (0x03f0: +Denorm)
17685 // 0 000 01 00 0000 0001 (0x0401: +Norm)
17686 // 0 111 11 00 0000 1111 (0x7c0f: +SNaN)
17687 // 0 111 11 11 1111 0000 (0x7ff0: +QNaN)
17688 // Generate and return 16-bit floats and their corresponding 32-bit values.
17689 //
17690 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17691 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,deUint32 count)17692 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17693 {
17694 vector<deFloat16> float16;
17695
17696 float16.reserve(count);
17697
17698 // Zero
17699 float16.push_back(deUint16(0x0000));
17700 float16.push_back(deUint16(0x8000));
17701 // Infinity
17702 float16.push_back(deUint16(0x7c00));
17703 float16.push_back(deUint16(0xfc00));
17704 // Normalized
17705 float16.push_back(deUint16(0x0401));
17706 float16.push_back(deUint16(0x8401));
17707 // Some normal number
17708 float16.push_back(deUint16(0x14cb));
17709 float16.push_back(deUint16(0x94cb));
17710 // Min/max positive normal
17711 float16.push_back(deUint16(0x0400));
17712 float16.push_back(deUint16(0x7bff));
17713 // Min/max negative normal
17714 float16.push_back(deUint16(0x8400));
17715 float16.push_back(deUint16(0xfbff));
17716 // PI
17717 float16.push_back(deUint16(0x4248)); // 3.140625
17718 float16.push_back(deUint16(0xb248)); // -3.140625
17719 // PI/2
17720 float16.push_back(deUint16(0x3e48)); // 1.5703125
17721 float16.push_back(deUint16(0xbe48)); // -1.5703125
17722 float16.push_back(deUint16(0x3c00)); // 1.0
17723 float16.push_back(deUint16(0x3800)); // 0.5
17724 // Some useful constants
17725 float16.push_back(tcu::Float16(-2.5f).bits());
17726 float16.push_back(tcu::Float16(-1.0f).bits());
17727 float16.push_back(tcu::Float16( 0.4f).bits());
17728 float16.push_back(tcu::Float16( 2.5f).bits());
17729
17730 const deUint32 numPicks = static_cast<deUint32>(float16.size());
17731
17732 DE_ASSERT(count >= numPicks);
17733 count -= numPicks;
17734
17735 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17736 {
17737 int sign = (rnd.getUint16() % 2 == 0) ? +1 : -1;
17738 int exponent = (rnd.getUint16() % 29) - 14 + 1;
17739 deUint16 mantissa = static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17740
17741 // Exclude power of -14 to avoid denorms
17742 DE_ASSERT(de::inRange(exponent, -13, 15));
17743
17744 float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17745 }
17746
17747 return float16;
17748 }
17749
getInputData1(deUint32 seed,size_t count,size_t argNo)17750 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17751 {
17752 DE_UNREF(argNo);
17753
17754 de::Random rnd(seed);
17755
17756 return getFloat16a(rnd, static_cast<deUint32>(count));
17757 }
17758
getInputData2(deUint32 seed,size_t count,size_t argNo)17759 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17760 {
17761 de::Random rnd (seed);
17762 size_t newCount = static_cast<size_t>(deSqrt(double(count)));
17763
17764 DE_ASSERT(newCount * newCount == count);
17765
17766 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17767
17768 return squarize(float16, static_cast<deUint32>(argNo));
17769 }
17770
getInputData3(deUint32 seed,size_t count,size_t argNo)17771 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17772 {
17773 if (argNo == 0 || argNo == 1)
17774 return getInputData2(seed, count, argNo);
17775 else
17776 return getInputData1(seed<<argNo, count, argNo);
17777 }
17778
getInputData(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17779 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17780 {
17781 DE_UNREF(stride);
17782
17783 vector<deFloat16> result;
17784
17785 switch (argCount)
17786 {
17787 case 1:result = getInputData1(seed, count, argNo); break;
17788 case 2:result = getInputData2(seed, count, argNo); break;
17789 case 3:result = getInputData3(seed, count, argNo); break;
17790 default: TCU_THROW(InternalError, "Invalid argument count specified");
17791 }
17792
17793 if (compCount == 3)
17794 {
17795 const size_t newCount = (3 * count) / 4;
17796 vector<deFloat16> newResult;
17797
17798 newResult.reserve(result.size());
17799
17800 for (size_t ndx = 0; ndx < newCount; ++ndx)
17801 {
17802 newResult.push_back(result[ndx]);
17803
17804 if (ndx % 3 == 2)
17805 newResult.push_back(0);
17806 }
17807
17808 result = newResult;
17809 }
17810
17811 DE_ASSERT(result.size() == count);
17812
17813 return result;
17814 }
17815
17816 // Generator for functions requiring data in range [1, inf]
getInputDataAC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17817 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17818 {
17819 vector<deFloat16> result;
17820
17821 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17822
17823 // Filter out values below 1.0 from upper half of numbers
17824 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17825 {
17826 const float f = tcu::Float16(result[idx]).asFloat();
17827
17828 if (f < 1.0f)
17829 result[idx] = tcu::Float16(1.0f - f).bits();
17830 }
17831
17832 return result;
17833 }
17834
17835 // Generator for functions requiring data in range [-1, 1]
getInputDataA(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17836 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17837 {
17838 vector<deFloat16> result;
17839
17840 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17841
17842 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17843 {
17844 const float f = tcu::Float16(result[idx]).asFloat();
17845
17846 if (!de::inRange(f, -1.0f, 1.0f))
17847 result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17848 }
17849
17850 return result;
17851 }
17852
17853 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17854 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17855 {
17856 vector<deFloat16> result;
17857
17858 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17859
17860 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17861 {
17862 const float f = tcu::Float16(result[idx]).asFloat();
17863
17864 if (!de::inRange(f, -DE_PI, DE_PI))
17865 result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17866 }
17867
17868 return result;
17869 }
17870
17871 // Generator for functions requiring data in range [0, inf]
getInputDataP(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17872 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17873 {
17874 vector<deFloat16> result;
17875
17876 result = getInputData(seed, count, compCount, stride, argCount, argNo);
17877
17878 if (argNo == 0)
17879 {
17880 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17881 result[idx] &= static_cast<deFloat16>(~0x8000);
17882 }
17883
17884 return result;
17885 }
17886
getInputDataV(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17887 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17888 {
17889 DE_UNREF(stride);
17890 DE_UNREF(argCount);
17891
17892 vector<deFloat16> result;
17893
17894 if (argNo == 0)
17895 result = getInputData2(seed, count, argNo);
17896 else
17897 {
17898 const size_t alignedCount = (compCount == 3) ? 4 : compCount;
17899 const size_t newCountX = static_cast<size_t>(deSqrt(double(count * alignedCount)));
17900 const size_t newCountY = count / newCountX;
17901 de::Random rnd (seed);
17902 vector<deFloat16> float16 = getFloat16a(rnd, static_cast<deUint32>(newCountX));
17903
17904 DE_ASSERT(newCountX * newCountX == alignedCount * count);
17905
17906 for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17907 {
17908 const vector<deFloat16> tmp(newCountY, float16[numIdx]);
17909
17910 result.insert(result.end(), tmp.begin(), tmp.end());
17911 }
17912 }
17913
17914 DE_ASSERT(result.size() == count);
17915
17916 return result;
17917 }
17918
getInputDataM(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17919 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17920 {
17921 DE_UNREF(compCount);
17922 DE_UNREF(stride);
17923 DE_UNREF(argCount);
17924
17925 de::Random rnd (seed << argNo);
17926 vector<deFloat16> result;
17927
17928 result = getFloat16a(rnd, static_cast<deUint32>(count));
17929
17930 DE_ASSERT(result.size() == count);
17931
17932 return result;
17933 }
17934
getInputDataD(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17935 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17936 {
17937 DE_UNREF(compCount);
17938 DE_UNREF(argCount);
17939
17940 de::Random rnd (seed << argNo);
17941 vector<deFloat16> result;
17942
17943 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17944 {
17945 int num = (rnd.getUint16() % 16) - 8;
17946
17947 result.push_back(tcu::Float16(float(num)).bits());
17948 }
17949
17950 result[0 * stride] = deUint16(0x7c00); // +Inf
17951 result[1 * stride] = deUint16(0xfc00); // -Inf
17952
17953 DE_ASSERT(result.size() == count);
17954
17955 return result;
17956 }
17957
17958 // Generator for smoothstep function
getInputDataSS(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17959 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17960 {
17961 vector<deFloat16> result;
17962
17963 result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17964
17965 if (argNo == 0)
17966 {
17967 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17968 {
17969 const float f = tcu::Float16(result[idx]).asFloat();
17970
17971 if (f > 4.0f)
17972 result[idx] = tcu::Float16(-f).bits();
17973 }
17974 }
17975
17976 if (argNo == 1)
17977 {
17978 for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17979 {
17980 const float f = tcu::Float16(result[idx]).asFloat();
17981
17982 if (f < 4.0f)
17983 result[idx] = tcu::Float16(-f).bits();
17984 }
17985 }
17986
17987 return result;
17988 }
17989
17990 // Generates normalized vectors for arguments 0 and 1
getInputDataN(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17991 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17992 {
17993 DE_UNREF(compCount);
17994 DE_UNREF(argCount);
17995
17996 de::Random rnd (seed << argNo);
17997 vector<deFloat16> result;
17998
17999 if (argNo == 0 || argNo == 1)
18000 {
18001 // The input parameters for the incident vector I and the surface normal N must already be normalized
18002 for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18003 {
18004 vector <float> unnormolized;
18005 float sum = 0;
18006
18007 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18008 unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18009
18010 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18011 sum += unnormolized[compIdx] * unnormolized[compIdx];
18012
18013 sum = deFloatSqrt(sum);
18014 if (sum == 0.0f)
18015 unnormolized[0] = sum = 1.0f;
18016
18017 for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18018 result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18019
18020 for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18021 result.push_back(0);
18022 }
18023 }
18024 else
18025 {
18026 // Input parameter eta
18027 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18028 {
18029 int num = (rnd.getUint16() % 16) - 8;
18030
18031 result.push_back(tcu::Float16(float(num)).bits());
18032 }
18033 }
18034
18035 DE_ASSERT(result.size() == count);
18036
18037 return result;
18038 }
18039
18040 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18041 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18042 {
18043 DE_UNREF(compCount);
18044 DE_UNREF(stride);
18045 DE_UNREF(argCount);
18046
18047 de::Random rnd (seed << argNo);
18048 vector<deFloat16> result;
18049
18050 for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18051 {
18052 int num = (rnd.getUint16() % 16) - 8;
18053
18054 result.push_back(tcu::Float16(float(num)).bits());
18055 }
18056
18057 DE_ASSERT(result.size() == count);
18058
18059 return result;
18060 }
18061
18062 struct Math16TestType
18063 {
18064 const char* typePrefix;
18065 const size_t typeComponents;
18066 const size_t typeArrayStride;
18067 const size_t typeStructStride;
18068 const char* storage_type;
18069 };
18070
18071 enum Math16DataTypes
18072 {
18073 NONE = 0,
18074 SCALAR = 1,
18075 VEC2 = 2,
18076 VEC3 = 3,
18077 VEC4 = 4,
18078 MAT2X2,
18079 MAT2X3,
18080 MAT2X4,
18081 MAT3X2,
18082 MAT3X3,
18083 MAT3X4,
18084 MAT4X2,
18085 MAT4X3,
18086 MAT4X4,
18087 MATH16_TYPE_LAST
18088 };
18089
18090 struct Math16ArgFragments
18091 {
18092 const char* bodies;
18093 const char* variables;
18094 const char* decorations;
18095 const char* funcVariables;
18096 };
18097
18098 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18099
18100 struct Math16TestFunc
18101 {
18102 const char* funcName;
18103 const char* funcSuffix;
18104 size_t funcArgsCount;
18105 size_t typeResult;
18106 size_t typeArg0;
18107 size_t typeArg1;
18108 size_t typeArg2;
18109 Math16GetInputData* getInputDataFunc;
18110 VerifyIOFunc verifyFunc;
18111 };
18112
18113 template<class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18114 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18115 {
18116 const int testSpecificSeed = deStringHash(testGroup.getName());
18117 const int seed = testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18118 const size_t numDataPointsByAxis = 32;
18119 const size_t numDataPoints = numDataPointsByAxis * numDataPointsByAxis;
18120 const char* componentType = "f16";
18121 const Math16TestType testTypes[MATH16_TYPE_LAST] =
18122 {
18123 { "", 0, 0, 0, "" },
18124 { "", 1, 1 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_half_ndp" },
18125 { "v2", 2, 2 * sizeof(deFloat16), 2 * sizeof(deFloat16), "u32_ndp" },
18126 { "v3", 3, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18127 { "v4", 4, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18128 { "m2x2", 0, 4 * sizeof(deFloat16), 4 * sizeof(deFloat16), "u32_ndp_2" },
18129 { "m2x3", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18130 { "m2x4", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18131 { "m3x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_3" },
18132 { "m3x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18133 { "m3x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_6" },
18134 { "m4x2", 0, 8 * sizeof(deFloat16), 8 * sizeof(deFloat16), "u32_ndp_4" },
18135 { "m4x3", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18136 { "m4x4", 0, 16 * sizeof(deFloat16), 16 * sizeof(deFloat16), "u32_ndp_8" },
18137 };
18138
18139 DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18140
18141
18142 const StringTemplate preMain
18143 (
18144 " %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
18145
18146 " %f16 = OpTypeFloat 16\n"
18147 " %v2f16 = OpTypeVector %f16 2\n"
18148 " %v3f16 = OpTypeVector %f16 3\n"
18149 " %v4f16 = OpTypeVector %f16 4\n"
18150 " %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18151 " %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18152 " %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18153 " %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18154 " %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18155 " %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18156 " %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18157 " %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18158 " %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18159
18160 " %fp_v2i32 = OpTypePointer Function %v2i32\n"
18161 " %fp_v3i32 = OpTypePointer Function %v3i32\n"
18162 " %fp_v4i32 = OpTypePointer Function %v4i32\n"
18163
18164 " %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18165 " %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18166 " %c_u32_5 = OpConstant %u32 5\n"
18167 " %c_u32_6 = OpConstant %u32 6\n"
18168 " %c_u32_7 = OpConstant %u32 7\n"
18169 " %c_u32_8 = OpConstant %u32 8\n"
18170 " %c_f16_0 = OpConstant %f16 0\n"
18171 " %c_f16_1 = OpConstant %f16 1\n"
18172 " %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18173 " %up_u32 = OpTypePointer Uniform %u32\n"
18174 "%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18175 " %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18176
18177 " %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18178 " %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18179 "%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18180 " %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18181 " %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18182 " %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18183 " %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18184 " %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18185 " %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18186 " %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18187 " %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18188 " %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18189 " %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18190 " %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18191 " %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18192 " %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18193 " %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18194 " %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18195 " %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18196 " %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18197 " %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18198 " %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18199 " %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18200 " %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18201 " %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18202 " %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18203 " %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18204 " %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18205 " %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18206 " %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18207 " %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18208
18209 " %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18210 " %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18211 " %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18212 " %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18213 " %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18214 " %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18215 " %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18216 " %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18217 " %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18218 " %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18219 " %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18220 " %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18221 " %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18222 " %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18223 " %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18224 " %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18225 " %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18226 "%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18227 "%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18228 "%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18229 "%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18230 "%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18231 "%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18232 "%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18233 "%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18234 "%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18235 "${arg_vars}"
18236 );
18237
18238 const StringTemplate decoration
18239 (
18240 "OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18241 "OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18242 "OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18243
18244 "OpDecorate %ra_u32_ndp ArrayStride 4\n"
18245 "OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18246 "OpDecorate %SSBO_u32_ndp BufferBlock\n"
18247
18248 "OpDecorate %ra_u32_2 ArrayStride 4\n"
18249 "OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18250 "OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18251 "OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18252
18253 "OpDecorate %ra_u32_4 ArrayStride 4\n"
18254 "OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18255 "OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18256 "OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18257
18258 "OpDecorate %ra_u32_3 ArrayStride 4\n"
18259 "OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18260 "OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18261 "OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18262
18263 "OpDecorate %ra_u32_6 ArrayStride 4\n"
18264 "OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18265 "OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18266 "OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18267
18268 "OpDecorate %ra_u32_8 ArrayStride 4\n"
18269 "OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18270 "OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18271 "OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18272
18273 "${arg_decorations}"
18274 );
18275
18276 const StringTemplate testFun
18277 (
18278 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18279 " %param = OpFunctionParameter %v4f32\n"
18280 " %entry = OpLabel\n"
18281
18282 " %i = OpVariable %fp_i32 Function\n"
18283 "${arg_infunc_vars}"
18284 " OpStore %i %c_i32_0\n"
18285 " OpBranch %loop\n"
18286
18287 " %loop = OpLabel\n"
18288 " %i_cmp = OpLoad %i32 %i\n"
18289 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18290 " OpLoopMerge %merge %next None\n"
18291 " OpBranchConditional %lt %write %merge\n"
18292
18293 " %write = OpLabel\n"
18294 " %ndx = OpLoad %i32 %i\n"
18295
18296 "${arg_func_call}"
18297
18298 " OpBranch %next\n"
18299
18300 " %next = OpLabel\n"
18301 " %i_cur = OpLoad %i32 %i\n"
18302 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18303 " OpStore %i %i_new\n"
18304 " OpBranch %loop\n"
18305
18306 " %merge = OpLabel\n"
18307 " OpReturnValue %param\n"
18308 " OpFunctionEnd\n"
18309 );
18310
18311 const Math16ArgFragments argFragment1 =
18312 {
18313 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18314 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18315 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18316 "",
18317 "",
18318 "",
18319 };
18320
18321 const Math16ArgFragments argFragment2 =
18322 {
18323 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18324 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18325 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18326 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18327 "",
18328 "",
18329 "",
18330 };
18331
18332 const Math16ArgFragments argFragment3 =
18333 {
18334 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18335 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18336 " %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18337 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18338 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18339 "",
18340 "",
18341 "",
18342 };
18343
18344 const Math16ArgFragments argFragmentLdExp =
18345 {
18346 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18347 " %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18348 "%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18349 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18350 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18351
18352 "",
18353
18354 "",
18355
18356 "",
18357 };
18358
18359 const Math16ArgFragments argFragmentModfFrac =
18360 {
18361 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18362 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18363 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18364
18365 " %fp_tmp = OpTypePointer Function %${tr}\n",
18366
18367 "",
18368
18369 " %tmp = OpVariable %fp_tmp Function\n",
18370 };
18371
18372 const Math16ArgFragments argFragmentModfInt =
18373 {
18374 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18375 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18376 " %tmp0 = OpAccessChain %fp_tmp %tmp\n"
18377 " %val_dst = OpLoad %${tr} %tmp0\n"
18378 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18379
18380 " %fp_tmp = OpTypePointer Function %${tr}\n",
18381
18382 "",
18383
18384 " %tmp = OpVariable %fp_tmp Function\n",
18385 };
18386
18387 const Math16ArgFragments argFragmentModfStruct =
18388 {
18389 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18390 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18391 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18392 " OpStore %tmp_ptr_s %val_tmp\n"
18393 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18394 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18395 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18396
18397 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18398 " %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18399 " %fp_tmp = OpTypePointer Function %st_tmp\n"
18400 " %c_frac = OpConstant %i32 0\n"
18401 " %c_int = OpConstant %i32 1\n",
18402
18403 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18404 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18405
18406 " %tmp = OpVariable %fp_tmp Function\n",
18407 };
18408
18409 const Math16ArgFragments argFragmentFrexpStructS =
18410 {
18411 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18412 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18413 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18414 " OpStore %tmp_ptr_s %val_tmp\n"
18415 "%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18416 " %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18417 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18418
18419 " %fp_${tr} = OpTypePointer Function %${tr}\n"
18420 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18421 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18422
18423 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18424 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18425
18426 " %tmp = OpVariable %fp_tmp Function\n",
18427 };
18428
18429 const Math16ArgFragments argFragmentFrexpStructE =
18430 {
18431 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18432 " %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18433 "%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18434 " OpStore %tmp_ptr_s %val_tmp\n"
18435 "%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18436 "%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18437 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18438 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18439
18440 " %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18441 " %fp_tmp = OpTypePointer Function %st_tmp\n",
18442
18443 "OpMemberDecorate %st_tmp 0 Offset 0\n"
18444 "OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18445
18446 " %tmp = OpVariable %fp_tmp Function\n",
18447 };
18448
18449 const Math16ArgFragments argFragmentFrexpS =
18450 {
18451 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18452 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18453 " %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18454 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18455
18456 "",
18457
18458 "",
18459
18460 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18461 };
18462
18463 const Math16ArgFragments argFragmentFrexpE =
18464 {
18465 " %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18466 " %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18467 "%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18468 "%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
18469 " %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18470 " %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18471
18472 "",
18473
18474 "",
18475
18476 " %tmp = OpVariable %fp_${dr}i32 Function\n",
18477 };
18478
18479 string load_funcs[MATH16_TYPE_LAST];
18480 load_funcs[SCALAR] = loadScalarF16FromUint;
18481 load_funcs[VEC2] = loadV2F16FromUint;
18482 load_funcs[VEC3] = loadV3F16FromUints;
18483 load_funcs[VEC4] = loadV4F16FromUints;
18484 load_funcs[MAT2X2] = loadM2x2F16FromUints;
18485 load_funcs[MAT2X3] = loadM2x3F16FromUints;
18486 load_funcs[MAT2X4] = loadM2x4F16FromUints;
18487 load_funcs[MAT3X2] = loadM3x2F16FromUints;
18488 load_funcs[MAT3X3] = loadM3x3F16FromUints;
18489 load_funcs[MAT3X4] = loadM3x4F16FromUints;
18490 load_funcs[MAT4X2] = loadM4x2F16FromUints;
18491 load_funcs[MAT4X3] = loadM4x3F16FromUints;
18492 load_funcs[MAT4X4] = loadM4x4F16FromUints;
18493
18494 string store_funcs[MATH16_TYPE_LAST];
18495 store_funcs[SCALAR] = storeScalarF16AsUint;
18496 store_funcs[VEC2] = storeV2F16AsUint;
18497 store_funcs[VEC3] = storeV3F16AsUints;
18498 store_funcs[VEC4] = storeV4F16AsUints;
18499 store_funcs[MAT2X2] = storeM2x2F16AsUints;
18500 store_funcs[MAT2X3] = storeM2x3F16AsUints;
18501 store_funcs[MAT2X4] = storeM2x4F16AsUints;
18502 store_funcs[MAT3X2] = storeM3x2F16AsUints;
18503 store_funcs[MAT3X3] = storeM3x3F16AsUints;
18504 store_funcs[MAT3X4] = storeM3x4F16AsUints;
18505 store_funcs[MAT4X2] = storeM4x2F16AsUints;
18506 store_funcs[MAT4X3] = storeM4x3F16AsUints;
18507 store_funcs[MAT4X4] = storeM4x4F16AsUints;
18508
18509 const Math16TestType& testType = testTypes[testTypeIdx];
18510 const string funcNameString = string(testFunc.funcName) + string(testFunc.funcSuffix);
18511 const string testName = de::toLower(funcNameString);
18512 const Math16ArgFragments* argFragments = DE_NULL;
18513 const size_t typeStructStride = testType.typeStructStride;
18514 const bool extInst = !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18515 const size_t numFloatsPerArg0Type = testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18516 const size_t iterations = numDataPoints / numFloatsPerArg0Type;
18517 const size_t numFloatsPerResultType = testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18518 const vector<deFloat16> float16DummyOutput (iterations * numFloatsPerResultType, 0);
18519 VulkanFeatures features;
18520 SpecResource specResource;
18521 map<string, string> specs;
18522 map<string, string> fragments;
18523 vector<string> extensions;
18524 string funcCall;
18525 string funcVariables;
18526 string variables;
18527 string declarations;
18528 string decorations;
18529 string functions;
18530
18531 switch (testFunc.funcArgsCount)
18532 {
18533 case 1:
18534 {
18535 argFragments = &argFragment1;
18536
18537 if (funcNameString == "ModfFrac") argFragments = &argFragmentModfFrac;
18538 if (funcNameString == "ModfInt") argFragments = &argFragmentModfInt;
18539 if (funcNameString == "ModfStructFrac") argFragments = &argFragmentModfStruct;
18540 if (funcNameString == "ModfStructInt") argFragments = &argFragmentModfStruct;
18541 if (funcNameString == "FrexpS") argFragments = &argFragmentFrexpS;
18542 if (funcNameString == "FrexpE") argFragments = &argFragmentFrexpE;
18543 if (funcNameString == "FrexpStructS") argFragments = &argFragmentFrexpStructS;
18544 if (funcNameString == "FrexpStructE") argFragments = &argFragmentFrexpStructE;
18545
18546 break;
18547 }
18548 case 2:
18549 {
18550 argFragments = &argFragment2;
18551
18552 if (funcNameString == "Ldexp") argFragments = &argFragmentLdExp;
18553
18554 break;
18555 }
18556 case 3:
18557 {
18558 argFragments = &argFragment3;
18559
18560 break;
18561 }
18562 default:
18563 {
18564 TCU_THROW(InternalError, "Invalid number of arguments");
18565 }
18566 }
18567
18568 functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18569 if (testFunc.funcArgsCount == 1)
18570 {
18571 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18572 variables +=
18573 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18574 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18575
18576 decorations +=
18577 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18578 "OpDecorate %ssbo_src0 Binding 0\n"
18579 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18580 "OpDecorate %ssbo_dst Binding 1\n";
18581 }
18582 else if (testFunc.funcArgsCount == 2)
18583 {
18584 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18585 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18586 variables +=
18587 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18588 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18589 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18590
18591 decorations +=
18592 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18593 "OpDecorate %ssbo_src0 Binding 0\n"
18594 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18595 "OpDecorate %ssbo_src1 Binding 1\n"
18596 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18597 "OpDecorate %ssbo_dst Binding 2\n";
18598 }
18599 else if (testFunc.funcArgsCount == 3)
18600 {
18601 functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18602 functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18603 functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18604 variables +=
18605 " %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18606 " %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18607 " %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18608 " %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18609
18610 decorations +=
18611 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
18612 "OpDecorate %ssbo_src0 Binding 0\n"
18613 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
18614 "OpDecorate %ssbo_src1 Binding 1\n"
18615 "OpDecorate %ssbo_src2 DescriptorSet 0\n"
18616 "OpDecorate %ssbo_src2 Binding 2\n"
18617 "OpDecorate %ssbo_dst DescriptorSet 0\n"
18618 "OpDecorate %ssbo_dst Binding 3\n";
18619 }
18620 else
18621 {
18622 TCU_THROW(InternalError, "Invalid number of function arguments");
18623 }
18624
18625 variables += argFragments->variables;
18626 decorations += argFragments->decorations;
18627
18628 specs["dr"] = testTypes[testFunc.typeResult].typePrefix;
18629 specs["d0"] = testTypes[testFunc.typeArg0].typePrefix;
18630 specs["d1"] = testTypes[testFunc.typeArg1].typePrefix;
18631 specs["d2"] = testTypes[testFunc.typeArg2].typePrefix;
18632 specs["tr"] = string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18633 specs["t0"] = string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18634 specs["t1"] = string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18635 specs["t2"] = string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18636 specs["store_tr"] = string(testTypes[testFunc.typeResult].storage_type);
18637 specs["store_t0"] = string(testTypes[testFunc.typeArg0].storage_type);
18638 specs["store_t1"] = string(testTypes[testFunc.typeArg1].storage_type);
18639 specs["store_t2"] = string(testTypes[testFunc.typeArg2].storage_type);
18640 specs["struct_stride"] = de::toString(typeStructStride);
18641 specs["op"] = extInst ? "OpExtInst" : testFunc.funcName;
18642 specs["ext_inst"] = extInst ? string("%ext_import ") + testFunc.funcName : "";
18643 specs["struct_member"] = de::toLower(testFunc.funcSuffix);
18644
18645 variables = StringTemplate(variables).specialize(specs);
18646 decorations = StringTemplate(decorations).specialize(specs);
18647 funcVariables = StringTemplate(argFragments->funcVariables).specialize(specs);
18648 funcCall = StringTemplate(argFragments->bodies).specialize(specs);
18649
18650 specs["num_data_points"] = de::toString(iterations);
18651 specs["arg_vars"] = variables;
18652 specs["arg_decorations"] = decorations;
18653 specs["arg_infunc_vars"] = funcVariables;
18654 specs["arg_func_call"] = funcCall;
18655
18656 fragments["extension"] = "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18657 fragments["capability"] = "OpCapability Matrix\nOpCapability Float16\n";
18658 fragments["decoration"] = decoration.specialize(specs);
18659 fragments["pre_main"] = preMain.specialize(specs) + functions;
18660 fragments["testfun"] = testFun.specialize(specs);
18661
18662 for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18663 {
18664 const size_t numFloatsPerItem = (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18665 : (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18666 : (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18667 : -1;
18668 const vector<deFloat16> inputData = testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18669
18670 specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18671 }
18672
18673 specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18674 specResource.verifyIO = testFunc.verifyFunc;
18675
18676 extensions.push_back("VK_KHR_shader_float16_int8");
18677
18678 features.extFloat16Int8.shaderFloat16 = true;
18679
18680 finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18681 }
18682
18683 template<size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18684 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18685 {
18686 DE_STATIC_ASSERT(C >= 1 && C <= 4);
18687
18688 const std::string testGroupName (string("arithmetic_") + de::toString(C));
18689 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18690 const Math16TestFunc testFuncs[] =
18691 {
18692 { "OpFNegate", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16OpFNegate> },
18693 { "Round", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Round> },
18694 { "RoundEven", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16RoundEven> },
18695 { "Trunc", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Trunc> },
18696 { "FAbs", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FAbs> },
18697 { "FSign", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FSign> },
18698 { "Floor", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Floor> },
18699 { "Ceil", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Ceil> },
18700 { "Fract", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Fract> },
18701 { "Radians", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Radians> },
18702 { "Degrees", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Degrees> },
18703 { "Sin", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sin> },
18704 { "Cos", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cos> },
18705 { "Tan", "", 1, C, C, 0, 0, &getInputDataPI, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tan> },
18706 { "Asin", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asin> },
18707 { "Acos", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acos> },
18708 { "Atan", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atan> },
18709 { "Sinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sinh> },
18710 { "Cosh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Cosh> },
18711 { "Tanh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Tanh> },
18712 { "Asinh", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Asinh> },
18713 { "Acosh", "", 1, C, C, 0, 0, &getInputDataAC, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Acosh> },
18714 { "Atanh", "", 1, C, C, 0, 0, &getInputDataA, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Atanh> },
18715 { "Exp", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp> },
18716 { "Log", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log> },
18717 { "Exp2", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Exp2> },
18718 { "Log2", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Log2> },
18719 { "Sqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Sqrt> },
18720 { "InverseSqrt", "", 1, C, C, 0, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, 0, 0, fp16InverseSqrt> },
18721 { "Modf", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18722 { "Modf", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18723 { "ModfStruct", "Frac", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfFrac> },
18724 { "ModfStruct", "Int", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16ModfInt> },
18725 { "Frexp", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18726 { "Frexp", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18727 { "FrexpStruct", "S", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpS> },
18728 { "FrexpStruct", "E", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16FrexpE> },
18729 { "OpFAdd", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFAdd> },
18730 { "OpFSub", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFSub> },
18731 { "OpFMul", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFMul> },
18732 { "OpFDiv", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16OpFDiv> },
18733 { "Atan2", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Atan2> },
18734 { "Pow", "", 2, C, C, C, 0, &getInputDataP, compareFP16ArithmeticFunc< C, C, C, 0, fp16Pow> },
18735 { "FMin", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMin> },
18736 { "FMax", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16FMax> },
18737 { "Step", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Step> },
18738 { "Ldexp", "", 2, C, C, C, 0, &getInputData, compareFP16ArithmeticFunc< C, C, C, 0, fp16Ldexp> },
18739 { "FClamp", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16FClamp> },
18740 { "FMix", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FMix> },
18741 { "SmoothStep", "", 3, C, C, C, C, &getInputDataSS, compareFP16ArithmeticFunc< C, C, C, C, fp16SmoothStep> },
18742 { "Fma", "", 3, C, C, C, C, &getInputData, compareFP16ArithmeticFunc< C, C, C, C, fp16Fma> },
18743 { "Length", "", 1, 1, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, 0, 0, fp16Length> },
18744 { "Distance", "", 2, 1, C, C, 0, &getInputData, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Distance> },
18745 { "Cross", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Cross> },
18746 { "Normalize", "", 1, C, C, 0, 0, &getInputData, compareFP16ArithmeticFunc< C, C, 0, 0, fp16Normalize> },
18747 { "FaceForward", "", 3, C, C, C, C, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, C, fp16FaceForward> },
18748 { "Reflect", "", 2, C, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< C, C, C, 0, fp16Reflect> },
18749 { "Refract", "", 3, C, C, C, 1, &getInputDataN, compareFP16ArithmeticFunc< C, C, C, 1, fp16Refract> },
18750 { "OpDot", "", 2, 1, C, C, 0, &getInputDataD, compareFP16ArithmeticFunc< 1, C, C, 0, fp16Dot> },
18751 { "OpVectorTimesScalar", "", 2, C, C, 1, 0, &getInputDataV, compareFP16ArithmeticFunc< C, C, 1, 0, fp16VectorTimesScalar> },
18752 };
18753
18754 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18755 {
18756 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18757 const string funcNameString = testFunc.funcName;
18758
18759 if ((C != 3) && funcNameString == "Cross")
18760 continue;
18761
18762 if ((C < 2) && funcNameString == "OpDot")
18763 continue;
18764
18765 if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18766 continue;
18767
18768 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18769 }
18770
18771 return testGroup.release();
18772 }
18773
18774 template<class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18775 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18776 {
18777 const std::string testGroupName ("arithmetic");
18778 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18779 const Math16TestFunc testFuncs[] =
18780 {
18781 { "OpTranspose", "2x2", 1, MAT2X2, MAT2X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Transpose<2,2> > },
18782 { "OpTranspose", "3x2", 1, MAT2X3, MAT3X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<3,2> > },
18783 { "OpTranspose", "4x2", 1, MAT2X4, MAT4X2, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<4,2> > },
18784 { "OpTranspose", "2x3", 1, MAT3X2, MAT2X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,3> > },
18785 { "OpTranspose", "3x3", 1, MAT3X3, MAT3X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,3> > },
18786 { "OpTranspose", "4x3", 1, MAT3X4, MAT4X3, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,3> > },
18787 { "OpTranspose", "2x4", 1, MAT4X2, MAT2X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 8, 8, 0, 0, fp16Transpose<2,4> > },
18788 { "OpTranspose", "3x4", 1, MAT4X3, MAT3X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<3,4> > },
18789 { "OpTranspose", "4x4", 1, MAT4X4, MAT4X4, 0, 0, &getInputDataM, compareFP16ArithmeticFunc< 16, 16, 0, 0, fp16Transpose<4,4> > },
18790 { "OpMatrixTimesScalar", "2x2", 2, MAT2X2, MAT2X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 1, 0, fp16MatrixTimesScalar<2,2> > },
18791 { "OpMatrixTimesScalar", "2x3", 2, MAT2X3, MAT2X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,3> > },
18792 { "OpMatrixTimesScalar", "2x4", 2, MAT2X4, MAT2X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<2,4> > },
18793 { "OpMatrixTimesScalar", "3x2", 2, MAT3X2, MAT3X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<3,2> > },
18794 { "OpMatrixTimesScalar", "3x3", 2, MAT3X3, MAT3X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,3> > },
18795 { "OpMatrixTimesScalar", "3x4", 2, MAT3X4, MAT3X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<3,4> > },
18796 { "OpMatrixTimesScalar", "4x2", 2, MAT4X2, MAT4X2, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 1, 0, fp16MatrixTimesScalar<4,2> > },
18797 { "OpMatrixTimesScalar", "4x3", 2, MAT4X3, MAT4X3, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,3> > },
18798 { "OpMatrixTimesScalar", "4x4", 2, MAT4X4, MAT4X4, 1, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 1, 0, fp16MatrixTimesScalar<4,4> > },
18799 { "OpVectorTimesMatrix", "2x2", 2, VEC2, VEC2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 2, 4, 0, fp16VectorTimesMatrix<2,2> > },
18800 { "OpVectorTimesMatrix", "2x3", 2, VEC2, VEC3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 3, 8, 0, fp16VectorTimesMatrix<2,3> > },
18801 { "OpVectorTimesMatrix", "2x4", 2, VEC2, VEC4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 8, 0, fp16VectorTimesMatrix<2,4> > },
18802 { "OpVectorTimesMatrix", "3x2", 2, VEC3, VEC2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 2, 8, 0, fp16VectorTimesMatrix<3,2> > },
18803 { "OpVectorTimesMatrix", "3x3", 2, VEC3, VEC3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 3, 16, 0, fp16VectorTimesMatrix<3,3> > },
18804 { "OpVectorTimesMatrix", "3x4", 2, VEC3, VEC4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 4, 16, 0, fp16VectorTimesMatrix<3,4> > },
18805 { "OpVectorTimesMatrix", "4x2", 2, VEC4, VEC2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 8, 0, fp16VectorTimesMatrix<4,2> > },
18806 { "OpVectorTimesMatrix", "4x3", 2, VEC4, VEC3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 3, 16, 0, fp16VectorTimesMatrix<4,3> > },
18807 { "OpVectorTimesMatrix", "4x4", 2, VEC4, VEC4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 16, 0, fp16VectorTimesMatrix<4,4> > },
18808 { "OpMatrixTimesVector", "2x2", 2, VEC2, MAT2X2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 4, 2, 0, fp16MatrixTimesVector<2,2> > },
18809 { "OpMatrixTimesVector", "2x3", 2, VEC3, MAT2X3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 8, 2, 0, fp16MatrixTimesVector<2,3> > },
18810 { "OpMatrixTimesVector", "2x4", 2, VEC4, MAT2X4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 2, 0, fp16MatrixTimesVector<2,4> > },
18811 { "OpMatrixTimesVector", "3x2", 2, VEC2, MAT3X2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 3, 0, fp16MatrixTimesVector<3,2> > },
18812 { "OpMatrixTimesVector", "3x3", 2, VEC3, MAT3X3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 3, 0, fp16MatrixTimesVector<3,3> > },
18813 { "OpMatrixTimesVector", "3x4", 2, VEC4, MAT3X4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 3, 0, fp16MatrixTimesVector<3,4> > },
18814 { "OpMatrixTimesVector", "4x2", 2, VEC2, MAT4X2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 2, 8, 4, 0, fp16MatrixTimesVector<4,2> > },
18815 { "OpMatrixTimesVector", "4x3", 2, VEC3, MAT4X3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 3, 16, 4, 0, fp16MatrixTimesVector<4,3> > },
18816 { "OpMatrixTimesVector", "4x4", 2, VEC4, MAT4X4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 16, 4, 0, fp16MatrixTimesVector<4,4> > },
18817 { "OpMatrixTimesMatrix", "2x2_2x2", 2, MAT2X2, MAT2X2, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 4, 4, 0, fp16MatrixTimesMatrix<2,2,2,2> > },
18818 { "OpMatrixTimesMatrix", "2x2_3x2", 2, MAT3X2, MAT2X2, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,3,2> > },
18819 { "OpMatrixTimesMatrix", "2x2_4x2", 2, MAT4X2, MAT2X2, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 8, 0, fp16MatrixTimesMatrix<2,2,4,2> > },
18820 { "OpMatrixTimesMatrix", "2x3_2x2", 2, MAT2X3, MAT2X3, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,3,2,2> > },
18821 { "OpMatrixTimesMatrix", "2x3_3x2", 2, MAT3X3, MAT2X3, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,3,2> > },
18822 { "OpMatrixTimesMatrix", "2x3_4x2", 2, MAT4X3, MAT2X3, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,3,4,2> > },
18823 { "OpMatrixTimesMatrix", "2x4_2x2", 2, MAT2X4, MAT2X4, MAT2X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 4, 0, fp16MatrixTimesMatrix<2,4,2,2> > },
18824 { "OpMatrixTimesMatrix", "2x4_3x2", 2, MAT3X4, MAT2X4, MAT3X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,3,2> > },
18825 { "OpMatrixTimesMatrix", "2x4_4x2", 2, MAT4X4, MAT2X4, MAT4X2, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 8, 8, 0, fp16MatrixTimesMatrix<2,4,4,2> > },
18826 { "OpMatrixTimesMatrix", "3x2_2x3", 2, MAT2X2, MAT3X2, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<3,2,2,3> > },
18827 { "OpMatrixTimesMatrix", "3x2_3x3", 2, MAT3X2, MAT3X2, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,3,3> > },
18828 { "OpMatrixTimesMatrix", "3x2_4x3", 2, MAT4X2, MAT3X2, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<3,2,4,3> > },
18829 { "OpMatrixTimesMatrix", "3x3_2x3", 2, MAT2X3, MAT3X3, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,3,2,3> > },
18830 { "OpMatrixTimesMatrix", "3x3_3x3", 2, MAT3X3, MAT3X3, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,3,3> > },
18831 { "OpMatrixTimesMatrix", "3x3_4x3", 2, MAT4X3, MAT3X3, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,3,4,3> > },
18832 { "OpMatrixTimesMatrix", "3x4_2x3", 2, MAT2X4, MAT3X4, MAT2X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<3,4,2,3> > },
18833 { "OpMatrixTimesMatrix", "3x4_3x3", 2, MAT3X4, MAT3X4, MAT3X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,3,3> > },
18834 { "OpMatrixTimesMatrix", "3x4_4x3", 2, MAT4X4, MAT3X4, MAT4X3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<3,4,4,3> > },
18835 { "OpMatrixTimesMatrix", "4x2_2x4", 2, MAT2X2, MAT4X2, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 8, 8, 0, fp16MatrixTimesMatrix<4,2,2,4> > },
18836 { "OpMatrixTimesMatrix", "4x2_3x4", 2, MAT3X2, MAT4X2, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,3,4> > },
18837 { "OpMatrixTimesMatrix", "4x2_4x4", 2, MAT4X2, MAT4X2, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 8, 16, 0, fp16MatrixTimesMatrix<4,2,4,4> > },
18838 { "OpMatrixTimesMatrix", "4x3_2x4", 2, MAT2X3, MAT4X3, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,3,2,4> > },
18839 { "OpMatrixTimesMatrix", "4x3_3x4", 2, MAT3X3, MAT4X3, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,3,4> > },
18840 { "OpMatrixTimesMatrix", "4x3_4x4", 2, MAT4X3, MAT4X3, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,3,4,4> > },
18841 { "OpMatrixTimesMatrix", "4x4_2x4", 2, MAT2X4, MAT4X4, MAT2X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 16, 8, 0, fp16MatrixTimesMatrix<4,4,2,4> > },
18842 { "OpMatrixTimesMatrix", "4x4_3x4", 2, MAT3X4, MAT4X4, MAT3X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,3,4> > },
18843 { "OpMatrixTimesMatrix", "4x4_4x4", 2, MAT4X4, MAT4X4, MAT4X4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 16, 16, 0, fp16MatrixTimesMatrix<4,4,4,4> > },
18844 { "OpOuterProduct", "2x2", 2, MAT2X2, VEC2, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 4, 2, 2, 0, fp16OuterProduct<2,2> > },
18845 { "OpOuterProduct", "2x3", 2, MAT2X3, VEC3, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 3, 2, 0, fp16OuterProduct<2,3> > },
18846 { "OpOuterProduct", "2x4", 2, MAT2X4, VEC4, VEC2, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 4, 2, 0, fp16OuterProduct<2,4> > },
18847 { "OpOuterProduct", "3x2", 2, MAT3X2, VEC2, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 3, 0, fp16OuterProduct<3,2> > },
18848 { "OpOuterProduct", "3x3", 2, MAT3X3, VEC3, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 3, 0, fp16OuterProduct<3,3> > },
18849 { "OpOuterProduct", "3x4", 2, MAT3X4, VEC4, VEC3, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 3, 0, fp16OuterProduct<3,4> > },
18850 { "OpOuterProduct", "4x2", 2, MAT4X2, VEC2, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 8, 2, 4, 0, fp16OuterProduct<4,2> > },
18851 { "OpOuterProduct", "4x3", 2, MAT4X3, VEC3, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 3, 4, 0, fp16OuterProduct<4,3> > },
18852 { "OpOuterProduct", "4x4", 2, MAT4X4, VEC4, VEC4, 0, &getInputDataD, compareFP16ArithmeticFunc< 16, 4, 4, 0, fp16OuterProduct<4,4> > },
18853 { "Determinant", "2x2", 1, SCALAR, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 4, 0, 0, fp16Determinant<2> > },
18854 { "Determinant", "3x3", 1, SCALAR, MAT3X3, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<3> > },
18855 { "Determinant", "4x4", 1, SCALAR, MAT4X4, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 1, 16, 0, 0, fp16Determinant<4> > },
18856 { "MatrixInverse", "2x2", 1, MAT2X2, MAT2X2, NONE, 0, &getInputDataC, compareFP16ArithmeticFunc< 4, 4, 0, 0, fp16Inverse<2> > },
18857 };
18858
18859 for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18860 {
18861 const Math16TestFunc& testFunc = testFuncs[testFuncIdx];
18862
18863 createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18864 }
18865
18866 return testGroup.release();
18867 }
18868
18869 struct ComparisonCase
18870 {
18871 string name;
18872 string desc;
18873 };
18874
18875 template<size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)18876 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18877 {
18878 const string testGroupName ("comparison_" + de::toString(C));
18879 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18880 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
18881
18882 const ComparisonCase amberTests[] =
18883 {
18884 { "modfstruct", "modf and modfStruct" },
18885 { "frexpstruct", "frexp and frexpStruct" }
18886 };
18887
18888 for (ComparisonCase test : amberTests)
18889 {
18890 const string caseDesc ("Compare output of " + test.desc);
18891 const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
18892
18893 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18894 test.name.c_str(),
18895 caseDesc.c_str(),
18896 dataDir,
18897 fileName));
18898 }
18899
18900 return testGroup.release();
18901 }
18902
18903 struct ShaderStage
18904 {
18905 string name;
18906 vector<string> requirement;
18907 };
18908
18909 template<size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)18910 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
18911 {
18912 const string testGroupName ("comparison_" + de::toString(C));
18913 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18914 const char* dataDir = "spirv_assembly/instruction/float32/comparison";
18915
18916 const ShaderStage stages[] =
18917 {
18918 { "vert", vector<string>(0) },
18919 { "tesc", vector<string>(1, "Features.tessellationShader") },
18920 { "tese", vector<string>(1, "Features.tessellationShader") },
18921 { "geom", vector<string>(1, "Features.geometryShader") },
18922 { "frag", vector<string>(0) }
18923 };
18924
18925 const ComparisonCase amberTests[] =
18926 {
18927 { "modfstruct", "modf and modfStruct" },
18928 { "frexpstruct", "frexp and frexpStruct" }
18929 };
18930
18931 for (ComparisonCase test : amberTests)
18932 for (ShaderStage stage : stages)
18933 {
18934 const string caseName (test.name + "_" + stage.name);
18935 const string caseDesc ("Compare output of " + test.desc);
18936 const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
18937
18938 testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18939 caseName.c_str(),
18940 caseDesc.c_str(),
18941 dataDir,
18942 fileName,
18943 stage.requirement));
18944 }
18945
18946 return testGroup.release();
18947 }
18948
getNumberTypeName(const NumberType type)18949 const string getNumberTypeName (const NumberType type)
18950 {
18951 if (type == NUMBERTYPE_INT32)
18952 {
18953 return "int";
18954 }
18955 else if (type == NUMBERTYPE_UINT32)
18956 {
18957 return "uint";
18958 }
18959 else if (type == NUMBERTYPE_FLOAT32)
18960 {
18961 return "float";
18962 }
18963 else
18964 {
18965 DE_ASSERT(false);
18966 return "";
18967 }
18968 }
18969
getInt(de::Random & rnd)18970 deInt32 getInt(de::Random& rnd)
18971 {
18972 return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18973 }
18974
repeatString(const string & str,int times)18975 const string repeatString (const string& str, int times)
18976 {
18977 string filler;
18978 for (int i = 0; i < times; ++i)
18979 {
18980 filler += str;
18981 }
18982 return filler;
18983 }
18984
getRandomConstantString(const NumberType type,de::Random & rnd)18985 const string getRandomConstantString (const NumberType type, de::Random& rnd)
18986 {
18987 if (type == NUMBERTYPE_INT32)
18988 {
18989 return numberToString<deInt32>(getInt(rnd));
18990 }
18991 else if (type == NUMBERTYPE_UINT32)
18992 {
18993 return numberToString<deUint32>(rnd.getUint32());
18994 }
18995 else if (type == NUMBERTYPE_FLOAT32)
18996 {
18997 return numberToString<float>(rnd.getFloat());
18998 }
18999 else
19000 {
19001 DE_ASSERT(false);
19002 return "";
19003 }
19004 }
19005
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19006 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19007 {
19008 map<string, string> params;
19009
19010 // Vec2 to Vec4
19011 for (int width = 2; width <= 4; ++width)
19012 {
19013 const string randomConst = numberToString(getInt(rnd));
19014 const string widthStr = numberToString(width);
19015 const string composite_type = "${customType}vec" + widthStr;
19016 const int index = rnd.getInt(0, width-1);
19017
19018 params["type"] = "vec";
19019 params["name"] = params["type"] + "_" + widthStr;
19020 params["compositeDecl"] = composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19021 params["compositeType"] = composite_type;
19022 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19023 params["compositeConstruct"] = "%instance = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19024 params["indexes"] = numberToString(index);
19025 testCases.push_back(params);
19026 }
19027 }
19028
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19029 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19030 {
19031 const int limit = 10;
19032 map<string, string> params;
19033
19034 for (int width = 2; width <= limit; ++width)
19035 {
19036 string randomConst = numberToString(getInt(rnd));
19037 string widthStr = numberToString(width);
19038 int index = rnd.getInt(0, width-1);
19039
19040 params["type"] = "array";
19041 params["name"] = params["type"] + "_" + widthStr;
19042 params["compositeDecl"] = string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19043 + "%composite = OpTypeArray ${customType} %arraywidth\n";
19044 params["compositeType"] = "%composite";
19045 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19046 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19047 params["indexes"] = numberToString(index);
19048 testCases.push_back(params);
19049 }
19050 }
19051
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19052 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19053 {
19054 const int limit = 10;
19055 map<string, string> params;
19056
19057 for (int width = 2; width <= limit; ++width)
19058 {
19059 string randomConst = numberToString(getInt(rnd));
19060 int index = rnd.getInt(0, width-1);
19061
19062 params["type"] = "struct";
19063 params["name"] = params["type"] + "_" + numberToString(width);
19064 params["compositeDecl"] = "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19065 params["compositeType"] = "%composite";
19066 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19067 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19068 params["indexes"] = numberToString(index);
19069 testCases.push_back(params);
19070 }
19071 }
19072
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19073 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19074 {
19075 map<string, string> params;
19076
19077 // Vec2 to Vec4
19078 for (int width = 2; width <= 4; ++width)
19079 {
19080 string widthStr = numberToString(width);
19081
19082 for (int column = 2 ; column <= 4; ++column)
19083 {
19084 int index_0 = rnd.getInt(0, column-1);
19085 int index_1 = rnd.getInt(0, width-1);
19086 string columnStr = numberToString(column);
19087
19088 params["type"] = "matrix";
19089 params["name"] = params["type"] + "_" + widthStr + "x" + columnStr;
19090 params["compositeDecl"] = string("%vectype = OpTypeVector ${customType} " + widthStr + "\n")
19091 + "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19092 params["compositeType"] = "%composite";
19093
19094 params["filler"] = string("%filler = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19095 + "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19096
19097 params["compositeConstruct"] = "%instance = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19098 params["indexes"] = numberToString(index_0) + " " + numberToString(index_1);
19099 testCases.push_back(params);
19100 }
19101 }
19102 }
19103
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19104 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19105 {
19106 createVectorCompositeCases(testCases, rnd, type);
19107 createArrayCompositeCases(testCases, rnd, type);
19108 createStructCompositeCases(testCases, rnd, type);
19109 // Matrix only supports float types
19110 if (type == NUMBERTYPE_FLOAT32)
19111 {
19112 createMatrixCompositeCases(testCases, rnd, type);
19113 }
19114 }
19115
getAssemblyTypeDeclaration(const NumberType type)19116 const string getAssemblyTypeDeclaration (const NumberType type)
19117 {
19118 switch (type)
19119 {
19120 case NUMBERTYPE_INT32: return "OpTypeInt 32 1";
19121 case NUMBERTYPE_UINT32: return "OpTypeInt 32 0";
19122 case NUMBERTYPE_FLOAT32: return "OpTypeFloat 32";
19123 default: DE_ASSERT(false); return "";
19124 }
19125 }
19126
getAssemblyTypeName(const NumberType type)19127 const string getAssemblyTypeName (const NumberType type)
19128 {
19129 switch (type)
19130 {
19131 case NUMBERTYPE_INT32: return "%i32";
19132 case NUMBERTYPE_UINT32: return "%u32";
19133 case NUMBERTYPE_FLOAT32: return "%f32";
19134 default: DE_ASSERT(false); return "";
19135 }
19136 }
19137
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19138 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19139 {
19140 map<string, string> parameters(params);
19141
19142 const string customType = getAssemblyTypeName(type);
19143 map<string, string> substCustomType;
19144 substCustomType["customType"] = customType;
19145 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19146 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19147 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19148 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19149 parameters["customType"] = customType;
19150 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19151
19152 if (parameters.at("compositeType") != "%u32vec3")
19153 {
19154 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19155 }
19156
19157 return StringTemplate(
19158 "OpCapability Shader\n"
19159 "OpCapability Matrix\n"
19160 "OpMemoryModel Logical GLSL450\n"
19161 "OpEntryPoint GLCompute %main \"main\" %id\n"
19162 "OpExecutionMode %main LocalSize 1 1 1\n"
19163
19164 "OpSource GLSL 430\n"
19165 "OpName %main \"main\"\n"
19166 "OpName %id \"gl_GlobalInvocationID\"\n"
19167
19168 // Decorators
19169 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19170 "OpDecorate %buf BufferBlock\n"
19171 "OpDecorate %indata DescriptorSet 0\n"
19172 "OpDecorate %indata Binding 0\n"
19173 "OpDecorate %outdata DescriptorSet 0\n"
19174 "OpDecorate %outdata Binding 1\n"
19175 "OpDecorate %customarr ArrayStride 4\n"
19176 "${compositeDecorator}"
19177 "OpMemberDecorate %buf 0 Offset 0\n"
19178
19179 // General types
19180 "%void = OpTypeVoid\n"
19181 "%voidf = OpTypeFunction %void\n"
19182 "%u32 = OpTypeInt 32 0\n"
19183 "%i32 = OpTypeInt 32 1\n"
19184 "%f32 = OpTypeFloat 32\n"
19185
19186 // Composite declaration
19187 "${compositeDecl}"
19188
19189 // Constants
19190 "${filler}"
19191
19192 "${u32vec3Decl:opt}"
19193 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19194
19195 // Inherited from custom
19196 "%customptr = OpTypePointer Uniform ${customType}\n"
19197 "%customarr = OpTypeRuntimeArray ${customType}\n"
19198 "%buf = OpTypeStruct %customarr\n"
19199 "%bufptr = OpTypePointer Uniform %buf\n"
19200
19201 "%indata = OpVariable %bufptr Uniform\n"
19202 "%outdata = OpVariable %bufptr Uniform\n"
19203
19204 "%id = OpVariable %uvec3ptr Input\n"
19205 "%zero = OpConstant %i32 0\n"
19206
19207 "%main = OpFunction %void None %voidf\n"
19208 "%label = OpLabel\n"
19209 "%idval = OpLoad %u32vec3 %id\n"
19210 "%x = OpCompositeExtract %u32 %idval 0\n"
19211
19212 "%inloc = OpAccessChain %customptr %indata %zero %x\n"
19213 "%outloc = OpAccessChain %customptr %outdata %zero %x\n"
19214 // Read the input value
19215 "%inval = OpLoad ${customType} %inloc\n"
19216 // Create the composite and fill it
19217 "${compositeConstruct}"
19218 // Insert the input value to a place
19219 "%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19220 // Read back the value from the position
19221 "%out_val = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19222 // Store it in the output position
19223 " OpStore %outloc %out_val\n"
19224 " OpReturn\n"
19225 " OpFunctionEnd\n"
19226 ).specialize(parameters);
19227 }
19228
19229 template<typename T>
createCompositeBuffer(T number)19230 BufferSp createCompositeBuffer(T number)
19231 {
19232 return BufferSp(new Buffer<T>(vector<T>(1, number)));
19233 }
19234
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19235 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19236 {
19237 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19238 de::Random rnd (deStringHash(group->getName()));
19239
19240 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19241 {
19242 NumberType numberType = NumberType(type);
19243 const string typeName = getNumberTypeName(numberType);
19244 const string description = "Test the OpCompositeInsert instruction with " + typeName + "s";
19245 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19246 vector<map<string, string> > testCases;
19247
19248 createCompositeCases(testCases, rnd, numberType);
19249
19250 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19251 {
19252 ComputeShaderSpec spec;
19253
19254 spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19255
19256 switch (numberType)
19257 {
19258 case NUMBERTYPE_INT32:
19259 {
19260 deInt32 number = getInt(rnd);
19261 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19262 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19263 break;
19264 }
19265 case NUMBERTYPE_UINT32:
19266 {
19267 deUint32 number = rnd.getUint32();
19268 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19269 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19270 break;
19271 }
19272 case NUMBERTYPE_FLOAT32:
19273 {
19274 float number = rnd.getFloat();
19275 spec.inputs.push_back(createCompositeBuffer<float>(number));
19276 spec.outputs.push_back(createCompositeBuffer<float>(number));
19277 break;
19278 }
19279 default:
19280 DE_ASSERT(false);
19281 }
19282
19283 spec.numWorkGroups = IVec3(1, 1, 1);
19284 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
19285 }
19286 group->addChild(subGroup.release());
19287 }
19288 return group.release();
19289 }
19290
19291 struct AssemblyStructInfo
19292 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19293 AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19294 : components (comp)
19295 , index (idx)
19296 {}
19297
19298 deUint32 components;
19299 deUint32 index;
19300 };
19301
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19302 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19303 {
19304 // Create the full index string
19305 string fullIndex = numberToString(structInfo.index) + " " + params.at("indexes");
19306 // Convert it to list of indexes
19307 vector<string> indexes = de::splitString(fullIndex, ' ');
19308
19309 map<string, string> parameters (params);
19310 parameters["structType"] = repeatString(" ${compositeType}", structInfo.components);
19311 parameters["structConstruct"] = repeatString(" %instance", structInfo.components);
19312 parameters["insertIndexes"] = fullIndex;
19313
19314 // In matrix cases the last two index is the CompositeExtract indexes
19315 const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19316
19317 // Construct the extractIndex
19318 for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19319 {
19320 parameters["extractIndexes"] += " " + *index;
19321 }
19322
19323 // Remove the last 1 or 2 element depends on matrix case or not
19324 indexes.erase(indexes.end() - extractIndexes, indexes.end());
19325
19326 deUint32 id = 0;
19327 // Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19328 for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19329 {
19330 string indexId = "%index_" + numberToString(id++);
19331 parameters["accessChainConstDeclaration"] += indexId + " = OpConstant %u32 " + *index + "\n";
19332 parameters["accessChainIndexes"] += " " + indexId;
19333 }
19334
19335 parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19336
19337 const string customType = getAssemblyTypeName(type);
19338 map<string, string> substCustomType;
19339 substCustomType["customType"] = customType;
19340 parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19341 parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19342 parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19343 parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19344 parameters["customType"] = customType;
19345
19346 const string compositeType = parameters.at("compositeType");
19347 map<string, string> substCompositeType;
19348 substCompositeType["compositeType"] = compositeType;
19349 parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19350 if (compositeType != "%u32vec3")
19351 {
19352 parameters["u32vec3Decl"] = "%u32vec3 = OpTypeVector %u32 3\n";
19353 }
19354
19355 return StringTemplate(
19356 "OpCapability Shader\n"
19357 "OpCapability Matrix\n"
19358 "OpMemoryModel Logical GLSL450\n"
19359 "OpEntryPoint GLCompute %main \"main\" %id\n"
19360 "OpExecutionMode %main LocalSize 1 1 1\n"
19361
19362 "OpSource GLSL 430\n"
19363 "OpName %main \"main\"\n"
19364 "OpName %id \"gl_GlobalInvocationID\"\n"
19365 // Decorators
19366 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19367 "OpDecorate %buf BufferBlock\n"
19368 "OpDecorate %indata DescriptorSet 0\n"
19369 "OpDecorate %indata Binding 0\n"
19370 "OpDecorate %outdata DescriptorSet 0\n"
19371 "OpDecorate %outdata Binding 1\n"
19372 "OpDecorate %customarr ArrayStride 4\n"
19373 "${compositeDecorator}"
19374 "OpMemberDecorate %buf 0 Offset 0\n"
19375 // General types
19376 "%void = OpTypeVoid\n"
19377 "%voidf = OpTypeFunction %void\n"
19378 "%i32 = OpTypeInt 32 1\n"
19379 "%u32 = OpTypeInt 32 0\n"
19380 "%f32 = OpTypeFloat 32\n"
19381 // Custom types
19382 "${compositeDecl}"
19383 // %u32vec3 if not already declared in ${compositeDecl}
19384 "${u32vec3Decl:opt}"
19385 "%uvec3ptr = OpTypePointer Input %u32vec3\n"
19386 // Inherited from composite
19387 "%composite_p = OpTypePointer Function ${compositeType}\n"
19388 "%struct_t = OpTypeStruct${structType}\n"
19389 "%struct_p = OpTypePointer Function %struct_t\n"
19390 // Constants
19391 "${filler}"
19392 "${accessChainConstDeclaration}"
19393 // Inherited from custom
19394 "%customptr = OpTypePointer Uniform ${customType}\n"
19395 "%customarr = OpTypeRuntimeArray ${customType}\n"
19396 "%buf = OpTypeStruct %customarr\n"
19397 "%bufptr = OpTypePointer Uniform %buf\n"
19398 "%indata = OpVariable %bufptr Uniform\n"
19399 "%outdata = OpVariable %bufptr Uniform\n"
19400
19401 "%id = OpVariable %uvec3ptr Input\n"
19402 "%zero = OpConstant %u32 0\n"
19403 "%main = OpFunction %void None %voidf\n"
19404 "%label = OpLabel\n"
19405 "%struct_v = OpVariable %struct_p Function\n"
19406 "%idval = OpLoad %u32vec3 %id\n"
19407 "%x = OpCompositeExtract %u32 %idval 0\n"
19408 // Create the input/output type
19409 "%inloc = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19410 "%outloc = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19411 // Read the input value
19412 "%inval = OpLoad ${customType} %inloc\n"
19413 // Create the composite and fill it
19414 "${compositeConstruct}"
19415 // Create the struct and fill it with the composite
19416 "%struct = OpCompositeConstruct %struct_t${structConstruct}\n"
19417 // Insert the value
19418 "%comp_obj = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19419 // Store the object
19420 " OpStore %struct_v %comp_obj\n"
19421 // Get deepest possible composite pointer
19422 "%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19423 "%read_obj = OpLoad ${compositeType} %inner_ptr\n"
19424 // Read back the stored value
19425 "%read_val = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19426 " OpStore %outloc %read_val\n"
19427 " OpReturn\n"
19428 " OpFunctionEnd\n"
19429 ).specialize(parameters);
19430 }
19431
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19432 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19433 {
19434 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19435 de::Random rnd (deStringHash(group->getName()));
19436
19437 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19438 {
19439 NumberType numberType = NumberType(type);
19440 const string typeName = getNumberTypeName(numberType);
19441 const string description = "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
19442 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19443
19444 vector<map<string, string> > testCases;
19445 createCompositeCases(testCases, rnd, numberType);
19446
19447 for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19448 {
19449 ComputeShaderSpec spec;
19450
19451 // Number of components inside of a struct
19452 deUint32 structComponents = rnd.getInt(2, 8);
19453 // Component index value
19454 deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19455 AssemblyStructInfo structInfo(structComponents, structIndex);
19456
19457 spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19458
19459 switch (numberType)
19460 {
19461 case NUMBERTYPE_INT32:
19462 {
19463 deInt32 number = getInt(rnd);
19464 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19465 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19466 break;
19467 }
19468 case NUMBERTYPE_UINT32:
19469 {
19470 deUint32 number = rnd.getUint32();
19471 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19472 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19473 break;
19474 }
19475 case NUMBERTYPE_FLOAT32:
19476 {
19477 float number = rnd.getFloat();
19478 spec.inputs.push_back(createCompositeBuffer<float>(number));
19479 spec.outputs.push_back(createCompositeBuffer<float>(number));
19480 break;
19481 }
19482 default:
19483 DE_ASSERT(false);
19484 }
19485 spec.numWorkGroups = IVec3(1, 1, 1);
19486 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
19487 }
19488 group->addChild(subGroup.release());
19489 }
19490 return group.release();
19491 }
19492
19493 // If the params missing, uninitialized case
19494 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19495 {
19496 map<string, string> parameters(params);
19497
19498 parameters["customType"] = getAssemblyTypeName(type);
19499
19500 // Declare the const value, and use it in the initializer
19501 if (params.find("constValue") != params.end())
19502 {
19503 parameters["variableInitializer"] = " %const";
19504 }
19505 // Uninitialized case
19506 else
19507 {
19508 parameters["commentDecl"] = ";";
19509 }
19510
19511 return StringTemplate(
19512 "OpCapability Shader\n"
19513 "OpMemoryModel Logical GLSL450\n"
19514 "OpEntryPoint GLCompute %main \"main\" %id\n"
19515 "OpExecutionMode %main LocalSize 1 1 1\n"
19516 "OpSource GLSL 430\n"
19517 "OpName %main \"main\"\n"
19518 "OpName %id \"gl_GlobalInvocationID\"\n"
19519 // Decorators
19520 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19521 "OpDecorate %indata DescriptorSet 0\n"
19522 "OpDecorate %indata Binding 0\n"
19523 "OpDecorate %outdata DescriptorSet 0\n"
19524 "OpDecorate %outdata Binding 1\n"
19525 "OpDecorate %in_arr ArrayStride 4\n"
19526 "OpDecorate %in_buf BufferBlock\n"
19527 "OpMemberDecorate %in_buf 0 Offset 0\n"
19528 // Base types
19529 "%void = OpTypeVoid\n"
19530 "%voidf = OpTypeFunction %void\n"
19531 "%u32 = OpTypeInt 32 0\n"
19532 "%i32 = OpTypeInt 32 1\n"
19533 "%f32 = OpTypeFloat 32\n"
19534 "%uvec3 = OpTypeVector %u32 3\n"
19535 "%uvec3ptr = OpTypePointer Input %uvec3\n"
19536 "${commentDecl:opt}%const = OpConstant ${customType} ${constValue:opt}\n"
19537 // Derived types
19538 "%in_ptr = OpTypePointer Uniform ${customType}\n"
19539 "%in_arr = OpTypeRuntimeArray ${customType}\n"
19540 "%in_buf = OpTypeStruct %in_arr\n"
19541 "%in_bufptr = OpTypePointer Uniform %in_buf\n"
19542 "%indata = OpVariable %in_bufptr Uniform\n"
19543 "%outdata = OpVariable %in_bufptr Uniform\n"
19544 "%id = OpVariable %uvec3ptr Input\n"
19545 "%var_ptr = OpTypePointer Function ${customType}\n"
19546 // Constants
19547 "%zero = OpConstant %i32 0\n"
19548 // Main function
19549 "%main = OpFunction %void None %voidf\n"
19550 "%label = OpLabel\n"
19551 "%out_var = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19552 "%idval = OpLoad %uvec3 %id\n"
19553 "%x = OpCompositeExtract %u32 %idval 0\n"
19554 "%inloc = OpAccessChain %in_ptr %indata %zero %x\n"
19555 "%outloc = OpAccessChain %in_ptr %outdata %zero %x\n"
19556
19557 "%outval = OpLoad ${customType} %out_var\n"
19558 " OpStore %outloc %outval\n"
19559 " OpReturn\n"
19560 " OpFunctionEnd\n"
19561 ).specialize(parameters);
19562 }
19563
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)19564 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19565 {
19566 DE_ASSERT(outputAllocs.size() != 0);
19567 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19568
19569 // Use custom epsilon because of the float->string conversion
19570 const float epsilon = 0.00001f;
19571
19572 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19573 {
19574 vector<deUint8> expectedBytes;
19575 float expected;
19576 float actual;
19577
19578 expectedOutputs[outputNdx].getBytes(expectedBytes);
19579 memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19580 memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19581
19582 // Test with epsilon
19583 if (fabs(expected - actual) > epsilon)
19584 {
19585 log << TestLog::Message << "Error: The actual and expected values not matching."
19586 << " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19587 return false;
19588 }
19589 }
19590 return true;
19591 }
19592
19593 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)19594 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19595 {
19596 DE_ASSERT(outputAllocs.size() != 0);
19597 DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19598
19599 // Copy and discard the result.
19600 for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19601 {
19602 vector<deUint8> expectedBytes;
19603 expectedOutputs[outputNdx].getBytes(expectedBytes);
19604
19605 const size_t width = expectedBytes.size();
19606 vector<char> data (width);
19607
19608 memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19609 }
19610 return true;
19611 }
19612
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)19613 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19614 {
19615 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19616 de::Random rnd (deStringHash(group->getName()));
19617
19618 for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19619 {
19620 NumberType numberType = NumberType(type);
19621 const string typeName = getNumberTypeName(numberType);
19622 const string description = "Test the OpVariable initializer with " + typeName + ".";
19623 de::MovePtr<tcu::TestCaseGroup> subGroup (new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19624
19625 // 2 similar subcases (initialized and uninitialized)
19626 for (int subCase = 0; subCase < 2; ++subCase)
19627 {
19628 ComputeShaderSpec spec;
19629 spec.numWorkGroups = IVec3(1, 1, 1);
19630
19631 map<string, string> params;
19632
19633 switch (numberType)
19634 {
19635 case NUMBERTYPE_INT32:
19636 {
19637 deInt32 number = getInt(rnd);
19638 spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19639 spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19640 params["constValue"] = numberToString(number);
19641 break;
19642 }
19643 case NUMBERTYPE_UINT32:
19644 {
19645 deUint32 number = rnd.getUint32();
19646 spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19647 spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19648 params["constValue"] = numberToString(number);
19649 break;
19650 }
19651 case NUMBERTYPE_FLOAT32:
19652 {
19653 float number = rnd.getFloat();
19654 spec.inputs.push_back(createCompositeBuffer<float>(number));
19655 spec.outputs.push_back(createCompositeBuffer<float>(number));
19656 spec.verifyIO = &compareFloats;
19657 params["constValue"] = numberToString(number);
19658 break;
19659 }
19660 default:
19661 DE_ASSERT(false);
19662 }
19663
19664 // Initialized subcase
19665 if (!subCase)
19666 {
19667 spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19668 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19669 }
19670 // Uninitialized subcase
19671 else
19672 {
19673 spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19674 spec.verifyIO = &passthruVerify;
19675 subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19676 }
19677 }
19678 group->addChild(subGroup.release());
19679 }
19680 return group.release();
19681 }
19682
createOpNopTests(tcu::TestContext & testCtx)19683 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19684 {
19685 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19686 RGBA defaultColors[4];
19687 map<string, string> opNopFragments;
19688
19689 getDefaultColors(defaultColors);
19690
19691 opNopFragments["testfun"] =
19692 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19693 "%param1 = OpFunctionParameter %v4f32\n"
19694 "%label_testfun = OpLabel\n"
19695 "OpNop\n"
19696 "OpNop\n"
19697 "OpNop\n"
19698 "OpNop\n"
19699 "OpNop\n"
19700 "OpNop\n"
19701 "OpNop\n"
19702 "OpNop\n"
19703 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19704 "%b = OpFAdd %f32 %a %a\n"
19705 "OpNop\n"
19706 "%c = OpFSub %f32 %b %a\n"
19707 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19708 "OpNop\n"
19709 "OpNop\n"
19710 "OpReturnValue %ret\n"
19711 "OpFunctionEnd\n";
19712
19713 createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19714
19715 return testGroup.release();
19716 }
19717
createOpNameTests(tcu::TestContext & testCtx)19718 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19719 {
19720 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19721 RGBA defaultColors[4];
19722 map<string, string> opNameFragments;
19723
19724 getDefaultColors(defaultColors);
19725
19726 opNameFragments["testfun"] =
19727 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19728 "%param1 = OpFunctionParameter %v4f32\n"
19729 "%label_func = OpLabel\n"
19730 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19731 "%b = OpFAdd %f32 %a %a\n"
19732 "%c = OpFSub %f32 %b %a\n"
19733 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19734 "OpReturnValue %ret\n"
19735 "OpFunctionEnd\n";
19736
19737 opNameFragments["debug"] =
19738 "OpName %BP_main \"not_main\"";
19739
19740 createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19741
19742 return testGroup.release();
19743 }
19744
createFloat16Tests(tcu::TestContext & testCtx)19745 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19746 {
19747 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19748
19749 testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19750 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19751 testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19752 testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19753 testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19754 testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19755 testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19756 testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19757 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19758 testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19759 testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19760 testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19761 testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19762 testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19763 testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19764
19765 return testGroup.release();
19766 }
19767
createFloat32Tests(tcu::TestContext & testCtx)19768 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19769 {
19770 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19771
19772 testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19773 testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19774 testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19775 testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19776
19777 return testGroup.release();
19778 }
19779
createFloat16Group(tcu::TestContext & testCtx)19780 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19781 {
19782 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19783
19784 testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19785 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19786 testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19787 testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19788 testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19789 testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19790 testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19791 testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19792 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19793 testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19794 testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19795 testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19796 testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19797 testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19798 testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19799
19800 return testGroup.release();
19801 }
19802
createFloat32Group(tcu::TestContext & testCtx)19803 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19804 {
19805 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19806
19807 testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19808 testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19809 testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19810 testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19811
19812 return testGroup.release();
19813 }
19814
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)19815 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19816 {
19817 de::MovePtr<tcu::TestCaseGroup> group (new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19818
19819 de::Random rnd (deStringHash(group->getName()));
19820 const int numElements = 100;
19821 vector<float> inputData (numElements, 0);
19822 vector<float> outputData (numElements, 0);
19823 fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19824
19825 const StringTemplate shaderTemplate (
19826 "${CAPS}\n"
19827 "OpMemoryModel Logical GLSL450\n"
19828 "OpEntryPoint GLCompute %main \"main\" %id\n"
19829 "OpExecutionMode %main LocalSize 1 1 1\n"
19830 "OpSource GLSL 430\n"
19831 "OpName %main \"main\"\n"
19832 "OpName %id \"gl_GlobalInvocationID\"\n"
19833
19834 "OpDecorate %id BuiltIn GlobalInvocationId\n"
19835
19836 + string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19837
19838 "%id = OpVariable %uvec3ptr Input\n"
19839 "${CONST}\n"
19840 "%main = OpFunction %void None %voidf\n"
19841 "%label = OpLabel\n"
19842 "%idval = OpLoad %uvec3 %id\n"
19843 "%x = OpCompositeExtract %u32 %idval 0\n"
19844 "%inloc = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19845
19846 "${TEST}\n"
19847
19848 "%outloc = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19849 " OpStore %outloc %res\n"
19850 " OpReturn\n"
19851 " OpFunctionEnd\n"
19852 );
19853
19854 // Each test case produces 4 boolean values, and we want each of these values
19855 // to come froma different combination of the available bit-sizes, so compute
19856 // all possible combinations here.
19857 vector<deUint32> widths;
19858 widths.push_back(32);
19859 widths.push_back(16);
19860 widths.push_back(8);
19861
19862 vector<IVec4> cases;
19863 for (size_t width0 = 0; width0 < widths.size(); width0++)
19864 {
19865 for (size_t width1 = 0; width1 < widths.size(); width1++)
19866 {
19867 for (size_t width2 = 0; width2 < widths.size(); width2++)
19868 {
19869 for (size_t width3 = 0; width3 < widths.size(); width3++)
19870 {
19871 cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19872 }
19873 }
19874 }
19875 }
19876
19877 for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19878 {
19879 /// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19880 if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19881 continue;
19882
19883 map<string, string> specializations;
19884 ComputeShaderSpec spec;
19885
19886 // Inject appropriate capabilities and reference constants depending
19887 // on the bit-sizes required by this test case
19888 bool hasFloat32 = cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19889 bool hasFloat16 = cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19890 bool hasInt8 = cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19891
19892 string capsStr = "OpCapability Shader\n";
19893 string constStr =
19894 "%c0i32 = OpConstant %i32 0\n"
19895 "%c1f32 = OpConstant %f32 1.0\n"
19896 "%c0f32 = OpConstant %f32 0.0\n";
19897
19898 if (hasFloat32)
19899 {
19900 constStr +=
19901 "%c10f32 = OpConstant %f32 10.0\n"
19902 "%c25f32 = OpConstant %f32 25.0\n"
19903 "%c50f32 = OpConstant %f32 50.0\n"
19904 "%c90f32 = OpConstant %f32 90.0\n";
19905 }
19906
19907 if (hasFloat16)
19908 {
19909 capsStr += "OpCapability Float16\n";
19910 constStr +=
19911 "%f16 = OpTypeFloat 16\n"
19912 "%c10f16 = OpConstant %f16 10.0\n"
19913 "%c25f16 = OpConstant %f16 25.0\n"
19914 "%c50f16 = OpConstant %f16 50.0\n"
19915 "%c90f16 = OpConstant %f16 90.0\n";
19916 }
19917
19918 if (hasInt8)
19919 {
19920 capsStr += "OpCapability Int8\n";
19921 constStr +=
19922 "%i8 = OpTypeInt 8 1\n"
19923 "%c10i8 = OpConstant %i8 10\n"
19924 "%c25i8 = OpConstant %i8 25\n"
19925 "%c50i8 = OpConstant %i8 50\n"
19926 "%c90i8 = OpConstant %i8 90\n";
19927 }
19928
19929 // Each invocation reads a different float32 value as input. Depending on
19930 // the bit-sizes required by the particular test case, we also produce
19931 // float16 and/or and int8 values by converting from the 32-bit float.
19932 string testStr = "";
19933 testStr += "%inval32 = OpLoad %f32 %inloc\n";
19934 if (hasFloat16)
19935 testStr += "%inval16 = OpFConvert %f16 %inval32\n";
19936 if (hasInt8)
19937 testStr += "%inval8 = OpConvertFToS %i8 %inval32\n";
19938
19939 // Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19940 // that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19941 // when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19942 // other way around, so in this case we want < instead of <=.
19943 if (cases[caseNdx][0] == 32)
19944 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19945 else if (cases[caseNdx][0] == 16)
19946 testStr += "%cmp1 = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19947 else
19948 testStr += "%cmp1 = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19949
19950 if (cases[caseNdx][1] == 32)
19951 testStr += "%cmp2 = OpFOrdLessThan %bool %inval32 %c50f32\n";
19952 else if (cases[caseNdx][1] == 16)
19953 testStr += "%cmp2 = OpFOrdLessThan %bool %inval16 %c50f16\n";
19954 else
19955 testStr += "%cmp2 = OpSLessThan %bool %inval8 %c50i8\n";
19956
19957 if (cases[caseNdx][2] == 32)
19958 testStr += "%cmp3 = OpFOrdLessThan %bool %inval32 %c10f32\n";
19959 else if (cases[caseNdx][2] == 16)
19960 testStr += "%cmp3 = OpFOrdLessThan %bool %inval16 %c10f16\n";
19961 else
19962 testStr += "%cmp3 = OpSLessThan %bool %inval8 %c10i8\n";
19963
19964 if (cases[caseNdx][3] == 32)
19965 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19966 else if (cases[caseNdx][3] == 16)
19967 testStr += "%cmp4 = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19968 else
19969 testStr += "%cmp4 = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19970
19971 testStr += "%and1 = OpLogicalAnd %bool %cmp1 %cmp2\n";
19972 testStr += "%or1 = OpLogicalOr %bool %cmp3 %cmp4\n";
19973 testStr += "%or2 = OpLogicalOr %bool %and1 %or1\n";
19974 testStr += "%not1 = OpLogicalNot %bool %or2\n";
19975 testStr += "%res = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19976
19977 specializations["CAPS"] = capsStr;
19978 specializations["CONST"] = constStr;
19979 specializations["TEST"] = testStr;
19980
19981 // Compute expected result by evaluating the boolean expression computed in the shader for each input value
19982 for (size_t ndx = 0; ndx < numElements; ++ndx)
19983 outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
19984
19985 spec.assembly = shaderTemplate.specialize(specializations);
19986 spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
19987 spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
19988 spec.numWorkGroups = IVec3(numElements, 1, 1);
19989 if (hasFloat16)
19990 spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
19991 if (hasInt8)
19992 spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
19993 spec.extensions.push_back("VK_KHR_shader_float16_int8");
19994
19995 string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
19996 group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
19997 }
19998
19999 return group.release();
20000 }
20001
createBoolGroup(tcu::TestContext & testCtx)20002 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20003 {
20004 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20005
20006 testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20007
20008 return testGroup.release();
20009 }
20010
createOpNameAbuseTests(tcu::TestContext & testCtx)20011 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20012 {
20013 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20014 vector<CaseParameter> abuseCases;
20015 RGBA defaultColors[4];
20016 map<string, string> opNameFragments;
20017
20018 getOpNameAbuseCases(abuseCases);
20019 getDefaultColors(defaultColors);
20020
20021 opNameFragments["testfun"] =
20022 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20023 "%param1 = OpFunctionParameter %v4f32\n"
20024 "%label_func = OpLabel\n"
20025 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20026 "%b = OpFAdd %f32 %a %a\n"
20027 "%c = OpFSub %f32 %b %a\n"
20028 "%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20029 "OpReturnValue %ret\n"
20030 "OpFunctionEnd\n";
20031
20032 for (unsigned int i = 0; i < abuseCases.size(); i++)
20033 {
20034 string casename;
20035 casename = string("main") + abuseCases[i].name;
20036
20037 opNameFragments["debug"] =
20038 "OpName %BP_main \"" + abuseCases[i].param + "\"";
20039
20040 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20041 }
20042
20043 for (unsigned int i = 0; i < abuseCases.size(); i++)
20044 {
20045 string casename;
20046 casename = string("b") + abuseCases[i].name;
20047
20048 opNameFragments["debug"] =
20049 "OpName %b \"" + abuseCases[i].param + "\"";
20050
20051 createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20052 }
20053
20054 {
20055 opNameFragments["debug"] =
20056 "OpName %test_code \"name1\"\n"
20057 "OpName %param1 \"name2\"\n"
20058 "OpName %a \"name3\"\n"
20059 "OpName %b \"name4\"\n"
20060 "OpName %c \"name5\"\n"
20061 "OpName %ret \"name6\"\n";
20062
20063 createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20064 }
20065
20066 {
20067 opNameFragments["debug"] =
20068 "OpName %test_code \"the_same\"\n"
20069 "OpName %param1 \"the_same\"\n"
20070 "OpName %a \"the_same\"\n"
20071 "OpName %b \"the_same\"\n"
20072 "OpName %c \"the_same\"\n"
20073 "OpName %ret \"the_same\"\n";
20074
20075 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20076 }
20077
20078 {
20079 opNameFragments["debug"] =
20080 "OpName %BP_main \"to_be\"\n"
20081 "OpName %BP_main \"or_not\"\n"
20082 "OpName %BP_main \"to_be\"\n";
20083
20084 createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20085 }
20086
20087 {
20088 opNameFragments["debug"] =
20089 "OpName %b \"to_be\"\n"
20090 "OpName %b \"or_not\"\n"
20091 "OpName %b \"to_be\"\n";
20092
20093 createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20094 }
20095
20096 return abuseGroup.release();
20097 }
20098
20099
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20100 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20101 {
20102 de::MovePtr<tcu::TestCaseGroup> abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20103 vector<CaseParameter> abuseCases;
20104 RGBA defaultColors[4];
20105 map<string, string> opMemberNameFragments;
20106
20107 getOpNameAbuseCases(abuseCases);
20108 getDefaultColors(defaultColors);
20109
20110 opMemberNameFragments["pre_main"] =
20111 "%f3str = OpTypeStruct %f32 %f32 %f32\n";
20112
20113 opMemberNameFragments["testfun"] =
20114 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20115 "%param1 = OpFunctionParameter %v4f32\n"
20116 "%label_func = OpLabel\n"
20117 "%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20118 "%b = OpFAdd %f32 %a %a\n"
20119 "%c = OpFSub %f32 %b %a\n"
20120 "%cstr = OpCompositeConstruct %f3str %c %c %c\n"
20121 "%d = OpCompositeExtract %f32 %cstr 0\n"
20122 "%ret = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20123 "OpReturnValue %ret\n"
20124 "OpFunctionEnd\n";
20125
20126 for (unsigned int i = 0; i < abuseCases.size(); i++)
20127 {
20128 string casename;
20129 casename = string("f3str_x") + abuseCases[i].name;
20130
20131 opMemberNameFragments["debug"] =
20132 "OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20133
20134 createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20135 }
20136
20137 {
20138 opMemberNameFragments["debug"] =
20139 "OpMemberName %f3str 0 \"name1\"\n"
20140 "OpMemberName %f3str 1 \"name2\"\n"
20141 "OpMemberName %f3str 2 \"name3\"\n";
20142
20143 createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20144 }
20145
20146 {
20147 opMemberNameFragments["debug"] =
20148 "OpMemberName %f3str 0 \"the_same\"\n"
20149 "OpMemberName %f3str 1 \"the_same\"\n"
20150 "OpMemberName %f3str 2 \"the_same\"\n";
20151
20152 createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20153 }
20154
20155 {
20156 opMemberNameFragments["debug"] =
20157 "OpMemberName %f3str 0 \"to_be\"\n"
20158 "OpMemberName %f3str 1 \"or_not\"\n"
20159 "OpMemberName %f3str 0 \"to_be\"\n"
20160 "OpMemberName %f3str 2 \"makes_no\"\n"
20161 "OpMemberName %f3str 0 \"difference\"\n"
20162 "OpMemberName %f3str 0 \"to_me\"\n";
20163
20164
20165 createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20166 }
20167
20168 return abuseGroup.release();
20169 }
20170
getSparseIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20171 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20172 {
20173 vector<deUint32> result;
20174 de::Random rnd (seed);
20175
20176 result.reserve(numDataPoints);
20177
20178 for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20179 result.push_back(rnd.getUint32());
20180
20181 return result;
20182 }
20183
getSparseIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2)20184 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20185 {
20186 vector<deUint32> result;
20187
20188 result.reserve(inData1.size());
20189
20190 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20191 result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20192
20193 return result;
20194 }
20195
20196 template<class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20197 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20198 {
20199 const deUint32 numDataPoints = 16;
20200 const std::string testName ("sparse_ids");
20201 const deUint32 seed (deStringHash(testName.c_str()));
20202 const vector<deUint32> inData1 (getSparseIdsAbuseData(numDataPoints, seed + 1));
20203 const vector<deUint32> inData2 (getSparseIdsAbuseData(numDataPoints, seed + 2));
20204 const vector<deUint32> outData (getSparseIdsAbuseResults(inData1, inData2));
20205 const StringTemplate preMain
20206 (
20207 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20208 " %up_u32 = OpTypePointer Uniform %u32\n"
20209 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20210 " %SSBO32 = OpTypeStruct %ra_u32\n"
20211 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20212 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20213 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20214 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20215 );
20216 const StringTemplate decoration
20217 (
20218 "OpDecorate %ra_u32 ArrayStride 4\n"
20219 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20220 "OpDecorate %SSBO32 BufferBlock\n"
20221 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20222 "OpDecorate %ssbo_src0 Binding 0\n"
20223 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20224 "OpDecorate %ssbo_src1 Binding 1\n"
20225 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20226 "OpDecorate %ssbo_dst Binding 2\n"
20227 );
20228 const StringTemplate testFun
20229 (
20230 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20231 " %param = OpFunctionParameter %v4f32\n"
20232
20233 " %entry = OpLabel\n"
20234 " %i = OpVariable %fp_i32 Function\n"
20235 " OpStore %i %c_i32_0\n"
20236 " OpBranch %loop\n"
20237
20238 " %loop = OpLabel\n"
20239 " %i_cmp = OpLoad %i32 %i\n"
20240 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20241 " OpLoopMerge %merge %next None\n"
20242 " OpBranchConditional %lt %write %merge\n"
20243
20244 " %write = OpLabel\n"
20245 " %ndx = OpLoad %i32 %i\n"
20246
20247 " %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20248 " %128 = OpLoad %u32 %127\n"
20249
20250 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20251 " %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20252 " %4194001 = OpLoad %u32 %4194000\n"
20253
20254 " %2097151 = OpIAdd %u32 %128 %4194001\n"
20255 " %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20256 " OpStore %2097152 %2097151\n"
20257 " OpBranch %next\n"
20258
20259 " %next = OpLabel\n"
20260 " %i_cur = OpLoad %i32 %i\n"
20261 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20262 " OpStore %i %i_new\n"
20263 " OpBranch %loop\n"
20264
20265 " %merge = OpLabel\n"
20266 " OpReturnValue %param\n"
20267
20268 " OpFunctionEnd\n"
20269 );
20270 SpecResource specResource;
20271 map<string, string> specs;
20272 VulkanFeatures features;
20273 map<string, string> fragments;
20274 vector<string> extensions;
20275
20276 specs["num_data_points"] = de::toString(numDataPoints);
20277
20278 fragments["decoration"] = decoration.specialize(specs);
20279 fragments["pre_main"] = preMain.specialize(specs);
20280 fragments["testfun"] = testFun.specialize(specs);
20281
20282 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20283 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20284 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20285
20286 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20287 {
20288 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20289 features.coreFeatures.fragmentStoresAndAtomics = true;
20290 }
20291
20292 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20293 }
20294
getLotsIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20295 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20296 {
20297 vector<deUint32> result;
20298 de::Random rnd (seed);
20299
20300 result.reserve(numDataPoints);
20301
20302 // Fixed value
20303 result.push_back(1u);
20304
20305 // Random values
20306 for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20307 result.push_back(rnd.getUint8());
20308
20309 return result;
20310 }
20311
getLotsIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2,const deUint32 count)20312 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20313 {
20314 vector<deUint32> result;
20315
20316 result.reserve(inData1.size());
20317
20318 for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20319 result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20320
20321 return result;
20322 }
20323
20324 template<class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20325 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20326 {
20327 const deUint32 numDataPoints = 16;
20328 const deUint32 firstNdx = 100u;
20329 const deUint32 sequenceCount = 10000u;
20330 const std::string testName ("lots_ids");
20331 const deUint32 seed (deStringHash(testName.c_str()));
20332 const vector<deUint32> inData1 (getLotsIdsAbuseData(numDataPoints, seed + 1));
20333 const vector<deUint32> inData2 (getLotsIdsAbuseData(numDataPoints, seed + 2));
20334 const vector<deUint32> outData (getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20335 const StringTemplate preMain
20336 (
20337 "%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20338 " %up_u32 = OpTypePointer Uniform %u32\n"
20339 " %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20340 " %SSBO32 = OpTypeStruct %ra_u32\n"
20341 "%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20342 "%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20343 "%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20344 " %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20345 );
20346 const StringTemplate decoration
20347 (
20348 "OpDecorate %ra_u32 ArrayStride 4\n"
20349 "OpMemberDecorate %SSBO32 0 Offset 0\n"
20350 "OpDecorate %SSBO32 BufferBlock\n"
20351 "OpDecorate %ssbo_src0 DescriptorSet 0\n"
20352 "OpDecorate %ssbo_src0 Binding 0\n"
20353 "OpDecorate %ssbo_src1 DescriptorSet 0\n"
20354 "OpDecorate %ssbo_src1 Binding 1\n"
20355 "OpDecorate %ssbo_dst DescriptorSet 0\n"
20356 "OpDecorate %ssbo_dst Binding 2\n"
20357 );
20358 const StringTemplate testFun
20359 (
20360 "%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20361 " %param = OpFunctionParameter %v4f32\n"
20362
20363 " %entry = OpLabel\n"
20364 " %i = OpVariable %fp_i32 Function\n"
20365 " OpStore %i %c_i32_0\n"
20366 " OpBranch %loop\n"
20367
20368 " %loop = OpLabel\n"
20369 " %i_cmp = OpLoad %i32 %i\n"
20370 " %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20371 " OpLoopMerge %merge %next None\n"
20372 " OpBranchConditional %lt %write %merge\n"
20373
20374 " %write = OpLabel\n"
20375 " %ndx = OpLoad %i32 %i\n"
20376
20377 " %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20378 " %91 = OpLoad %u32 %90\n"
20379
20380 " %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20381 " %${zeroth_id} = OpLoad %u32 %98\n"
20382
20383 "${seq}\n"
20384
20385 // The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20386 " %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20387 " OpStore %dst %${last_id}\n"
20388 " OpBranch %next\n"
20389
20390 " %next = OpLabel\n"
20391 " %i_cur = OpLoad %i32 %i\n"
20392 " %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20393 " OpStore %i %i_new\n"
20394 " OpBranch %loop\n"
20395
20396 " %merge = OpLabel\n"
20397 " OpReturnValue %param\n"
20398
20399 " OpFunctionEnd\n"
20400 );
20401 deUint32 lastId = firstNdx;
20402 SpecResource specResource;
20403 map<string, string> specs;
20404 VulkanFeatures features;
20405 map<string, string> fragments;
20406 vector<string> extensions;
20407 std::string sequence;
20408
20409 for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20410 {
20411 const deUint32 sequenceId = sequenceNdx + firstNdx;
20412 const std::string sequenceIdStr = de::toString(sequenceId);
20413
20414 sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20415 lastId = sequenceId;
20416
20417 if (sequenceNdx == 0)
20418 sequence.reserve((10 + sequence.length()) * sequenceCount);
20419 }
20420
20421 specs["num_data_points"] = de::toString(numDataPoints);
20422 specs["zeroth_id"] = de::toString(firstNdx - 1);
20423 specs["last_id"] = de::toString(lastId);
20424 specs["seq"] = sequence;
20425
20426 fragments["decoration"] = decoration.specialize(specs);
20427 fragments["pre_main"] = preMain.specialize(specs);
20428 fragments["testfun"] = testFun.specialize(specs);
20429
20430 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20431 specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20432 specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20433
20434 if (std::is_base_of<GraphicsResources, SpecResource>::value)
20435 {
20436 features.coreFeatures.vertexPipelineStoresAndAtomics = true;
20437 features.coreFeatures.fragmentStoresAndAtomics = true;
20438 }
20439
20440 finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20441 }
20442
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20443 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20444 {
20445 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20446
20447 createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20448 createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20449
20450 return testGroup.release();
20451 }
20452
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20453 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20454 {
20455 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20456
20457 createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20458 createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20459
20460 return testGroup.release();
20461 }
20462
createFunctionParamsGroup(tcu::TestContext & testCtx)20463 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20464 {
20465 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20466
20467 static const char data_dir[] = "spirv_assembly/instruction/function_params";
20468
20469 static const struct
20470 {
20471 const std::string name;
20472 const std::string desc;
20473 } cases[] =
20474 {
20475 { "sampler_param", "Test combined image sampler as function parameter" },
20476 };
20477
20478 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20479 {
20480 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20481 cases[i].name.c_str(),
20482 cases[i].desc.c_str(),
20483 data_dir,
20484 cases[i].name + ".amber");
20485 testGroup->addChild(testCase);
20486 }
20487
20488 return testGroup.release();
20489 }
20490
createEarlyFragmentTests(tcu::TestContext & testCtx)20491 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20492 {
20493 de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20494
20495 static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20496
20497 static const struct Case
20498 {
20499 const string name;
20500 const string desc;
20501 }
20502 cases[] =
20503 {
20504 // Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20505 { "depth_less", "gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." },
20506 { "depth_greater", "gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH." },
20507 { "depth_less_or_equal", "gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20508 { "depth_greater_or_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20509 { "depth_equal", "gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH." },
20510 { "depth_not_equal", "gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH." }
20511 };
20512
20513 for (const auto& tCase : cases)
20514 {
20515 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20516 tCase.name.c_str(),
20517 tCase.desc.c_str(),
20518 dataDir,
20519 tCase.name + ".amber");
20520
20521 earlyFragTests->addChild(testCase);
20522 }
20523
20524 return earlyFragTests.release();
20525 }
20526
createOpExecutionModeTests(tcu::TestContext & testCtx)20527 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20528 {
20529 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20530
20531 static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20532
20533 static const struct Case
20534 {
20535 const string name;
20536 const string desc;
20537 } cases[] =
20538 {
20539 { "depthless_0", "FragDepth < Polygon depth: depth test should pass." },
20540 { "depthless_1", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20541 { "depthless_2", "FragDepth < Polygon depth: depth test should fail." },
20542 { "depthless_3", "FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20543 { "depthless_4", "FragDepth < Polygon depth: depth test should pass." },
20544 { "depthgreater_0", "FragDepth > Polygon depth: depth test should pass." },
20545 { "depthgreater_1", "FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20546 { "depthgreater_2", "FragDepth > Polygon depth: depth test should fail." },
20547 { "depthgreater_3", "FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20548 { "depthgreater_4", "FragDepth > Polygon depth: depth test should pass." },
20549 { "depthunchanged_0", "FragDepth == Polygon depth: depth test should pass." },
20550 { "depthunchanged_1", "FragDepth == Polygon depth: depth test should fail." },
20551 { "depthunchanged_2", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20552 { "depthunchanged_3", "FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20553 };
20554
20555 for (const auto& case_ : cases)
20556 {
20557 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20558 case_.name.c_str(),
20559 case_.desc.c_str(),
20560 dataDir,
20561 case_.name + ".amber");
20562 testGroup->addChild(testCase);
20563 }
20564
20565 return testGroup.release();
20566 }
20567
createQueryGroup(tcu::TestContext & testCtx)20568 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20569 {
20570 de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20571
20572 static const char data_dir[] = "spirv_assembly/instruction/image_query";
20573
20574 static const struct
20575 {
20576 const std::string name;
20577 const std::string desc;
20578 } cases[] =
20579 {
20580 { "samples_storage", "Test samples query can be used on storage images" },
20581 };
20582
20583 vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20584
20585 for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20586 {
20587 cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20588 cases[i].name.c_str(),
20589 cases[i].desc.c_str(),
20590 data_dir,
20591 cases[i].name + ".amber",
20592 requirements);
20593 testGroup->addChild(testCase);
20594 }
20595
20596 return testGroup.release();
20597 }
20598
createInstructionTests(tcu::TestContext & testCtx)20599 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20600 {
20601 const bool testComputePipeline = true;
20602
20603 de::MovePtr<tcu::TestCaseGroup> instructionTests (new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20604 de::MovePtr<tcu::TestCaseGroup> computeTests (new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20605 de::MovePtr<tcu::TestCaseGroup> graphicsTests (new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20606
20607 computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20608 computeTests->addChild(createLocalSizeGroup(testCtx, false));
20609 computeTests->addChild(createLocalSizeGroup(testCtx, true));
20610 computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20611 computeTests->addChild(createOpNopGroup(testCtx));
20612 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20613 computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20614 computeTests->addChild(createOpAtomicGroup(testCtx, false));
20615 computeTests->addChild(createOpAtomicGroup(testCtx, true)); // Using new StorageBuffer decoration
20616 computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true)); // Return value validation
20617 computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true)); // volatile atomics
20618 computeTests->addChild(createOpLineGroup(testCtx));
20619 computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20620 computeTests->addChild(createOpNoLineGroup(testCtx));
20621 computeTests->addChild(createOpConstantNullGroup(testCtx));
20622 computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20623 computeTests->addChild(createOpConstantUsageGroup(testCtx));
20624 computeTests->addChild(createSpecConstantGroup(testCtx));
20625 computeTests->addChild(createOpSourceGroup(testCtx));
20626 computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20627 computeTests->addChild(createDecorationGroupGroup(testCtx));
20628 computeTests->addChild(createOpPhiGroup(testCtx));
20629 computeTests->addChild(createLoopControlGroup(testCtx));
20630 computeTests->addChild(createFunctionControlGroup(testCtx));
20631 computeTests->addChild(createSelectionControlGroup(testCtx));
20632 computeTests->addChild(createBlockOrderGroup(testCtx));
20633 computeTests->addChild(createMultipleShaderGroup(testCtx));
20634 computeTests->addChild(createMemoryAccessGroup(testCtx));
20635 computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20636 computeTests->addChild(createOpCopyObjectGroup(testCtx));
20637 computeTests->addChild(createNoContractionGroup(testCtx));
20638 computeTests->addChild(createOpUndefGroup(testCtx));
20639 computeTests->addChild(createOpUnreachableGroup(testCtx));
20640 computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20641 computeTests->addChild(createOpFRemGroup(testCtx));
20642 computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20643 computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20644 computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20645 computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20646 computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20647 computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20648 computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20649 computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20650 computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20651 computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20652 computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20653 computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20654 computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20655 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20656 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20657 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20658 computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20659 computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20660 computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20661 computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20662 computeTests->addChild(createOpNMinGroup(testCtx));
20663 computeTests->addChild(createOpNMaxGroup(testCtx));
20664 computeTests->addChild(createOpNClampGroup(testCtx));
20665 computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20666 {
20667 de::MovePtr<tcu::TestCaseGroup> computeAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20668
20669 computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20670 computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20671
20672 computeTests->addChild(computeAndroidTests.release());
20673 }
20674
20675 computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20676 computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20677 computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20678 computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20679 computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20680 computeTests->addChild(createVariableInitComputeGroup(testCtx));
20681 computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20682 computeTests->addChild(createIndexingComputeGroup(testCtx));
20683 computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20684 computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20685 computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20686 computeTests->addChild(createOpNameGroup(testCtx));
20687 computeTests->addChild(createOpMemberNameGroup(testCtx));
20688 computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20689 computeTests->addChild(createFloat16Group(testCtx));
20690 computeTests->addChild(createFloat32Group(testCtx));
20691 computeTests->addChild(createBoolGroup(testCtx));
20692 computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20693 computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20694 computeTests->addChild(createSignedIntCompareGroup(testCtx));
20695 computeTests->addChild(createSignedOpTestsGroup(testCtx));
20696 computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20697 computeTests->addChild(createPtrAccessChainGroup(testCtx));
20698 computeTests->addChild(createVectorShuffleGroup(testCtx));
20699 computeTests->addChild(createHlslComputeGroup(testCtx));
20700 computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20701 computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20702 computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20703 computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20704
20705 graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20706 graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20707 graphicsTests->addChild(createOpNopTests(testCtx));
20708 graphicsTests->addChild(createOpSourceTests(testCtx));
20709 graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20710 graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20711 graphicsTests->addChild(createOpLineTests(testCtx));
20712 graphicsTests->addChild(createOpNoLineTests(testCtx));
20713 graphicsTests->addChild(createOpConstantNullTests(testCtx));
20714 graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20715 graphicsTests->addChild(createMemoryAccessTests(testCtx));
20716 graphicsTests->addChild(createOpUndefTests(testCtx));
20717 graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20718 graphicsTests->addChild(createModuleTests(testCtx));
20719 graphicsTests->addChild(createUnusedVariableTests(testCtx));
20720 graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20721 graphicsTests->addChild(createOpPhiTests(testCtx));
20722 graphicsTests->addChild(createNoContractionTests(testCtx));
20723 graphicsTests->addChild(createOpQuantizeTests(testCtx));
20724 graphicsTests->addChild(createLoopTests(testCtx));
20725 graphicsTests->addChild(createSpecConstantTests(testCtx));
20726 graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20727 graphicsTests->addChild(createBarrierTests(testCtx));
20728 graphicsTests->addChild(createDecorationGroupTests(testCtx));
20729 graphicsTests->addChild(createFRemTests(testCtx));
20730 graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20731 graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20732
20733 {
20734 de::MovePtr<tcu::TestCaseGroup> graphicsAndroidTests (new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20735
20736 graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20737 graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20738
20739 graphicsTests->addChild(graphicsAndroidTests.release());
20740 }
20741
20742 graphicsTests->addChild(createOpNameTests(testCtx));
20743 graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20744 graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20745
20746 graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20747 graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20748 graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20749 graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20750 graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20751 graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20752 graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20753 graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20754 graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20755 graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20756 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20757 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20758 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20759 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20760 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20761 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20762 graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20763 graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20764 graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20765 graphicsTests->addChild(createFloat16Tests(testCtx));
20766 graphicsTests->addChild(createFloat32Tests(testCtx));
20767 graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20768 graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20769 graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20770 graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20771
20772 instructionTests->addChild(computeTests.release());
20773 instructionTests->addChild(graphicsTests.release());
20774 instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20775 instructionTests->addChild(createFunctionParamsGroup(testCtx));
20776 instructionTests->addChild(createQueryGroup(testCtx));
20777 instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20778 instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20779
20780 return instructionTests.release();
20781 }
20782
20783 } // SpirVAssembly
20784 } // vkt
20785