• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  * Copyright (c) 2016 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37 
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54 
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #ifndef CTS_USES_VULKANSC
89 #include "vktSpvAsmIntegerDotProductTests.hpp"
90 #endif // CTS_USES_VULKANSC
91 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
92 
93 #include <cmath>
94 #include <limits>
95 #include <map>
96 #include <string>
97 #include <sstream>
98 #include <utility>
99 #include <stack>
100 
101 namespace vkt
102 {
103 namespace SpirVAssembly
104 {
105 
106 namespace
107 {
108 
109 using namespace vk;
110 using std::map;
111 using std::string;
112 using std::vector;
113 using tcu::IVec3;
114 using tcu::IVec4;
115 using tcu::RGBA;
116 using tcu::TestLog;
117 using tcu::TestStatus;
118 using tcu::Vec4;
119 using de::UniquePtr;
120 using tcu::StringTemplate;
121 using tcu::Vec4;
122 
123 const bool TEST_WITH_NAN	= true;
124 const bool TEST_WITHOUT_NAN	= false;
125 
126 const string loadScalarF16FromUint =
127 	"%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
128 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
129 	"%ld_arg_${var}_entry = OpLabel\n"
130 	"%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
131 	"%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
132 	"%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
133 	"%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
134 	"%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
135 	"%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
136 	"%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
137 	"OpReturnValue %ld_arg_${var}_ex\n"
138 	"OpFunctionEnd\n";
139 
140 const string loadV2F16FromUint =
141 	"%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
142 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
143 	"%ld_arg_${var}_entry = OpLabel\n"
144 	"%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
145 	"%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
146 	"%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
147 	"OpReturnValue %ld_arg_${var}_cast\n"
148 	"OpFunctionEnd\n";
149 
150 const string loadV3F16FromUints =
151 	// Since we allocate a vec4 worth of values, this case is almost the
152 	// same as that case.
153 	"%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
154 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
155 	"%ld_arg_${var}_entry = OpLabel\n"
156 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
157 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
158 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
159 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
160 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
161 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
162 	"%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
163 	"OpReturnValue %ld_arg_${var}_shuffle\n"
164 	"OpFunctionEnd\n";
165 
166 const string loadV4F16FromUints =
167 	"%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
168 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
169 	"%ld_arg_${var}_entry = OpLabel\n"
170 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
171 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
172 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
173 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
174 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
175 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
176 	"%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
177 	"OpReturnValue %ld_arg_${var}_shuffle\n"
178 	"OpFunctionEnd\n";
179 
180 const string loadM2x2F16FromUints =
181 	"%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
182 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
183 	"%ld_arg_${var}_entry = OpLabel\n"
184 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
185 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
186 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
187 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
188 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
189 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
190 	"%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
191 	"OpReturnValue %ld_arg_${var}_cons\n"
192 	"OpFunctionEnd\n";
193 
194 const string loadM2x3F16FromUints =
195 	"%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
196 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
197 	"%ld_arg_${var}_entry = OpLabel\n"
198 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
199 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
200 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
201 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
202 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
203 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
204 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
205 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
206 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
207 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
208 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
209 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
210 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
211 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
212 	"%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
213 	"OpReturnValue %ld_arg_${var}_mat\n"
214 	"OpFunctionEnd\n";
215 
216 const string loadM2x4F16FromUints =
217 	"%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
218 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
219 	"%ld_arg_${var}_entry = OpLabel\n"
220 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
221 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
222 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
223 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
224 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
225 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
226 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
227 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
228 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
229 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
230 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
231 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
232 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
233 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
234 	"%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
235 	"OpReturnValue %ld_arg_${var}_mat\n"
236 	"OpFunctionEnd\n";
237 
238 const string loadM3x2F16FromUints =
239 	"%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
240 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
241 	"%ld_arg_${var}_entry = OpLabel\n"
242 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
243 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
244 	"%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
245 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
246 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
247 	"%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
248 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
249 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
250 	"%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
251 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
252 	"OpReturnValue %ld_arg_${var}_mat\n"
253 	"OpFunctionEnd\n";
254 
255 const string loadM3x3F16FromUints =
256 	"%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
257 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
258 	"%ld_arg_${var}_entry = OpLabel\n"
259 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
260 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
261 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
262 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
263 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
264 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
265 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
266 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
267 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
268 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
269 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
270 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
271 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
272 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
273 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
274 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
275 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
276 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
277 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
278 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
279 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
280 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
281 	"OpReturnValue %ld_arg_${var}_mat\n"
282 	"OpFunctionEnd\n";
283 
284 const string loadM3x4F16FromUints =
285 	"%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
286 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
287 	"%ld_arg_${var}_entry = OpLabel\n"
288 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
289 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
290 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
291 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
292 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
293 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
294 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
295 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
296 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
297 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
298 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
299 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
300 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
301 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
302 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
303 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
304 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
305 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
306 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
307 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
308 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
309 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
310 	"OpReturnValue %ld_arg_${var}_mat\n"
311 	"OpFunctionEnd\n";
312 
313 const string loadM4x2F16FromUints =
314 	"%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
315 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
316 	"%ld_arg_${var}_entry = OpLabel\n"
317 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
318 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
319 	"%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
320 	"%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
321 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
322 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
323 	"%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
324 	"%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
325 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
326 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
327 	"%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
328 	"%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
329 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
330 	"OpReturnValue %ld_arg_${var}_mat\n"
331 	"OpFunctionEnd\n";
332 
333 const string loadM4x3F16FromUints =
334 	"%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
335 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
336 	"%ld_arg_${var}_entry = OpLabel\n"
337 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
338 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
339 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
340 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
341 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
342 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
343 	"%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
344 	"%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
345 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
346 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
347 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
348 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
349 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
350 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
351 	"%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
352 	"%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
353 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
354 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
355 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
356 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
357 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
358 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
359 	"%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
360 	"%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
361 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
362 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
363 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
364 	"%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
365 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
366 	"OpReturnValue %ld_arg_${var}_mat\n"
367 	"OpFunctionEnd\n";
368 
369 const string loadM4x4F16FromUints =
370 	"%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
371 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
372 	"%ld_arg_${var}_entry = OpLabel\n"
373 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
374 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
375 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
376 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
377 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
378 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
379 	"%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
380 	"%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
381 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
382 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
383 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
384 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
385 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
386 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
387 	"%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
388 	"%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
389 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
390 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
391 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
392 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
393 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
394 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
395 	"%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
396 	"%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
397 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
398 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
399 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
400 	"%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
401 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
402 	"OpReturnValue %ld_arg_${var}_mat\n"
403 	"OpFunctionEnd\n";
404 
405 const string storeScalarF16AsUint =
406 	// This version is sensitive to the initial value in the output buffer.
407 	// The infrastructure sets all output buffer bits to one before invoking
408 	// the shader so this version uses an atomic and to generate the correct
409 	// zeroes.
410 	"%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
411 	"%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
412 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
413 	"%st_fn_${var}_entry = OpLabel\n"
414 	"%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
415 	"%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
416 	"%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
417 	"%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
418 	// Or 16 bits of ones into the half that was not populated with the result.
419 	"%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
420 	"%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
421 	"%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
422 	"%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
423 	"%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
424 	"%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
425 	"%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
426 	"OpReturn\n"
427 	"OpFunctionEnd\n";
428 
429 const string storeV2F16AsUint =
430 	"%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
431 	"%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
432 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
433 	"%st_fn_${var}_entry = OpLabel\n"
434 	"%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
435 	"%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
436 	"OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
437 	"OpReturn\n"
438 	"OpFunctionEnd\n";
439 
440 const string storeV3F16AsUints =
441 	// Since we allocate a vec4 worth of values, this case can be treated
442 	// almost the same as a vec4 case. We will store some extra data that
443 	// should not be compared.
444 	"%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
445 	"%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
446 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
447 	"%st_fn_${var}_entry = OpLabel\n"
448 	"%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
449 	"%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
450 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
451 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
452 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
453 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
454 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
455 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
456 	"OpReturn\n"
457 	"OpFunctionEnd\n";
458 
459 const string storeV4F16AsUints =
460 	"%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
461 	"%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
462 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
463 	"%st_fn_${var}_entry = OpLabel\n"
464 	"%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
465 	"%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
466 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
467 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
468 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
469 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
470 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
471 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
472 	"OpReturn\n"
473 	"OpFunctionEnd\n";
474 
475 const string storeM2x2F16AsUints =
476 	"%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
477 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
478 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
479 	"%st_fn_${var}_entry = OpLabel\n"
480 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
481 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
482 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
483 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
484 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
485 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
486 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
487 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
488 	"OpReturn\n"
489 	"OpFunctionEnd\n";
490 
491 const string storeM2x3F16AsUints =
492 	// In the extracted elements for 01 and 11 the second element doesn't
493 	// matter.
494 	"%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
495 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
496 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
497 	"%st_fn_${var}_entry = OpLabel\n"
498 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
499 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
500 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
501 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
502 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
503 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
504 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
505 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
506 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
507 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
508 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
509 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
510 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
511 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
512 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
513 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
514 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
515 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
516 	"OpReturn\n"
517 	"OpFunctionEnd\n";
518 
519 const string storeM2x4F16AsUints =
520 	"%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
521 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
522 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
523 	"%st_fn_${var}_entry = OpLabel\n"
524 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
525 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
526 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
527 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
528 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
529 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
530 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
531 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
532 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
533 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
534 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
535 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
536 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
537 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
538 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
539 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
540 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
541 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
542 	"OpReturn\n"
543 	"OpFunctionEnd\n";
544 
545 const string storeM3x2F16AsUints =
546 	"%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
547 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
548 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
549 	"%st_fn_${var}_entry = OpLabel\n"
550 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
551 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
552 	"%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
553 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
554 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
555 	"%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
556 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
557 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
558 	"%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
559 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
560 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
561 	"OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
562 	"OpReturn\n"
563 	"OpFunctionEnd\n";
564 
565 const string storeM3x3F16AsUints =
566 	// The second element of the each broken down vec3 doesn't matter.
567 	"%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
568 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
569 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
570 	"%st_fn_${var}_entry = OpLabel\n"
571 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
572 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
573 	"%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
574 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
575 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
576 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
577 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
578 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
579 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
580 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
581 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
582 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
583 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
584 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
585 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
586 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
587 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
588 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
589 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
590 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
591 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
592 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
593 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
594 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
595 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
596 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
597 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
598 	"OpReturn\n"
599 	"OpFunctionEnd\n";
600 
601 const string storeM3x4F16AsUints =
602 	"%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
603 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
604 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
605 	"%st_fn_${var}_entry = OpLabel\n"
606 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
607 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
608 	"%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
609 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
610 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
611 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
612 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
613 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
614 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
615 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
616 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
617 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
618 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
619 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
620 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
621 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
622 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
623 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
624 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
625 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
626 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
627 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
628 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
629 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
630 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
631 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
632 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
633 	"OpReturn\n"
634 	"OpFunctionEnd\n";
635 
636 const string storeM4x2F16AsUints =
637 	"%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
638 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
639 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
640 	"%st_fn_${var}_entry = OpLabel\n"
641 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
642 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
643 	"%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
644 	"%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
645 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
646 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
647 	"%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
648 	"%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
649 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
650 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
651 	"%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
652 	"%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
653 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
654 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
655 	"OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
656 	"OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
657 	"OpReturn\n"
658 	"OpFunctionEnd\n";
659 
660 const string storeM4x3F16AsUints =
661 	// The last element of each decomposed vec3 doesn't matter.
662 	"%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
663 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
664 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
665 	"%st_fn_${var}_entry = OpLabel\n"
666 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
667 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
668 	"%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
669 	"%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
670 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
671 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
672 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
673 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
674 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
675 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
676 	"%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
677 	"%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
678 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
679 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
680 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
681 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
682 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
683 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
684 	"%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
685 	"%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
686 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
687 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
688 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
689 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
690 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
691 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
692 	"%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
693 	"%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
694 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
695 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
696 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
697 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
698 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
699 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
700 	"OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
701 	"OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
702 	"OpReturn\n"
703 	"OpFunctionEnd\n";
704 
705 const string storeM4x4F16AsUints =
706 	"%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
707 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
708 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
709 	"%st_fn_${var}_entry = OpLabel\n"
710 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
711 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
712 	"%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
713 	"%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
714 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
715 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
716 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
717 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
718 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
719 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
720 	"%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
721 	"%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
722 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
723 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
724 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
725 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
726 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
727 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
728 	"%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
729 	"%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
730 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
731 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
732 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
733 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
734 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
735 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
736 	"%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
737 	"%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
738 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
739 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
740 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
741 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
742 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
743 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
744 	"OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
745 	"OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
746 	"OpReturn\n"
747 	"OpFunctionEnd\n";
748 
749 template<typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)750 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
751 {
752 	T* const typedPtr = (T*)dst;
753 	for (int ndx = 0; ndx < numValues; ndx++)
754 		typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
755 }
756 
757 // Filter is a function that returns true if a value should pass, false otherwise.
758 template<typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)759 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
760 {
761 	T* const typedPtr = (T*)dst;
762 	T value;
763 	for (int ndx = 0; ndx < numValues; ndx++)
764 	{
765 		do
766 			value = de::randomScalar<T>(rnd, minValue, maxValue);
767 		while (!filter(value));
768 
769 		typedPtr[offset + ndx] = value;
770 	}
771 }
772 
773 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)774 deInt64 randomInt64LogDistributed (de::Random& rnd)
775 {
776 	deInt64 val = rnd.getUint64();
777 	val &= (1ull << rnd.getInt(1, 63)) - 1;
778 	if (rnd.getBool())
779 		val = -val;
780 	return val;
781 }
782 
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues)783 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
784 {
785 	for (int ndx = 0; ndx < numValues; ndx++)
786 		dst[ndx] = randomInt64LogDistributed(rnd);
787 }
788 
789 template<typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues,FilterT filter)790 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
791 {
792 	for (int ndx = 0; ndx < numValues; ndx++)
793 	{
794 		deInt64 value;
795 		do {
796 			value = randomInt64LogDistributed(rnd);
797 		} while (!filter(value));
798 		dst[ndx] = value;
799 	}
800 }
801 
filterNonNegative(const deInt64 value)802 inline bool filterNonNegative (const deInt64 value)
803 {
804 	return value >= 0;
805 }
806 
filterPositive(const deInt64 value)807 inline bool filterPositive (const deInt64 value)
808 {
809 	return value > 0;
810 }
811 
filterNotZero(const deInt64 value)812 inline bool filterNotZero (const deInt64 value)
813 {
814 	return value != 0;
815 }
816 
floorAll(vector<float> & values)817 static void floorAll (vector<float>& values)
818 {
819 	for (size_t i = 0; i < values.size(); i++)
820 		values[i] = deFloatFloor(values[i]);
821 }
822 
floorAll(vector<Vec4> & values)823 static void floorAll (vector<Vec4>& values)
824 {
825 	for (size_t i = 0; i < values.size(); i++)
826 		values[i] = floor(values[i]);
827 }
828 
829 struct CaseParameter
830 {
831 	const char*		name;
832 	string			param;
833 
CaseParametervkt::SpirVAssembly::__anon6f921be60111::CaseParameter834 	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
835 };
836 
837 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
838 //
839 // #version 430
840 //
841 // layout(std140, set = 0, binding = 0) readonly buffer Input {
842 //   float elements[];
843 // } input_data;
844 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
845 //   float elements[];
846 // } output_data;
847 //
848 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
849 //
850 // void main() {
851 //   uint x = gl_GlobalInvocationID.x;
852 //   output_data.elements[x] = -input_data.elements[x];
853 // }
854 
getAsmForLocalSizeTest(bool useLiteralLocalSize,bool useLiteralLocalSizeId,bool useSpecConstantWorkgroupSize,IVec3 workGroupSize,deUint32 ndx)855 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
856 {
857 	std::ostringstream out;
858 	out << "OpCapability Shader\n"
859 		   "OpMemoryModel Logical GLSL450\n";
860 
861 	if (useLiteralLocalSizeId)
862 	{
863 		out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
864 			   "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
865 	}
866 	else
867 	{
868 		out << "OpEntryPoint GLCompute %main \"main\" %id\n";
869 
870 		if (useLiteralLocalSize)
871 		{
872 			out << "OpExecutionMode %main LocalSize "
873 				<< workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
874 		}
875 	}
876 
877 	out << "OpSource GLSL 430\n"
878 		   "OpName %main           \"main\"\n"
879 		   "OpName %id             \"gl_GlobalInvocationID\"\n"
880 		   "OpDecorate %id BuiltIn GlobalInvocationId\n";
881 
882 	if (useSpecConstantWorkgroupSize)
883 	{
884 		out << "OpDecorate %spec_0 SpecId 100\n"
885 			   "OpDecorate %spec_1 SpecId 101\n"
886 			   "OpDecorate %spec_2 SpecId 102\n"
887 			   "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
888 	}
889 
890 	if (useLiteralLocalSizeId)
891 	{
892 		out << getComputeAsmInputOutputBufferTraits("Block")
893 			<< getComputeAsmCommonTypes("StorageBuffer")
894 			<< getComputeAsmInputOutputBuffer("StorageBuffer")
895 			<< "%const_0  = OpConstant %u32 " << workGroupSize.x() << "\n"
896 			   "%const_1  = OpConstant %u32 " << workGroupSize.y() << "\n"
897 			   "%const_2  = OpConstant %u32 " << workGroupSize.z() << "\n";
898 	}
899 	else
900 	{
901 		out << getComputeAsmInputOutputBufferTraits()
902 			<< getComputeAsmCommonTypes()
903 			<< getComputeAsmInputOutputBuffer();
904 	}
905 
906 	out << "%id        = OpVariable %uvec3ptr Input\n"
907 		   "%zero      = OpConstant %i32 0 \n";
908 
909 	if (useSpecConstantWorkgroupSize)
910 	{
911 		out << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
912 			   "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
913 			   "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
914 			   "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
915 	}
916 
917 	out << "%main      = OpFunction %void None %voidf\n"
918 		   "%label     = OpLabel\n"
919 		   "%idval     = OpLoad %uvec3 %id\n"
920 		   "%ndx       = OpCompositeExtract %u32 %idval " << ndx << "\n"
921 
922 		   "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
923 		   "%inval     = OpLoad %f32 %inloc\n"
924 		   "%neg       = OpFNegate %f32 %inval\n"
925 		   "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
926 		   "             OpStore %outloc %neg\n"
927 		   "             OpReturn\n"
928 		   "             OpFunctionEnd\n";
929 
930 	return out.str();
931 }
932 
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)933 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
934 {
935 	const char*		groupName[]{ "localsize", "localsize_id" };
936 
937 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
938 	ComputeShaderSpec				spec;
939 	de::Random						rnd				(deStringHash(group->getName()));
940 	const deUint32					numElements		= 64u;
941 	vector<float>					positiveFloats	(numElements, 0);
942 	vector<float>					negativeFloats	(numElements, 0);
943 
944 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
945 
946 	for (size_t ndx = 0; ndx < numElements; ++ndx)
947 		negativeFloats[ndx] = -positiveFloats[ndx];
948 
949 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
950 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
951 
952 	if (useLocalSizeId)
953 	{
954 		spec.spirvVersion = SPIRV_VERSION_1_5;
955 		spec.extensions.push_back("VK_KHR_maintenance4");
956 	}
957 
958 	spec.numWorkGroups = IVec3(numElements, 1, 1);
959 
960 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
961 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
962 
963 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
964 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
965 
966 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
967 	{
968 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
969 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
970 	}
971 
972 	spec.numWorkGroups = IVec3(1, 1, 1);
973 
974 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
975 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
976 
977 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
978 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
979 
980 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
981 	{
982 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
983 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
984 	}
985 
986 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
987 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
988 
989 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
990 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
991 
992 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
993 	{
994 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
995 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
996 	}
997 
998 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
999 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
1000 
1001 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1002 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
1003 
1004 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
1005 	{
1006 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1007 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
1008 	}
1009 
1010 	return group.release();
1011 }
1012 
createOpNopGroup(tcu::TestContext & testCtx)1013 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1014 {
1015 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
1016 	ComputeShaderSpec				spec;
1017 	de::Random						rnd				(deStringHash(group->getName()));
1018 	const int						numElements		= 100;
1019 	vector<float>					positiveFloats	(numElements, 0);
1020 	vector<float>					negativeFloats	(numElements, 0);
1021 
1022 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1023 
1024 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1025 		negativeFloats[ndx] = -positiveFloats[ndx];
1026 
1027 	spec.assembly =
1028 		string(getComputeAsmShaderPreamble()) +
1029 
1030 		"OpSource GLSL 430\n"
1031 		"OpName %main           \"main\"\n"
1032 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1033 
1034 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1035 
1036 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1037 
1038 		+ string(getComputeAsmInputOutputBuffer()) +
1039 
1040 		"%id        = OpVariable %uvec3ptr Input\n"
1041 		"%zero      = OpConstant %i32 0\n"
1042 
1043 		"%main      = OpFunction %void None %voidf\n"
1044 		"%label     = OpLabel\n"
1045 		"%idval     = OpLoad %uvec3 %id\n"
1046 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1047 
1048 		"             OpNop\n" // Inside a function body
1049 
1050 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1051 		"%inval     = OpLoad %f32 %inloc\n"
1052 		"%neg       = OpFNegate %f32 %inval\n"
1053 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1054 		"             OpStore %outloc %neg\n"
1055 		"             OpReturn\n"
1056 		"             OpFunctionEnd\n";
1057 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1058 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1059 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1060 
1061 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1062 
1063 	return group.release();
1064 }
1065 
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1066 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1067 {
1068 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1069 	de::Random						rnd				(deStringHash(group->getName()));
1070 	const int						numElements		= 100;
1071 	vector<float>					positiveFloats	(numElements, 0);
1072 	vector<float>					negativeFloats	(numElements, 0);
1073 
1074 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1075 
1076 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1077 		negativeFloats[ndx] = -positiveFloats[ndx];
1078 
1079 	const VariableLocation			testLocations[] =
1080 	{
1081 		// Set		Binding
1082 		{ 0,		5			},
1083 		{ 5,		5			},
1084 	};
1085 
1086 	for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1087 	{
1088 		const VariableLocation& location = testLocations[locationNdx];
1089 
1090 		// Unused variable.
1091 		{
1092 			ComputeShaderSpec				spec;
1093 
1094 			spec.assembly =
1095 				string(getComputeAsmShaderPreamble()) +
1096 
1097 				"OpDecorate %id BuiltIn GlobalInvocationId\n"
1098 
1099 				+ getUnusedDecorations(location)
1100 
1101 				+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1102 
1103 				+ getUnusedTypesAndConstants()
1104 
1105 				+ string(getComputeAsmInputOutputBuffer())
1106 
1107 				+ getUnusedBuffer() +
1108 
1109 				"%id        = OpVariable %uvec3ptr Input\n"
1110 				"%zero      = OpConstant %i32 0\n"
1111 
1112 				"%main      = OpFunction %void None %voidf\n"
1113 				"%label     = OpLabel\n"
1114 				"%idval     = OpLoad %uvec3 %id\n"
1115 				"%x         = OpCompositeExtract %u32 %idval 0\n"
1116 
1117 				"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1118 				"%inval     = OpLoad %f32 %inloc\n"
1119 				"%neg       = OpFNegate %f32 %inval\n"
1120 				"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1121 				"             OpStore %outloc %neg\n"
1122 				"             OpReturn\n"
1123 				"             OpFunctionEnd\n";
1124 			spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1125 			spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1126 			spec.numWorkGroups = IVec3(numElements, 1, 1);
1127 
1128 			std::string testName		= "variable_" + location.toString();
1129 			std::string testDescription	= "Unused variable test with " + location.toDescription();
1130 
1131 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1132 		}
1133 
1134 		// Unused function.
1135 		{
1136 			ComputeShaderSpec				spec;
1137 
1138 			spec.assembly =
1139 				string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1140 
1141 				"OpDecorate %id BuiltIn GlobalInvocationId\n"
1142 
1143 				+ getUnusedDecorations(location)
1144 
1145 				+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1146 
1147 				+ getUnusedTypesAndConstants() +
1148 
1149 				"%c_i32_0 = OpConstant %i32 0\n"
1150 				"%c_i32_1 = OpConstant %i32 1\n"
1151 
1152 				+ string(getComputeAsmInputOutputBuffer())
1153 
1154 				+ getUnusedBuffer() +
1155 
1156 				"%id        = OpVariable %uvec3ptr Input\n"
1157 				"%zero      = OpConstant %i32 0\n"
1158 
1159 				"%main      = OpFunction %void None %voidf\n"
1160 				"%label     = OpLabel\n"
1161 				"%idval     = OpLoad %uvec3 %id\n"
1162 				"%x         = OpCompositeExtract %u32 %idval 0\n"
1163 
1164 				"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1165 				"%inval     = OpLoad %f32 %inloc\n"
1166 				"%neg       = OpFNegate %f32 %inval\n"
1167 				"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1168 				"             OpStore %outloc %neg\n"
1169 				"             OpReturn\n"
1170 				"             OpFunctionEnd\n"
1171 
1172 				+ getUnusedFunctionBody();
1173 
1174 			spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1175 			spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1176 			spec.numWorkGroups = IVec3(numElements, 1, 1);
1177 
1178 			std::string testName		= "function_" + location.toString();
1179 			std::string testDescription	= "Unused function test with " + location.toDescription();
1180 
1181 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1182 		}
1183 	}
1184 
1185 	return group.release();
1186 }
1187 
1188 template<bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1189 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1190 {
1191 	if (outputAllocs.size() != 1)
1192 		return false;
1193 
1194 	vector<deUint8>	input1Bytes;
1195 	vector<deUint8>	input2Bytes;
1196 	vector<deUint8>	expectedBytes;
1197 
1198 	inputs[0].getBytes(input1Bytes);
1199 	inputs[1].getBytes(input2Bytes);
1200 	expectedOutputs[0].getBytes(expectedBytes);
1201 
1202 	const deInt32* const	expectedOutputAsInt		= reinterpret_cast<const deInt32*>(&expectedBytes.front());
1203 	const deInt32* const	outputAsInt				= static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1204 	const float* const		input1AsFloat			= reinterpret_cast<const float*>(&input1Bytes.front());
1205 	const float* const		input2AsFloat			= reinterpret_cast<const float*>(&input2Bytes.front());
1206 	bool returnValue								= true;
1207 
1208 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1209 	{
1210 		if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1211 			continue;
1212 
1213 		if (outputAsInt[idx] != expectedOutputAsInt[idx])
1214 		{
1215 			log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1216 			returnValue = false;
1217 		}
1218 	}
1219 	return returnValue;
1220 }
1221 
1222 typedef VkBool32 (*compareFuncType) (float, float);
1223 
1224 struct OpFUnordCase
1225 {
1226 	const char*		name;
1227 	const char*		opCode;
1228 	compareFuncType	compareFunc;
1229 
OpFUnordCasevkt::SpirVAssembly::__anon6f921be60111::OpFUnordCase1230 					OpFUnordCase			(const char* _name, const char* _opCode, compareFuncType _compareFunc)
1231 						: name				(_name)
1232 						, opCode			(_opCode)
1233 						, compareFunc		(_compareFunc) {}
1234 };
1235 
1236 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1237 do { \
1238 	struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1239 	cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1240 } while (deGetFalse())
1241 
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1242 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1243 {
1244 	const string					nan				= testWithNan ? "_nan" : "";
1245 	const string					groupName		= "opfunord" + nan;
1246 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1247 	de::Random						rnd				(deStringHash(group->getName()));
1248 	const int						numElements		= 100;
1249 	vector<OpFUnordCase>			cases;
1250 	string							extensions		= testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1251 	string							capabilities	= testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1252 	string							exeModes		= testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1253 	const StringTemplate			shaderTemplate	(
1254 		string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1255 		"OpSource GLSL 430\n"
1256 		"OpName %main           \"main\"\n"
1257 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1258 
1259 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1260 
1261 		"OpDecorate %buf BufferBlock\n"
1262 		"OpDecorate %buf2 BufferBlock\n"
1263 		"OpDecorate %indata1 DescriptorSet 0\n"
1264 		"OpDecorate %indata1 Binding 0\n"
1265 		"OpDecorate %indata2 DescriptorSet 0\n"
1266 		"OpDecorate %indata2 Binding 1\n"
1267 		"OpDecorate %outdata DescriptorSet 0\n"
1268 		"OpDecorate %outdata Binding 2\n"
1269 		"OpDecorate %f32arr ArrayStride 4\n"
1270 		"OpDecorate %i32arr ArrayStride 4\n"
1271 		"OpMemberDecorate %buf 0 Offset 0\n"
1272 		"OpMemberDecorate %buf2 0 Offset 0\n"
1273 
1274 		+ string(getComputeAsmCommonTypes()) +
1275 
1276 		"%buf        = OpTypeStruct %f32arr\n"
1277 		"%bufptr     = OpTypePointer Uniform %buf\n"
1278 		"%indata1    = OpVariable %bufptr Uniform\n"
1279 		"%indata2    = OpVariable %bufptr Uniform\n"
1280 
1281 		"%buf2       = OpTypeStruct %i32arr\n"
1282 		"%buf2ptr    = OpTypePointer Uniform %buf2\n"
1283 		"%outdata    = OpVariable %buf2ptr Uniform\n"
1284 
1285 		"%id        = OpVariable %uvec3ptr Input\n"
1286 		"%zero      = OpConstant %i32 0\n"
1287 		"%consti1   = OpConstant %i32 1\n"
1288 		"%constf1   = OpConstant %f32 1.0\n"
1289 
1290 		"%main      = OpFunction %void None %voidf\n"
1291 		"%label     = OpLabel\n"
1292 		"%idval     = OpLoad %uvec3 %id\n"
1293 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1294 
1295 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1296 		"%inval1    = OpLoad %f32 %inloc1\n"
1297 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1298 		"%inval2    = OpLoad %f32 %inloc2\n"
1299 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1300 
1301 		"%result    = ${OPCODE} %bool %inval1 %inval2\n"
1302 		"%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1303 		"             OpStore %outloc %int_res\n"
1304 
1305 		"             OpReturn\n"
1306 		"             OpFunctionEnd\n");
1307 
1308 	ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1309 	ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1310 	ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1311 	ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1312 	ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1313 	ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1314 
1315 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1316 	{
1317 		map<string, string>			specializations;
1318 		ComputeShaderSpec			spec;
1319 		const float					NaN				= std::numeric_limits<float>::quiet_NaN();
1320 		vector<float>				inputFloats1	(numElements, 0);
1321 		vector<float>				inputFloats2	(numElements, 0);
1322 		vector<deInt32>				expectedInts	(numElements, 0);
1323 
1324 		specializations["OPCODE"]	= cases[caseNdx].opCode;
1325 		spec.assembly				= shaderTemplate.specialize(specializations);
1326 
1327 		fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1328 		for (size_t ndx = 0; ndx < numElements; ++ndx)
1329 		{
1330 			switch (ndx % 6)
1331 			{
1332 				case 0:		inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1333 				case 1:		inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1334 				case 2:		inputFloats2[ndx] = inputFloats1[ndx]; break;
1335 				case 3:		inputFloats2[ndx] = NaN; break;
1336 				case 4:		inputFloats2[ndx] = inputFloats1[ndx];	inputFloats1[ndx] = NaN; break;
1337 				case 5:		inputFloats2[ndx] = NaN;				inputFloats1[ndx] = NaN; break;
1338 			}
1339 			expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1340 		}
1341 
1342 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1343 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1344 		spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1345 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
1346 		spec.verifyIO		= testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1347 
1348 		if (testWithNan)
1349 		{
1350 			spec.extensions.push_back("VK_KHR_shader_float_controls");
1351 			spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1352 		}
1353 
1354 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1355 	}
1356 
1357 	return group.release();
1358 }
1359 
1360 struct OpAtomicCase
1361 {
1362 	const char*		name;
1363 	const char*		assembly;
1364 	const char*		retValAssembly;
1365 	OpAtomicType	opAtomic;
1366 	deInt32			numOutputElements;
1367 
OpAtomicCasevkt::SpirVAssembly::__anon6f921be60111::OpAtomicCase1368 					OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1369 						: name				(_name)
1370 						, assembly			(_assembly)
1371 						, retValAssembly	(_retValAssembly)
1372 						, opAtomic			(_opAtomic)
1373 						, numOutputElements	(_numOutputElements) {}
1374 };
1375 
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1376 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1377 {
1378 	std::string						groupName			("opatomic");
1379 	if (useStorageBuffer)
1380 		groupName += "_storage_buffer";
1381 	if (verifyReturnValues)
1382 		groupName += "_return_values";
1383 	if (volatileAtomic)
1384 		groupName += "_volatile";
1385 	de::MovePtr<tcu::TestCaseGroup>	group				(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1386 	vector<OpAtomicCase>			cases;
1387 
1388 	const StringTemplate			shaderTemplate	(
1389 
1390 		string("OpCapability Shader\n") +
1391 		(volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1392 		(useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1393 		(volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1394 		(volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1395 		"OpEntryPoint GLCompute %main \"main\" %id\n"
1396 		"OpExecutionMode %main LocalSize 1 1 1\n" +
1397 
1398 		"OpSource GLSL 430\n"
1399 		"OpName %main           \"main\"\n"
1400 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1401 
1402 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1403 
1404 		"OpDecorate %buf ${BLOCK_DECORATION}\n"
1405 		"OpDecorate %indata DescriptorSet 0\n"
1406 		"OpDecorate %indata Binding 0\n"
1407 		"OpDecorate %i32arr ArrayStride 4\n"
1408 		"OpMemberDecorate %buf 0 Offset 0\n"
1409 
1410 		"OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1411 		"OpDecorate %sum DescriptorSet 0\n"
1412 		"OpDecorate %sum Binding 1\n"
1413 		"OpMemberDecorate %sumbuf 0 Offset 0\n"
1414 
1415 		"${RETVAL_BUF_DECORATE}"
1416 
1417 		+ getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1418 
1419 		"%buf       = OpTypeStruct %i32arr\n"
1420 		"%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1421 		"%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1422 
1423 		"%sumbuf    = OpTypeStruct %i32arr\n"
1424 		"%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1425 		"%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1426 
1427 		"${RETVAL_BUF_DECL}"
1428 
1429 		"%id        = OpVariable %uvec3ptr Input\n"
1430 		"%minusone  = OpConstant %i32 -1\n"
1431 		"%zero      = OpConstant %i32 0\n"
1432 		"%one       = OpConstant %u32 1\n"
1433 		"%two       = OpConstant %i32 2\n"
1434 		"%five      = OpConstant %i32 5\n"
1435 		"%volbit    = OpConstant %i32 32768\n"
1436 
1437 		"%main      = OpFunction %void None %voidf\n"
1438 		"%label     = OpLabel\n"
1439 		"%idval     = OpLoad %uvec3 %id\n"
1440 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1441 
1442 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1443 		"%inval     = OpLoad %i32 %inloc\n"
1444 
1445 		"%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1446 		"${INSTRUCTION}"
1447 		"${RETVAL_ASSEMBLY}"
1448 
1449 		"             OpReturn\n"
1450 		"             OpFunctionEnd\n");
1451 
1452 	#define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1453 	do { \
1454 		cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1455 	} while (deGetFalse())
1456 	#define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1457 	#define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1458 
1459 	ADD_OPATOMIC_CASE_1(iadd,	"%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1460 								"             OpStore %retloc %retv\n", OPATOMIC_IADD );
1461 	ADD_OPATOMIC_CASE_1(isub,	"%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1462 								"             OpStore %retloc %retv\n", OPATOMIC_ISUB );
1463 	ADD_OPATOMIC_CASE_1(iinc,	"%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1464 								"             OpStore %retloc %retv\n", OPATOMIC_IINC );
1465 	ADD_OPATOMIC_CASE_1(idec,	"%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1466 								"             OpStore %retloc %retv\n", OPATOMIC_IDEC );
1467 	if (!verifyReturnValues)
1468 	{
1469 		ADD_OPATOMIC_CASE_N(load,	"%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1470 									"             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1471 		ADD_OPATOMIC_CASE_N(store,	"             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1472 	}
1473 
1474 	ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
1475 								"             OpStore %outloc %even\n"
1476 								"%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1477 								"			  OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1478 
1479 
1480 	#undef ADD_OPATOMIC_CASE
1481 	#undef ADD_OPATOMIC_CASE_1
1482 	#undef ADD_OPATOMIC_CASE_N
1483 
1484 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1485 	{
1486 		map<string, string>			specializations;
1487 		ComputeShaderSpec			spec;
1488 		vector<deInt32>				inputInts		(numElements, 0);
1489 		vector<deInt32>				expected		(cases[caseNdx].numOutputElements, -1);
1490 
1491 		if (volatileAtomic)
1492 		{
1493 			spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1494 			spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1495 
1496 			// volatile, queuefamily scope
1497 			specializations["SEMANTICS"] = "%volbit";
1498 			specializations["SCOPE"] = "%five";
1499 		}
1500 		else
1501 		{
1502 			// non-volatile, device scope
1503 			specializations["SEMANTICS"] = "%zero";
1504 			specializations["SCOPE"] = "%one";
1505 		}
1506 		specializations["INDEX"]				= (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1507 		specializations["INSTRUCTION"]			= cases[caseNdx].assembly;
1508 		specializations["BLOCK_DECORATION"]		= useStorageBuffer ? "Block" : "BufferBlock";
1509 		specializations["BLOCK_POINTER_TYPE"]	= useStorageBuffer ? "StorageBuffer" : "Uniform";
1510 
1511 		if (verifyReturnValues)
1512 		{
1513 			const StringTemplate blockDecoration	(
1514 				"\n"
1515 				"OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1516 				"OpDecorate %ret DescriptorSet 0\n"
1517 				"OpDecorate %ret Binding 2\n"
1518 				"OpMemberDecorate %retbuf 0 Offset 0\n\n");
1519 
1520 			const StringTemplate blockDeclaration	(
1521 				"\n"
1522 				"%retbuf    = OpTypeStruct %i32arr\n"
1523 				"%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1524 				"%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1525 
1526 			specializations["RETVAL_ASSEMBLY"] =
1527 				"%retloc    = OpAccessChain %i32ptr %ret %zero %x\n"
1528 				+ std::string(cases[caseNdx].retValAssembly);
1529 
1530 			specializations["RETVAL_BUF_DECORATE"]	= blockDecoration.specialize(specializations);
1531 			specializations["RETVAL_BUF_DECL"]		= blockDeclaration.specialize(specializations);
1532 		}
1533 		else
1534 		{
1535 			specializations["RETVAL_ASSEMBLY"]		= "";
1536 			specializations["RETVAL_BUF_DECORATE"]	= "";
1537 			specializations["RETVAL_BUF_DECL"]		= "";
1538 		}
1539 
1540 		spec.assembly							= shaderTemplate.specialize(specializations);
1541 
1542 		// Specialize one more time, to catch things that were in a template parameter
1543 		const StringTemplate					assemblyTemplate(spec.assembly);
1544 		spec.assembly							= assemblyTemplate.specialize(specializations);
1545 
1546 		if (useStorageBuffer)
1547 			spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1548 
1549 		spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1550 		spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1551 		if (verifyReturnValues)
1552 			spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1553 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1554 
1555 		if (verifyReturnValues)
1556 		{
1557 			switch (cases[caseNdx].opAtomic)
1558 			{
1559 				case OPATOMIC_IADD:
1560 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1561 					break;
1562 				case OPATOMIC_ISUB:
1563 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1564 					break;
1565 				case OPATOMIC_IINC:
1566 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1567 					break;
1568 				case OPATOMIC_IDEC:
1569 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1570 					break;
1571 				case OPATOMIC_COMPEX:
1572 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1573 					break;
1574 				default:
1575 					DE_FATAL("Unsupported OpAtomic type for return value verification");
1576 			}
1577 		}
1578 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1579 	}
1580 
1581 	return group.release();
1582 }
1583 
createOpLineGroup(tcu::TestContext & testCtx)1584 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1585 {
1586 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1587 	ComputeShaderSpec				spec;
1588 	de::Random						rnd				(deStringHash(group->getName()));
1589 	const int						numElements		= 100;
1590 	vector<float>					positiveFloats	(numElements, 0);
1591 	vector<float>					negativeFloats	(numElements, 0);
1592 
1593 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1594 
1595 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1596 		negativeFloats[ndx] = -positiveFloats[ndx];
1597 
1598 	spec.assembly =
1599 		string(getComputeAsmShaderPreamble()) +
1600 
1601 		"%fname1 = OpString \"negateInputs.comp\"\n"
1602 		"%fname2 = OpString \"negateInputs\"\n"
1603 
1604 		"OpSource GLSL 430\n"
1605 		"OpName %main           \"main\"\n"
1606 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1607 
1608 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1609 
1610 		+ string(getComputeAsmInputOutputBufferTraits()) +
1611 
1612 		"OpLine %fname1 0 0\n" // At the earliest possible position
1613 
1614 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1615 
1616 		"OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1617 		"OpLine %fname2 1 0\n" // Different filenames
1618 		"OpLine %fname1 1000 100000\n"
1619 
1620 		"%id        = OpVariable %uvec3ptr Input\n"
1621 		"%zero      = OpConstant %i32 0\n"
1622 
1623 		"OpLine %fname1 1 1\n" // Before a function
1624 
1625 		"%main      = OpFunction %void None %voidf\n"
1626 		"%label     = OpLabel\n"
1627 
1628 		"OpLine %fname1 1 1\n" // In a function
1629 
1630 		"%idval     = OpLoad %uvec3 %id\n"
1631 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1632 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1633 		"%inval     = OpLoad %f32 %inloc\n"
1634 		"%neg       = OpFNegate %f32 %inval\n"
1635 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1636 		"             OpStore %outloc %neg\n"
1637 		"             OpReturn\n"
1638 		"             OpFunctionEnd\n";
1639 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1640 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1641 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1642 
1643 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1644 
1645 	return group.release();
1646 }
1647 
veryfiBinaryShader(const ProgramBinary & binary)1648 bool veryfiBinaryShader (const ProgramBinary& binary)
1649 {
1650 	const size_t	paternCount			= 3u;
1651 	bool paternsCheck[paternCount]		=
1652 	{
1653 		false, false, false
1654 	};
1655 	const string patersns[paternCount]	=
1656 	{
1657 		"VULKAN CTS",
1658 		"Negative values",
1659 		"Date: 2017/09/21"
1660 	};
1661 	size_t			paternNdx		= 0u;
1662 
1663 	for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1664 	{
1665 		if (false == paternsCheck[paternNdx] &&
1666 			patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1667 			deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1668 		{
1669 			paternsCheck[paternNdx]= true;
1670 			paternNdx++;
1671 			if (paternNdx == paternCount)
1672 				break;
1673 		}
1674 	}
1675 
1676 	for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1677 	{
1678 		if (!paternsCheck[ndx])
1679 			return false;
1680 	}
1681 
1682 	return true;
1683 }
1684 
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1685 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1686 {
1687 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1688 	ComputeShaderSpec				spec;
1689 	de::Random						rnd				(deStringHash(group->getName()));
1690 	const int						numElements		= 10;
1691 	vector<float>					positiveFloats	(numElements, 0);
1692 	vector<float>					negativeFloats	(numElements, 0);
1693 
1694 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1695 
1696 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1697 		negativeFloats[ndx] = -positiveFloats[ndx];
1698 
1699 	spec.assembly =
1700 		string(getComputeAsmShaderPreamble()) +
1701 		"%fname = OpString \"negateInputs.comp\"\n"
1702 
1703 		"OpSource GLSL 430\n"
1704 		"OpName %main           \"main\"\n"
1705 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1706 		"OpModuleProcessed \"VULKAN CTS\"\n"					//OpModuleProcessed;
1707 		"OpModuleProcessed \"Negative values\"\n"
1708 		"OpModuleProcessed \"Date: 2017/09/21\"\n"
1709 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1710 
1711 		+ string(getComputeAsmInputOutputBufferTraits())
1712 
1713 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1714 
1715 		"OpLine %fname 0 1\n"
1716 
1717 		"OpLine %fname 1000 1\n"
1718 
1719 		"%id        = OpVariable %uvec3ptr Input\n"
1720 		"%zero      = OpConstant %i32 0\n"
1721 		"%main      = OpFunction %void None %voidf\n"
1722 
1723 		"%label     = OpLabel\n"
1724 		"%idval     = OpLoad %uvec3 %id\n"
1725 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1726 
1727 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1728 		"%inval     = OpLoad %f32 %inloc\n"
1729 		"%neg       = OpFNegate %f32 %inval\n"
1730 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1731 		"             OpStore %outloc %neg\n"
1732 		"             OpReturn\n"
1733 		"             OpFunctionEnd\n";
1734 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1735 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1736 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1737 	spec.verifyBinary = veryfiBinaryShader;
1738 	spec.spirvVersion = SPIRV_VERSION_1_3;
1739 
1740 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1741 
1742 	return group.release();
1743 }
1744 
createOpNoLineGroup(tcu::TestContext & testCtx)1745 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1746 {
1747 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1748 	ComputeShaderSpec				spec;
1749 	de::Random						rnd				(deStringHash(group->getName()));
1750 	const int						numElements		= 100;
1751 	vector<float>					positiveFloats	(numElements, 0);
1752 	vector<float>					negativeFloats	(numElements, 0);
1753 
1754 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1755 
1756 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1757 		negativeFloats[ndx] = -positiveFloats[ndx];
1758 
1759 	spec.assembly =
1760 		string(getComputeAsmShaderPreamble()) +
1761 
1762 		"%fname = OpString \"negateInputs.comp\"\n"
1763 
1764 		"OpSource GLSL 430\n"
1765 		"OpName %main           \"main\"\n"
1766 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1767 
1768 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1769 
1770 		+ string(getComputeAsmInputOutputBufferTraits()) +
1771 
1772 		"OpNoLine\n" // At the earliest possible position, without preceding OpLine
1773 
1774 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1775 
1776 		"OpLine %fname 0 1\n"
1777 		"OpNoLine\n" // Immediately following a preceding OpLine
1778 
1779 		"OpLine %fname 1000 1\n"
1780 
1781 		"%id        = OpVariable %uvec3ptr Input\n"
1782 		"%zero      = OpConstant %i32 0\n"
1783 
1784 		"OpNoLine\n" // Contents after the previous OpLine
1785 
1786 		"%main      = OpFunction %void None %voidf\n"
1787 		"%label     = OpLabel\n"
1788 		"%idval     = OpLoad %uvec3 %id\n"
1789 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1790 
1791 		"OpNoLine\n" // Multiple OpNoLine
1792 		"OpNoLine\n"
1793 		"OpNoLine\n"
1794 
1795 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1796 		"%inval     = OpLoad %f32 %inloc\n"
1797 		"%neg       = OpFNegate %f32 %inval\n"
1798 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1799 		"             OpStore %outloc %neg\n"
1800 		"             OpReturn\n"
1801 		"             OpFunctionEnd\n";
1802 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1803 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1804 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1805 
1806 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1807 
1808 	return group.release();
1809 }
1810 
1811 // Compare instruction for the contraction compute case.
1812 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1813 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1814 {
1815 	if (outputAllocs.size() != 1)
1816 		return false;
1817 
1818 	// Only size is needed because we are not comparing the exact values.
1819 	size_t byteSize = expectedOutputs[0].getByteSize();
1820 
1821 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
1822 
1823 	for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1824 		if (outputAsFloat[i] != 0.f &&
1825 			outputAsFloat[i] != -ldexp(1, -24)) {
1826 			return false;
1827 		}
1828 	}
1829 
1830 	return true;
1831 }
1832 
createNoContractionGroup(tcu::TestContext & testCtx)1833 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1834 {
1835 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1836 	vector<CaseParameter>			cases;
1837 	const int						numElements		= 100;
1838 	vector<float>					inputFloats1	(numElements, 0);
1839 	vector<float>					inputFloats2	(numElements, 0);
1840 	vector<float>					outputFloats	(numElements, 0);
1841 	const StringTemplate			shaderTemplate	(
1842 		string(getComputeAsmShaderPreamble()) +
1843 
1844 		"OpName %main           \"main\"\n"
1845 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1846 
1847 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1848 
1849 		"${DECORATION}\n"
1850 
1851 		"OpDecorate %buf BufferBlock\n"
1852 		"OpDecorate %indata1 DescriptorSet 0\n"
1853 		"OpDecorate %indata1 Binding 0\n"
1854 		"OpDecorate %indata2 DescriptorSet 0\n"
1855 		"OpDecorate %indata2 Binding 1\n"
1856 		"OpDecorate %outdata DescriptorSet 0\n"
1857 		"OpDecorate %outdata Binding 2\n"
1858 		"OpDecorate %f32arr ArrayStride 4\n"
1859 		"OpMemberDecorate %buf 0 Offset 0\n"
1860 
1861 		+ string(getComputeAsmCommonTypes()) +
1862 
1863 		"%buf        = OpTypeStruct %f32arr\n"
1864 		"%bufptr     = OpTypePointer Uniform %buf\n"
1865 		"%indata1    = OpVariable %bufptr Uniform\n"
1866 		"%indata2    = OpVariable %bufptr Uniform\n"
1867 		"%outdata    = OpVariable %bufptr Uniform\n"
1868 
1869 		"%id         = OpVariable %uvec3ptr Input\n"
1870 		"%zero       = OpConstant %i32 0\n"
1871 		"%c_f_m1     = OpConstant %f32 -1.\n"
1872 
1873 		"%main       = OpFunction %void None %voidf\n"
1874 		"%label      = OpLabel\n"
1875 		"%idval      = OpLoad %uvec3 %id\n"
1876 		"%x          = OpCompositeExtract %u32 %idval 0\n"
1877 		"%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1878 		"%inval1     = OpLoad %f32 %inloc1\n"
1879 		"%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1880 		"%inval2     = OpLoad %f32 %inloc2\n"
1881 		"%mul        = OpFMul %f32 %inval1 %inval2\n"
1882 		"%add        = OpFAdd %f32 %mul %c_f_m1\n"
1883 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1884 		"              OpStore %outloc %add\n"
1885 		"              OpReturn\n"
1886 		"              OpFunctionEnd\n");
1887 
1888 	cases.push_back(CaseParameter("multiplication",	"OpDecorate %mul NoContraction"));
1889 	cases.push_back(CaseParameter("addition",		"OpDecorate %add NoContraction"));
1890 	cases.push_back(CaseParameter("both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1891 
1892 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1893 	{
1894 		inputFloats1[ndx]	= 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1895 		inputFloats2[ndx]	= 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1896 		// Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1897 		// conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1898 		// So the final result will be 0.f or 0x1p-24.
1899 		// If the operation is combined into a precise fused multiply-add, then the result would be
1900 		// 2^-46 (0xa8800000).
1901 		outputFloats[ndx]	= 0.f;
1902 	}
1903 
1904 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1905 	{
1906 		map<string, string>		specializations;
1907 		ComputeShaderSpec		spec;
1908 
1909 		specializations["DECORATION"] = cases[caseNdx].param;
1910 		spec.assembly = shaderTemplate.specialize(specializations);
1911 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1912 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1913 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1914 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1915 		// Check against the two possible answers based on rounding mode.
1916 		spec.verifyIO = &compareNoContractCase;
1917 
1918 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1919 	}
1920 	return group.release();
1921 }
1922 
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1923 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1924 {
1925 	if (outputAllocs.size() != 1)
1926 		return false;
1927 
1928 	vector<deUint8>	expectedBytes;
1929 	expectedOutputs[0].getBytes(expectedBytes);
1930 
1931 	const float*	expectedOutputAsFloat	= reinterpret_cast<const float*>(&expectedBytes.front());
1932 	const float*	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
1933 
1934 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1935 	{
1936 		const float f0 = expectedOutputAsFloat[idx];
1937 		const float f1 = outputAsFloat[idx];
1938 		// \todo relative error needs to be fairly high because FRem may be implemented as
1939 		// (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1940 		if (deFloatAbs((f1 - f0) / f0) > 0.02)
1941 			return false;
1942 	}
1943 
1944 	return true;
1945 }
1946 
createOpFRemGroup(tcu::TestContext & testCtx)1947 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1948 {
1949 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1950 	ComputeShaderSpec				spec;
1951 	de::Random						rnd				(deStringHash(group->getName()));
1952 	const int						numElements		= 200;
1953 	vector<float>					inputFloats1	(numElements, 0);
1954 	vector<float>					inputFloats2	(numElements, 0);
1955 	vector<float>					outputFloats	(numElements, 0);
1956 
1957 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1958 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1959 
1960 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1961 	{
1962 		// Guard against divisors near zero.
1963 		if (std::fabs(inputFloats2[ndx]) < 1e-3)
1964 			inputFloats2[ndx] = 8.f;
1965 
1966 		// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1967 		outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1968 	}
1969 
1970 	spec.assembly =
1971 		string(getComputeAsmShaderPreamble()) +
1972 
1973 		"OpName %main           \"main\"\n"
1974 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1975 
1976 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1977 
1978 		"OpDecorate %buf BufferBlock\n"
1979 		"OpDecorate %indata1 DescriptorSet 0\n"
1980 		"OpDecorate %indata1 Binding 0\n"
1981 		"OpDecorate %indata2 DescriptorSet 0\n"
1982 		"OpDecorate %indata2 Binding 1\n"
1983 		"OpDecorate %outdata DescriptorSet 0\n"
1984 		"OpDecorate %outdata Binding 2\n"
1985 		"OpDecorate %f32arr ArrayStride 4\n"
1986 		"OpMemberDecorate %buf 0 Offset 0\n"
1987 
1988 		+ string(getComputeAsmCommonTypes()) +
1989 
1990 		"%buf        = OpTypeStruct %f32arr\n"
1991 		"%bufptr     = OpTypePointer Uniform %buf\n"
1992 		"%indata1    = OpVariable %bufptr Uniform\n"
1993 		"%indata2    = OpVariable %bufptr Uniform\n"
1994 		"%outdata    = OpVariable %bufptr Uniform\n"
1995 
1996 		"%id        = OpVariable %uvec3ptr Input\n"
1997 		"%zero      = OpConstant %i32 0\n"
1998 
1999 		"%main      = OpFunction %void None %voidf\n"
2000 		"%label     = OpLabel\n"
2001 		"%idval     = OpLoad %uvec3 %id\n"
2002 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2003 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2004 		"%inval1    = OpLoad %f32 %inloc1\n"
2005 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2006 		"%inval2    = OpLoad %f32 %inloc2\n"
2007 		"%rem       = OpFRem %f32 %inval1 %inval2\n"
2008 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2009 		"             OpStore %outloc %rem\n"
2010 		"             OpReturn\n"
2011 		"             OpFunctionEnd\n";
2012 
2013 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2014 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2015 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2016 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2017 	spec.verifyIO = &compareFRem;
2018 
2019 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2020 
2021 	return group.release();
2022 }
2023 
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2024 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2025 {
2026 	if (outputAllocs.size() != 1)
2027 		return false;
2028 
2029 	const BufferSp&			expectedOutput			(expectedOutputs[0].getBuffer());
2030 	std::vector<deUint8>	data;
2031 	expectedOutput->getBytes(data);
2032 
2033 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2034 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2035 
2036 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2037 	{
2038 		const float f0 = expectedOutputAsFloat[idx];
2039 		const float f1 = outputAsFloat[idx];
2040 
2041 		// For NMin, we accept NaN as output if both inputs were NaN.
2042 		// Otherwise the NaN is the wrong choise, as on architectures that
2043 		// do not handle NaN, those are huge values.
2044 		if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2045 			return false;
2046 	}
2047 
2048 	return true;
2049 }
2050 
createOpNMinGroup(tcu::TestContext & testCtx)2051 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2052 {
2053 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2054 	ComputeShaderSpec				spec;
2055 	de::Random						rnd				(deStringHash(group->getName()));
2056 	const int						numElements		= 200;
2057 	vector<float>					inputFloats1	(numElements, 0);
2058 	vector<float>					inputFloats2	(numElements, 0);
2059 	vector<float>					outputFloats	(numElements, 0);
2060 
2061 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2062 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2063 
2064 	// Make the first case a full-NAN case.
2065 	inputFloats1[0] = TCU_NAN;
2066 	inputFloats2[0] = TCU_NAN;
2067 
2068 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2069 	{
2070 		// By default, pick the smallest
2071 		outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2072 
2073 		// Make half of the cases NaN cases
2074 		if ((ndx & 1) == 0)
2075 		{
2076 			// Alternate between the NaN operand
2077 			if ((ndx & 2) == 0)
2078 			{
2079 				outputFloats[ndx] = inputFloats2[ndx];
2080 				inputFloats1[ndx] = TCU_NAN;
2081 			}
2082 			else
2083 			{
2084 				outputFloats[ndx] = inputFloats1[ndx];
2085 				inputFloats2[ndx] = TCU_NAN;
2086 			}
2087 		}
2088 	}
2089 
2090 	spec.assembly =
2091 		"OpCapability Shader\n"
2092 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2093 		"OpMemoryModel Logical GLSL450\n"
2094 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2095 		"OpExecutionMode %main LocalSize 1 1 1\n"
2096 
2097 		"OpName %main           \"main\"\n"
2098 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2099 
2100 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2101 
2102 		"OpDecorate %buf BufferBlock\n"
2103 		"OpDecorate %indata1 DescriptorSet 0\n"
2104 		"OpDecorate %indata1 Binding 0\n"
2105 		"OpDecorate %indata2 DescriptorSet 0\n"
2106 		"OpDecorate %indata2 Binding 1\n"
2107 		"OpDecorate %outdata DescriptorSet 0\n"
2108 		"OpDecorate %outdata Binding 2\n"
2109 		"OpDecorate %f32arr ArrayStride 4\n"
2110 		"OpMemberDecorate %buf 0 Offset 0\n"
2111 
2112 		+ string(getComputeAsmCommonTypes()) +
2113 
2114 		"%buf        = OpTypeStruct %f32arr\n"
2115 		"%bufptr     = OpTypePointer Uniform %buf\n"
2116 		"%indata1    = OpVariable %bufptr Uniform\n"
2117 		"%indata2    = OpVariable %bufptr Uniform\n"
2118 		"%outdata    = OpVariable %bufptr Uniform\n"
2119 
2120 		"%id        = OpVariable %uvec3ptr Input\n"
2121 		"%zero      = OpConstant %i32 0\n"
2122 
2123 		"%main      = OpFunction %void None %voidf\n"
2124 		"%label     = OpLabel\n"
2125 		"%idval     = OpLoad %uvec3 %id\n"
2126 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2127 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2128 		"%inval1    = OpLoad %f32 %inloc1\n"
2129 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2130 		"%inval2    = OpLoad %f32 %inloc2\n"
2131 		"%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2132 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2133 		"             OpStore %outloc %rem\n"
2134 		"             OpReturn\n"
2135 		"             OpFunctionEnd\n";
2136 
2137 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2138 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2139 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2140 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2141 	spec.verifyIO = &compareNMin;
2142 
2143 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2144 
2145 	return group.release();
2146 }
2147 
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2148 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2149 {
2150 	if (outputAllocs.size() != 1)
2151 		return false;
2152 
2153 	const BufferSp&			expectedOutput			= expectedOutputs[0].getBuffer();
2154 	std::vector<deUint8>	data;
2155 	expectedOutput->getBytes(data);
2156 
2157 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2158 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2159 
2160 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2161 	{
2162 		const float f0 = expectedOutputAsFloat[idx];
2163 		const float f1 = outputAsFloat[idx];
2164 
2165 		// For NMax, NaN is considered acceptable result, since in
2166 		// architectures that do not handle NaNs, those are huge values.
2167 		if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2168 			return false;
2169 	}
2170 
2171 	return true;
2172 }
2173 
createOpNMaxGroup(tcu::TestContext & testCtx)2174 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2175 {
2176 	de::MovePtr<tcu::TestCaseGroup>	group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2177 	ComputeShaderSpec				spec;
2178 	de::Random						rnd				(deStringHash(group->getName()));
2179 	const int						numElements		= 200;
2180 	vector<float>					inputFloats1	(numElements, 0);
2181 	vector<float>					inputFloats2	(numElements, 0);
2182 	vector<float>					outputFloats	(numElements, 0);
2183 
2184 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2185 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2186 
2187 	// Make the first case a full-NAN case.
2188 	inputFloats1[0] = TCU_NAN;
2189 	inputFloats2[0] = TCU_NAN;
2190 
2191 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2192 	{
2193 		// By default, pick the biggest
2194 		outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2195 
2196 		// Make half of the cases NaN cases
2197 		if ((ndx & 1) == 0)
2198 		{
2199 			// Alternate between the NaN operand
2200 			if ((ndx & 2) == 0)
2201 			{
2202 				outputFloats[ndx] = inputFloats2[ndx];
2203 				inputFloats1[ndx] = TCU_NAN;
2204 			}
2205 			else
2206 			{
2207 				outputFloats[ndx] = inputFloats1[ndx];
2208 				inputFloats2[ndx] = TCU_NAN;
2209 			}
2210 		}
2211 	}
2212 
2213 	spec.assembly =
2214 		"OpCapability Shader\n"
2215 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2216 		"OpMemoryModel Logical GLSL450\n"
2217 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2218 		"OpExecutionMode %main LocalSize 1 1 1\n"
2219 
2220 		"OpName %main           \"main\"\n"
2221 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2222 
2223 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2224 
2225 		"OpDecorate %buf BufferBlock\n"
2226 		"OpDecorate %indata1 DescriptorSet 0\n"
2227 		"OpDecorate %indata1 Binding 0\n"
2228 		"OpDecorate %indata2 DescriptorSet 0\n"
2229 		"OpDecorate %indata2 Binding 1\n"
2230 		"OpDecorate %outdata DescriptorSet 0\n"
2231 		"OpDecorate %outdata Binding 2\n"
2232 		"OpDecorate %f32arr ArrayStride 4\n"
2233 		"OpMemberDecorate %buf 0 Offset 0\n"
2234 
2235 		+ string(getComputeAsmCommonTypes()) +
2236 
2237 		"%buf        = OpTypeStruct %f32arr\n"
2238 		"%bufptr     = OpTypePointer Uniform %buf\n"
2239 		"%indata1    = OpVariable %bufptr Uniform\n"
2240 		"%indata2    = OpVariable %bufptr Uniform\n"
2241 		"%outdata    = OpVariable %bufptr Uniform\n"
2242 
2243 		"%id        = OpVariable %uvec3ptr Input\n"
2244 		"%zero      = OpConstant %i32 0\n"
2245 
2246 		"%main      = OpFunction %void None %voidf\n"
2247 		"%label     = OpLabel\n"
2248 		"%idval     = OpLoad %uvec3 %id\n"
2249 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2250 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2251 		"%inval1    = OpLoad %f32 %inloc1\n"
2252 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2253 		"%inval2    = OpLoad %f32 %inloc2\n"
2254 		"%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2255 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2256 		"             OpStore %outloc %rem\n"
2257 		"             OpReturn\n"
2258 		"             OpFunctionEnd\n";
2259 
2260 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2261 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2262 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2263 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2264 	spec.verifyIO = &compareNMax;
2265 
2266 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2267 
2268 	return group.release();
2269 }
2270 
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2271 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2272 {
2273 	if (outputAllocs.size() != 1)
2274 		return false;
2275 
2276 	const BufferSp&			expectedOutput			= expectedOutputs[0].getBuffer();
2277 	std::vector<deUint8>	data;
2278 	expectedOutput->getBytes(data);
2279 
2280 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2281 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2282 
2283 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2284 	{
2285 		const float e0 = expectedOutputAsFloat[idx * 2];
2286 		const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2287 		const float res = outputAsFloat[idx];
2288 
2289 		// For NClamp, we have two possible outcomes based on
2290 		// whether NaNs are handled or not.
2291 		// If either min or max value is NaN, the result is undefined,
2292 		// so this test doesn't stress those. If the clamped value is
2293 		// NaN, and NaNs are handled, the result is min; if NaNs are not
2294 		// handled, they are big values that result in max.
2295 		// If all three parameters are NaN, the result should be NaN.
2296 		if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2297 			 (deFloatAbs(e0 - res) < 0.00001f) ||
2298 			 (deFloatAbs(e1 - res) < 0.00001f)))
2299 			return false;
2300 	}
2301 
2302 	return true;
2303 }
2304 
createOpNClampGroup(tcu::TestContext & testCtx)2305 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2306 {
2307 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2308 	ComputeShaderSpec				spec;
2309 	de::Random						rnd				(deStringHash(group->getName()));
2310 	const int						numElements		= 200;
2311 	vector<float>					inputFloats1	(numElements, 0);
2312 	vector<float>					inputFloats2	(numElements, 0);
2313 	vector<float>					inputFloats3	(numElements, 0);
2314 	vector<float>					outputFloats	(numElements * 2, 0);
2315 
2316 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2317 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2318 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2319 
2320 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2321 	{
2322 		// Results are only defined if max value is bigger than min value.
2323 		if (inputFloats2[ndx] > inputFloats3[ndx])
2324 		{
2325 			float t = inputFloats2[ndx];
2326 			inputFloats2[ndx] = inputFloats3[ndx];
2327 			inputFloats3[ndx] = t;
2328 		}
2329 
2330 		// By default, do the clamp, setting both possible answers
2331 		float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2332 
2333 		float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2334 		float maxResB = maxResA;
2335 
2336 		// Alternate between the NaN cases
2337 		if (ndx & 1)
2338 		{
2339 			inputFloats1[ndx] = TCU_NAN;
2340 			// If NaN is handled, the result should be same as the clamp minimum.
2341 			// If NaN is not handled, the result should clamp to the clamp maximum.
2342 			maxResA = inputFloats2[ndx];
2343 			maxResB = inputFloats3[ndx];
2344 		}
2345 		else
2346 		{
2347 			// Not a NaN case - only one legal result.
2348 			maxResA = defaultRes;
2349 			maxResB = defaultRes;
2350 		}
2351 
2352 		outputFloats[ndx * 2] = maxResA;
2353 		outputFloats[ndx * 2 + 1] = maxResB;
2354 	}
2355 
2356 	// Make the first case a full-NAN case.
2357 	inputFloats1[0] = TCU_NAN;
2358 	inputFloats2[0] = TCU_NAN;
2359 	inputFloats3[0] = TCU_NAN;
2360 	outputFloats[0] = TCU_NAN;
2361 	outputFloats[1] = TCU_NAN;
2362 
2363 	spec.assembly =
2364 		"OpCapability Shader\n"
2365 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2366 		"OpMemoryModel Logical GLSL450\n"
2367 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2368 		"OpExecutionMode %main LocalSize 1 1 1\n"
2369 
2370 		"OpName %main           \"main\"\n"
2371 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2372 
2373 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2374 
2375 		"OpDecorate %buf BufferBlock\n"
2376 		"OpDecorate %indata1 DescriptorSet 0\n"
2377 		"OpDecorate %indata1 Binding 0\n"
2378 		"OpDecorate %indata2 DescriptorSet 0\n"
2379 		"OpDecorate %indata2 Binding 1\n"
2380 		"OpDecorate %indata3 DescriptorSet 0\n"
2381 		"OpDecorate %indata3 Binding 2\n"
2382 		"OpDecorate %outdata DescriptorSet 0\n"
2383 		"OpDecorate %outdata Binding 3\n"
2384 		"OpDecorate %f32arr ArrayStride 4\n"
2385 		"OpMemberDecorate %buf 0 Offset 0\n"
2386 
2387 		+ string(getComputeAsmCommonTypes()) +
2388 
2389 		"%buf        = OpTypeStruct %f32arr\n"
2390 		"%bufptr     = OpTypePointer Uniform %buf\n"
2391 		"%indata1    = OpVariable %bufptr Uniform\n"
2392 		"%indata2    = OpVariable %bufptr Uniform\n"
2393 		"%indata3    = OpVariable %bufptr Uniform\n"
2394 		"%outdata    = OpVariable %bufptr Uniform\n"
2395 
2396 		"%id        = OpVariable %uvec3ptr Input\n"
2397 		"%zero      = OpConstant %i32 0\n"
2398 
2399 		"%main      = OpFunction %void None %voidf\n"
2400 		"%label     = OpLabel\n"
2401 		"%idval     = OpLoad %uvec3 %id\n"
2402 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2403 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2404 		"%inval1    = OpLoad %f32 %inloc1\n"
2405 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2406 		"%inval2    = OpLoad %f32 %inloc2\n"
2407 		"%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2408 		"%inval3    = OpLoad %f32 %inloc3\n"
2409 		"%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2410 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2411 		"             OpStore %outloc %rem\n"
2412 		"             OpReturn\n"
2413 		"             OpFunctionEnd\n";
2414 
2415 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2416 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2417 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2418 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2419 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2420 	spec.verifyIO = &compareNClamp;
2421 
2422 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2423 
2424 	return group.release();
2425 }
2426 
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2427 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2428 {
2429 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2430 	de::Random						rnd				(deStringHash(group->getName()));
2431 	const int						numElements		= 200;
2432 
2433 	const struct CaseParams
2434 	{
2435 		const char*		name;
2436 		const char*		failMessage;		// customized status message
2437 		qpTestResult	failResult;			// override status on failure
2438 		int				op1Min, op1Max;		// operand ranges
2439 		int				op2Min, op2Max;
2440 	} cases[] =
2441 	{
2442 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
2443 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
2444 	};
2445 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2446 
2447 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2448 	{
2449 		const CaseParams&	params		= cases[caseNdx];
2450 		ComputeShaderSpec	spec;
2451 		vector<deInt32>		inputInts1	(numElements, 0);
2452 		vector<deInt32>		inputInts2	(numElements, 0);
2453 		vector<deInt32>		outputInts	(numElements, 0);
2454 
2455 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2456 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2457 
2458 		for (int ndx = 0; ndx < numElements; ++ndx)
2459 		{
2460 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2461 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2462 		}
2463 
2464 		spec.assembly =
2465 			string(getComputeAsmShaderPreamble()) +
2466 
2467 			"OpName %main           \"main\"\n"
2468 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2469 
2470 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2471 
2472 			"OpDecorate %buf BufferBlock\n"
2473 			"OpDecorate %indata1 DescriptorSet 0\n"
2474 			"OpDecorate %indata1 Binding 0\n"
2475 			"OpDecorate %indata2 DescriptorSet 0\n"
2476 			"OpDecorate %indata2 Binding 1\n"
2477 			"OpDecorate %outdata DescriptorSet 0\n"
2478 			"OpDecorate %outdata Binding 2\n"
2479 			"OpDecorate %i32arr ArrayStride 4\n"
2480 			"OpMemberDecorate %buf 0 Offset 0\n"
2481 
2482 			+ string(getComputeAsmCommonTypes()) +
2483 
2484 			"%buf        = OpTypeStruct %i32arr\n"
2485 			"%bufptr     = OpTypePointer Uniform %buf\n"
2486 			"%indata1    = OpVariable %bufptr Uniform\n"
2487 			"%indata2    = OpVariable %bufptr Uniform\n"
2488 			"%outdata    = OpVariable %bufptr Uniform\n"
2489 
2490 			"%id        = OpVariable %uvec3ptr Input\n"
2491 			"%zero      = OpConstant %i32 0\n"
2492 
2493 			"%main      = OpFunction %void None %voidf\n"
2494 			"%label     = OpLabel\n"
2495 			"%idval     = OpLoad %uvec3 %id\n"
2496 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2497 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2498 			"%inval1    = OpLoad %i32 %inloc1\n"
2499 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2500 			"%inval2    = OpLoad %i32 %inloc2\n"
2501 			"%rem       = OpSRem %i32 %inval1 %inval2\n"
2502 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2503 			"             OpStore %outloc %rem\n"
2504 			"             OpReturn\n"
2505 			"             OpFunctionEnd\n";
2506 
2507 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
2508 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
2509 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
2510 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2511 		spec.failResult			= params.failResult;
2512 		spec.failMessage		= params.failMessage;
2513 
2514 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2515 	}
2516 
2517 	return group.release();
2518 }
2519 
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2520 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2521 {
2522 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2523 	de::Random						rnd				(deStringHash(group->getName()));
2524 	const int						numElements		= 200;
2525 
2526 	const struct CaseParams
2527 	{
2528 		const char*		name;
2529 		const char*		failMessage;		// customized status message
2530 		qpTestResult	failResult;			// override status on failure
2531 		bool			positive;
2532 	} cases[] =
2533 	{
2534 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
2535 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
2536 	};
2537 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2538 
2539 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2540 	{
2541 		const CaseParams&	params		= cases[caseNdx];
2542 		ComputeShaderSpec	spec;
2543 		vector<deInt64>		inputInts1	(numElements, 0);
2544 		vector<deInt64>		inputInts2	(numElements, 0);
2545 		vector<deInt64>		outputInts	(numElements, 0);
2546 
2547 		if (params.positive)
2548 		{
2549 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2550 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2551 		}
2552 		else
2553 		{
2554 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2555 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2556 		}
2557 
2558 		for (int ndx = 0; ndx < numElements; ++ndx)
2559 		{
2560 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2561 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2562 		}
2563 
2564 		spec.assembly =
2565 			"OpCapability Int64\n"
2566 
2567 			+ string(getComputeAsmShaderPreamble()) +
2568 
2569 			"OpName %main           \"main\"\n"
2570 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2571 
2572 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2573 
2574 			"OpDecorate %buf BufferBlock\n"
2575 			"OpDecorate %indata1 DescriptorSet 0\n"
2576 			"OpDecorate %indata1 Binding 0\n"
2577 			"OpDecorate %indata2 DescriptorSet 0\n"
2578 			"OpDecorate %indata2 Binding 1\n"
2579 			"OpDecorate %outdata DescriptorSet 0\n"
2580 			"OpDecorate %outdata Binding 2\n"
2581 			"OpDecorate %i64arr ArrayStride 8\n"
2582 			"OpMemberDecorate %buf 0 Offset 0\n"
2583 
2584 			+ string(getComputeAsmCommonTypes())
2585 			+ string(getComputeAsmCommonInt64Types()) +
2586 
2587 			"%buf        = OpTypeStruct %i64arr\n"
2588 			"%bufptr     = OpTypePointer Uniform %buf\n"
2589 			"%indata1    = OpVariable %bufptr Uniform\n"
2590 			"%indata2    = OpVariable %bufptr Uniform\n"
2591 			"%outdata    = OpVariable %bufptr Uniform\n"
2592 
2593 			"%id        = OpVariable %uvec3ptr Input\n"
2594 			"%zero      = OpConstant %i64 0\n"
2595 
2596 			"%main      = OpFunction %void None %voidf\n"
2597 			"%label     = OpLabel\n"
2598 			"%idval     = OpLoad %uvec3 %id\n"
2599 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2600 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2601 			"%inval1    = OpLoad %i64 %inloc1\n"
2602 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2603 			"%inval2    = OpLoad %i64 %inloc2\n"
2604 			"%rem       = OpSRem %i64 %inval1 %inval2\n"
2605 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2606 			"             OpStore %outloc %rem\n"
2607 			"             OpReturn\n"
2608 			"             OpFunctionEnd\n";
2609 
2610 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
2611 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
2612 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
2613 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2614 		spec.failResult			= params.failResult;
2615 		spec.failMessage		= params.failMessage;
2616 
2617 		spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2618 
2619 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2620 	}
2621 
2622 	return group.release();
2623 }
2624 
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2625 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2626 {
2627 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2628 	de::Random						rnd				(deStringHash(group->getName()));
2629 	const int						numElements		= 200;
2630 
2631 	const struct CaseParams
2632 	{
2633 		const char*		name;
2634 		const char*		failMessage;		// customized status message
2635 		qpTestResult	failResult;			// override status on failure
2636 		int				op1Min, op1Max;		// operand ranges
2637 		int				op2Min, op2Max;
2638 	} cases[] =
2639 	{
2640 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
2641 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
2642 	};
2643 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2644 
2645 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2646 	{
2647 		const CaseParams&	params		= cases[caseNdx];
2648 
2649 		ComputeShaderSpec	spec;
2650 		vector<deInt32>		inputInts1	(numElements, 0);
2651 		vector<deInt32>		inputInts2	(numElements, 0);
2652 		vector<deInt32>		outputInts	(numElements, 0);
2653 
2654 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2655 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2656 
2657 		for (int ndx = 0; ndx < numElements; ++ndx)
2658 		{
2659 			deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2660 			if (rem == 0)
2661 			{
2662 				outputInts[ndx] = 0;
2663 			}
2664 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2665 			{
2666 				// They have the same sign
2667 				outputInts[ndx] = rem;
2668 			}
2669 			else
2670 			{
2671 				// They have opposite sign.  The remainder operation takes the
2672 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2673 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2674 				// the result has the correct sign and that it is still
2675 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
2676 				//
2677 				// See also http://mathforum.org/library/drmath/view/52343.html
2678 				outputInts[ndx] = rem + inputInts2[ndx];
2679 			}
2680 		}
2681 
2682 		spec.assembly =
2683 			string(getComputeAsmShaderPreamble()) +
2684 
2685 			"OpName %main           \"main\"\n"
2686 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2687 
2688 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2689 
2690 			"OpDecorate %buf BufferBlock\n"
2691 			"OpDecorate %indata1 DescriptorSet 0\n"
2692 			"OpDecorate %indata1 Binding 0\n"
2693 			"OpDecorate %indata2 DescriptorSet 0\n"
2694 			"OpDecorate %indata2 Binding 1\n"
2695 			"OpDecorate %outdata DescriptorSet 0\n"
2696 			"OpDecorate %outdata Binding 2\n"
2697 			"OpDecorate %i32arr ArrayStride 4\n"
2698 			"OpMemberDecorate %buf 0 Offset 0\n"
2699 
2700 			+ string(getComputeAsmCommonTypes()) +
2701 
2702 			"%buf        = OpTypeStruct %i32arr\n"
2703 			"%bufptr     = OpTypePointer Uniform %buf\n"
2704 			"%indata1    = OpVariable %bufptr Uniform\n"
2705 			"%indata2    = OpVariable %bufptr Uniform\n"
2706 			"%outdata    = OpVariable %bufptr Uniform\n"
2707 
2708 			"%id        = OpVariable %uvec3ptr Input\n"
2709 			"%zero      = OpConstant %i32 0\n"
2710 
2711 			"%main      = OpFunction %void None %voidf\n"
2712 			"%label     = OpLabel\n"
2713 			"%idval     = OpLoad %uvec3 %id\n"
2714 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2715 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2716 			"%inval1    = OpLoad %i32 %inloc1\n"
2717 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2718 			"%inval2    = OpLoad %i32 %inloc2\n"
2719 			"%rem       = OpSMod %i32 %inval1 %inval2\n"
2720 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2721 			"             OpStore %outloc %rem\n"
2722 			"             OpReturn\n"
2723 			"             OpFunctionEnd\n";
2724 
2725 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
2726 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
2727 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
2728 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2729 		spec.failResult			= params.failResult;
2730 		spec.failMessage		= params.failMessage;
2731 
2732 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2733 	}
2734 
2735 	return group.release();
2736 }
2737 
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2738 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2739 {
2740 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2741 	de::Random						rnd				(deStringHash(group->getName()));
2742 	const int						numElements		= 200;
2743 
2744 	const struct CaseParams
2745 	{
2746 		const char*		name;
2747 		const char*		failMessage;		// customized status message
2748 		qpTestResult	failResult;			// override status on failure
2749 		bool			positive;
2750 	} cases[] =
2751 	{
2752 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
2753 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
2754 	};
2755 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2756 
2757 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2758 	{
2759 		const CaseParams&	params		= cases[caseNdx];
2760 
2761 		ComputeShaderSpec	spec;
2762 		vector<deInt64>		inputInts1	(numElements, 0);
2763 		vector<deInt64>		inputInts2	(numElements, 0);
2764 		vector<deInt64>		outputInts	(numElements, 0);
2765 
2766 
2767 		if (params.positive)
2768 		{
2769 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2770 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2771 		}
2772 		else
2773 		{
2774 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2775 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2776 		}
2777 
2778 		for (int ndx = 0; ndx < numElements; ++ndx)
2779 		{
2780 			deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2781 			if (rem == 0)
2782 			{
2783 				outputInts[ndx] = 0;
2784 			}
2785 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2786 			{
2787 				// They have the same sign
2788 				outputInts[ndx] = rem;
2789 			}
2790 			else
2791 			{
2792 				// They have opposite sign.  The remainder operation takes the
2793 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2794 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2795 				// the result has the correct sign and that it is still
2796 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
2797 				//
2798 				// See also http://mathforum.org/library/drmath/view/52343.html
2799 				outputInts[ndx] = rem + inputInts2[ndx];
2800 			}
2801 		}
2802 
2803 		spec.assembly =
2804 			"OpCapability Int64\n"
2805 
2806 			+ string(getComputeAsmShaderPreamble()) +
2807 
2808 			"OpName %main           \"main\"\n"
2809 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2810 
2811 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2812 
2813 			"OpDecorate %buf BufferBlock\n"
2814 			"OpDecorate %indata1 DescriptorSet 0\n"
2815 			"OpDecorate %indata1 Binding 0\n"
2816 			"OpDecorate %indata2 DescriptorSet 0\n"
2817 			"OpDecorate %indata2 Binding 1\n"
2818 			"OpDecorate %outdata DescriptorSet 0\n"
2819 			"OpDecorate %outdata Binding 2\n"
2820 			"OpDecorate %i64arr ArrayStride 8\n"
2821 			"OpMemberDecorate %buf 0 Offset 0\n"
2822 
2823 			+ string(getComputeAsmCommonTypes())
2824 			+ string(getComputeAsmCommonInt64Types()) +
2825 
2826 			"%buf        = OpTypeStruct %i64arr\n"
2827 			"%bufptr     = OpTypePointer Uniform %buf\n"
2828 			"%indata1    = OpVariable %bufptr Uniform\n"
2829 			"%indata2    = OpVariable %bufptr Uniform\n"
2830 			"%outdata    = OpVariable %bufptr Uniform\n"
2831 
2832 			"%id        = OpVariable %uvec3ptr Input\n"
2833 			"%zero      = OpConstant %i64 0\n"
2834 
2835 			"%main      = OpFunction %void None %voidf\n"
2836 			"%label     = OpLabel\n"
2837 			"%idval     = OpLoad %uvec3 %id\n"
2838 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2839 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2840 			"%inval1    = OpLoad %i64 %inloc1\n"
2841 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2842 			"%inval2    = OpLoad %i64 %inloc2\n"
2843 			"%rem       = OpSMod %i64 %inval1 %inval2\n"
2844 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2845 			"             OpStore %outloc %rem\n"
2846 			"             OpReturn\n"
2847 			"             OpFunctionEnd\n";
2848 
2849 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
2850 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
2851 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
2852 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2853 		spec.failResult			= params.failResult;
2854 		spec.failMessage		= params.failMessage;
2855 
2856 		spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2857 
2858 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2859 	}
2860 
2861 	return group.release();
2862 }
2863 
2864 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2865 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2866 {
2867 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2868 	de::Random						rnd				(deStringHash(group->getName()));
2869 	const int						numElements		= 100;
2870 
2871 	// The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2872 	ComputeShaderSpec				spec1;
2873 	vector<Vec4>					inputFloats1	(numElements);
2874 	vector<Vec4>					outputFloats1	(numElements);
2875 
2876 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2877 
2878 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2879 	floorAll(inputFloats1);
2880 
2881 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2882 		outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2883 
2884 	spec1.assembly =
2885 		string(getComputeAsmShaderPreamble()) +
2886 
2887 		"OpName %main           \"main\"\n"
2888 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2889 
2890 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2891 		"OpDecorate %vec4arr ArrayStride 16\n"
2892 
2893 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2894 
2895 		"%vec4       = OpTypeVector %f32 4\n"
2896 		"%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2897 		"%vec4ptr_f  = OpTypePointer Function %vec4\n"
2898 		"%vec4arr    = OpTypeRuntimeArray %vec4\n"
2899 		"%buf        = OpTypeStruct %vec4arr\n"
2900 		"%bufptr     = OpTypePointer Uniform %buf\n"
2901 		"%indata     = OpVariable %bufptr Uniform\n"
2902 		"%outdata    = OpVariable %bufptr Uniform\n"
2903 
2904 		"%id         = OpVariable %uvec3ptr Input\n"
2905 		"%zero       = OpConstant %i32 0\n"
2906 		"%c_f_0      = OpConstant %f32 0.\n"
2907 		"%c_f_0_5    = OpConstant %f32 0.5\n"
2908 		"%c_f_1_5    = OpConstant %f32 1.5\n"
2909 		"%c_f_2_5    = OpConstant %f32 2.5\n"
2910 		"%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2911 
2912 		"%main       = OpFunction %void None %voidf\n"
2913 		"%label      = OpLabel\n"
2914 		"%v_vec4     = OpVariable %vec4ptr_f Function\n"
2915 		"%idval      = OpLoad %uvec3 %id\n"
2916 		"%x          = OpCompositeExtract %u32 %idval 0\n"
2917 		"%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2918 		"%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2919 		"              OpCopyMemory %v_vec4 %inloc\n"
2920 		"%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2921 		"%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2922 		"              OpStore %outloc %add\n"
2923 		"              OpReturn\n"
2924 		"              OpFunctionEnd\n";
2925 
2926 	spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2927 	spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2928 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
2929 
2930 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2931 
2932 	// The following case copies a float[100] variable from the input buffer to the output buffer.
2933 	ComputeShaderSpec				spec2;
2934 	vector<float>					inputFloats2	(numElements);
2935 	vector<float>					outputFloats2	(numElements);
2936 
2937 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2938 
2939 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2940 		outputFloats2[ndx] = inputFloats2[ndx];
2941 
2942 	spec2.assembly =
2943 		string(getComputeAsmShaderPreamble()) +
2944 
2945 		"OpName %main           \"main\"\n"
2946 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2947 
2948 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2949 		"OpDecorate %f32arr100 ArrayStride 4\n"
2950 
2951 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2952 
2953 		"%hundred        = OpConstant %u32 100\n"
2954 		"%f32arr100      = OpTypeArray %f32 %hundred\n"
2955 		"%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2956 		"%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2957 		"%buf            = OpTypeStruct %f32arr100\n"
2958 		"%bufptr         = OpTypePointer Uniform %buf\n"
2959 		"%indata         = OpVariable %bufptr Uniform\n"
2960 		"%outdata        = OpVariable %bufptr Uniform\n"
2961 
2962 		"%id             = OpVariable %uvec3ptr Input\n"
2963 		"%zero           = OpConstant %i32 0\n"
2964 
2965 		"%main           = OpFunction %void None %voidf\n"
2966 		"%label          = OpLabel\n"
2967 		"%var            = OpVariable %f32arr100ptr_f Function\n"
2968 		"%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2969 		"%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2970 		"                  OpCopyMemory %var %inarr\n"
2971 		"                  OpCopyMemory %outarr %var\n"
2972 		"                  OpReturn\n"
2973 		"                  OpFunctionEnd\n";
2974 
2975 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2976 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2977 	spec2.numWorkGroups = IVec3(1, 1, 1);
2978 
2979 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2980 
2981 	// The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2982 	ComputeShaderSpec				spec3;
2983 	vector<float>					inputFloats3	(16);
2984 	vector<float>					outputFloats3	(16);
2985 
2986 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2987 
2988 	for (size_t ndx = 0; ndx < 16; ++ndx)
2989 		outputFloats3[ndx] = inputFloats3[ndx];
2990 
2991 	spec3.assembly =
2992 		string(getComputeAsmShaderPreamble()) +
2993 
2994 		"OpName %main           \"main\"\n"
2995 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2996 
2997 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2998 		//"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
2999 		"OpMemberDecorate %buf 1 Offset 16\n"
3000 		"OpMemberDecorate %buf 2 Offset 32\n"
3001 		"OpMemberDecorate %buf 3 Offset 48\n"
3002 
3003 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3004 
3005 		"%vec4      = OpTypeVector %f32 4\n"
3006 		"%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3007 		"%bufptr    = OpTypePointer Uniform %buf\n"
3008 		"%indata    = OpVariable %bufptr Uniform\n"
3009 		"%outdata   = OpVariable %bufptr Uniform\n"
3010 		"%vec4stptr = OpTypePointer Function %buf\n"
3011 
3012 		"%id        = OpVariable %uvec3ptr Input\n"
3013 		"%zero      = OpConstant %i32 0\n"
3014 
3015 		"%main      = OpFunction %void None %voidf\n"
3016 		"%label     = OpLabel\n"
3017 		"%var       = OpVariable %vec4stptr Function\n"
3018 		"             OpCopyMemory %var %indata\n"
3019 		"             OpCopyMemory %outdata %var\n"
3020 		"             OpReturn\n"
3021 		"             OpFunctionEnd\n";
3022 
3023 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3024 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3025 	spec3.numWorkGroups = IVec3(1, 1, 1);
3026 
3027 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
3028 
3029 	// The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3030 	ComputeShaderSpec				spec4;
3031 	vector<float>					inputFloats4	(numElements);
3032 	vector<float>					outputFloats4	(numElements);
3033 
3034 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3035 
3036 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3037 		outputFloats4[ndx] = -inputFloats4[ndx];
3038 
3039 	spec4.assembly =
3040 		string(getComputeAsmShaderPreamble()) +
3041 
3042 		"OpName %main           \"main\"\n"
3043 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3044 
3045 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3046 
3047 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3048 
3049 		"%f32ptr_f  = OpTypePointer Function %f32\n"
3050 		"%id        = OpVariable %uvec3ptr Input\n"
3051 		"%zero      = OpConstant %i32 0\n"
3052 
3053 		"%main      = OpFunction %void None %voidf\n"
3054 		"%label     = OpLabel\n"
3055 		"%var       = OpVariable %f32ptr_f Function\n"
3056 		"%idval     = OpLoad %uvec3 %id\n"
3057 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3058 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3059 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3060 		"             OpCopyMemory %var %inloc\n"
3061 		"%val       = OpLoad %f32 %var\n"
3062 		"%neg       = OpFNegate %f32 %val\n"
3063 		"             OpStore %outloc %neg\n"
3064 		"             OpReturn\n"
3065 		"             OpFunctionEnd\n";
3066 
3067 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3068 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3069 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
3070 
3071 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3072 
3073 	return group.release();
3074 }
3075 
createOpCopyObjectGroup(tcu::TestContext & testCtx)3076 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3077 {
3078 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3079 	ComputeShaderSpec				spec;
3080 	de::Random						rnd				(deStringHash(group->getName()));
3081 	const int						numElements		= 100;
3082 	vector<float>					inputFloats		(numElements, 0);
3083 	vector<float>					outputFloats	(numElements, 0);
3084 
3085 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3086 
3087 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3088 	floorAll(inputFloats);
3089 
3090 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3091 		outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3092 
3093 	spec.assembly =
3094 		string(getComputeAsmShaderPreamble()) +
3095 
3096 		"OpName %main           \"main\"\n"
3097 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3098 
3099 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3100 
3101 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3102 
3103 		"%fmat     = OpTypeMatrix %fvec3 3\n"
3104 		"%three    = OpConstant %u32 3\n"
3105 		"%farr     = OpTypeArray %f32 %three\n"
3106 		"%fst      = OpTypeStruct %f32 %f32\n"
3107 
3108 		+ string(getComputeAsmInputOutputBuffer()) +
3109 
3110 		"%id            = OpVariable %uvec3ptr Input\n"
3111 		"%zero          = OpConstant %i32 0\n"
3112 		"%c_f           = OpConstant %f32 1.5\n"
3113 		"%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3114 		"%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3115 		"%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3116 		"%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3117 
3118 		"%main          = OpFunction %void None %voidf\n"
3119 		"%label         = OpLabel\n"
3120 		"%c_f_copy      = OpCopyObject %f32   %c_f\n"
3121 		"%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3122 		"%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3123 		"%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3124 		"%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3125 		"%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3126 		"%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3127 		"%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3128 		"%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3129 		// Add up. 1.5 * 5 = 7.5.
3130 		"%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3131 		"%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3132 		"%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3133 		"%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3134 
3135 		"%idval         = OpLoad %uvec3 %id\n"
3136 		"%x             = OpCompositeExtract %u32 %idval 0\n"
3137 		"%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3138 		"%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3139 		"%inval         = OpLoad %f32 %inloc\n"
3140 		"%add           = OpFAdd %f32 %add4 %inval\n"
3141 		"                 OpStore %outloc %add\n"
3142 		"                 OpReturn\n"
3143 		"                 OpFunctionEnd\n";
3144 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3145 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3146 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3147 
3148 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3149 
3150 	return group.release();
3151 }
3152 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3153 //
3154 // #version 430
3155 //
3156 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3157 //   float elements[];
3158 // } input_data;
3159 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3160 //   float elements[];
3161 // } output_data;
3162 //
3163 // void not_called_func() {
3164 //   // place OpUnreachable here
3165 // }
3166 //
3167 // uint modulo4(uint val) {
3168 //   switch (val % uint(4)) {
3169 //     case 0:  return 3;
3170 //     case 1:  return 2;
3171 //     case 2:  return 1;
3172 //     case 3:  return 0;
3173 //     default: return 100; // place OpUnreachable here
3174 //   }
3175 // }
3176 //
3177 // uint const5() {
3178 //   return 5;
3179 //   // place OpUnreachable here
3180 // }
3181 //
3182 // void main() {
3183 //   uint x = gl_GlobalInvocationID.x;
3184 //   if (const5() > modulo4(1000)) {
3185 //     output_data.elements[x] = -input_data.elements[x];
3186 //   } else {
3187 //     // place OpUnreachable here
3188 //     output_data.elements[x] = input_data.elements[x];
3189 //   }
3190 // }
3191 
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3192 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3193 {
3194 #ifndef CTS_USES_VULKANSC
3195 	static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3196 
3197 	struct Case
3198 	{
3199 		string	name;
3200 		string	desc;
3201 	};
3202 
3203 	static const Case cases[] =
3204 	{
3205 		{ "unreachable-switch-merge-in-loop",	"Test containing an unreachable switch merge block inside an infinite loop"	},
3206 	};
3207 
3208 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3209 	{
3210 		const string fileName = cases[i].name + ".amber";
3211 		group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3212 	}
3213 #else
3214 	DE_UNREF(group);
3215 	DE_UNREF(testCtx);
3216 #endif
3217 }
3218 
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3219 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3220 {
3221 #ifndef CTS_USES_VULKANSC
3222 	static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3223 
3224 	struct Case
3225 	{
3226 		string	name;
3227 		string	desc;
3228 	};
3229 
3230 	static const Case cases[] =
3231 	{
3232 		{ "switch-case-to-merge-block",	"Test switch containing a case that jumps directly to the merge block"	},
3233 	};
3234 
3235 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3236 	{
3237 		const string fileName = cases[i].name + ".amber";
3238 		group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3239 	}
3240 #else
3241 	DE_UNREF(group);
3242 	DE_UNREF(testCtx);
3243 #endif
3244 }
3245 
3246 #ifndef CTS_USES_VULKANSC
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3247 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3248 {
3249 	de::MovePtr<tcu::TestCaseGroup>	group		(new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3250 	static const char				dataDir[]	= "spirv_assembly/instruction/compute/arraylength";
3251 
3252 	struct Case
3253 	{
3254 		string	name;
3255 		string	desc;
3256 	};
3257 
3258 	static const Case cases[] =
3259 	{
3260 		{ "array-stride-larger-than-element-size",	"Test using an unsized array with stride larger than the element size"	}
3261 	};
3262 
3263 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3264 	{
3265 		const string fileName = cases[i].name + ".amber";
3266 		group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3267 	}
3268 
3269 	return group.release();
3270 }
3271 #endif
3272 
createOpUnreachableGroup(tcu::TestContext & testCtx)3273 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3274 {
3275 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3276 	ComputeShaderSpec				spec;
3277 	de::Random						rnd				(deStringHash(group->getName()));
3278 	const int						numElements		= 100;
3279 	vector<float>					positiveFloats	(numElements, 0);
3280 	vector<float>					negativeFloats	(numElements, 0);
3281 
3282 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3283 
3284 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3285 		negativeFloats[ndx] = -positiveFloats[ndx];
3286 
3287 	spec.assembly =
3288 		string(getComputeAsmShaderPreamble()) +
3289 
3290 		"OpSource GLSL 430\n"
3291 		"OpName %main            \"main\"\n"
3292 		"OpName %func_not_called_func \"not_called_func(\"\n"
3293 		"OpName %func_modulo4         \"modulo4(u1;\"\n"
3294 		"OpName %func_const5          \"const5(\"\n"
3295 		"OpName %id                   \"gl_GlobalInvocationID\"\n"
3296 
3297 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3298 
3299 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3300 
3301 		"%u32ptr    = OpTypePointer Function %u32\n"
3302 		"%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3303 		"%unitf     = OpTypeFunction %u32\n"
3304 
3305 		"%id        = OpVariable %uvec3ptr Input\n"
3306 		"%zero      = OpConstant %u32 0\n"
3307 		"%one       = OpConstant %u32 1\n"
3308 		"%two       = OpConstant %u32 2\n"
3309 		"%three     = OpConstant %u32 3\n"
3310 		"%four      = OpConstant %u32 4\n"
3311 		"%five      = OpConstant %u32 5\n"
3312 		"%hundred   = OpConstant %u32 100\n"
3313 		"%thousand  = OpConstant %u32 1000\n"
3314 
3315 		+ string(getComputeAsmInputOutputBuffer()) +
3316 
3317 		// Main()
3318 		"%main   = OpFunction %void None %voidf\n"
3319 		"%main_entry  = OpLabel\n"
3320 		"%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3321 		"%idval       = OpLoad %uvec3 %id\n"
3322 		"%x           = OpCompositeExtract %u32 %idval 0\n"
3323 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3324 		"%inval       = OpLoad %f32 %inloc\n"
3325 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3326 		"%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3327 		"%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3328 		"%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3329 		"               OpSelectionMerge %if_end None\n"
3330 		"               OpBranchConditional %cmp_gt %if_true %if_false\n"
3331 		"%if_true     = OpLabel\n"
3332 		"%negate      = OpFNegate %f32 %inval\n"
3333 		"               OpStore %outloc %negate\n"
3334 		"               OpBranch %if_end\n"
3335 		"%if_false    = OpLabel\n"
3336 		"               OpUnreachable\n" // Unreachable else branch for if statement
3337 		"%if_end      = OpLabel\n"
3338 		"               OpReturn\n"
3339 		"               OpFunctionEnd\n"
3340 
3341 		// not_called_function()
3342 		"%func_not_called_func  = OpFunction %void None %voidf\n"
3343 		"%not_called_func_entry = OpLabel\n"
3344 		"                         OpUnreachable\n" // Unreachable entry block in not called static function
3345 		"                         OpFunctionEnd\n"
3346 
3347 		// modulo4()
3348 		"%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3349 		"%valptr        = OpFunctionParameter %u32ptr\n"
3350 		"%modulo4_entry = OpLabel\n"
3351 		"%val           = OpLoad %u32 %valptr\n"
3352 		"%modulo        = OpUMod %u32 %val %four\n"
3353 		"                 OpSelectionMerge %switch_merge None\n"
3354 		"                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3355 		"%case0         = OpLabel\n"
3356 		"                 OpReturnValue %three\n"
3357 		"%case1         = OpLabel\n"
3358 		"                 OpReturnValue %two\n"
3359 		"%case2         = OpLabel\n"
3360 		"                 OpReturnValue %one\n"
3361 		"%case3         = OpLabel\n"
3362 		"                 OpReturnValue %zero\n"
3363 		"%default       = OpLabel\n"
3364 		"                 OpUnreachable\n" // Unreachable default case for switch statement
3365 		"%switch_merge  = OpLabel\n"
3366 		"                 OpUnreachable\n" // Unreachable merge block for switch statement
3367 		"                 OpFunctionEnd\n"
3368 
3369 		// const5()
3370 		"%func_const5  = OpFunction %u32 None %unitf\n"
3371 		"%const5_entry = OpLabel\n"
3372 		"                OpReturnValue %five\n"
3373 		"%unreachable  = OpLabel\n"
3374 		"                OpUnreachable\n" // Unreachable block in function
3375 		"                OpFunctionEnd\n";
3376 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3377 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3378 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3379 
3380 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3381 
3382 	addOpUnreachableAmberTests(*group, testCtx);
3383 
3384 	return group.release();
3385 }
3386 
3387 // Assembly code used for testing decoration group is based on GLSL source code:
3388 //
3389 // #version 430
3390 //
3391 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3392 //   float elements[];
3393 // } input_data0;
3394 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3395 //   float elements[];
3396 // } input_data1;
3397 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3398 //   float elements[];
3399 // } input_data2;
3400 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3401 //   float elements[];
3402 // } input_data3;
3403 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3404 //   float elements[];
3405 // } input_data4;
3406 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3407 //   float elements[];
3408 // } output_data;
3409 //
3410 // void main() {
3411 //   uint x = gl_GlobalInvocationID.x;
3412 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3413 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3414 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3415 {
3416 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3417 	ComputeShaderSpec				spec;
3418 	de::Random						rnd				(deStringHash(group->getName()));
3419 	const int						numElements		= 100;
3420 	vector<float>					inputFloats0	(numElements, 0);
3421 	vector<float>					inputFloats1	(numElements, 0);
3422 	vector<float>					inputFloats2	(numElements, 0);
3423 	vector<float>					inputFloats3	(numElements, 0);
3424 	vector<float>					inputFloats4	(numElements, 0);
3425 	vector<float>					outputFloats	(numElements, 0);
3426 
3427 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3428 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3429 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3430 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3431 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3432 
3433 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3434 	floorAll(inputFloats0);
3435 	floorAll(inputFloats1);
3436 	floorAll(inputFloats2);
3437 	floorAll(inputFloats3);
3438 	floorAll(inputFloats4);
3439 
3440 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3441 		outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3442 
3443 	spec.assembly =
3444 		string(getComputeAsmShaderPreamble()) +
3445 
3446 		"OpSource GLSL 430\n"
3447 		"OpName %main \"main\"\n"
3448 		"OpName %id \"gl_GlobalInvocationID\"\n"
3449 
3450 		// Not using group decoration on variable.
3451 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3452 		// Not using group decoration on type.
3453 		"OpDecorate %f32arr ArrayStride 4\n"
3454 
3455 		"OpDecorate %groups BufferBlock\n"
3456 		"OpDecorate %groupm Offset 0\n"
3457 		"%groups = OpDecorationGroup\n"
3458 		"%groupm = OpDecorationGroup\n"
3459 
3460 		// Group decoration on multiple structs.
3461 		"OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3462 		// Group decoration on multiple struct members.
3463 		"OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3464 
3465 		"OpDecorate %group1 DescriptorSet 0\n"
3466 		"OpDecorate %group3 DescriptorSet 0\n"
3467 		"OpDecorate %group3 NonWritable\n"
3468 		"OpDecorate %group3 Restrict\n"
3469 		"%group0 = OpDecorationGroup\n"
3470 		"%group1 = OpDecorationGroup\n"
3471 		"%group3 = OpDecorationGroup\n"
3472 
3473 		// Applying the same decoration group multiple times.
3474 		"OpGroupDecorate %group1 %outdata\n"
3475 		"OpGroupDecorate %group1 %outdata\n"
3476 		"OpGroupDecorate %group1 %outdata\n"
3477 		"OpDecorate %outdata DescriptorSet 0\n"
3478 		"OpDecorate %outdata Binding 5\n"
3479 		// Applying decoration group containing nothing.
3480 		"OpGroupDecorate %group0 %indata0\n"
3481 		"OpDecorate %indata0 DescriptorSet 0\n"
3482 		"OpDecorate %indata0 Binding 0\n"
3483 		// Applying decoration group containing one decoration.
3484 		"OpGroupDecorate %group1 %indata1\n"
3485 		"OpDecorate %indata1 Binding 1\n"
3486 		// Applying decoration group containing multiple decorations.
3487 		"OpGroupDecorate %group3 %indata2 %indata3\n"
3488 		"OpDecorate %indata2 Binding 2\n"
3489 		"OpDecorate %indata3 Binding 3\n"
3490 		// Applying multiple decoration groups (with overlapping).
3491 		"OpGroupDecorate %group0 %indata4\n"
3492 		"OpGroupDecorate %group1 %indata4\n"
3493 		"OpGroupDecorate %group3 %indata4\n"
3494 		"OpDecorate %indata4 Binding 4\n"
3495 
3496 		+ string(getComputeAsmCommonTypes()) +
3497 
3498 		"%id   = OpVariable %uvec3ptr Input\n"
3499 		"%zero = OpConstant %i32 0\n"
3500 
3501 		"%outbuf    = OpTypeStruct %f32arr\n"
3502 		"%outbufptr = OpTypePointer Uniform %outbuf\n"
3503 		"%outdata   = OpVariable %outbufptr Uniform\n"
3504 		"%inbuf0    = OpTypeStruct %f32arr\n"
3505 		"%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3506 		"%indata0   = OpVariable %inbuf0ptr Uniform\n"
3507 		"%inbuf1    = OpTypeStruct %f32arr\n"
3508 		"%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3509 		"%indata1   = OpVariable %inbuf1ptr Uniform\n"
3510 		"%inbuf2    = OpTypeStruct %f32arr\n"
3511 		"%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3512 		"%indata2   = OpVariable %inbuf2ptr Uniform\n"
3513 		"%inbuf3    = OpTypeStruct %f32arr\n"
3514 		"%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3515 		"%indata3   = OpVariable %inbuf3ptr Uniform\n"
3516 		"%inbuf4    = OpTypeStruct %f32arr\n"
3517 		"%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3518 		"%indata4   = OpVariable %inbufptr Uniform\n"
3519 
3520 		"%main   = OpFunction %void None %voidf\n"
3521 		"%label  = OpLabel\n"
3522 		"%idval  = OpLoad %uvec3 %id\n"
3523 		"%x      = OpCompositeExtract %u32 %idval 0\n"
3524 		"%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3525 		"%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3526 		"%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3527 		"%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3528 		"%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3529 		"%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3530 		"%inval0 = OpLoad %f32 %inloc0\n"
3531 		"%inval1 = OpLoad %f32 %inloc1\n"
3532 		"%inval2 = OpLoad %f32 %inloc2\n"
3533 		"%inval3 = OpLoad %f32 %inloc3\n"
3534 		"%inval4 = OpLoad %f32 %inloc4\n"
3535 		"%add0   = OpFAdd %f32 %inval0 %inval1\n"
3536 		"%add1   = OpFAdd %f32 %add0 %inval2\n"
3537 		"%add2   = OpFAdd %f32 %add1 %inval3\n"
3538 		"%add    = OpFAdd %f32 %add2 %inval4\n"
3539 		"          OpStore %outloc %add\n"
3540 		"          OpReturn\n"
3541 		"          OpFunctionEnd\n";
3542 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3543 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3544 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3545 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3546 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3547 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3548 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3549 
3550 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3551 
3552 	return group.release();
3553 }
3554 
3555 enum SpecConstantType
3556 {
3557 	SC_INT8,
3558 	SC_UINT8,
3559 	SC_INT16,
3560 	SC_UINT16,
3561 	SC_INT32,
3562 	SC_UINT32,
3563 	SC_INT64,
3564 	SC_UINT64,
3565 	SC_FLOAT16,
3566 	SC_FLOAT32,
3567 	SC_FLOAT64,
3568 };
3569 
3570 struct SpecConstantValue
3571 {
3572 	SpecConstantType type;
3573 	union ValueUnion {
3574 		deInt8			i8;
3575 		deUint8			u8;
3576 		deInt16			i16;
3577 		deUint16		u16;
3578 		deInt32			i32;
3579 		deUint32		u32;
3580 		deInt64			i64;
3581 		deUint64		u64;
3582 		tcu::Float16	f16;
3583 		tcu::Float32	f32;
3584 		tcu::Float64	f64;
3585 
ValueUnion(deInt8 v)3586 		ValueUnion (deInt8			v) : i8(v)	{}
ValueUnion(deUint8 v)3587 		ValueUnion (deUint8			v) : u8(v)	{}
ValueUnion(deInt16 v)3588 		ValueUnion (deInt16			v) : i16(v)	{}
ValueUnion(deUint16 v)3589 		ValueUnion (deUint16		v) : u16(v)	{}
ValueUnion(deInt32 v)3590 		ValueUnion (deInt32			v) : i32(v)	{}
ValueUnion(deUint32 v)3591 		ValueUnion (deUint32		v) : u32(v)	{}
ValueUnion(deInt64 v)3592 		ValueUnion (deInt64			v) : i64(v)	{}
ValueUnion(deUint64 v)3593 		ValueUnion (deUint64		v) : u64(v)	{}
ValueUnion(tcu::Float16 v)3594 		ValueUnion (tcu::Float16	v) : f16(v)	{}
ValueUnion(tcu::Float32 v)3595 		ValueUnion (tcu::Float32	v) : f32(v)	{}
ValueUnion(tcu::Float64 v)3596 		ValueUnion (tcu::Float64	v) : f64(v)	{}
3597 	} value;
3598 
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3599 	SpecConstantValue (deInt8			v) : type(SC_INT8)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3600 	SpecConstantValue (deUint8			v) : type(SC_UINT8)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3601 	SpecConstantValue (deInt16			v) : type(SC_INT16)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3602 	SpecConstantValue (deUint16			v) : type(SC_UINT16)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3603 	SpecConstantValue (deInt32			v) : type(SC_INT32)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3604 	SpecConstantValue (deUint32			v) : type(SC_UINT32)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3605 	SpecConstantValue (deInt64			v) : type(SC_INT64)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3606 	SpecConstantValue (deUint64			v) : type(SC_UINT64)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3607 	SpecConstantValue (tcu::Float16		v) : type(SC_FLOAT16)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3608 	SpecConstantValue (tcu::Float32		v) : type(SC_FLOAT32)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3609 	SpecConstantValue (tcu::Float64		v) : type(SC_FLOAT64)	, value(v) {}
3610 
appendTovkt::SpirVAssembly::__anon6f921be60111::SpecConstantValue3611 	void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3612 	{
3613 		switch (type)
3614 		{
3615 		case SC_INT8:		specConstants.append(value.i8);		break;
3616 		case SC_UINT8:		specConstants.append(value.u8);		break;
3617 		case SC_INT16:		specConstants.append(value.i16);	break;
3618 		case SC_UINT16:		specConstants.append(value.u16);	break;
3619 		case SC_INT32:		specConstants.append(value.i32);	break;
3620 		case SC_UINT32:		specConstants.append(value.u32);	break;
3621 		case SC_INT64:		specConstants.append(value.i64);	break;
3622 		case SC_UINT64:		specConstants.append(value.u64);	break;
3623 		case SC_FLOAT16:	specConstants.append(value.f16);	break;
3624 		case SC_FLOAT32:	specConstants.append(value.f32);	break;
3625 		case SC_FLOAT64:	specConstants.append(value.f64);	break;
3626 		default:
3627 			DE_ASSERT(false);
3628 		}
3629 	}
3630 };
3631 
3632 enum CaseFlagBits
3633 {
3634 	FLAG_NONE		= 0,
3635 	FLAG_CONVERT	= 1,
3636 	FLAG_I8			= (1<<1),
3637 	FLAG_I16		= (1<<2),
3638 	FLAG_I64		= (1<<3),
3639 	FLAG_F16		= (1<<4),
3640 	FLAG_F64		= (1<<5),
3641 };
3642 using CaseFlags = deUint32;
3643 
3644 struct SpecConstantTwoValCase
3645 {
3646 	const std::string	caseName;
3647 	const std::string	scDefinition0;
3648 	const std::string	scDefinition1;
3649 	const std::string	scResultType;
3650 	const std::string	scOperation;
3651 	SpecConstantValue	scActualValue0;
3652 	SpecConstantValue	scActualValue1;
3653 	const std::string	resultOperation;
3654 	vector<deInt32>		expectedOutput;
3655 	CaseFlags			caseFlags;
3656 
SpecConstantTwoValCasevkt::SpirVAssembly::__anon6f921be60111::SpecConstantTwoValCase3657 						SpecConstantTwoValCase (const std::string& name,
3658 												const std::string& definition0,
3659 												const std::string& definition1,
3660 												const std::string& resultType,
3661 												const std::string& operation,
3662 												SpecConstantValue value0,
3663 												SpecConstantValue value1,
3664 												const std::string& resultOp,
3665 												const vector<deInt32>& output,
3666 												CaseFlags flags = FLAG_NONE)
3667 							: caseName				(name)
3668 							, scDefinition0			(definition0)
3669 							, scDefinition1			(definition1)
3670 							, scResultType			(resultType)
3671 							, scOperation			(operation)
3672 							, scActualValue0		(value0)
3673 							, scActualValue1		(value1)
3674 							, resultOperation		(resultOp)
3675 							, expectedOutput		(output)
3676 							, caseFlags				(flags)
3677 							{}
3678 };
3679 
getSpecConstantOpStructConstantsAndTypes()3680 std::string getSpecConstantOpStructConstantsAndTypes ()
3681 {
3682 	return
3683 		"%zero        = OpConstant %i32 0\n"
3684 		"%one         = OpConstant %i32 1\n"
3685 		"%two         = OpConstant %i32 2\n"
3686 		"%three       = OpConstant %i32 3\n"
3687 		"%iarr3       = OpTypeArray %i32 %three\n"
3688 		"%imat3       = OpTypeArray %iarr3 %three\n"
3689 		"%struct      = OpTypeStruct %imat3\n"
3690 		;
3691 }
3692 
getSpecConstantOpStructComposites()3693 std::string getSpecConstantOpStructComposites ()
3694 {
3695 	return
3696 		"%iarr3_0     = OpConstantComposite %iarr3 %zero %zero %zero\n"
3697 		"%imat3_0     = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3698 		"%struct_0    = OpConstantComposite %struct %imat3_0\n"
3699 		;
3700 }
3701 
getSpecConstantOpStructConstBlock()3702 std::string getSpecConstantOpStructConstBlock ()
3703 {
3704 	return
3705 		"%iarr3_a     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_0     0\n"                        // Compose (sc_0, sc_1, sc_2)
3706 		"%iarr3_b     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_a     1\n"
3707 		"%iarr3_c     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_b     2\n"
3708 
3709 		"%iarr3_d     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_0     0\n"                        // Compose (sc_1, sc_2, sc_0)
3710 		"%iarr3_e     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_d     1\n"
3711 		"%iarr3_f     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_e     2\n"
3712 
3713 		"%iarr3_g     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_0     0\n"                        // Compose (sc_2, sc_0, sc_1)
3714 		"%iarr3_h     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_g     1\n"
3715 		"%iarr3_i     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_h     2\n"
3716 
3717 		"%imat3_a     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_c     %imat3_0     0\n"						// Matrix with the 3 previous arrays.
3718 		"%imat3_b     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_f     %imat3_a     1\n"
3719 		"%imat3_c     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_i     %imat3_b     2\n"
3720 
3721 		"%struct_a    = OpSpecConstantOp %struct CompositeInsert  %imat3_c     %struct_0    0\n"						// Save it in the struct.
3722 
3723 		"%comp_0_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 0\n"									// Extract some component pairs to compare them.
3724 		"%comp_1_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 0\n"
3725 
3726 		"%comp_0_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 1\n"
3727 		"%comp_2_2    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 2\n"
3728 
3729 		"%comp_2_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 0\n"
3730 		"%comp_1_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 1\n"
3731 
3732 		"%cmpres_0    = OpSpecConstantOp %bool   IEqual %comp_0_0 %comp_1_0\n"											// Must be false.
3733 		"%cmpres_1    = OpSpecConstantOp %bool   IEqual %comp_0_1 %comp_2_2\n"											// Must be true.
3734 		"%cmpres_2    = OpSpecConstantOp %bool   IEqual %comp_2_0 %comp_1_1\n"											// Must be true.
3735 
3736 		"%mustbe_0    = OpSpecConstantOp %i32    Select %cmpres_0 %one %zero\n"											// Must select 0
3737 		"%mustbe_1    = OpSpecConstantOp %i32    Select %cmpres_1 %one %zero\n"											// Must select 1
3738 		"%mustbe_2    = OpSpecConstantOp %i32    Select %cmpres_2 %two %one\n"											// Must select 2
3739 		;
3740 }
3741 
getSpecConstantOpStructInstructions()3742 std::string getSpecConstantOpStructInstructions ()
3743 {
3744 	return
3745 		// Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3746 		// the final result should not be altered.
3747 		"%subf_a      = OpISub %i32 %one %mustbe_0\n"
3748 		"%subf_b      = OpIMul %i32 %subf_a %mustbe_1\n"
3749 		"%subf_c      = OpISub %i32 %mustbe_2 %one\n"
3750 		"%factor      = OpIMul %i32 %subf_b %subf_c\n"
3751 		"%sc_final    = OpIMul %i32 %factor %sc_factor\n"
3752 		;
3753 }
3754 
createSpecConstantGroup(tcu::TestContext & testCtx)3755 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3756 {
3757 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3758 	vector<SpecConstantTwoValCase>	cases;
3759 	de::Random						rnd				(deStringHash(group->getName()));
3760 	const int						numElements		= 100;
3761 	vector<deInt32>					inputInts		(numElements, 0);
3762 	vector<deInt32>					outputInts1		(numElements, 0);
3763 	vector<deInt32>					outputInts2		(numElements, 0);
3764 	vector<deInt32>					outputInts3		(numElements, 0);
3765 	vector<deInt32>					outputInts4		(numElements, 0);
3766 	vector<deInt32>					outputInts5		(numElements, 0);
3767 	const StringTemplate			shaderTemplate	(
3768 		"${CAPABILITIES:opt}"
3769 		+ string(getComputeAsmShaderPreamble()) +
3770 
3771 		"OpName %main           \"main\"\n"
3772 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3773 
3774 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3775 		"OpDecorate %sc_0  SpecId 0\n"
3776 		"OpDecorate %sc_1  SpecId 1\n"
3777 		"OpDecorate %i32arr ArrayStride 4\n"
3778 
3779 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3780 
3781 		"${OPTYPE_DEFINITIONS:opt}"
3782 		"%buf     = OpTypeStruct %i32arr\n"
3783 		"%bufptr  = OpTypePointer Uniform %buf\n"
3784 		"%indata    = OpVariable %bufptr Uniform\n"
3785 		"%outdata   = OpVariable %bufptr Uniform\n"
3786 
3787 		"%id        = OpVariable %uvec3ptr Input\n"
3788 		"%zero      = OpConstant %i32 0\n"
3789 
3790 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
3791 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
3792 		"%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3793 
3794 		"%main      = OpFunction %void None %voidf\n"
3795 		"%label     = OpLabel\n"
3796 		"${TYPE_CONVERT:opt}"
3797 		"%idval     = OpLoad %uvec3 %id\n"
3798 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3799 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3800 		"%inval     = OpLoad %i32 %inloc\n"
3801 		"%final     = ${GEN_RESULT}\n"
3802 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3803 		"             OpStore %outloc %final\n"
3804 		"             OpReturn\n"
3805 		"             OpFunctionEnd\n");
3806 
3807 	fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3808 
3809 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3810 	{
3811 		outputInts1[ndx] = inputInts[ndx] + 42;
3812 		outputInts2[ndx] = inputInts[ndx];
3813 		outputInts3[ndx] = inputInts[ndx] - 11200;
3814 		outputInts4[ndx] = inputInts[ndx] + 1;
3815 		outputInts5[ndx] = inputInts[ndx] - 42;
3816 	}
3817 
3818 	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
3819 	const char addSc32ToInput[]		= "OpIAdd %i32 %inval %sc_final32";
3820 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
3821 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
3822 
3823 	cases.push_back(SpecConstantTwoValCase("iadd",						" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",			62,						-20,				addScToInput,		outputInts1));
3824 	cases.push_back(SpecConstantTwoValCase("isub",						" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",			100,					58,					addScToInput,		outputInts1));
3825 	cases.push_back(SpecConstantTwoValCase("imul",						" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",			-2,						-21,				addScToInput,		outputInts1));
3826 	cases.push_back(SpecConstantTwoValCase("sdiv",						" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",			-126,					-3,					addScToInput,		outputInts1));
3827 	cases.push_back(SpecConstantTwoValCase("udiv",						" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",			126,					3,					addScToInput,		outputInts1));
3828 	cases.push_back(SpecConstantTwoValCase("srem",						" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",			7,						3,					addScToInput,		outputInts4));
3829 	cases.push_back(SpecConstantTwoValCase("smod",						" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",			7,						3,					addScToInput,		outputInts4));
3830 	cases.push_back(SpecConstantTwoValCase("umod",						" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",			342,					50,					addScToInput,		outputInts1));
3831 	cases.push_back(SpecConstantTwoValCase("bitwiseand",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",			42,						63,					addScToInput,		outputInts1));
3832 	cases.push_back(SpecConstantTwoValCase("bitwiseor",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",			34,						8,					addScToInput,		outputInts1));
3833 	cases.push_back(SpecConstantTwoValCase("bitwisexor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",			18,						56,					addScToInput,		outputInts1));
3834 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical",			" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					2,					addScToInput,		outputInts1));
3835 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					2,					addScToInput,		outputInts5));
3836 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical",			" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						1,					addScToInput,		outputInts1));
3837 
3838 	// Shifts for other integer sizes.
3839 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftRightLogical    %sc_0 %sc_1",			deInt64{168},			deInt64{2},			addSc32ToInput,		outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3840 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64",	" %i64 0",		" %i64 0",		"%i64",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt64{-168},			deInt64{2},			addSc32ToInput,		outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3841 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt64{21},			deInt64{1},			addSc32ToInput,		outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3842 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftRightLogical    %sc_0 %sc_1",			deInt16{168},			deInt16{2},			addSc32ToInput,		outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3843 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16",	" %i16 0",		" %i16 0",		"%i16",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt16{-168},			deInt16{2},			addSc32ToInput,		outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3844 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt16{21},			deInt16{1},			addSc32ToInput,		outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3845 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftRightLogical    %sc_0 %sc_1",			deInt8{84},				deInt8{1},			addSc32ToInput,		outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3846 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8",	" %i8 0",		" %i8 0",		"%i8",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt8{-84},			deInt8{1},			addSc32ToInput,		outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3847 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt8{21},				deInt8{1},			addSc32ToInput,		outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3848 
3849 	// Shifts for other integer sizes but only in the shift amount.
3850 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					deInt64{2},			addScToInput,		outputInts1, (FLAG_I64)));
3851 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0",		" %i64 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					deInt64{2},			addScToInput,		outputInts5, (FLAG_I64)));
3852 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt64{1},			addScToInput,		outputInts1, (FLAG_I64)));
3853 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					deInt16{2},			addScToInput,		outputInts1, (FLAG_I16)));
3854 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0",		" %i16 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					deInt16{2},			addScToInput,		outputInts5, (FLAG_I16)));
3855 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt16{1},			addScToInput,		outputInts1, (FLAG_I16)));
3856 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8",	" %i32 0",		" %i8 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			84,						deInt8{1},			addScToInput,		outputInts1, (FLAG_I8)));
3857 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8",	" %i32 0",		" %i8 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-84,					deInt8{1},			addScToInput,		outputInts5, (FLAG_I8)));
3858 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt8{1},			addScToInput,		outputInts1, (FLAG_I8)));
3859 
3860 	cases.push_back(SpecConstantTwoValCase("slessthan",					" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",			-20,					-10,				selectTrueUsingSc,	outputInts2));
3861 	cases.push_back(SpecConstantTwoValCase("ulessthan",					" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",			10,						20,					selectTrueUsingSc,	outputInts2));
3862 	cases.push_back(SpecConstantTwoValCase("sgreaterthan",				" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",			-1000,					50,					selectFalseUsingSc,	outputInts2));
3863 	cases.push_back(SpecConstantTwoValCase("ugreaterthan",				" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",			10,						5,					selectTrueUsingSc,	outputInts2));
3864 	cases.push_back(SpecConstantTwoValCase("slessthanequal",			" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",			-10,					-10,				selectTrueUsingSc,	outputInts2));
3865 	cases.push_back(SpecConstantTwoValCase("ulessthanequal",			" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",			50,						100,				selectTrueUsingSc,	outputInts2));
3866 	cases.push_back(SpecConstantTwoValCase("sgreaterthanequal",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",			-1000,					50,					selectFalseUsingSc,	outputInts2));
3867 	cases.push_back(SpecConstantTwoValCase("ugreaterthanequal",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",			10,						10,					selectTrueUsingSc,	outputInts2));
3868 	cases.push_back(SpecConstantTwoValCase("iequal",					" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",			42,						24,					selectFalseUsingSc,	outputInts2));
3869 	cases.push_back(SpecConstantTwoValCase("inotequal",					" %i32 0",		" %i32 0",		"%bool",	"INotEqual            %sc_0 %sc_1",			42,						24,					selectTrueUsingSc,	outputInts2));
3870 	cases.push_back(SpecConstantTwoValCase("logicaland",				"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",			0,						1,					selectFalseUsingSc,	outputInts2));
3871 	cases.push_back(SpecConstantTwoValCase("logicalor",					"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",			1,						0,					selectTrueUsingSc,	outputInts2));
3872 	cases.push_back(SpecConstantTwoValCase("logicalequal",				"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",			0,						1,					selectFalseUsingSc,	outputInts2));
3873 	cases.push_back(SpecConstantTwoValCase("logicalnotequal",			"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",			1,						0,					selectTrueUsingSc,	outputInts2));
3874 	cases.push_back(SpecConstantTwoValCase("snegate",					" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",				-42,					0,					addScToInput,		outputInts1));
3875 	cases.push_back(SpecConstantTwoValCase("not",						" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,					0,					addScToInput,		outputInts1));
3876 	cases.push_back(SpecConstantTwoValCase("logicalnot",				"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,						0,					selectFalseUsingSc,	outputInts2));
3877 	cases.push_back(SpecConstantTwoValCase("select",					"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,						42,					addScToInput,		outputInts1));
3878 	cases.push_back(SpecConstantTwoValCase("sconvert",					" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",				-11200,					0,					addSc32ToInput,		outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3879 	cases.push_back(SpecConstantTwoValCase("fconvert",					" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",				tcu::Float32{-11200.0},	tcu::Float32{0.0},	addSc32ToInput,		outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3880 	cases.push_back(SpecConstantTwoValCase("fconvert16",				" %f16 0",		" %f16 0",		"%f32",		"FConvert             %sc_0",				tcu::Float16{1.0},		tcu::Float16{0.0},	addSc32ToInput,		outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3881 
3882 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3883 	{
3884 		map<string, string>		specializations;
3885 		ComputeShaderSpec		spec;
3886 
3887 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
3888 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
3889 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
3890 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
3891 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
3892 
3893 		// Special SPIR-V code when using 16-bit integers.
3894 		if (cases[caseNdx].caseFlags & FLAG_I16)
3895 		{
3896 			spec.requestedVulkanFeatures.coreFeatures.shaderInt16	= VK_TRUE;
3897 			specializations["CAPABILITIES"]							+= "OpCapability Int16\n";							// Adds 16-bit integer capability
3898 			specializations["OPTYPE_DEFINITIONS"]					+= "%i16 = OpTypeInt 16 1\n";						// Adds 16-bit integer type
3899 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3900 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpSConvert %i32 %sc_final\n";		// Converts 16-bit integer to 32-bit integer
3901 		}
3902 
3903 		// Special SPIR-V code when using 64-bit integers.
3904 		if (cases[caseNdx].caseFlags & FLAG_I64)
3905 		{
3906 			spec.requestedVulkanFeatures.coreFeatures.shaderInt64	= VK_TRUE;
3907 			specializations["CAPABILITIES"]							+= "OpCapability Int64\n";							// Adds 64-bit integer capability
3908 			specializations["OPTYPE_DEFINITIONS"]					+= "%i64 = OpTypeInt 64 1\n";						// Adds 64-bit integer type
3909 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3910 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpSConvert %i32 %sc_final\n";		// Converts 64-bit integer to 32-bit integer
3911 		}
3912 
3913 		// Special SPIR-V code when using 64-bit floats.
3914 		if (cases[caseNdx].caseFlags & FLAG_F64)
3915 		{
3916 			spec.requestedVulkanFeatures.coreFeatures.shaderFloat64	= VK_TRUE;
3917 			specializations["CAPABILITIES"]							+= "OpCapability Float64\n";						// Adds 64-bit float capability
3918 			specializations["OPTYPE_DEFINITIONS"]					+= "%f64 = OpTypeFloat 64\n";						// Adds 64-bit float type
3919 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3920 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpConvertFToS %i32 %sc_final\n";	// Converts 64-bit float to 32-bit integer
3921 		}
3922 
3923 		// Extension needed for float16 and int8.
3924 		if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3925 			spec.extensions.push_back("VK_KHR_shader_float16_int8");
3926 
3927 		// Special SPIR-V code when using 16-bit floats.
3928 		if (cases[caseNdx].caseFlags & FLAG_F16)
3929 		{
3930 			spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3931 			specializations["CAPABILITIES"]				+= "OpCapability Float16\n";						// Adds 16-bit float capability
3932 			specializations["OPTYPE_DEFINITIONS"]		+= "%f16 = OpTypeFloat 16\n";						// Adds 16-bit float type
3933 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3934 				specializations["TYPE_CONVERT"]			+= "%sc_final32 = OpConvertFToS %i32 %sc_final\n";	// Converts 16-bit float to 32-bit integer
3935 		}
3936 
3937 		// Special SPIR-V code when using 8-bit integers.
3938 		if (cases[caseNdx].caseFlags & FLAG_I8)
3939 		{
3940 			spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3941 			specializations["CAPABILITIES"]				+= "OpCapability Int8\n";						// Adds 8-bit integer capability
3942 			specializations["OPTYPE_DEFINITIONS"]		+= "%i8 = OpTypeInt 8 1\n";						// Adds 8-bit integer type
3943 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3944 				specializations["TYPE_CONVERT"]			+= "%sc_final32 = OpSConvert %i32 %sc_final\n";	// Converts 8-bit integer to 32-bit integer
3945 		}
3946 
3947 		spec.assembly = shaderTemplate.specialize(specializations);
3948 		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3949 		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3950 		spec.numWorkGroups = IVec3(numElements, 1, 1);
3951 		cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3952 		cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3953 
3954 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), cases[caseNdx].caseName.c_str(), spec));
3955 	}
3956 
3957 	ComputeShaderSpec				spec;
3958 
3959 	spec.assembly =
3960 		string(getComputeAsmShaderPreamble()) +
3961 
3962 		"OpName %main           \"main\"\n"
3963 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3964 
3965 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3966 		"OpDecorate %sc_0  SpecId 0\n"
3967 		"OpDecorate %sc_1  SpecId 1\n"
3968 		"OpDecorate %sc_2  SpecId 2\n"
3969 		"OpDecorate %i32arr ArrayStride 4\n"
3970 
3971 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3972 
3973 		"%ivec3       = OpTypeVector %i32 3\n"
3974 
3975 		+ getSpecConstantOpStructConstantsAndTypes() +
3976 
3977 		"%buf         = OpTypeStruct %i32arr\n"
3978 		"%bufptr      = OpTypePointer Uniform %buf\n"
3979 		"%indata      = OpVariable %bufptr Uniform\n"
3980 		"%outdata     = OpVariable %bufptr Uniform\n"
3981 
3982 		"%id          = OpVariable %uvec3ptr Input\n"
3983 		"%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
3984 		"%vec3_undef  = OpUndef %ivec3\n"
3985 
3986 		+ getSpecConstantOpStructComposites () +
3987 
3988 		"%sc_0        = OpSpecConstant %i32 0\n"
3989 		"%sc_1        = OpSpecConstant %i32 0\n"
3990 		"%sc_2        = OpSpecConstant %i32 0\n"
3991 
3992 		+ getSpecConstantOpStructConstBlock () +
3993 
3994 		"%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"							// (sc_0, 0, 0)
3995 		"%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"							// (0, sc_1, 0)
3996 		"%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"							// (0, 0, sc_2)
3997 		"%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
3998 		"%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
3999 		"%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
4000 		"%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
4001 		"%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
4002 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
4003 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
4004 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
4005 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
4006 		"%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"								// (sc_2 - sc_0) * sc_1
4007 
4008 		"%main      = OpFunction %void None %voidf\n"
4009 		"%label     = OpLabel\n"
4010 
4011 		+ getSpecConstantOpStructInstructions() +
4012 
4013 		"%idval     = OpLoad %uvec3 %id\n"
4014 		"%x         = OpCompositeExtract %u32 %idval 0\n"
4015 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
4016 		"%inval     = OpLoad %i32 %inloc\n"
4017 		"%final     = OpIAdd %i32 %inval %sc_final\n"
4018 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
4019 		"             OpStore %outloc %final\n"
4020 		"             OpReturn\n"
4021 		"             OpFunctionEnd\n";
4022 	spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4023 	spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4024 	spec.numWorkGroups = IVec3(numElements, 1, 1);
4025 	spec.specConstants.append<deInt32>(123);
4026 	spec.specConstants.append<deInt32>(56);
4027 	spec.specConstants.append<deInt32>(-77);
4028 
4029 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
4030 
4031 	return group.release();
4032 }
4033 
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4034 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4035 {
4036 	ComputeShaderSpec	specInt;
4037 	ComputeShaderSpec	specFloat;
4038 	ComputeShaderSpec	specFloat16;
4039 	ComputeShaderSpec	specVec3;
4040 	ComputeShaderSpec	specMat4;
4041 	ComputeShaderSpec	specArray;
4042 	ComputeShaderSpec	specStruct;
4043 	de::Random			rnd				(deStringHash(group->getName()));
4044 	const int			numElements		= 100;
4045 	vector<float>		inputFloats		(numElements, 0);
4046 	vector<float>		outputFloats	(numElements, 0);
4047 	vector<deUint32>	inputUints		(numElements, 0);
4048 	vector<deUint32>	outputUints		(numElements, 0);
4049 
4050 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4051 
4052 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4053 	floorAll(inputFloats);
4054 
4055 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4056 	{
4057 		// Just check if the value is positive or not
4058 		outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4059 	}
4060 
4061 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4062 	{
4063 		inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4064 		outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4065 	}
4066 
4067 	// All of the tests are of the form:
4068 	//
4069 	// testtype r
4070 	//
4071 	// if (inputdata > 0)
4072 	//   r = 1
4073 	// else
4074 	//   r = -1
4075 	//
4076 	// return (float)r
4077 
4078 	specFloat.assembly =
4079 		string(getComputeAsmShaderPreamble()) +
4080 
4081 		"OpSource GLSL 430\n"
4082 		"OpName %main \"main\"\n"
4083 		"OpName %id \"gl_GlobalInvocationID\"\n"
4084 
4085 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4086 
4087 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4088 
4089 		"%id = OpVariable %uvec3ptr Input\n"
4090 		"%zero       = OpConstant %i32 0\n"
4091 		"%float_0    = OpConstant %f32 0.0\n"
4092 		"%float_1    = OpConstant %f32 1.0\n"
4093 		"%float_n1   = OpConstant %f32 -1.0\n"
4094 
4095 		"%main     = OpFunction %void None %voidf\n"
4096 		"%entry    = OpLabel\n"
4097 		"%idval    = OpLoad %uvec3 %id\n"
4098 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4099 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4100 		"%inval    = OpLoad %f32 %inloc\n"
4101 
4102 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4103 		"            OpSelectionMerge %cm None\n"
4104 		"            OpBranchConditional %comp %tb %fb\n"
4105 		"%tb       = OpLabel\n"
4106 		"            OpBranch %cm\n"
4107 		"%fb       = OpLabel\n"
4108 		"            OpBranch %cm\n"
4109 		"%cm       = OpLabel\n"
4110 		"%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4111 
4112 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4113 		"            OpStore %outloc %res\n"
4114 		"            OpReturn\n"
4115 
4116 		"            OpFunctionEnd\n";
4117 	specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4118 	specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4119 	specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4120 
4121 	specFloat16.assembly =
4122 		"OpCapability Shader\n"
4123 		"OpCapability Float16\n"
4124 		"OpMemoryModel Logical GLSL450\n"
4125 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4126 		"OpExecutionMode %main LocalSize 1 1 1\n"
4127 
4128 		"OpSource GLSL 430\n"
4129 		"OpName %main \"main\"\n"
4130 		"OpName %id \"gl_GlobalInvocationID\"\n"
4131 
4132 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4133 
4134 		"OpDecorate %buf BufferBlock\n"
4135 		"OpDecorate %indata DescriptorSet 0\n"
4136 		"OpDecorate %indata Binding 0\n"
4137 		"OpDecorate %outdata DescriptorSet 0\n"
4138 		"OpDecorate %outdata Binding 1\n"
4139 		"OpDecorate %u32arr ArrayStride 4\n"
4140 		"OpMemberDecorate %buf 0 Offset 0\n"
4141 
4142 		+ string(getComputeAsmCommonTypes()) +
4143 
4144 		"%f16      = OpTypeFloat 16\n"
4145 		"%f16vec2  = OpTypeVector %f16 2\n"
4146 		"%fvec2    = OpTypeVector %f32 2\n"
4147 		"%u32ptr   = OpTypePointer Uniform %u32\n"
4148 		"%u32arr   = OpTypeRuntimeArray %u32\n"
4149 		"%f16_0    = OpConstant %f16 0.0\n"
4150 
4151 
4152 		"%buf      = OpTypeStruct %u32arr\n"
4153 		"%bufptr   = OpTypePointer Uniform %buf\n"
4154 		"%indata   = OpVariable %bufptr Uniform\n"
4155 		"%outdata  = OpVariable %bufptr Uniform\n"
4156 
4157 		"%id       = OpVariable %uvec3ptr Input\n"
4158 		"%zero     = OpConstant %i32 0\n"
4159 		"%float_0  = OpConstant %f32 0.0\n"
4160 		"%float_1  = OpConstant %f32 1.0\n"
4161 		"%float_n1 = OpConstant %f32 -1.0\n"
4162 
4163 		"%main     = OpFunction %void None %voidf\n"
4164 		"%entry    = OpLabel\n"
4165 		"%idval    = OpLoad %uvec3 %id\n"
4166 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4167 		"%inloc    = OpAccessChain %u32ptr %indata %zero %x\n"
4168 		"%inval    = OpLoad %u32 %inloc\n"
4169 		"%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4170 		"%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4171 		"%f32_inval = OpFConvert %f32 %f16_inval\n"
4172 
4173 		"%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4174 		"            OpSelectionMerge %cm None\n"
4175 		"            OpBranchConditional %comp %tb %fb\n"
4176 		"%tb       = OpLabel\n"
4177 		"            OpBranch %cm\n"
4178 		"%fb       = OpLabel\n"
4179 		"            OpBranch %cm\n"
4180 		"%cm       = OpLabel\n"
4181 		"%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4182 		"%f16_res  = OpFConvert %f16 %res\n"
4183 
4184 		"%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4185 		"%u32_res  = OpBitcast %u32 %f16vec2_res\n"
4186 
4187 		"%outloc   = OpAccessChain %u32ptr %outdata %zero %x\n"
4188 		"            OpStore %outloc %u32_res\n"
4189 		"            OpReturn\n"
4190 
4191 		"            OpFunctionEnd\n";
4192 
4193 	specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4194 	specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4195 	specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4196 	specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4197 
4198 	specMat4.assembly =
4199 		string(getComputeAsmShaderPreamble()) +
4200 
4201 		"OpSource GLSL 430\n"
4202 		"OpName %main \"main\"\n"
4203 		"OpName %id \"gl_GlobalInvocationID\"\n"
4204 
4205 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4206 
4207 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4208 
4209 		"%id = OpVariable %uvec3ptr Input\n"
4210 		"%v4f32      = OpTypeVector %f32 4\n"
4211 		"%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
4212 		"%zero       = OpConstant %i32 0\n"
4213 		"%float_0    = OpConstant %f32 0.0\n"
4214 		"%float_1    = OpConstant %f32 1.0\n"
4215 		"%float_n1   = OpConstant %f32 -1.0\n"
4216 		"%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4217 		"%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4218 		"%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4219 		"%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4220 		"%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4221 		"%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4222 		"%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4223 		"%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4224 		"%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4225 		"%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4226 
4227 		"%main     = OpFunction %void None %voidf\n"
4228 		"%entry    = OpLabel\n"
4229 		"%idval    = OpLoad %uvec3 %id\n"
4230 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4231 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4232 		"%inval    = OpLoad %f32 %inloc\n"
4233 
4234 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4235 		"            OpSelectionMerge %cm None\n"
4236 		"            OpBranchConditional %comp %tb %fb\n"
4237 		"%tb       = OpLabel\n"
4238 		"            OpBranch %cm\n"
4239 		"%fb       = OpLabel\n"
4240 		"            OpBranch %cm\n"
4241 		"%cm       = OpLabel\n"
4242 		"%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4243 		"%res      = OpCompositeExtract %f32 %mres 2 2\n"
4244 
4245 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4246 		"            OpStore %outloc %res\n"
4247 		"            OpReturn\n"
4248 
4249 		"            OpFunctionEnd\n";
4250 	specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4251 	specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4252 	specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4253 
4254 	specVec3.assembly =
4255 		string(getComputeAsmShaderPreamble()) +
4256 
4257 		"OpSource GLSL 430\n"
4258 		"OpName %main \"main\"\n"
4259 		"OpName %id \"gl_GlobalInvocationID\"\n"
4260 
4261 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4262 
4263 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4264 
4265 		"%id = OpVariable %uvec3ptr Input\n"
4266 		"%zero       = OpConstant %i32 0\n"
4267 		"%float_0    = OpConstant %f32 0.0\n"
4268 		"%float_1    = OpConstant %f32 1.0\n"
4269 		"%float_n1   = OpConstant %f32 -1.0\n"
4270 		"%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4271 		"%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4272 
4273 		"%main     = OpFunction %void None %voidf\n"
4274 		"%entry    = OpLabel\n"
4275 		"%idval    = OpLoad %uvec3 %id\n"
4276 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4277 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4278 		"%inval    = OpLoad %f32 %inloc\n"
4279 
4280 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4281 		"            OpSelectionMerge %cm None\n"
4282 		"            OpBranchConditional %comp %tb %fb\n"
4283 		"%tb       = OpLabel\n"
4284 		"            OpBranch %cm\n"
4285 		"%fb       = OpLabel\n"
4286 		"            OpBranch %cm\n"
4287 		"%cm       = OpLabel\n"
4288 		"%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4289 		"%res      = OpCompositeExtract %f32 %vres 2\n"
4290 
4291 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4292 		"            OpStore %outloc %res\n"
4293 		"            OpReturn\n"
4294 
4295 		"            OpFunctionEnd\n";
4296 	specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4297 	specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4298 	specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4299 
4300 	specInt.assembly =
4301 		string(getComputeAsmShaderPreamble()) +
4302 
4303 		"OpSource GLSL 430\n"
4304 		"OpName %main \"main\"\n"
4305 		"OpName %id \"gl_GlobalInvocationID\"\n"
4306 
4307 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4308 
4309 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4310 
4311 		"%id = OpVariable %uvec3ptr Input\n"
4312 		"%zero       = OpConstant %i32 0\n"
4313 		"%float_0    = OpConstant %f32 0.0\n"
4314 		"%i1         = OpConstant %i32 1\n"
4315 		"%i2         = OpConstant %i32 -1\n"
4316 
4317 		"%main     = OpFunction %void None %voidf\n"
4318 		"%entry    = OpLabel\n"
4319 		"%idval    = OpLoad %uvec3 %id\n"
4320 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4321 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4322 		"%inval    = OpLoad %f32 %inloc\n"
4323 
4324 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4325 		"            OpSelectionMerge %cm None\n"
4326 		"            OpBranchConditional %comp %tb %fb\n"
4327 		"%tb       = OpLabel\n"
4328 		"            OpBranch %cm\n"
4329 		"%fb       = OpLabel\n"
4330 		"            OpBranch %cm\n"
4331 		"%cm       = OpLabel\n"
4332 		"%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
4333 		"%res      = OpConvertSToF %f32 %ires\n"
4334 
4335 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4336 		"            OpStore %outloc %res\n"
4337 		"            OpReturn\n"
4338 
4339 		"            OpFunctionEnd\n";
4340 	specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4341 	specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4342 	specInt.numWorkGroups = IVec3(numElements, 1, 1);
4343 
4344 	specArray.assembly =
4345 		string(getComputeAsmShaderPreamble()) +
4346 
4347 		"OpSource GLSL 430\n"
4348 		"OpName %main \"main\"\n"
4349 		"OpName %id \"gl_GlobalInvocationID\"\n"
4350 
4351 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4352 
4353 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4354 
4355 		"%id = OpVariable %uvec3ptr Input\n"
4356 		"%zero       = OpConstant %i32 0\n"
4357 		"%u7         = OpConstant %u32 7\n"
4358 		"%float_0    = OpConstant %f32 0.0\n"
4359 		"%float_1    = OpConstant %f32 1.0\n"
4360 		"%float_n1   = OpConstant %f32 -1.0\n"
4361 		"%f32a7      = OpTypeArray %f32 %u7\n"
4362 		"%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4363 		"%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4364 		"%main     = OpFunction %void None %voidf\n"
4365 		"%entry    = OpLabel\n"
4366 		"%idval    = OpLoad %uvec3 %id\n"
4367 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4368 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4369 		"%inval    = OpLoad %f32 %inloc\n"
4370 
4371 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4372 		"            OpSelectionMerge %cm None\n"
4373 		"            OpBranchConditional %comp %tb %fb\n"
4374 		"%tb       = OpLabel\n"
4375 		"            OpBranch %cm\n"
4376 		"%fb       = OpLabel\n"
4377 		"            OpBranch %cm\n"
4378 		"%cm       = OpLabel\n"
4379 		"%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4380 		"%res      = OpCompositeExtract %f32 %ares 5\n"
4381 
4382 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4383 		"            OpStore %outloc %res\n"
4384 		"            OpReturn\n"
4385 
4386 		"            OpFunctionEnd\n";
4387 	specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4388 	specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4389 	specArray.numWorkGroups = IVec3(numElements, 1, 1);
4390 
4391 	specStruct.assembly =
4392 		string(getComputeAsmShaderPreamble()) +
4393 
4394 		"OpSource GLSL 430\n"
4395 		"OpName %main \"main\"\n"
4396 		"OpName %id \"gl_GlobalInvocationID\"\n"
4397 
4398 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4399 
4400 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4401 
4402 		"%id = OpVariable %uvec3ptr Input\n"
4403 		"%zero       = OpConstant %i32 0\n"
4404 		"%float_0    = OpConstant %f32 0.0\n"
4405 		"%float_1    = OpConstant %f32 1.0\n"
4406 		"%float_n1   = OpConstant %f32 -1.0\n"
4407 
4408 		"%v2f32      = OpTypeVector %f32 2\n"
4409 		"%Data2      = OpTypeStruct %f32 %v2f32\n"
4410 		"%Data       = OpTypeStruct %Data2 %f32\n"
4411 
4412 		"%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4413 		"%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4414 		"%s1         = OpConstantComposite %Data %in1b %float_1\n"
4415 		"%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4416 		"%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4417 		"%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4418 
4419 		"%main     = OpFunction %void None %voidf\n"
4420 		"%entry    = OpLabel\n"
4421 		"%idval    = OpLoad %uvec3 %id\n"
4422 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4423 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4424 		"%inval    = OpLoad %f32 %inloc\n"
4425 
4426 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4427 		"            OpSelectionMerge %cm None\n"
4428 		"            OpBranchConditional %comp %tb %fb\n"
4429 		"%tb       = OpLabel\n"
4430 		"            OpBranch %cm\n"
4431 		"%fb       = OpLabel\n"
4432 		"            OpBranch %cm\n"
4433 		"%cm       = OpLabel\n"
4434 		"%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4435 		"%res      = OpCompositeExtract %f32 %sres 0 0\n"
4436 
4437 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4438 		"            OpStore %outloc %res\n"
4439 		"            OpReturn\n"
4440 
4441 		"            OpFunctionEnd\n";
4442 	specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4443 	specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4444 	specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4445 
4446 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4447 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4448 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4449 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4450 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4451 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4452 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4453 }
4454 
generateConstantDefinitions(int count)4455 string generateConstantDefinitions (int count)
4456 {
4457 	std::ostringstream	r;
4458 	for (int i = 0; i < count; i++)
4459 		r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4460 	r << "\n";
4461 	return r.str();
4462 }
4463 
generateSwitchCases(int count)4464 string generateSwitchCases (int count)
4465 {
4466 	std::ostringstream	r;
4467 	for (int i = 0; i < count; i++)
4468 		r << " " << i << " %case" << i;
4469 	r << "\n";
4470 	return r.str();
4471 }
4472 
generateSwitchTargets(int count)4473 string generateSwitchTargets (int count)
4474 {
4475 	std::ostringstream	r;
4476 	for (int i = 0; i < count; i++)
4477 		r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4478 	r << "\n";
4479 	return r.str();
4480 }
4481 
generateOpPhiParams(int count)4482 string generateOpPhiParams (int count)
4483 {
4484 	std::ostringstream	r;
4485 	for (int i = 0; i < count; i++)
4486 		r << " %cf" << (i * 10 + 5) << " %case" << i;
4487 	r << "\n";
4488 	return r.str();
4489 }
4490 
generateIntWidth(int value)4491 string generateIntWidth (int value)
4492 {
4493 	std::ostringstream	r;
4494 	r << value;
4495 	return r.str();
4496 }
4497 
4498 // Expand input string by injecting "ABC" between the input
4499 // string characters. The acc/add/treshold parameters are used
4500 // to skip some of the injections to make the result less
4501 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4502 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4503 {
4504 	std::ostringstream	res;
4505 	const char*			p = s.c_str();
4506 
4507 	while (*p)
4508 	{
4509 		res << *p;
4510 		acc += add;
4511 		if (acc > treshold)
4512 		{
4513 			acc -= treshold;
4514 			res << "ABC";
4515 		}
4516 		p++;
4517 	}
4518 	return res.str();
4519 }
4520 
4521 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4522 float calcOpPhiCase5 (float val, const string& s)
4523 {
4524 	const char*		p		= s.c_str();
4525 	float			x[8];
4526 	bool			b[8];
4527 	const float		tv[8]	= { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4528 	const float		v		= deFloatAbs(val);
4529 	float			res		= 0;
4530 	int				depth	= -1;
4531 	int				skip	= 0;
4532 
4533 	for (int i = 7; i >= 0; --i)
4534 		x[i] = std::fmod((float)v, (float)(2 << i));
4535 	for (int i = 7; i >= 0; --i)
4536 		b[i] = x[i] > tv[i];
4537 
4538 	while (*p)
4539 	{
4540 		if (*p == 'A')
4541 		{
4542 			depth++;
4543 			if (skip == 0 && b[depth])
4544 			{
4545 				res++;
4546 			}
4547 			else
4548 				skip++;
4549 		}
4550 		if (*p == 'B')
4551 		{
4552 			if (skip)
4553 				skip--;
4554 			if (b[depth] || skip)
4555 				skip++;
4556 		}
4557 		if (*p == 'C')
4558 		{
4559 			depth--;
4560 			if (skip)
4561 				skip--;
4562 		}
4563 		p++;
4564 	}
4565 	return res;
4566 }
4567 
4568 // In the code string, the letters represent the following:
4569 //
4570 // A:
4571 //     if (certain bit is set)
4572 //     {
4573 //       result++;
4574 //
4575 // B:
4576 //     } else {
4577 //
4578 // C:
4579 //     }
4580 //
4581 // examples:
4582 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4583 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4584 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4585 //
4586 // Code generation gets a bit complicated due to the else-branches,
4587 // which do not generate new values. Thus, the generator needs to
4588 // keep track of the previous variable change seen by the else
4589 // branch.
generateOpPhiCase5(const string & s)4590 string generateOpPhiCase5 (const string& s)
4591 {
4592 	std::stack<int>				idStack;
4593 	std::stack<std::string>		value;
4594 	std::stack<std::string>		valueLabel;
4595 	std::stack<std::string>		mergeLeft;
4596 	std::stack<std::string>		mergeRight;
4597 	std::ostringstream			res;
4598 	const char*					p			= s.c_str();
4599 	int							depth		= -1;
4600 	int							currId		= 0;
4601 	int							iter		= 0;
4602 
4603 	idStack.push(-1);
4604 	value.push("%f32_0");
4605 	valueLabel.push("%f32_0 %entry");
4606 
4607 	while (*p)
4608 	{
4609 		if (*p == 'A')
4610 		{
4611 			depth++;
4612 			currId = iter;
4613 			idStack.push(currId);
4614 			res << "\tOpSelectionMerge %m" << currId << " None\n";
4615 			res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4616 			res << "%t" << currId << " = OpLabel\n";
4617 			res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4618 			std::ostringstream tag;
4619 			tag << "%rt" << currId;
4620 			value.push(tag.str());
4621 			tag << " %t" << currId;
4622 			valueLabel.push(tag.str());
4623 		}
4624 
4625 		if (*p == 'B')
4626 		{
4627 			mergeLeft.push(valueLabel.top());
4628 			value.pop();
4629 			valueLabel.pop();
4630 			res << "\tOpBranch %m" << currId << "\n";
4631 			res << "%f" << currId << " = OpLabel\n";
4632 			std::ostringstream tag;
4633 			tag << value.top() << " %f" << currId;
4634 			valueLabel.pop();
4635 			valueLabel.push(tag.str());
4636 		}
4637 
4638 		if (*p == 'C')
4639 		{
4640 			mergeRight.push(valueLabel.top());
4641 			res << "\tOpBranch %m" << currId << "\n";
4642 			res << "%m" << currId << " = OpLabel\n";
4643 			if (*(p + 1) == 0)
4644 				res << "%res"; // last result goes to %res
4645 			else
4646 				res << "%rm" << currId;
4647 			res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4648 			std::ostringstream tag;
4649 			tag << "%rm" << currId;
4650 			value.pop();
4651 			value.push(tag.str());
4652 			tag << " %m" << currId;
4653 			valueLabel.pop();
4654 			valueLabel.push(tag.str());
4655 			mergeLeft.pop();
4656 			mergeRight.pop();
4657 			depth--;
4658 			idStack.pop();
4659 			currId = idStack.top();
4660 		}
4661 		p++;
4662 		iter++;
4663 	}
4664 	return res.str();
4665 }
4666 
createOpPhiGroup(tcu::TestContext & testCtx)4667 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4668 {
4669 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4670 	ComputeShaderSpec				spec1;
4671 	ComputeShaderSpec				spec2;
4672 	ComputeShaderSpec				spec3;
4673 	ComputeShaderSpec				spec4;
4674 	ComputeShaderSpec				spec5;
4675 	de::Random						rnd				(deStringHash(group->getName()));
4676 	const int						numElements		= 100;
4677 	vector<float>					inputFloats		(numElements, 0);
4678 	vector<float>					outputFloats1	(numElements, 0);
4679 	vector<float>					outputFloats2	(numElements, 0);
4680 	vector<float>					outputFloats3	(numElements, 0);
4681 	vector<float>					outputFloats4	(numElements, 0);
4682 	vector<float>					outputFloats5	(numElements, 0);
4683 	std::string						codestring		= "ABC";
4684 	const int						test4Width		= 512;
4685 
4686 	// Build case 5 code string. Each iteration makes the hierarchy more complicated.
4687 	// 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4688 	// shader code.
4689 	for (int i = 0, acc = 0; i < 9; i++)
4690 		codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4691 
4692 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4693 
4694 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4695 	floorAll(inputFloats);
4696 
4697 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4698 	{
4699 		switch (ndx % 3)
4700 		{
4701 			case 0:		outputFloats1[ndx] = inputFloats[ndx] + 5.5f;	break;
4702 			case 1:		outputFloats1[ndx] = inputFloats[ndx] + 20.5f;	break;
4703 			case 2:		outputFloats1[ndx] = inputFloats[ndx] + 1.75f;	break;
4704 			default:	break;
4705 		}
4706 		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4707 		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4708 
4709 		int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4710 		outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4711 
4712 		outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4713 	}
4714 
4715 	spec1.assembly =
4716 		string(getComputeAsmShaderPreamble()) +
4717 
4718 		"OpSource GLSL 430\n"
4719 		"OpName %main \"main\"\n"
4720 		"OpName %id \"gl_GlobalInvocationID\"\n"
4721 
4722 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4723 
4724 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4725 
4726 		"%id = OpVariable %uvec3ptr Input\n"
4727 		"%zero       = OpConstant %i32 0\n"
4728 		"%three      = OpConstant %u32 3\n"
4729 		"%constf5p5  = OpConstant %f32 5.5\n"
4730 		"%constf20p5 = OpConstant %f32 20.5\n"
4731 		"%constf1p75 = OpConstant %f32 1.75\n"
4732 		"%constf8p5  = OpConstant %f32 8.5\n"
4733 		"%constf6p5  = OpConstant %f32 6.5\n"
4734 
4735 		"%main     = OpFunction %void None %voidf\n"
4736 		"%entry    = OpLabel\n"
4737 		"%idval    = OpLoad %uvec3 %id\n"
4738 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4739 		"%selector = OpUMod %u32 %x %three\n"
4740 		"            OpSelectionMerge %phi None\n"
4741 		"            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4742 
4743 		// Case 1 before OpPhi.
4744 		"%case1    = OpLabel\n"
4745 		"            OpBranch %phi\n"
4746 
4747 		"%default  = OpLabel\n"
4748 		"            OpUnreachable\n"
4749 
4750 		"%phi      = OpLabel\n"
4751 		"%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4752 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4753 		"%inval    = OpLoad %f32 %inloc\n"
4754 		"%add      = OpFAdd %f32 %inval %operand\n"
4755 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4756 		"            OpStore %outloc %add\n"
4757 		"            OpReturn\n"
4758 
4759 		// Case 0 after OpPhi.
4760 		"%case0    = OpLabel\n"
4761 		"            OpBranch %phi\n"
4762 
4763 
4764 		// Case 2 after OpPhi.
4765 		"%case2    = OpLabel\n"
4766 		"            OpBranch %phi\n"
4767 
4768 		"            OpFunctionEnd\n";
4769 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4770 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4771 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
4772 
4773 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4774 
4775 	spec2.assembly =
4776 		string(getComputeAsmShaderPreamble()) +
4777 
4778 		"OpName %main \"main\"\n"
4779 		"OpName %id \"gl_GlobalInvocationID\"\n"
4780 
4781 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4782 
4783 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4784 
4785 		"%id         = OpVariable %uvec3ptr Input\n"
4786 		"%zero       = OpConstant %i32 0\n"
4787 		"%one        = OpConstant %i32 1\n"
4788 		"%three      = OpConstant %i32 3\n"
4789 		"%constf6p5  = OpConstant %f32 6.5\n"
4790 
4791 		"%main       = OpFunction %void None %voidf\n"
4792 		"%entry      = OpLabel\n"
4793 		"%idval      = OpLoad %uvec3 %id\n"
4794 		"%x          = OpCompositeExtract %u32 %idval 0\n"
4795 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4796 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4797 		"%inval      = OpLoad %f32 %inloc\n"
4798 		"              OpBranch %phi\n"
4799 
4800 		"%phi        = OpLabel\n"
4801 		"%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
4802 		"%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
4803 		"%step_next  = OpIAdd %i32 %step %one\n"
4804 		"%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4805 		"%still_loop = OpSLessThan %bool %step %three\n"
4806 		"              OpLoopMerge %exit %phi None\n"
4807 		"              OpBranchConditional %still_loop %phi %exit\n"
4808 
4809 		"%exit       = OpLabel\n"
4810 		"              OpStore %outloc %accum\n"
4811 		"              OpReturn\n"
4812 		"              OpFunctionEnd\n";
4813 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4814 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4815 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
4816 
4817 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4818 
4819 	spec3.assembly =
4820 		string(getComputeAsmShaderPreamble()) +
4821 
4822 		"OpName %main \"main\"\n"
4823 		"OpName %id \"gl_GlobalInvocationID\"\n"
4824 
4825 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4826 
4827 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4828 
4829 		"%f32ptr_f   = OpTypePointer Function %f32\n"
4830 		"%id         = OpVariable %uvec3ptr Input\n"
4831 		"%true       = OpConstantTrue %bool\n"
4832 		"%false      = OpConstantFalse %bool\n"
4833 		"%zero       = OpConstant %i32 0\n"
4834 		"%constf8p5  = OpConstant %f32 8.5\n"
4835 
4836 		"%main       = OpFunction %void None %voidf\n"
4837 		"%entry      = OpLabel\n"
4838 		"%b          = OpVariable %f32ptr_f Function %constf8p5\n"
4839 		"%idval      = OpLoad %uvec3 %id\n"
4840 		"%x          = OpCompositeExtract %u32 %idval 0\n"
4841 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4842 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4843 		"%a_init     = OpLoad %f32 %inloc\n"
4844 		"%b_init     = OpLoad %f32 %b\n"
4845 		"              OpBranch %phi\n"
4846 
4847 		"%phi        = OpLabel\n"
4848 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
4849 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
4850 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
4851 		"              OpLoopMerge %exit %phi None\n"
4852 		"              OpBranchConditional %still_loop %phi %exit\n"
4853 
4854 		"%exit       = OpLabel\n"
4855 		"%sub        = OpFSub %f32 %a_next %b_next\n"
4856 		"              OpStore %outloc %sub\n"
4857 		"              OpReturn\n"
4858 		"              OpFunctionEnd\n";
4859 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4860 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4861 	spec3.numWorkGroups = IVec3(numElements, 1, 1);
4862 
4863 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4864 
4865 	spec4.assembly =
4866 		"OpCapability Shader\n"
4867 		"%ext = OpExtInstImport \"GLSL.std.450\"\n"
4868 		"OpMemoryModel Logical GLSL450\n"
4869 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4870 		"OpExecutionMode %main LocalSize 1 1 1\n"
4871 
4872 		"OpSource GLSL 430\n"
4873 		"OpName %main \"main\"\n"
4874 		"OpName %id \"gl_GlobalInvocationID\"\n"
4875 
4876 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4877 
4878 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4879 
4880 		"%id       = OpVariable %uvec3ptr Input\n"
4881 		"%zero     = OpConstant %i32 0\n"
4882 		"%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4883 
4884 		+ generateConstantDefinitions(test4Width) +
4885 
4886 		"%main     = OpFunction %void None %voidf\n"
4887 		"%entry    = OpLabel\n"
4888 		"%idval    = OpLoad %uvec3 %id\n"
4889 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4890 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4891 		"%inval    = OpLoad %f32 %inloc\n"
4892 		"%xf       = OpConvertUToF %f32 %x\n"
4893 		"%xm       = OpFMul %f32 %xf %inval\n"
4894 		"%xa       = OpExtInst %f32 %ext FAbs %xm\n"
4895 		"%xi       = OpConvertFToU %u32 %xa\n"
4896 		"%selector = OpUMod %u32 %xi %cimod\n"
4897 		"            OpSelectionMerge %phi None\n"
4898 		"            OpSwitch %selector %default "
4899 
4900 		+ generateSwitchCases(test4Width) +
4901 
4902 		"%default  = OpLabel\n"
4903 		"            OpUnreachable\n"
4904 
4905 		+ generateSwitchTargets(test4Width) +
4906 
4907 		"%phi      = OpLabel\n"
4908 		"%result   = OpPhi %f32"
4909 
4910 		+ generateOpPhiParams(test4Width) +
4911 
4912 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4913 		"            OpStore %outloc %result\n"
4914 		"            OpReturn\n"
4915 
4916 		"            OpFunctionEnd\n";
4917 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4918 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4919 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
4920 
4921 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4922 
4923 	spec5.assembly =
4924 		"OpCapability Shader\n"
4925 		"%ext      = OpExtInstImport \"GLSL.std.450\"\n"
4926 		"OpMemoryModel Logical GLSL450\n"
4927 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4928 		"OpExecutionMode %main LocalSize 1 1 1\n"
4929 		"%code     = OpString \"" + codestring + "\"\n"
4930 
4931 		"OpSource GLSL 430\n"
4932 		"OpName %main \"main\"\n"
4933 		"OpName %id \"gl_GlobalInvocationID\"\n"
4934 
4935 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4936 
4937 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4938 
4939 		"%id       = OpVariable %uvec3ptr Input\n"
4940 		"%zero     = OpConstant %i32 0\n"
4941 		"%f32_0    = OpConstant %f32 0.0\n"
4942 		"%f32_0_5  = OpConstant %f32 0.5\n"
4943 		"%f32_1    = OpConstant %f32 1.0\n"
4944 		"%f32_1_5  = OpConstant %f32 1.5\n"
4945 		"%f32_2    = OpConstant %f32 2.0\n"
4946 		"%f32_3_5  = OpConstant %f32 3.5\n"
4947 		"%f32_4    = OpConstant %f32 4.0\n"
4948 		"%f32_7_5  = OpConstant %f32 7.5\n"
4949 		"%f32_8    = OpConstant %f32 8.0\n"
4950 		"%f32_15_5 = OpConstant %f32 15.5\n"
4951 		"%f32_16   = OpConstant %f32 16.0\n"
4952 		"%f32_31_5 = OpConstant %f32 31.5\n"
4953 		"%f32_32   = OpConstant %f32 32.0\n"
4954 		"%f32_63_5 = OpConstant %f32 63.5\n"
4955 		"%f32_64   = OpConstant %f32 64.0\n"
4956 		"%f32_127_5 = OpConstant %f32 127.5\n"
4957 		"%f32_128  = OpConstant %f32 128.0\n"
4958 		"%f32_256  = OpConstant %f32 256.0\n"
4959 
4960 		"%main     = OpFunction %void None %voidf\n"
4961 		"%entry    = OpLabel\n"
4962 		"%idval    = OpLoad %uvec3 %id\n"
4963 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4964 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4965 		"%inval    = OpLoad %f32 %inloc\n"
4966 
4967 		"%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
4968 		"%x8       = OpFMod %f32 %xabs %f32_256\n"
4969 		"%x7       = OpFMod %f32 %xabs %f32_128\n"
4970 		"%x6       = OpFMod %f32 %xabs %f32_64\n"
4971 		"%x5       = OpFMod %f32 %xabs %f32_32\n"
4972 		"%x4       = OpFMod %f32 %xabs %f32_16\n"
4973 		"%x3       = OpFMod %f32 %xabs %f32_8\n"
4974 		"%x2       = OpFMod %f32 %xabs %f32_4\n"
4975 		"%x1       = OpFMod %f32 %xabs %f32_2\n"
4976 
4977 		"%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4978 		"%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4979 		"%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4980 		"%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4981 		"%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4982 		"%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4983 		"%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4984 		"%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4985 
4986 		+ generateOpPhiCase5(codestring) +
4987 
4988 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4989 		"            OpStore %outloc %res\n"
4990 		"            OpReturn\n"
4991 
4992 		"            OpFunctionEnd\n";
4993 	spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4994 	spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4995 	spec5.numWorkGroups = IVec3(numElements, 1, 1);
4996 
4997 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4998 
4999 	createOpPhiVartypeTests(group, testCtx);
5000 
5001 	return group.release();
5002 }
5003 
5004 // Assembly code used for testing block order is based on GLSL source code:
5005 //
5006 // #version 430
5007 //
5008 // layout(std140, set = 0, binding = 0) readonly buffer Input {
5009 //   float elements[];
5010 // } input_data;
5011 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
5012 //   float elements[];
5013 // } output_data;
5014 //
5015 // void main() {
5016 //   uint x = gl_GlobalInvocationID.x;
5017 //   output_data.elements[x] = input_data.elements[x];
5018 //   if (x > uint(50)) {
5019 //     switch (x % uint(3)) {
5020 //       case 0: output_data.elements[x] += 1.5f; break;
5021 //       case 1: output_data.elements[x] += 42.f; break;
5022 //       case 2: output_data.elements[x] -= 27.f; break;
5023 //       default: break;
5024 //     }
5025 //   } else {
5026 //     output_data.elements[x] = -input_data.elements[x];
5027 //   }
5028 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5029 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5030 {
5031 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5032 	ComputeShaderSpec				spec;
5033 	de::Random						rnd				(deStringHash(group->getName()));
5034 	const int						numElements		= 100;
5035 	vector<float>					inputFloats		(numElements, 0);
5036 	vector<float>					outputFloats	(numElements, 0);
5037 
5038 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5039 
5040 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5041 	floorAll(inputFloats);
5042 
5043 	for (size_t ndx = 0; ndx <= 50; ++ndx)
5044 		outputFloats[ndx] = -inputFloats[ndx];
5045 
5046 	for (size_t ndx = 51; ndx < numElements; ++ndx)
5047 	{
5048 		switch (ndx % 3)
5049 		{
5050 			case 0:		outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5051 			case 1:		outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5052 			case 2:		outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5053 			default:	break;
5054 		}
5055 	}
5056 
5057 	spec.assembly =
5058 		string(getComputeAsmShaderPreamble()) +
5059 
5060 		"OpSource GLSL 430\n"
5061 		"OpName %main \"main\"\n"
5062 		"OpName %id \"gl_GlobalInvocationID\"\n"
5063 
5064 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5065 
5066 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5067 
5068 		"%u32ptr       = OpTypePointer Function %u32\n"
5069 		"%u32ptr_input = OpTypePointer Input %u32\n"
5070 
5071 		+ string(getComputeAsmInputOutputBuffer()) +
5072 
5073 		"%id        = OpVariable %uvec3ptr Input\n"
5074 		"%zero      = OpConstant %i32 0\n"
5075 		"%const3    = OpConstant %u32 3\n"
5076 		"%const50   = OpConstant %u32 50\n"
5077 		"%constf1p5 = OpConstant %f32 1.5\n"
5078 		"%constf27  = OpConstant %f32 27.0\n"
5079 		"%constf42  = OpConstant %f32 42.0\n"
5080 
5081 		"%main = OpFunction %void None %voidf\n"
5082 
5083 		// entry block.
5084 		"%entry    = OpLabel\n"
5085 
5086 		// Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5087 		"%xvar     = OpVariable %u32ptr Function\n"
5088 		"%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
5089 		"%x        = OpLoad %u32 %xptr\n"
5090 		"            OpStore %xvar %x\n"
5091 
5092 		"%cmp      = OpUGreaterThan %bool %x %const50\n"
5093 		"            OpSelectionMerge %if_merge None\n"
5094 		"            OpBranchConditional %cmp %if_true %if_false\n"
5095 
5096 		// False branch for if-statement: placed in the middle of switch cases and before true branch.
5097 		"%if_false = OpLabel\n"
5098 		"%x_f      = OpLoad %u32 %xvar\n"
5099 		"%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
5100 		"%inval_f  = OpLoad %f32 %inloc_f\n"
5101 		"%negate   = OpFNegate %f32 %inval_f\n"
5102 		"%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5103 		"            OpStore %outloc_f %negate\n"
5104 		"            OpBranch %if_merge\n"
5105 
5106 		// Merge block for if-statement: placed in the middle of true and false branch.
5107 		"%if_merge = OpLabel\n"
5108 		"            OpReturn\n"
5109 
5110 		// True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5111 		"%if_true  = OpLabel\n"
5112 		"%xval_t   = OpLoad %u32 %xvar\n"
5113 		"%mod      = OpUMod %u32 %xval_t %const3\n"
5114 		"            OpSelectionMerge %switch_merge None\n"
5115 		"            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5116 
5117 		// Merge block for switch-statement: placed before the case
5118 		// bodies.  But it must follow OpSwitch which dominates it.
5119 		"%switch_merge = OpLabel\n"
5120 		"                OpBranch %if_merge\n"
5121 
5122 		// Case 1 for switch-statement: placed before case 0.
5123 		// It must follow the OpSwitch that dominates it.
5124 		"%case1    = OpLabel\n"
5125 		"%x_1      = OpLoad %u32 %xvar\n"
5126 		"%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
5127 		"%inval_1  = OpLoad %f32 %inloc_1\n"
5128 		"%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
5129 		"%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5130 		"            OpStore %outloc_1 %addf42\n"
5131 		"            OpBranch %switch_merge\n"
5132 
5133 		// Case 2 for switch-statement.
5134 		"%case2    = OpLabel\n"
5135 		"%x_2      = OpLoad %u32 %xvar\n"
5136 		"%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
5137 		"%inval_2  = OpLoad %f32 %inloc_2\n"
5138 		"%subf27   = OpFSub %f32 %inval_2 %constf27\n"
5139 		"%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5140 		"            OpStore %outloc_2 %subf27\n"
5141 		"            OpBranch %switch_merge\n"
5142 
5143 		// Default case for switch-statement: placed in the middle of normal cases.
5144 		"%default = OpLabel\n"
5145 		"           OpBranch %switch_merge\n"
5146 
5147 		// Case 0 for switch-statement: out of order.
5148 		"%case0    = OpLabel\n"
5149 		"%x_0      = OpLoad %u32 %xvar\n"
5150 		"%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
5151 		"%inval_0  = OpLoad %f32 %inloc_0\n"
5152 		"%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
5153 		"%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5154 		"            OpStore %outloc_0 %addf1p5\n"
5155 		"            OpBranch %switch_merge\n"
5156 
5157 		"            OpFunctionEnd\n";
5158 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5159 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5160 	spec.numWorkGroups = IVec3(numElements, 1, 1);
5161 
5162 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
5163 
5164 	return group.release();
5165 }
5166 
createMultipleShaderGroup(tcu::TestContext & testCtx)5167 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5168 {
5169 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5170 	ComputeShaderSpec				spec1;
5171 	ComputeShaderSpec				spec2;
5172 	de::Random						rnd				(deStringHash(group->getName()));
5173 	const int						numElements		= 100;
5174 	vector<float>					inputFloats		(numElements, 0);
5175 	vector<float>					outputFloats1	(numElements, 0);
5176 	vector<float>					outputFloats2	(numElements, 0);
5177 	fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5178 
5179 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5180 	{
5181 		outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5182 		outputFloats2[ndx] = -inputFloats[ndx];
5183 	}
5184 
5185 	const string assembly(
5186 		"OpCapability Shader\n"
5187 		"OpMemoryModel Logical GLSL450\n"
5188 		"OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5189 		"OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5190 		// A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5191 		"OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5192 		"OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5193 		"OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5194 
5195 		"OpName %comp_main1              \"entrypoint1\"\n"
5196 		"OpName %comp_main2              \"entrypoint2\"\n"
5197 		"OpName %vert_main               \"entrypoint2\"\n"
5198 		"OpName %id                      \"gl_GlobalInvocationID\"\n"
5199 		"OpName %vert_builtin_st         \"gl_PerVertex\"\n"
5200 		"OpName %vertexIndex             \"gl_VertexIndex\"\n"
5201 		"OpName %instanceIndex           \"gl_InstanceIndex\"\n"
5202 		"OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5203 		"OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5204 		"OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5205 
5206 		"OpDecorate %id                      BuiltIn GlobalInvocationId\n"
5207 		"OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
5208 		"OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
5209 		"OpDecorate %vert_builtin_st         Block\n"
5210 		"OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5211 		"OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5212 		"OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5213 
5214 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5215 
5216 		"%zero       = OpConstant %i32 0\n"
5217 		"%one        = OpConstant %u32 1\n"
5218 		"%c_f32_1    = OpConstant %f32 1\n"
5219 
5220 		"%i32inputptr         = OpTypePointer Input %i32\n"
5221 		"%vec4                = OpTypeVector %f32 4\n"
5222 		"%vec4ptr             = OpTypePointer Output %vec4\n"
5223 		"%f32arr1             = OpTypeArray %f32 %one\n"
5224 		"%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
5225 		"%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5226 		"%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
5227 
5228 		"%id         = OpVariable %uvec3ptr Input\n"
5229 		"%vertexIndex = OpVariable %i32inputptr Input\n"
5230 		"%instanceIndex = OpVariable %i32inputptr Input\n"
5231 		"%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5232 
5233 		// gl_Position = vec4(1.);
5234 		"%vert_main  = OpFunction %void None %voidf\n"
5235 		"%vert_entry = OpLabel\n"
5236 		"%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5237 		"              OpStore %position %c_vec4_1\n"
5238 		"              OpReturn\n"
5239 		"              OpFunctionEnd\n"
5240 
5241 		// Double inputs.
5242 		"%comp_main1  = OpFunction %void None %voidf\n"
5243 		"%comp1_entry = OpLabel\n"
5244 		"%idval1      = OpLoad %uvec3 %id\n"
5245 		"%x1          = OpCompositeExtract %u32 %idval1 0\n"
5246 		"%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
5247 		"%inval1      = OpLoad %f32 %inloc1\n"
5248 		"%add         = OpFAdd %f32 %inval1 %inval1\n"
5249 		"%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
5250 		"               OpStore %outloc1 %add\n"
5251 		"               OpReturn\n"
5252 		"               OpFunctionEnd\n"
5253 
5254 		// Negate inputs.
5255 		"%comp_main2  = OpFunction %void None %voidf\n"
5256 		"%comp2_entry = OpLabel\n"
5257 		"%idval2      = OpLoad %uvec3 %id\n"
5258 		"%x2          = OpCompositeExtract %u32 %idval2 0\n"
5259 		"%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
5260 		"%inval2      = OpLoad %f32 %inloc2\n"
5261 		"%neg         = OpFNegate %f32 %inval2\n"
5262 		"%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
5263 		"               OpStore %outloc2 %neg\n"
5264 		"               OpReturn\n"
5265 		"               OpFunctionEnd\n");
5266 
5267 	spec1.assembly = assembly;
5268 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5269 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5270 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
5271 	spec1.entryPoint = "entrypoint1";
5272 
5273 	spec2.assembly = assembly;
5274 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5275 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5276 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
5277 	spec2.entryPoint = "entrypoint2";
5278 
5279 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
5280 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
5281 
5282 	return group.release();
5283 }
5284 
makeLongUTF8String(size_t num4ByteChars)5285 inline std::string makeLongUTF8String (size_t num4ByteChars)
5286 {
5287 	// An example of a longest valid UTF-8 character.  Be explicit about the
5288 	// character type because Microsoft compilers can otherwise interpret the
5289 	// character string as being over wide (16-bit) characters. Ideally, we
5290 	// would just use a C++11 UTF-8 string literal, but we want to support older
5291 	// Microsoft compilers.
5292 	const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5293 	std::string longString;
5294 	longString.reserve(num4ByteChars * 4);
5295 	for (size_t count = 0; count < num4ByteChars; count++)
5296 	{
5297 		longString += earthAfrica;
5298 	}
5299 	return longString;
5300 }
5301 
createOpSourceGroup(tcu::TestContext & testCtx)5302 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5303 {
5304 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5305 	vector<CaseParameter>			cases;
5306 	de::Random						rnd				(deStringHash(group->getName()));
5307 	const int						numElements		= 100;
5308 	vector<float>					positiveFloats	(numElements, 0);
5309 	vector<float>					negativeFloats	(numElements, 0);
5310 	const StringTemplate			shaderTemplate	(
5311 		"OpCapability Shader\n"
5312 		"OpMemoryModel Logical GLSL450\n"
5313 
5314 		"OpEntryPoint GLCompute %main \"main\" %id\n"
5315 		"OpExecutionMode %main LocalSize 1 1 1\n"
5316 
5317 		"${SOURCE}\n"
5318 
5319 		"OpName %main           \"main\"\n"
5320 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5321 
5322 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5323 
5324 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5325 
5326 		"%id        = OpVariable %uvec3ptr Input\n"
5327 		"%zero      = OpConstant %i32 0\n"
5328 
5329 		"%main      = OpFunction %void None %voidf\n"
5330 		"%label     = OpLabel\n"
5331 		"%idval     = OpLoad %uvec3 %id\n"
5332 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5333 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5334 		"%inval     = OpLoad %f32 %inloc\n"
5335 		"%neg       = OpFNegate %f32 %inval\n"
5336 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5337 		"             OpStore %outloc %neg\n"
5338 		"             OpReturn\n"
5339 		"             OpFunctionEnd\n");
5340 
5341 	cases.push_back(CaseParameter("unknown_source",							"OpSource Unknown 0"));
5342 	cases.push_back(CaseParameter("wrong_source",							"OpSource OpenCL_C 210"));
5343 	cases.push_back(CaseParameter("normal_filename",						"%fname = OpString \"filename\"\n"
5344 																			"OpSource GLSL 430 %fname"));
5345 	cases.push_back(CaseParameter("empty_filename",							"%fname = OpString \"\"\n"
5346 																			"OpSource GLSL 430 %fname"));
5347 	cases.push_back(CaseParameter("normal_source_code",						"%fname = OpString \"filename\"\n"
5348 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5349 	cases.push_back(CaseParameter("empty_source_code",						"%fname = OpString \"filename\"\n"
5350 																			"OpSource GLSL 430 %fname \"\""));
5351 	cases.push_back(CaseParameter("long_source_code",						"%fname = OpString \"filename\"\n"
5352 																			"OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5353 	cases.push_back(CaseParameter("utf8_source_code",						"%fname = OpString \"filename\"\n"
5354 																			"OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5355 	cases.push_back(CaseParameter("normal_sourcecontinued",					"%fname = OpString \"filename\"\n"
5356 																			"OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5357 																			"OpSourceContinued \"id main() {}\""));
5358 	cases.push_back(CaseParameter("empty_sourcecontinued",					"%fname = OpString \"filename\"\n"
5359 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5360 																			"OpSourceContinued \"\""));
5361 	cases.push_back(CaseParameter("long_sourcecontinued",					"%fname = OpString \"filename\"\n"
5362 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5363 																			"OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5364 	cases.push_back(CaseParameter("utf8_sourcecontinued",					"%fname = OpString \"filename\"\n"
5365 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5366 																			"OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5367 	cases.push_back(CaseParameter("multi_sourcecontinued",					"%fname = OpString \"filename\"\n"
5368 																			"OpSource GLSL 430 %fname \"#version 430\n\"\n"
5369 																			"OpSourceContinued \"void\"\n"
5370 																			"OpSourceContinued \"main()\"\n"
5371 																			"OpSourceContinued \"{}\""));
5372 	cases.push_back(CaseParameter("empty_source_before_sourcecontinued",	"%fname = OpString \"filename\"\n"
5373 																			"OpSource GLSL 430 %fname \"\"\n"
5374 																			"OpSourceContinued \"#version 430\nvoid main() {}\""));
5375 
5376 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5377 
5378 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5379 		negativeFloats[ndx] = -positiveFloats[ndx];
5380 
5381 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5382 	{
5383 		map<string, string>		specializations;
5384 		ComputeShaderSpec		spec;
5385 
5386 		specializations["SOURCE"] = cases[caseNdx].param;
5387 		spec.assembly = shaderTemplate.specialize(specializations);
5388 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5389 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5390 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5391 
5392 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5393 	}
5394 
5395 	return group.release();
5396 }
5397 
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5398 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5399 {
5400 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5401 	vector<CaseParameter>			cases;
5402 	de::Random						rnd				(deStringHash(group->getName()));
5403 	const int						numElements		= 100;
5404 	vector<float>					inputFloats		(numElements, 0);
5405 	vector<float>					outputFloats	(numElements, 0);
5406 	const StringTemplate			shaderTemplate	(
5407 		string(getComputeAsmShaderPreamble()) +
5408 
5409 		"OpSourceExtension \"${EXTENSION}\"\n"
5410 
5411 		"OpName %main           \"main\"\n"
5412 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5413 
5414 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5415 
5416 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5417 
5418 		"%id        = OpVariable %uvec3ptr Input\n"
5419 		"%zero      = OpConstant %i32 0\n"
5420 
5421 		"%main      = OpFunction %void None %voidf\n"
5422 		"%label     = OpLabel\n"
5423 		"%idval     = OpLoad %uvec3 %id\n"
5424 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5425 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5426 		"%inval     = OpLoad %f32 %inloc\n"
5427 		"%neg       = OpFNegate %f32 %inval\n"
5428 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5429 		"             OpStore %outloc %neg\n"
5430 		"             OpReturn\n"
5431 		"             OpFunctionEnd\n");
5432 
5433 	cases.push_back(CaseParameter("empty_extension",	""));
5434 	cases.push_back(CaseParameter("real_extension",		"GL_ARB_texture_rectangle"));
5435 	cases.push_back(CaseParameter("fake_extension",		"GL_ARB_im_the_ultimate_extension"));
5436 	cases.push_back(CaseParameter("utf8_extension",		"GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5437 	cases.push_back(CaseParameter("long_extension",		makeLongUTF8String(65533) + "ccc")); // word count: 65535
5438 
5439 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5440 
5441 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5442 		outputFloats[ndx] = -inputFloats[ndx];
5443 
5444 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5445 	{
5446 		map<string, string>		specializations;
5447 		ComputeShaderSpec		spec;
5448 
5449 		specializations["EXTENSION"] = cases[caseNdx].param;
5450 		spec.assembly = shaderTemplate.specialize(specializations);
5451 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5452 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5453 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5454 
5455 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5456 	}
5457 
5458 	return group.release();
5459 }
5460 
5461 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5462 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5463 {
5464 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5465 	vector<CaseParameter>			cases;
5466 	de::Random						rnd				(deStringHash(group->getName()));
5467 	const int						numElements		= 100;
5468 	vector<float>					positiveFloats	(numElements, 0);
5469 	vector<float>					negativeFloats	(numElements, 0);
5470 	const StringTemplate			shaderTemplate	(
5471 		string(getComputeAsmShaderPreamble()) +
5472 
5473 		"OpSource GLSL 430\n"
5474 		"OpName %main           \"main\"\n"
5475 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5476 
5477 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5478 
5479 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5480 		"%uvec2     = OpTypeVector %u32 2\n"
5481 		"%bvec3     = OpTypeVector %bool 3\n"
5482 		"%fvec4     = OpTypeVector %f32 4\n"
5483 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
5484 		"%const100  = OpConstant %u32 100\n"
5485 		"%uarr100   = OpTypeArray %i32 %const100\n"
5486 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
5487 		"%pointer   = OpTypePointer Function %i32\n"
5488 		+ string(getComputeAsmInputOutputBuffer()) +
5489 
5490 		"%null      = OpConstantNull ${TYPE}\n"
5491 
5492 		"%id        = OpVariable %uvec3ptr Input\n"
5493 		"%zero      = OpConstant %i32 0\n"
5494 
5495 		"%main      = OpFunction %void None %voidf\n"
5496 		"%label     = OpLabel\n"
5497 		"%idval     = OpLoad %uvec3 %id\n"
5498 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5499 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5500 		"%inval     = OpLoad %f32 %inloc\n"
5501 		"%neg       = OpFNegate %f32 %inval\n"
5502 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5503 		"             OpStore %outloc %neg\n"
5504 		"             OpReturn\n"
5505 		"             OpFunctionEnd\n");
5506 
5507 	cases.push_back(CaseParameter("bool",			"%bool"));
5508 	cases.push_back(CaseParameter("sint32",			"%i32"));
5509 	cases.push_back(CaseParameter("uint32",			"%u32"));
5510 	cases.push_back(CaseParameter("float32",		"%f32"));
5511 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
5512 	cases.push_back(CaseParameter("vec3bool",		"%bvec3"));
5513 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
5514 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
5515 	cases.push_back(CaseParameter("array",			"%uarr100"));
5516 	cases.push_back(CaseParameter("struct",			"%struct"));
5517 	cases.push_back(CaseParameter("pointer",		"%pointer"));
5518 
5519 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5520 
5521 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5522 		negativeFloats[ndx] = -positiveFloats[ndx];
5523 
5524 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5525 	{
5526 		map<string, string>		specializations;
5527 		ComputeShaderSpec		spec;
5528 
5529 		specializations["TYPE"] = cases[caseNdx].param;
5530 		spec.assembly = shaderTemplate.specialize(specializations);
5531 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5532 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5533 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5534 
5535 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5536 	}
5537 
5538 	return group.release();
5539 }
5540 
5541 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5542 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5543 {
5544 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5545 	vector<CaseParameter>			cases;
5546 	de::Random						rnd				(deStringHash(group->getName()));
5547 	const int						numElements		= 100;
5548 	vector<float>					positiveFloats	(numElements, 0);
5549 	vector<float>					negativeFloats	(numElements, 0);
5550 	const StringTemplate			shaderTemplate	(
5551 		string(getComputeAsmShaderPreamble()) +
5552 
5553 		"OpSource GLSL 430\n"
5554 		"OpName %main           \"main\"\n"
5555 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5556 
5557 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5558 
5559 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5560 
5561 		"%id        = OpVariable %uvec3ptr Input\n"
5562 		"%zero      = OpConstant %i32 0\n"
5563 
5564 		"${CONSTANT}\n"
5565 
5566 		"%main      = OpFunction %void None %voidf\n"
5567 		"%label     = OpLabel\n"
5568 		"%idval     = OpLoad %uvec3 %id\n"
5569 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5570 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5571 		"%inval     = OpLoad %f32 %inloc\n"
5572 		"%neg       = OpFNegate %f32 %inval\n"
5573 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5574 		"             OpStore %outloc %neg\n"
5575 		"             OpReturn\n"
5576 		"             OpFunctionEnd\n");
5577 
5578 	cases.push_back(CaseParameter("vector",			"%five = OpConstant %u32 5\n"
5579 													"%const = OpConstantComposite %uvec3 %five %zero %five"));
5580 	cases.push_back(CaseParameter("matrix",			"%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5581 													"%ten = OpConstant %f32 10.\n"
5582 													"%fzero = OpConstant %f32 0.\n"
5583 													"%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5584 													"%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5585 	cases.push_back(CaseParameter("struct",			"%m2vec3 = OpTypeMatrix %fvec3 2\n"
5586 													"%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5587 													"%fzero = OpConstant %f32 0.\n"
5588 													"%one = OpConstant %f32 1.\n"
5589 													"%point5 = OpConstant %f32 0.5\n"
5590 													"%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5591 													"%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5592 													"%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5593 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %u32 %f32\n"
5594 													"%st2 = OpTypeStruct %i32 %i32\n"
5595 													"%struct = OpTypeStruct %st1 %st2\n"
5596 													"%point5 = OpConstant %f32 0.5\n"
5597 													"%one = OpConstant %u32 1\n"
5598 													"%ten = OpConstant %i32 10\n"
5599 													"%st1val = OpConstantComposite %st1 %one %point5\n"
5600 													"%st2val = OpConstantComposite %st2 %ten %ten\n"
5601 													"%const = OpConstantComposite %struct %st1val %st2val"));
5602 
5603 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5604 
5605 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5606 		negativeFloats[ndx] = -positiveFloats[ndx];
5607 
5608 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5609 	{
5610 		map<string, string>		specializations;
5611 		ComputeShaderSpec		spec;
5612 
5613 		specializations["CONSTANT"] = cases[caseNdx].param;
5614 		spec.assembly = shaderTemplate.specialize(specializations);
5615 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5616 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5617 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5618 
5619 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5620 	}
5621 
5622 	return group.release();
5623 }
5624 
5625 // Creates a floating point number with the given exponent, and significand
5626 // bits set. It can only create normalized numbers. Only the least significant
5627 // 24 bits of the significand will be examined. The final bit of the
5628 // significand will also be ignored. This allows alignment to be written
5629 // similarly to C99 hex-floats.
5630 // For example if you wanted to write 0x1.7f34p-12 you would call
5631 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(deInt32 exponent,deUint32 significand)5632 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5633 {
5634 	float f = 1.0f;
5635 
5636 	for (deInt32 idx = 0; idx < 23; ++idx)
5637 	{
5638 		f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5639 		significand <<= 1;
5640 	}
5641 
5642 	return std::ldexp(f, exponent);
5643 }
5644 
5645 // Compare instruction for the OpQuantizeF16 compute exact case.
5646 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5647 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5648 {
5649 	if (outputAllocs.size() != 1)
5650 		return false;
5651 
5652 	// Only size is needed because we cannot compare Nans.
5653 	size_t byteSize = expectedOutputs[0].getByteSize();
5654 
5655 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
5656 
5657 	if (byteSize != 4*sizeof(float)) {
5658 		return false;
5659 	}
5660 
5661 	if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5662 		*outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5663 		return false;
5664 	}
5665 	outputAsFloat++;
5666 
5667 	if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5668 		*outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5669 		return false;
5670 	}
5671 	outputAsFloat++;
5672 
5673 	if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5674 		*outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5675 		return false;
5676 	}
5677 	outputAsFloat++;
5678 
5679 	if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5680 		*outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5681 		return false;
5682 	}
5683 
5684 	return true;
5685 }
5686 
5687 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5688 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5689 {
5690 	if (outputAllocs.size() != 1)
5691 		return false;
5692 
5693 	// Only size is needed because we cannot compare Nans.
5694 	size_t byteSize = expectedOutputs[0].getByteSize();
5695 
5696 	const float* const	output_as_float	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
5697 
5698 	for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5699 	{
5700 		if (!deFloatIsNaN(output_as_float[idx]))
5701 		{
5702 			return false;
5703 		}
5704 	}
5705 
5706 	return true;
5707 }
5708 
5709 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5710 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5711 {
5712 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5713 
5714 	const std::string shader (
5715 		string(getComputeAsmShaderPreamble()) +
5716 
5717 		"OpSource GLSL 430\n"
5718 		"OpName %main           \"main\"\n"
5719 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5720 
5721 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5722 
5723 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5724 
5725 		"%id        = OpVariable %uvec3ptr Input\n"
5726 		"%zero      = OpConstant %i32 0\n"
5727 
5728 		"%main      = OpFunction %void None %voidf\n"
5729 		"%label     = OpLabel\n"
5730 		"%idval     = OpLoad %uvec3 %id\n"
5731 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5732 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5733 		"%inval     = OpLoad %f32 %inloc\n"
5734 		"%quant     = OpQuantizeToF16 %f32 %inval\n"
5735 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5736 		"             OpStore %outloc %quant\n"
5737 		"             OpReturn\n"
5738 		"             OpFunctionEnd\n");
5739 
5740 	{
5741 		ComputeShaderSpec	spec;
5742 		const deUint32		numElements		= 100;
5743 		vector<float>		infinities;
5744 		vector<float>		results;
5745 
5746 		infinities.reserve(numElements);
5747 		results.reserve(numElements);
5748 
5749 		for (size_t idx = 0; idx < numElements; ++idx)
5750 		{
5751 			switch(idx % 4)
5752 			{
5753 				case 0:
5754 					infinities.push_back(std::numeric_limits<float>::infinity());
5755 					results.push_back(std::numeric_limits<float>::infinity());
5756 					break;
5757 				case 1:
5758 					infinities.push_back(-std::numeric_limits<float>::infinity());
5759 					results.push_back(-std::numeric_limits<float>::infinity());
5760 					break;
5761 				case 2:
5762 					infinities.push_back(std::ldexp(1.0f, 16));
5763 					results.push_back(std::numeric_limits<float>::infinity());
5764 					break;
5765 				case 3:
5766 					infinities.push_back(std::ldexp(-1.0f, 32));
5767 					results.push_back(-std::numeric_limits<float>::infinity());
5768 					break;
5769 			}
5770 		}
5771 
5772 		spec.assembly = shader;
5773 		spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5774 		spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5775 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5776 
5777 		group->addChild(new SpvAsmComputeShaderCase(
5778 			testCtx, "infinities", "Check that infinities propagated and created", spec));
5779 	}
5780 
5781 	{
5782 		ComputeShaderSpec	spec;
5783 		vector<float>		nans;
5784 		const deUint32		numElements		= 100;
5785 
5786 		nans.reserve(numElements);
5787 
5788 		for (size_t idx = 0; idx < numElements; ++idx)
5789 		{
5790 			if (idx % 2 == 0)
5791 			{
5792 				nans.push_back(std::numeric_limits<float>::quiet_NaN());
5793 			}
5794 			else
5795 			{
5796 				nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5797 			}
5798 		}
5799 
5800 		spec.assembly = shader;
5801 		spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5802 		spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5803 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5804 		spec.verifyIO = &compareNan;
5805 
5806 		group->addChild(new SpvAsmComputeShaderCase(
5807 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
5808 	}
5809 
5810 	{
5811 		ComputeShaderSpec	spec;
5812 		vector<float>		small;
5813 		vector<float>		zeros;
5814 		const deUint32		numElements		= 100;
5815 
5816 		small.reserve(numElements);
5817 		zeros.reserve(numElements);
5818 
5819 		for (size_t idx = 0; idx < numElements; ++idx)
5820 		{
5821 			switch(idx % 6)
5822 			{
5823 				case 0:
5824 					small.push_back(0.f);
5825 					zeros.push_back(0.f);
5826 					break;
5827 				case 1:
5828 					small.push_back(-0.f);
5829 					zeros.push_back(-0.f);
5830 					break;
5831 				case 2:
5832 					small.push_back(std::ldexp(1.0f, -16));
5833 					zeros.push_back(0.f);
5834 					break;
5835 				case 3:
5836 					small.push_back(std::ldexp(-1.0f, -32));
5837 					zeros.push_back(-0.f);
5838 					break;
5839 				case 4:
5840 					small.push_back(std::ldexp(1.0f, -127));
5841 					zeros.push_back(0.f);
5842 					break;
5843 				case 5:
5844 					small.push_back(-std::ldexp(1.0f, -128));
5845 					zeros.push_back(-0.f);
5846 					break;
5847 			}
5848 		}
5849 
5850 		spec.assembly = shader;
5851 		spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5852 		spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5853 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5854 
5855 		group->addChild(new SpvAsmComputeShaderCase(
5856 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5857 	}
5858 
5859 	{
5860 		ComputeShaderSpec	spec;
5861 		vector<float>		exact;
5862 		const deUint32		numElements		= 200;
5863 
5864 		exact.reserve(numElements);
5865 
5866 		for (size_t idx = 0; idx < numElements; ++idx)
5867 			exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5868 
5869 		spec.assembly = shader;
5870 		spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5871 		spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5872 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5873 
5874 		group->addChild(new SpvAsmComputeShaderCase(
5875 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5876 	}
5877 
5878 	{
5879 		ComputeShaderSpec	spec;
5880 		vector<float>		inputs;
5881 		const deUint32		numElements		= 4;
5882 
5883 		inputs.push_back(constructNormalizedFloat(8,	0x300300));
5884 		inputs.push_back(-constructNormalizedFloat(-7,	0x600800));
5885 		inputs.push_back(constructNormalizedFloat(2,	0x01E000));
5886 		inputs.push_back(constructNormalizedFloat(1,	0xFFE000));
5887 
5888 		spec.assembly = shader;
5889 		spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5890 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5891 		spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5892 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5893 
5894 		group->addChild(new SpvAsmComputeShaderCase(
5895 			testCtx, "rounded", "Check that are rounded when needed", spec));
5896 	}
5897 
5898 	return group.release();
5899 }
5900 
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)5901 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5902 {
5903 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5904 
5905 	const std::string shader (
5906 		string(getComputeAsmShaderPreamble()) +
5907 
5908 		"OpName %main           \"main\"\n"
5909 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5910 
5911 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5912 
5913 		"OpDecorate %sc_0  SpecId 0\n"
5914 		"OpDecorate %sc_1  SpecId 1\n"
5915 		"OpDecorate %sc_2  SpecId 2\n"
5916 		"OpDecorate %sc_3  SpecId 3\n"
5917 		"OpDecorate %sc_4  SpecId 4\n"
5918 		"OpDecorate %sc_5  SpecId 5\n"
5919 
5920 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5921 
5922 		"%id        = OpVariable %uvec3ptr Input\n"
5923 		"%zero      = OpConstant %i32 0\n"
5924 		"%c_u32_6   = OpConstant %u32 6\n"
5925 
5926 		"%sc_0      = OpSpecConstant %f32 0.\n"
5927 		"%sc_1      = OpSpecConstant %f32 0.\n"
5928 		"%sc_2      = OpSpecConstant %f32 0.\n"
5929 		"%sc_3      = OpSpecConstant %f32 0.\n"
5930 		"%sc_4      = OpSpecConstant %f32 0.\n"
5931 		"%sc_5      = OpSpecConstant %f32 0.\n"
5932 
5933 		"%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5934 		"%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5935 		"%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5936 		"%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5937 		"%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5938 		"%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5939 
5940 		"%main      = OpFunction %void None %voidf\n"
5941 		"%label     = OpLabel\n"
5942 		"%idval     = OpLoad %uvec3 %id\n"
5943 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5944 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5945 		"%selector  = OpUMod %u32 %x %c_u32_6\n"
5946 		"            OpSelectionMerge %exit None\n"
5947 		"            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5948 
5949 		"%case0     = OpLabel\n"
5950 		"             OpStore %outloc %sc_0_quant\n"
5951 		"             OpBranch %exit\n"
5952 
5953 		"%case1     = OpLabel\n"
5954 		"             OpStore %outloc %sc_1_quant\n"
5955 		"             OpBranch %exit\n"
5956 
5957 		"%case2     = OpLabel\n"
5958 		"             OpStore %outloc %sc_2_quant\n"
5959 		"             OpBranch %exit\n"
5960 
5961 		"%case3     = OpLabel\n"
5962 		"             OpStore %outloc %sc_3_quant\n"
5963 		"             OpBranch %exit\n"
5964 
5965 		"%case4     = OpLabel\n"
5966 		"             OpStore %outloc %sc_4_quant\n"
5967 		"             OpBranch %exit\n"
5968 
5969 		"%case5     = OpLabel\n"
5970 		"             OpStore %outloc %sc_5_quant\n"
5971 		"             OpBranch %exit\n"
5972 
5973 		"%exit      = OpLabel\n"
5974 		"             OpReturn\n"
5975 
5976 		"             OpFunctionEnd\n");
5977 
5978 	{
5979 		ComputeShaderSpec	spec;
5980 		const deUint8		numCases	= 4;
5981 		vector<float>		inputs		(numCases, 0.f);
5982 		vector<float>		outputs;
5983 
5984 		spec.assembly		= shader;
5985 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
5986 
5987 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5988 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5989 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5990 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5991 
5992 		outputs.push_back(std::numeric_limits<float>::infinity());
5993 		outputs.push_back(-std::numeric_limits<float>::infinity());
5994 		outputs.push_back(std::numeric_limits<float>::infinity());
5995 		outputs.push_back(-std::numeric_limits<float>::infinity());
5996 
5997 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5998 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5999 
6000 		group->addChild(new SpvAsmComputeShaderCase(
6001 			testCtx, "infinities", "Check that infinities propagated and created", spec));
6002 	}
6003 
6004 	{
6005 		ComputeShaderSpec	spec;
6006 		const deUint8		numCases	= 2;
6007 		vector<float>		inputs		(numCases, 0.f);
6008 		vector<float>		outputs;
6009 
6010 		spec.assembly		= shader;
6011 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6012 		spec.verifyIO		= &compareNan;
6013 
6014 		outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6015 		outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6016 
6017 		for (deUint8 idx = 0; idx < numCases; ++idx)
6018 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6019 
6020 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6021 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6022 
6023 		group->addChild(new SpvAsmComputeShaderCase(
6024 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
6025 	}
6026 
6027 	{
6028 		ComputeShaderSpec	spec;
6029 		const deUint8		numCases	= 6;
6030 		vector<float>		inputs		(numCases, 0.f);
6031 		vector<float>		outputs;
6032 
6033 		spec.assembly		= shader;
6034 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6035 
6036 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6037 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6038 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6039 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6040 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6041 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6042 
6043 		outputs.push_back(0.f);
6044 		outputs.push_back(-0.f);
6045 		outputs.push_back(0.f);
6046 		outputs.push_back(-0.f);
6047 		outputs.push_back(0.f);
6048 		outputs.push_back(-0.f);
6049 
6050 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6051 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6052 
6053 		group->addChild(new SpvAsmComputeShaderCase(
6054 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
6055 	}
6056 
6057 	{
6058 		ComputeShaderSpec	spec;
6059 		const deUint8		numCases	= 6;
6060 		vector<float>		inputs		(numCases, 0.f);
6061 		vector<float>		outputs;
6062 
6063 		spec.assembly		= shader;
6064 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6065 
6066 		for (deUint8 idx = 0; idx < 6; ++idx)
6067 		{
6068 			const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6069 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6070 			outputs.push_back(f);
6071 		}
6072 
6073 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6074 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6075 
6076 		group->addChild(new SpvAsmComputeShaderCase(
6077 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
6078 	}
6079 
6080 	{
6081 		ComputeShaderSpec	spec;
6082 		const deUint8		numCases	= 4;
6083 		vector<float>		inputs		(numCases, 0.f);
6084 		vector<float>		outputs;
6085 
6086 		spec.assembly		= shader;
6087 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6088 		spec.verifyIO		= &compareOpQuantizeF16ComputeExactCase;
6089 
6090 		outputs.push_back(constructNormalizedFloat(8, 0x300300));
6091 		outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6092 		outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6093 		outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6094 
6095 		for (deUint8 idx = 0; idx < numCases; ++idx)
6096 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6097 
6098 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6099 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6100 
6101 		group->addChild(new SpvAsmComputeShaderCase(
6102 			testCtx, "rounded", "Check that are rounded when needed", spec));
6103 	}
6104 
6105 	return group.release();
6106 }
6107 
6108 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6109 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6110 {
6111 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6112 	ComputeShaderSpec				spec;
6113 	de::Random						rnd				(deStringHash(group->getName()));
6114 	const int						numElements		= 100;
6115 	vector<float>					positiveFloats	(numElements, 0);
6116 	vector<float>					negativeFloats	(numElements, 0);
6117 
6118 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6119 
6120 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6121 		negativeFloats[ndx] = -positiveFloats[ndx];
6122 
6123 	spec.assembly =
6124 		"OpCapability Shader\n"
6125 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6126 		"OpMemoryModel Logical GLSL450\n"
6127 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6128 		"OpExecutionMode %main LocalSize 1 1 1\n"
6129 
6130 		"OpSource GLSL 430\n"
6131 		"OpName %main           \"main\"\n"
6132 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6133 
6134 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6135 
6136 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6137 
6138 		"%fmat      = OpTypeMatrix %fvec3 3\n"
6139 		"%ten       = OpConstant %u32 10\n"
6140 		"%f32arr10  = OpTypeArray %f32 %ten\n"
6141 		"%fst       = OpTypeStruct %f32 %f32\n"
6142 
6143 		+ string(getComputeAsmInputOutputBuffer()) +
6144 
6145 		"%id        = OpVariable %uvec3ptr Input\n"
6146 		"%zero      = OpConstant %i32 0\n"
6147 
6148 		// Create a bunch of null values
6149 		"%unull     = OpConstantNull %u32\n"
6150 		"%fnull     = OpConstantNull %f32\n"
6151 		"%vnull     = OpConstantNull %fvec3\n"
6152 		"%mnull     = OpConstantNull %fmat\n"
6153 		"%anull     = OpConstantNull %f32arr10\n"
6154 		"%snull     = OpConstantComposite %fst %fnull %fnull\n"
6155 
6156 		"%main      = OpFunction %void None %voidf\n"
6157 		"%label     = OpLabel\n"
6158 		"%idval     = OpLoad %uvec3 %id\n"
6159 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6160 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6161 		"%inval     = OpLoad %f32 %inloc\n"
6162 		"%neg       = OpFNegate %f32 %inval\n"
6163 
6164 		// Get the abs() of (a certain element of) those null values
6165 		"%unull_cov = OpConvertUToF %f32 %unull\n"
6166 		"%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6167 		"%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6168 		"%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
6169 		"%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6170 		"%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
6171 		"%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6172 		"%anull_3   = OpCompositeExtract %f32 %anull 3\n"
6173 		"%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6174 		"%snull_1   = OpCompositeExtract %f32 %snull 1\n"
6175 		"%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6176 
6177 		// Add them all
6178 		"%add1      = OpFAdd %f32 %neg  %unull_abs\n"
6179 		"%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
6180 		"%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
6181 		"%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
6182 		"%add5      = OpFAdd %f32 %add4 %anull_abs\n"
6183 		"%final     = OpFAdd %f32 %add5 %snull_abs\n"
6184 
6185 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6186 		"             OpStore %outloc %final\n" // write to output
6187 		"             OpReturn\n"
6188 		"             OpFunctionEnd\n";
6189 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6190 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6191 	spec.numWorkGroups = IVec3(numElements, 1, 1);
6192 
6193 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
6194 
6195 	return group.release();
6196 }
6197 
6198 // Assembly code used for testing loop control is based on GLSL source code:
6199 // #version 430
6200 //
6201 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6202 //   float elements[];
6203 // } input_data;
6204 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6205 //   float elements[];
6206 // } output_data;
6207 //
6208 // void main() {
6209 //   uint x = gl_GlobalInvocationID.x;
6210 //   output_data.elements[x] = input_data.elements[x];
6211 //   for (uint i = 0; i < 4; ++i)
6212 //     output_data.elements[x] += 1.f;
6213 // }
createLoopControlGroup(tcu::TestContext & testCtx)6214 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6215 {
6216 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6217 	vector<CaseParameter>			cases;
6218 	de::Random						rnd				(deStringHash(group->getName()));
6219 	const int						numElements		= 100;
6220 	vector<float>					inputFloats		(numElements, 0);
6221 	vector<float>					outputFloats	(numElements, 0);
6222 	const StringTemplate			shaderTemplate	(
6223 		string(getComputeAsmShaderPreamble()) +
6224 
6225 		"OpSource GLSL 430\n"
6226 		"OpName %main \"main\"\n"
6227 		"OpName %id \"gl_GlobalInvocationID\"\n"
6228 
6229 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6230 
6231 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6232 
6233 		"%u32ptr      = OpTypePointer Function %u32\n"
6234 
6235 		"%id          = OpVariable %uvec3ptr Input\n"
6236 		"%zero        = OpConstant %i32 0\n"
6237 		"%uzero       = OpConstant %u32 0\n"
6238 		"%one         = OpConstant %i32 1\n"
6239 		"%constf1     = OpConstant %f32 1.0\n"
6240 		"%four        = OpConstant %u32 4\n"
6241 
6242 		"%main        = OpFunction %void None %voidf\n"
6243 		"%entry       = OpLabel\n"
6244 		"%i           = OpVariable %u32ptr Function\n"
6245 		"               OpStore %i %uzero\n"
6246 
6247 		"%idval       = OpLoad %uvec3 %id\n"
6248 		"%x           = OpCompositeExtract %u32 %idval 0\n"
6249 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
6250 		"%inval       = OpLoad %f32 %inloc\n"
6251 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
6252 		"               OpStore %outloc %inval\n"
6253 		"               OpBranch %loop_entry\n"
6254 
6255 		"%loop_entry  = OpLabel\n"
6256 		"%i_val       = OpLoad %u32 %i\n"
6257 		"%cmp_lt      = OpULessThan %bool %i_val %four\n"
6258 		"               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6259 		"               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6260 		"%loop_body   = OpLabel\n"
6261 		"%outval      = OpLoad %f32 %outloc\n"
6262 		"%addf1       = OpFAdd %f32 %outval %constf1\n"
6263 		"               OpStore %outloc %addf1\n"
6264 		"%new_i       = OpIAdd %u32 %i_val %one\n"
6265 		"               OpStore %i %new_i\n"
6266 		"               OpBranch %loop_entry\n"
6267 		"%loop_merge  = OpLabel\n"
6268 		"               OpReturn\n"
6269 		"               OpFunctionEnd\n");
6270 
6271 	cases.push_back(CaseParameter("none",				"None"));
6272 	cases.push_back(CaseParameter("unroll",				"Unroll"));
6273 	cases.push_back(CaseParameter("dont_unroll",		"DontUnroll"));
6274 
6275 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6276 
6277 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6278 		outputFloats[ndx] = inputFloats[ndx] + 4.f;
6279 
6280 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6281 	{
6282 		map<string, string>		specializations;
6283 		ComputeShaderSpec		spec;
6284 
6285 		specializations["CONTROL"] = cases[caseNdx].param;
6286 		spec.assembly = shaderTemplate.specialize(specializations);
6287 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6288 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6289 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6290 
6291 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6292 	}
6293 
6294 	group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
6295 	group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
6296 
6297 	return group.release();
6298 }
6299 
6300 // Assembly code used for testing selection control is based on GLSL source code:
6301 // #version 430
6302 //
6303 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6304 //   float elements[];
6305 // } input_data;
6306 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6307 //   float elements[];
6308 // } output_data;
6309 //
6310 // void main() {
6311 //   uint x = gl_GlobalInvocationID.x;
6312 //   float val = input_data.elements[x];
6313 //   if (val > 10.f)
6314 //     output_data.elements[x] = val + 1.f;
6315 //   else
6316 //     output_data.elements[x] = val - 1.f;
6317 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6318 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6319 {
6320 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6321 	vector<CaseParameter>			cases;
6322 	de::Random						rnd				(deStringHash(group->getName()));
6323 	const int						numElements		= 100;
6324 	vector<float>					inputFloats		(numElements, 0);
6325 	vector<float>					outputFloats	(numElements, 0);
6326 	const StringTemplate			shaderTemplate	(
6327 		string(getComputeAsmShaderPreamble()) +
6328 
6329 		"OpSource GLSL 430\n"
6330 		"OpName %main \"main\"\n"
6331 		"OpName %id \"gl_GlobalInvocationID\"\n"
6332 
6333 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6334 
6335 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6336 
6337 		"%id       = OpVariable %uvec3ptr Input\n"
6338 		"%zero     = OpConstant %i32 0\n"
6339 		"%constf1  = OpConstant %f32 1.0\n"
6340 		"%constf10 = OpConstant %f32 10.0\n"
6341 
6342 		"%main     = OpFunction %void None %voidf\n"
6343 		"%entry    = OpLabel\n"
6344 		"%idval    = OpLoad %uvec3 %id\n"
6345 		"%x        = OpCompositeExtract %u32 %idval 0\n"
6346 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
6347 		"%inval    = OpLoad %f32 %inloc\n"
6348 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
6349 		"%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
6350 
6351 		"            OpSelectionMerge %if_end ${CONTROL}\n"
6352 		"            OpBranchConditional %cmp_gt %if_true %if_false\n"
6353 		"%if_true  = OpLabel\n"
6354 		"%addf1    = OpFAdd %f32 %inval %constf1\n"
6355 		"            OpStore %outloc %addf1\n"
6356 		"            OpBranch %if_end\n"
6357 		"%if_false = OpLabel\n"
6358 		"%subf1    = OpFSub %f32 %inval %constf1\n"
6359 		"            OpStore %outloc %subf1\n"
6360 		"            OpBranch %if_end\n"
6361 		"%if_end   = OpLabel\n"
6362 		"            OpReturn\n"
6363 		"            OpFunctionEnd\n");
6364 
6365 	cases.push_back(CaseParameter("none",					"None"));
6366 	cases.push_back(CaseParameter("flatten",				"Flatten"));
6367 	cases.push_back(CaseParameter("dont_flatten",			"DontFlatten"));
6368 	cases.push_back(CaseParameter("flatten_dont_flatten",	"DontFlatten|Flatten"));
6369 
6370 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6371 
6372 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6373 	floorAll(inputFloats);
6374 
6375 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6376 		outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6377 
6378 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6379 	{
6380 		map<string, string>		specializations;
6381 		ComputeShaderSpec		spec;
6382 
6383 		specializations["CONTROL"] = cases[caseNdx].param;
6384 		spec.assembly = shaderTemplate.specialize(specializations);
6385 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6386 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6387 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6388 
6389 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6390 	}
6391 
6392 	return group.release();
6393 }
6394 
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6395 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6396 {
6397 	// Generate a long name.
6398 	std::string longname;
6399 	longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6400 
6401 	// Some bad names, abusing utf-8 encoding. This may also cause problems
6402 	// with the logs.
6403 	// 1. Various illegal code points in utf-8
6404 	std::string utf8illegal =
6405 		"Illegal bytes in UTF-8: "
6406 		"\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6407 		"illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6408 
6409 	// 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6410 	std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6411 
6412 	// 3. Some overlong encodings
6413 	std::string utf8overlong =
6414 		"UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6415 		"\xf0\x8f\xbf\xbf";
6416 
6417 	// 4. Internet "zalgo" meme "bleeding text"
6418 	std::string utf8zalgo =
6419 		"\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6420 		"\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6421 		"\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6422 		"\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6423 		"\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6424 		"\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6425 		"\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6426 		"\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6427 		"\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6428 		"\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6429 		"\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6430 		"\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6431 		"\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6432 		"\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6433 		"\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6434 		"\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6435 		"\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6436 		"\x93\xcd\x96\xcc\x97\xff";
6437 
6438 	// General name abuses
6439 	abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6440 	abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6441 	abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6442 	abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6443 	abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6444 
6445 	// GL keywords
6446 	abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6447 	abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6448 	abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6449 	abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6450 	abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6451 	abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6452 	abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6453 	abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6454 	abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6455 	abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6456 	abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6457 	abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6458 	abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6459 	abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6460 	abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6461 	abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6462 	abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6463 	abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6464 	abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6465 	abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6466 	abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6467 }
6468 
createOpNameGroup(tcu::TestContext & testCtx)6469 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6470 {
6471 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6472 	de::MovePtr<tcu::TestCaseGroup>	entryMainGroup	(new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6473 	de::MovePtr<tcu::TestCaseGroup>	entryNotGroup	(new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6474 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup		(new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6475 	vector<CaseParameter>			cases;
6476 	vector<CaseParameter>			abuseCases;
6477 	vector<string>					testFunc;
6478 	de::Random						rnd				(deStringHash(group->getName()));
6479 	const int						numElements		= 128;
6480 	vector<float>					inputFloats		(numElements, 0);
6481 	vector<float>					outputFloats	(numElements, 0);
6482 
6483 	getOpNameAbuseCases(abuseCases);
6484 
6485 	fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6486 
6487 	for(size_t ndx = 0; ndx < numElements; ++ndx)
6488 		outputFloats[ndx] = -inputFloats[ndx];
6489 
6490 	const string commonShaderHeader =
6491 		"OpCapability Shader\n"
6492 		"OpMemoryModel Logical GLSL450\n"
6493 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6494 		"OpExecutionMode %main LocalSize 1 1 1\n";
6495 
6496 	const string commonShaderFooter =
6497 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6498 
6499 		+ string(getComputeAsmInputOutputBufferTraits())
6500 		+ string(getComputeAsmCommonTypes())
6501 		+ string(getComputeAsmInputOutputBuffer()) +
6502 
6503 		"%id        = OpVariable %uvec3ptr Input\n"
6504 		"%zero      = OpConstant %i32 0\n"
6505 
6506 		"%func      = OpFunction %void None %voidf\n"
6507 		"%5         = OpLabel\n"
6508 		"             OpReturn\n"
6509 		"             OpFunctionEnd\n"
6510 
6511 		"%main      = OpFunction %void None %voidf\n"
6512 		"%entry     = OpLabel\n"
6513 		"%7         = OpFunctionCall %void %func\n"
6514 
6515 		"%idval     = OpLoad %uvec3 %id\n"
6516 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6517 
6518 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6519 		"%inval     = OpLoad %f32 %inloc\n"
6520 		"%neg       = OpFNegate %f32 %inval\n"
6521 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6522 		"             OpStore %outloc %neg\n"
6523 
6524 		"             OpReturn\n"
6525 		"             OpFunctionEnd\n";
6526 
6527 	const StringTemplate shaderTemplate (
6528 		"OpCapability Shader\n"
6529 		"OpMemoryModel Logical GLSL450\n"
6530 		"OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6531 		"OpExecutionMode %main LocalSize 1 1 1\n"
6532 		"OpName %${ID} \"${NAME}\"\n" +
6533 		commonShaderFooter);
6534 
6535 	const std::string multipleNames =
6536 		commonShaderHeader +
6537 		"OpName %main \"to_be\"\n"
6538 		"OpName %id   \"or_not\"\n"
6539 		"OpName %main \"to_be\"\n"
6540 		"OpName %main \"makes_no\"\n"
6541 		"OpName %func \"difference\"\n"
6542 		"OpName %5    \"to_me\"\n" +
6543 		commonShaderFooter;
6544 
6545 	{
6546 		ComputeShaderSpec	spec;
6547 
6548 		spec.assembly		= multipleNames;
6549 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6550 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6551 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6552 
6553 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6554 	}
6555 
6556 	const std::string everythingNamed =
6557 		commonShaderHeader +
6558 		"OpName %main   \"name1\"\n"
6559 		"OpName %id     \"name2\"\n"
6560 		"OpName %zero   \"name3\"\n"
6561 		"OpName %entry  \"name4\"\n"
6562 		"OpName %func   \"name5\"\n"
6563 		"OpName %5      \"name6\"\n"
6564 		"OpName %7      \"name7\"\n"
6565 		"OpName %idval  \"name8\"\n"
6566 		"OpName %inloc  \"name9\"\n"
6567 		"OpName %inval  \"name10\"\n"
6568 		"OpName %neg    \"name11\"\n"
6569 		"OpName %outloc \"name12\"\n"+
6570 		commonShaderFooter;
6571 	{
6572 		ComputeShaderSpec	spec;
6573 
6574 		spec.assembly		= everythingNamed;
6575 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6576 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6577 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6578 
6579 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6580 	}
6581 
6582 	const std::string everythingNamedTheSame =
6583 		commonShaderHeader +
6584 		"OpName %main   \"the_same\"\n"
6585 		"OpName %id     \"the_same\"\n"
6586 		"OpName %zero   \"the_same\"\n"
6587 		"OpName %entry  \"the_same\"\n"
6588 		"OpName %func   \"the_same\"\n"
6589 		"OpName %5      \"the_same\"\n"
6590 		"OpName %7      \"the_same\"\n"
6591 		"OpName %idval  \"the_same\"\n"
6592 		"OpName %inloc  \"the_same\"\n"
6593 		"OpName %inval  \"the_same\"\n"
6594 		"OpName %neg    \"the_same\"\n"
6595 		"OpName %outloc \"the_same\"\n"+
6596 		commonShaderFooter;
6597 	{
6598 		ComputeShaderSpec	spec;
6599 
6600 		spec.assembly		= everythingNamedTheSame;
6601 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6602 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6603 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6604 
6605 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6606 	}
6607 
6608 	// main_is_...
6609 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6610 	{
6611 		map<string, string>	specializations;
6612 		ComputeShaderSpec	spec;
6613 
6614 		specializations["ENTRY"]	= "main";
6615 		specializations["ID"]		= "main";
6616 		specializations["NAME"]		= abuseCases[ndx].param;
6617 		spec.assembly				= shaderTemplate.specialize(specializations);
6618 		spec.numWorkGroups			= IVec3(numElements, 1, 1);
6619 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6620 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6621 
6622 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6623 	}
6624 
6625 	// x_is_....
6626 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6627 	{
6628 		map<string, string>	specializations;
6629 		ComputeShaderSpec	spec;
6630 
6631 		specializations["ENTRY"]	= "main";
6632 		specializations["ID"]		= "x";
6633 		specializations["NAME"]		= abuseCases[ndx].param;
6634 		spec.assembly				= shaderTemplate.specialize(specializations);
6635 		spec.numWorkGroups			= IVec3(numElements, 1, 1);
6636 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6637 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6638 
6639 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6640 	}
6641 
6642 	cases.push_back(CaseParameter("_is_main", "main"));
6643 	cases.push_back(CaseParameter("_is_not_main", "not_main"));
6644 	testFunc.push_back("main");
6645 	testFunc.push_back("func");
6646 
6647 	for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6648 	{
6649 		for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6650 		{
6651 			map<string, string>	specializations;
6652 			ComputeShaderSpec	spec;
6653 
6654 			specializations["ENTRY"]	= "main";
6655 			specializations["ID"]		= testFunc[fNdx];
6656 			specializations["NAME"]		= cases[ndx].param;
6657 			spec.assembly				= shaderTemplate.specialize(specializations);
6658 			spec.numWorkGroups			= IVec3(numElements, 1, 1);
6659 			spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6660 			spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6661 
6662 			entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6663 		}
6664 	}
6665 
6666 	cases.push_back(CaseParameter("_is_entry", "rdc"));
6667 
6668 	for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6669 	{
6670 		for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6671 		{
6672 			map<string, string>     specializations;
6673 			ComputeShaderSpec       spec;
6674 
6675 			specializations["ENTRY"]	= "rdc";
6676 			specializations["ID"]		= testFunc[fNdx];
6677 			specializations["NAME"]		= cases[ndx].param;
6678 			spec.assembly				= shaderTemplate.specialize(specializations);
6679 			spec.numWorkGroups			= IVec3(numElements, 1, 1);
6680 			spec.entryPoint				= "rdc";
6681 			spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6682 			spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6683 
6684 			entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6685 		}
6686 	}
6687 
6688 	group->addChild(entryMainGroup.release());
6689 	group->addChild(entryNotGroup.release());
6690 	group->addChild(abuseGroup.release());
6691 
6692 	return group.release();
6693 }
6694 
createOpMemberNameGroup(tcu::TestContext & testCtx)6695 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6696 {
6697 	de::MovePtr<tcu::TestCaseGroup>	group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6698 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6699 	vector<CaseParameter>			abuseCases;
6700 	vector<string>					testFunc;
6701 	de::Random						rnd(deStringHash(group->getName()));
6702 	const int						numElements = 128;
6703 	vector<float>					inputFloats(numElements, 0);
6704 	vector<float>					outputFloats(numElements, 0);
6705 
6706 	getOpNameAbuseCases(abuseCases);
6707 
6708 	fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6709 
6710 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6711 		outputFloats[ndx] = -inputFloats[ndx];
6712 
6713 	const string commonShaderHeader =
6714 		"OpCapability Shader\n"
6715 		"OpMemoryModel Logical GLSL450\n"
6716 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6717 		"OpExecutionMode %main LocalSize 1 1 1\n";
6718 
6719 	const string commonShaderFooter =
6720 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6721 
6722 		+ string(getComputeAsmInputOutputBufferTraits())
6723 		+ string(getComputeAsmCommonTypes())
6724 		+ string(getComputeAsmInputOutputBuffer()) +
6725 
6726 		"%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6727 
6728 		"%id        = OpVariable %uvec3ptr Input\n"
6729 		"%zero      = OpConstant %i32 0\n"
6730 
6731 		"%main      = OpFunction %void None %voidf\n"
6732 		"%entry     = OpLabel\n"
6733 
6734 		"%idval     = OpLoad %uvec3 %id\n"
6735 		"%x0        = OpCompositeExtract %u32 %idval 0\n"
6736 
6737 		"%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6738 		"%x         = OpCompositeExtract %u32 %idstr 0\n"
6739 
6740 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6741 		"%inval     = OpLoad %f32 %inloc\n"
6742 		"%neg       = OpFNegate %f32 %inval\n"
6743 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6744 		"             OpStore %outloc %neg\n"
6745 
6746 		"             OpReturn\n"
6747 		"             OpFunctionEnd\n";
6748 
6749 	const StringTemplate shaderTemplate(
6750 		commonShaderHeader +
6751 		"OpMemberName %u3str 0 \"${NAME}\"\n" +
6752 		commonShaderFooter);
6753 
6754 	const std::string multipleNames =
6755 		commonShaderHeader +
6756 		"OpMemberName %u3str 0 \"to_be\"\n"
6757 		"OpMemberName %u3str 1 \"or_not\"\n"
6758 		"OpMemberName %u3str 0 \"to_be\"\n"
6759 		"OpMemberName %u3str 2 \"makes_no\"\n"
6760 		"OpMemberName %u3str 0 \"difference\"\n"
6761 		"OpMemberName %u3str 0 \"to_me\"\n" +
6762 		commonShaderFooter;
6763 	{
6764 		ComputeShaderSpec	spec;
6765 
6766 		spec.assembly = multipleNames;
6767 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6768 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6769 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6770 
6771 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6772 	}
6773 
6774 	const std::string everythingNamedTheSame =
6775 		commonShaderHeader +
6776 		"OpMemberName %u3str 0 \"the_same\"\n"
6777 		"OpMemberName %u3str 1 \"the_same\"\n"
6778 		"OpMemberName %u3str 2 \"the_same\"\n" +
6779 		commonShaderFooter;
6780 
6781 	{
6782 		ComputeShaderSpec	spec;
6783 
6784 		spec.assembly = everythingNamedTheSame;
6785 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6786 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6787 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6788 
6789 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6790 	}
6791 
6792 	// u3str_x_is_....
6793 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6794 	{
6795 		map<string, string>	specializations;
6796 		ComputeShaderSpec	spec;
6797 
6798 		specializations["NAME"] = abuseCases[ndx].param;
6799 		spec.assembly = shaderTemplate.specialize(specializations);
6800 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6801 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6802 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6803 
6804 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6805 	}
6806 
6807 	group->addChild(abuseGroup.release());
6808 
6809 	return group.release();
6810 }
6811 
6812 // Assembly code used for testing function control is based on GLSL source code:
6813 //
6814 // #version 430
6815 //
6816 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6817 //   float elements[];
6818 // } input_data;
6819 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6820 //   float elements[];
6821 // } output_data;
6822 //
6823 // float const10() { return 10.f; }
6824 //
6825 // void main() {
6826 //   uint x = gl_GlobalInvocationID.x;
6827 //   output_data.elements[x] = input_data.elements[x] + const10();
6828 // }
createFunctionControlGroup(tcu::TestContext & testCtx)6829 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6830 {
6831 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6832 	vector<CaseParameter>			cases;
6833 	de::Random						rnd				(deStringHash(group->getName()));
6834 	const int						numElements		= 100;
6835 	vector<float>					inputFloats		(numElements, 0);
6836 	vector<float>					outputFloats	(numElements, 0);
6837 	const StringTemplate			shaderTemplate	(
6838 		string(getComputeAsmShaderPreamble()) +
6839 
6840 		"OpSource GLSL 430\n"
6841 		"OpName %main \"main\"\n"
6842 		"OpName %func_const10 \"const10(\"\n"
6843 		"OpName %id \"gl_GlobalInvocationID\"\n"
6844 
6845 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6846 
6847 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6848 
6849 		"%f32f = OpTypeFunction %f32\n"
6850 		"%id = OpVariable %uvec3ptr Input\n"
6851 		"%zero = OpConstant %i32 0\n"
6852 		"%constf10 = OpConstant %f32 10.0\n"
6853 
6854 		"%main         = OpFunction %void None %voidf\n"
6855 		"%entry        = OpLabel\n"
6856 		"%idval        = OpLoad %uvec3 %id\n"
6857 		"%x            = OpCompositeExtract %u32 %idval 0\n"
6858 		"%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
6859 		"%inval        = OpLoad %f32 %inloc\n"
6860 		"%ret_10       = OpFunctionCall %f32 %func_const10\n"
6861 		"%fadd         = OpFAdd %f32 %inval %ret_10\n"
6862 		"%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
6863 		"                OpStore %outloc %fadd\n"
6864 		"                OpReturn\n"
6865 		"                OpFunctionEnd\n"
6866 
6867 		"%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6868 		"%label        = OpLabel\n"
6869 		"                OpReturnValue %constf10\n"
6870 		"                OpFunctionEnd\n");
6871 
6872 	cases.push_back(CaseParameter("none",						"None"));
6873 	cases.push_back(CaseParameter("inline",						"Inline"));
6874 	cases.push_back(CaseParameter("dont_inline",				"DontInline"));
6875 	cases.push_back(CaseParameter("pure",						"Pure"));
6876 	cases.push_back(CaseParameter("const",						"Const"));
6877 	cases.push_back(CaseParameter("inline_pure",				"Inline|Pure"));
6878 	cases.push_back(CaseParameter("const_dont_inline",			"Const|DontInline"));
6879 	cases.push_back(CaseParameter("inline_dont_inline",			"Inline|DontInline"));
6880 	cases.push_back(CaseParameter("pure_inline_dont_inline",	"Pure|Inline|DontInline"));
6881 
6882 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6883 
6884 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6885 	floorAll(inputFloats);
6886 
6887 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6888 		outputFloats[ndx] = inputFloats[ndx] + 10.f;
6889 
6890 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6891 	{
6892 		map<string, string>		specializations;
6893 		ComputeShaderSpec		spec;
6894 
6895 		specializations["CONTROL"] = cases[caseNdx].param;
6896 		spec.assembly = shaderTemplate.specialize(specializations);
6897 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6898 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6899 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6900 
6901 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6902 	}
6903 
6904 	return group.release();
6905 }
6906 
createMemoryAccessGroup(tcu::TestContext & testCtx)6907 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6908 {
6909 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6910 	vector<CaseParameter>			cases;
6911 	de::Random						rnd				(deStringHash(group->getName()));
6912 	const int						numElements		= 100;
6913 	vector<float>					inputFloats		(numElements, 0);
6914 	vector<float>					outputFloats	(numElements, 0);
6915 	const StringTemplate			shaderTemplate	(
6916 		string(getComputeAsmShaderPreamble()) +
6917 
6918 		"OpSource GLSL 430\n"
6919 		"OpName %main           \"main\"\n"
6920 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6921 
6922 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6923 
6924 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6925 
6926 		"%f32ptr_f  = OpTypePointer Function %f32\n"
6927 
6928 		"%id        = OpVariable %uvec3ptr Input\n"
6929 		"%zero      = OpConstant %i32 0\n"
6930 		"%four      = OpConstant %i32 4\n"
6931 
6932 		"%main      = OpFunction %void None %voidf\n"
6933 		"%label     = OpLabel\n"
6934 		"%copy      = OpVariable %f32ptr_f Function\n"
6935 		"%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
6936 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6937 		"%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
6938 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6939 		"             OpCopyMemory %copy %inloc ${ACCESS}\n"
6940 		"%val1      = OpLoad %f32 %copy\n"
6941 		"%val2      = OpLoad %f32 %inloc\n"
6942 		"%add       = OpFAdd %f32 %val1 %val2\n"
6943 		"             OpStore %outloc %add ${ACCESS}\n"
6944 		"             OpReturn\n"
6945 		"             OpFunctionEnd\n");
6946 
6947 	cases.push_back(CaseParameter("null",					""));
6948 	cases.push_back(CaseParameter("none",					"None"));
6949 	cases.push_back(CaseParameter("volatile",				"Volatile"));
6950 	cases.push_back(CaseParameter("aligned",				"Aligned 4"));
6951 	cases.push_back(CaseParameter("nontemporal",			"Nontemporal"));
6952 	cases.push_back(CaseParameter("aligned_nontemporal",	"Aligned|Nontemporal 4"));
6953 	cases.push_back(CaseParameter("aligned_volatile",		"Volatile|Aligned 4"));
6954 
6955 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6956 
6957 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6958 		outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6959 
6960 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6961 	{
6962 		map<string, string>		specializations;
6963 		ComputeShaderSpec		spec;
6964 
6965 		specializations["ACCESS"] = cases[caseNdx].param;
6966 		spec.assembly = shaderTemplate.specialize(specializations);
6967 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6968 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6969 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6970 
6971 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6972 	}
6973 
6974 	return group.release();
6975 }
6976 
6977 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)6978 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6979 {
6980 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6981 	vector<CaseParameter>			cases;
6982 	de::Random						rnd				(deStringHash(group->getName()));
6983 	const int						numElements		= 100;
6984 	vector<float>					positiveFloats	(numElements, 0);
6985 	vector<float>					negativeFloats	(numElements, 0);
6986 	const StringTemplate			shaderTemplate	(
6987 		string(getComputeAsmShaderPreamble()) +
6988 
6989 		"OpSource GLSL 430\n"
6990 		"OpName %main           \"main\"\n"
6991 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6992 
6993 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6994 
6995 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6996 		"%uvec2     = OpTypeVector %u32 2\n"
6997 		"%fvec4     = OpTypeVector %f32 4\n"
6998 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
6999 		"%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
7000 		"%sampler   = OpTypeSampler\n"
7001 		"%simage    = OpTypeSampledImage %image\n"
7002 		"%const100  = OpConstant %u32 100\n"
7003 		"%uarr100   = OpTypeArray %i32 %const100\n"
7004 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
7005 		"%pointer   = OpTypePointer Function %i32\n"
7006 		+ string(getComputeAsmInputOutputBuffer()) +
7007 
7008 		"%id        = OpVariable %uvec3ptr Input\n"
7009 		"%zero      = OpConstant %i32 0\n"
7010 
7011 		"%main      = OpFunction %void None %voidf\n"
7012 		"%label     = OpLabel\n"
7013 
7014 		"%undef     = OpUndef ${TYPE}\n"
7015 
7016 		"%idval     = OpLoad %uvec3 %id\n"
7017 		"%x         = OpCompositeExtract %u32 %idval 0\n"
7018 
7019 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7020 		"%inval     = OpLoad %f32 %inloc\n"
7021 		"%neg       = OpFNegate %f32 %inval\n"
7022 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7023 		"             OpStore %outloc %neg\n"
7024 		"             OpReturn\n"
7025 		"             OpFunctionEnd\n");
7026 
7027 	cases.push_back(CaseParameter("bool",			"%bool"));
7028 	cases.push_back(CaseParameter("sint32",			"%i32"));
7029 	cases.push_back(CaseParameter("uint32",			"%u32"));
7030 	cases.push_back(CaseParameter("float32",		"%f32"));
7031 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
7032 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
7033 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
7034 	cases.push_back(CaseParameter("image",			"%image"));
7035 	cases.push_back(CaseParameter("sampler",		"%sampler"));
7036 	cases.push_back(CaseParameter("sampledimage",	"%simage"));
7037 	cases.push_back(CaseParameter("array",			"%uarr100"));
7038 	cases.push_back(CaseParameter("runtimearray",	"%f32arr"));
7039 	cases.push_back(CaseParameter("struct",			"%struct"));
7040 	cases.push_back(CaseParameter("pointer",		"%pointer"));
7041 
7042 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7043 
7044 	for (size_t ndx = 0; ndx < numElements; ++ndx)
7045 		negativeFloats[ndx] = -positiveFloats[ndx];
7046 
7047 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7048 	{
7049 		map<string, string>		specializations;
7050 		ComputeShaderSpec		spec;
7051 
7052 		specializations["TYPE"] = cases[caseNdx].param;
7053 		spec.assembly = shaderTemplate.specialize(specializations);
7054 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7055 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7056 		spec.numWorkGroups = IVec3(numElements, 1, 1);
7057 
7058 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7059 	}
7060 
7061 	// OpUndef with constants.
7062 #ifndef CTS_USES_VULKANSC
7063 	{
7064 		static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7065 
7066 		static const struct
7067 		{
7068 			const std::string name;
7069 			const std::string desc;
7070 		} amberCases[] =
7071 		{
7072 			{ "undefined_constant_composite",		"OpUndef value in OpConstantComposite"		},
7073 			{ "undefined_spec_constant_composite",	"OpUndef value in OpSpecConstantComposite"	},
7074 		};
7075 
7076 		for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7077 		{
7078 			cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7079 																				amberCases[i].name.c_str(),
7080 																				amberCases[i].desc.c_str(),
7081 																				data_dir,
7082 																				amberCases[i].name + ".amber");
7083 			group->addChild(testCase);
7084 		}
7085 	}
7086 #endif
7087 
7088 	return group.release();
7089 }
7090 
7091 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7092 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7093 {
7094 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7095 	vector<CaseParameter>			cases;
7096 	de::Random						rnd				(deStringHash(group->getName()));
7097 	const int						numElements		= 100;
7098 	vector<float>					positiveFloats	(numElements, 0);
7099 	vector<float>					negativeFloats	(numElements, 0);
7100 	const StringTemplate			shaderTemplate	(
7101 		"OpCapability Shader\n"
7102 		"OpCapability Float16\n"
7103 		"OpMemoryModel Logical GLSL450\n"
7104 		"OpEntryPoint GLCompute %main \"main\" %id\n"
7105 		"OpExecutionMode %main LocalSize 1 1 1\n"
7106 		"OpSource GLSL 430\n"
7107 		"OpName %main           \"main\"\n"
7108 		"OpName %id             \"gl_GlobalInvocationID\"\n"
7109 
7110 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
7111 
7112 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7113 
7114 		"%id        = OpVariable %uvec3ptr Input\n"
7115 		"%zero      = OpConstant %i32 0\n"
7116 		"%f16       = OpTypeFloat 16\n"
7117 		"%c_f16_0   = OpConstant %f16 0.0\n"
7118 		"%c_f16_0_5 = OpConstant %f16 0.5\n"
7119 		"%c_f16_1   = OpConstant %f16 1.0\n"
7120 		"%v2f16     = OpTypeVector %f16 2\n"
7121 		"%v3f16     = OpTypeVector %f16 3\n"
7122 		"%v4f16     = OpTypeVector %f16 4\n"
7123 
7124 		"${CONSTANT}\n"
7125 
7126 		"%main      = OpFunction %void None %voidf\n"
7127 		"%label     = OpLabel\n"
7128 		"%idval     = OpLoad %uvec3 %id\n"
7129 		"%x         = OpCompositeExtract %u32 %idval 0\n"
7130 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7131 		"%inval     = OpLoad %f32 %inloc\n"
7132 		"%neg       = OpFNegate %f32 %inval\n"
7133 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7134 		"             OpStore %outloc %neg\n"
7135 		"             OpReturn\n"
7136 		"             OpFunctionEnd\n");
7137 
7138 
7139 	cases.push_back(CaseParameter("vector",			"%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7140 	cases.push_back(CaseParameter("matrix",			"%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7141 													"%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7142 													"%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7143 	cases.push_back(CaseParameter("struct",			"%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7144 													"%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7145 													"%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7146 													"%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7147 													"%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7148 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %i32 %f16\n"
7149 													"%st2 = OpTypeStruct %i32 %i32\n"
7150 													"%struct = OpTypeStruct %st1 %st2\n"
7151 													"%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7152 													"%st2val = OpConstantComposite %st2 %zero %zero\n"
7153 													"%const = OpConstantComposite %struct %st1val %st2val"));
7154 
7155 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7156 
7157 	for (size_t ndx = 0; ndx < numElements; ++ndx)
7158 		negativeFloats[ndx] = -positiveFloats[ndx];
7159 
7160 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7161 	{
7162 		map<string, string>		specializations;
7163 		ComputeShaderSpec		spec;
7164 
7165 		specializations["CONSTANT"] = cases[caseNdx].param;
7166 		spec.assembly = shaderTemplate.specialize(specializations);
7167 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7168 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7169 		spec.numWorkGroups = IVec3(numElements, 1, 1);
7170 
7171 		spec.extensions.push_back("VK_KHR_shader_float16_int8");
7172 
7173 		spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7174 
7175 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7176 	}
7177 
7178 	return group.release();
7179 }
7180 
squarize(const vector<deFloat16> & inData,const deUint32 argNo)7181 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7182 {
7183 	const size_t		inDataLength	= inData.size();
7184 	vector<deFloat16>	result;
7185 
7186 	result.reserve(inDataLength * inDataLength);
7187 
7188 	if (argNo == 0)
7189 	{
7190 		for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7191 			result.insert(result.end(), inData.begin(), inData.end());
7192 	}
7193 
7194 	if (argNo == 1)
7195 	{
7196 		for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7197 		{
7198 			const vector<deFloat16>	tmp(inDataLength, inData[numIdx]);
7199 
7200 			result.insert(result.end(), tmp.begin(), tmp.end());
7201 		}
7202 	}
7203 
7204 	return result;
7205 }
7206 
squarizeVector(const vector<deFloat16> & inData,const deUint32 argNo)7207 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7208 {
7209 	vector<deFloat16>	vec;
7210 	vector<deFloat16>	result;
7211 
7212 	// Create vectors. vec will contain each possible pair from inData
7213 	{
7214 		const size_t	inDataLength	= inData.size();
7215 
7216 		DE_ASSERT(inDataLength <= 64);
7217 
7218 		vec.reserve(2 * inDataLength * inDataLength);
7219 
7220 		for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7221 		for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7222 		{
7223 			vec.push_back(inData[numIdxX]);
7224 			vec.push_back(inData[numIdxY]);
7225 		}
7226 	}
7227 
7228 	// Create vector pairs. result will contain each possible pair from vec
7229 	{
7230 		const size_t	coordsPerVector	= 2;
7231 		const size_t	vectorsCount	= vec.size() / coordsPerVector;
7232 
7233 		result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7234 
7235 		if (argNo == 0)
7236 		{
7237 			for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7238 			for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7239 			{
7240 				for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7241 					result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7242 			}
7243 		}
7244 
7245 		if (argNo == 1)
7246 		{
7247 			for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7248 			for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7249 			{
7250 				for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7251 					result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7252 			}
7253 		}
7254 	}
7255 
7256 	return result;
7257 }
7258 
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isNan7259 struct fp16isNan			{ bool operator()(const tcu::Float16 in1, const tcu::Float16)		{ return in1.isNaN(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isInf7260 struct fp16isInf			{ bool operator()(const tcu::Float16 in1, const tcu::Float16)		{ return in1.isInf(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isEqual7261 struct fp16isEqual			{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() == in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isUnequal7262 struct fp16isUnequal		{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() != in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isLess7263 struct fp16isLess			{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() <  in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isGreater7264 struct fp16isGreater		{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() >  in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isLessOrEqual7265 struct fp16isLessOrEqual	{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() <= in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon6f921be60111::fp16isGreaterOrEqual7266 struct fp16isGreaterOrEqual	{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() >= in2.asFloat(); } };
7267 
7268 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7269 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7270 {
7271 	if (inputs.size() != 2 || outputAllocs.size() != 1)
7272 		return false;
7273 
7274 	vector<deUint8>	input1Bytes;
7275 	vector<deUint8>	input2Bytes;
7276 
7277 	inputs[0].getBytes(input1Bytes);
7278 	inputs[1].getBytes(input2Bytes);
7279 
7280 	const deUint32			denormModesCount			= 2;
7281 	const deFloat16			float16one					= tcu::Float16(1.0f).bits();
7282 	const deFloat16			float16zero					= tcu::Float16(0.0f).bits();
7283 	const tcu::Float16		zero						= tcu::Float16::zero(1);
7284 	const deFloat16* const	outputAsFP16				= static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7285 	const deFloat16* const	input1AsFP16				= reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7286 	const deFloat16* const	input2AsFP16				= reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7287 	deUint32				successfulRuns				= denormModesCount;
7288 	std::string				results[denormModesCount];
7289 	TestedLogicalFunction	testedLogicalFunction;
7290 
7291 	for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7292 	{
7293 		const bool flushToZero = (denormMode == 1);
7294 
7295 		for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7296 		{
7297 			const tcu::Float16	f1pre			= tcu::Float16(input1AsFP16[idx]);
7298 			const tcu::Float16	f2pre			= tcu::Float16(input2AsFP16[idx]);
7299 			const tcu::Float16	f1				= (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7300 			const tcu::Float16	f2				= (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7301 			deFloat16			expectedOutput	= float16zero;
7302 
7303 			if (onlyTestFunc)
7304 			{
7305 				if (testedLogicalFunction(f1, f2))
7306 					expectedOutput = float16one;
7307 			}
7308 			else
7309 			{
7310 				const bool	f1nan	= f1.isNaN();
7311 				const bool	f2nan	= f2.isNaN();
7312 
7313 				// Skip NaN floats if not supported by implementation
7314 				if (!nanSupported && (f1nan || f2nan))
7315 					continue;
7316 
7317 				if (unationModeAnd)
7318 				{
7319 					const bool	ordered		= !f1nan && !f2nan;
7320 
7321 					if (ordered && testedLogicalFunction(f1, f2))
7322 						expectedOutput = float16one;
7323 				}
7324 				else
7325 				{
7326 					const bool	unordered	= f1nan || f2nan;
7327 
7328 					if (unordered || testedLogicalFunction(f1, f2))
7329 						expectedOutput = float16one;
7330 				}
7331 			}
7332 
7333 			if (outputAsFP16[idx] != expectedOutput)
7334 			{
7335 				std::ostringstream str;
7336 
7337 				str << "ERROR: Sub-case #" << idx
7338 					<< " flushToZero:" << flushToZero
7339 					<< std::hex
7340 					<< " failed, inputs: 0x" << f1.bits()
7341 					<< ";0x" << f2.bits()
7342 					<< " output: 0x" << outputAsFP16[idx]
7343 					<< " expected output: 0x" << expectedOutput;
7344 
7345 				results[denormMode] = str.str();
7346 
7347 				successfulRuns--;
7348 
7349 				break;
7350 			}
7351 		}
7352 	}
7353 
7354 	if (successfulRuns == 0)
7355 		for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7356 			log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7357 
7358 	return successfulRuns > 0;
7359 }
7360 
7361 } // anonymous
7362 
createOpSourceTests(tcu::TestContext & testCtx)7363 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7364 {
7365 	struct NameCodePair { string name, code; };
7366 	RGBA							defaultColors[4];
7367 	de::MovePtr<tcu::TestCaseGroup> opSourceTests			(new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7368 	const std::string				opsourceGLSLWithFile	= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7369 	map<string, string>				fragments				= passthruFragments();
7370 	const NameCodePair				tests[]					=
7371 	{
7372 		{"unknown", "OpSource Unknown 321"},
7373 		{"essl", "OpSource ESSL 310"},
7374 		{"glsl", "OpSource GLSL 450"},
7375 		{"opencl_cpp", "OpSource OpenCL_CPP 120"},
7376 		{"opencl_c", "OpSource OpenCL_C 120"},
7377 		{"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7378 		{"file", opsourceGLSLWithFile},
7379 		{"source", opsourceGLSLWithFile + "\"void main(){}\""},
7380 		// Longest possible source string: SPIR-V limits instructions to 65535
7381 		// words, of which the first 4 are opsourceGLSLWithFile; the rest will
7382 		// contain 65530 UTF8 characters (one word each) plus one last word
7383 		// containing 3 ASCII characters and \0.
7384 		{"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7385 	};
7386 
7387 	getDefaultColors(defaultColors);
7388 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7389 	{
7390 		fragments["debug"] = tests[testNdx].code;
7391 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7392 	}
7393 
7394 	return opSourceTests.release();
7395 }
7396 
createOpSourceContinuedTests(tcu::TestContext & testCtx)7397 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7398 {
7399 	struct NameCodePair { string name, code; };
7400 	RGBA								defaultColors[4];
7401 	de::MovePtr<tcu::TestCaseGroup>		opSourceTests		(new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7402 	map<string, string>					fragments			= passthruFragments();
7403 	const std::string					opsource			= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7404 	const NameCodePair					tests[]				=
7405 	{
7406 		{"empty", opsource + "OpSourceContinued \"\""},
7407 		{"short", opsource + "OpSourceContinued \"abcde\""},
7408 		{"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7409 		// Longest possible source string: SPIR-V limits instructions to 65535
7410 		// words, of which the first one is OpSourceContinued/length; the rest
7411 		// will contain 65533 UTF8 characters (one word each) plus one last word
7412 		// containing 3 ASCII characters and \0.
7413 		{"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7414 	};
7415 
7416 	getDefaultColors(defaultColors);
7417 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7418 	{
7419 		fragments["debug"] = tests[testNdx].code;
7420 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7421 	}
7422 
7423 	return opSourceTests.release();
7424 }
createOpNoLineTests(tcu::TestContext & testCtx)7425 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7426 {
7427 	RGBA								 defaultColors[4];
7428 	de::MovePtr<tcu::TestCaseGroup>		 opLineTests		 (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7429 	map<string, string>					 fragments;
7430 	getDefaultColors(defaultColors);
7431 	fragments["debug"]			=
7432 		"%name = OpString \"name\"\n";
7433 
7434 	fragments["pre_main"]	=
7435 		"OpNoLine\n"
7436 		"OpNoLine\n"
7437 		"OpLine %name 1 1\n"
7438 		"OpNoLine\n"
7439 		"OpLine %name 1 1\n"
7440 		"OpLine %name 1 1\n"
7441 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7442 		"OpNoLine\n"
7443 		"OpLine %name 1 1\n"
7444 		"OpNoLine\n"
7445 		"OpLine %name 1 1\n"
7446 		"OpLine %name 1 1\n"
7447 		"%second_param1 = OpFunctionParameter %v4f32\n"
7448 		"OpNoLine\n"
7449 		"OpNoLine\n"
7450 		"%label_secondfunction = OpLabel\n"
7451 		"OpNoLine\n"
7452 		"OpReturnValue %second_param1\n"
7453 		"OpFunctionEnd\n"
7454 		"OpNoLine\n"
7455 		"OpNoLine\n";
7456 
7457 	fragments["testfun"]		=
7458 		// A %test_code function that returns its argument unchanged.
7459 		"OpNoLine\n"
7460 		"OpNoLine\n"
7461 		"OpLine %name 1 1\n"
7462 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7463 		"OpNoLine\n"
7464 		"%param1 = OpFunctionParameter %v4f32\n"
7465 		"OpNoLine\n"
7466 		"OpNoLine\n"
7467 		"%label_testfun = OpLabel\n"
7468 		"OpNoLine\n"
7469 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7470 		"OpReturnValue %val1\n"
7471 		"OpFunctionEnd\n"
7472 		"OpLine %name 1 1\n"
7473 		"OpNoLine\n";
7474 
7475 	createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7476 
7477 	return opLineTests.release();
7478 }
7479 
createOpModuleProcessedTests(tcu::TestContext & testCtx)7480 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7481 {
7482 	RGBA								defaultColors[4];
7483 	de::MovePtr<tcu::TestCaseGroup>		opModuleProcessedTests			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7484 	map<string, string>					fragments;
7485 	std::vector<std::string>			noExtensions;
7486 	GraphicsResources					resources;
7487 
7488 	getDefaultColors(defaultColors);
7489 	resources.verifyBinary = veryfiBinaryShader;
7490 	resources.spirvVersion = SPIRV_VERSION_1_3;
7491 
7492 	fragments["moduleprocessed"]							=
7493 		"OpModuleProcessed \"VULKAN CTS\"\n"
7494 		"OpModuleProcessed \"Negative values\"\n"
7495 		"OpModuleProcessed \"Date: 2017/09/21\"\n";
7496 
7497 	fragments["pre_main"]	=
7498 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7499 		"%second_param1 = OpFunctionParameter %v4f32\n"
7500 		"%label_secondfunction = OpLabel\n"
7501 		"OpReturnValue %second_param1\n"
7502 		"OpFunctionEnd\n";
7503 
7504 	fragments["testfun"]		=
7505 		// A %test_code function that returns its argument unchanged.
7506 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7507 		"%param1 = OpFunctionParameter %v4f32\n"
7508 		"%label_testfun = OpLabel\n"
7509 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7510 		"OpReturnValue %val1\n"
7511 		"OpFunctionEnd\n";
7512 
7513 	createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7514 
7515 	return opModuleProcessedTests.release();
7516 }
7517 
7518 
createOpLineTests(tcu::TestContext & testCtx)7519 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7520 {
7521 	RGBA													defaultColors[4];
7522 	de::MovePtr<tcu::TestCaseGroup>							opLineTests			(new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7523 	map<string, string>										fragments;
7524 	std::vector<std::pair<std::string, std::string> >		problemStrings;
7525 
7526 	problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7527 	problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7528 	problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7529 	getDefaultColors(defaultColors);
7530 
7531 	fragments["debug"]			=
7532 		"%other_name = OpString \"other_name\"\n";
7533 
7534 	fragments["pre_main"]	=
7535 		"OpLine %file_name 32 0\n"
7536 		"OpLine %file_name 32 32\n"
7537 		"OpLine %file_name 32 40\n"
7538 		"OpLine %other_name 32 40\n"
7539 		"OpLine %other_name 0 100\n"
7540 		"OpLine %other_name 0 4294967295\n"
7541 		"OpLine %other_name 4294967295 0\n"
7542 		"OpLine %other_name 32 40\n"
7543 		"OpLine %file_name 0 0\n"
7544 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7545 		"OpLine %file_name 1 0\n"
7546 		"%second_param1 = OpFunctionParameter %v4f32\n"
7547 		"OpLine %file_name 1 3\n"
7548 		"OpLine %file_name 1 2\n"
7549 		"%label_secondfunction = OpLabel\n"
7550 		"OpLine %file_name 0 2\n"
7551 		"OpReturnValue %second_param1\n"
7552 		"OpFunctionEnd\n"
7553 		"OpLine %file_name 0 2\n"
7554 		"OpLine %file_name 0 2\n";
7555 
7556 	fragments["testfun"]		=
7557 		// A %test_code function that returns its argument unchanged.
7558 		"OpLine %file_name 1 0\n"
7559 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7560 		"OpLine %file_name 16 330\n"
7561 		"%param1 = OpFunctionParameter %v4f32\n"
7562 		"OpLine %file_name 14 442\n"
7563 		"%label_testfun = OpLabel\n"
7564 		"OpLine %file_name 11 1024\n"
7565 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7566 		"OpLine %file_name 2 97\n"
7567 		"OpReturnValue %val1\n"
7568 		"OpFunctionEnd\n"
7569 		"OpLine %file_name 5 32\n";
7570 
7571 	for (size_t i = 0; i < problemStrings.size(); ++i)
7572 	{
7573 		map<string, string> testFragments = fragments;
7574 		testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7575 		createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7576 	}
7577 
7578 	return opLineTests.release();
7579 }
7580 
createOpConstantNullTests(tcu::TestContext & testCtx)7581 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7582 {
7583 	de::MovePtr<tcu::TestCaseGroup> opConstantNullTests		(new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7584 	RGBA							colors[4];
7585 
7586 
7587 	const char						functionStart[] =
7588 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7589 		"%param1 = OpFunctionParameter %v4f32\n"
7590 		"%lbl    = OpLabel\n";
7591 
7592 	const char						functionEnd[]	=
7593 		"OpReturnValue %transformed_param\n"
7594 		"OpFunctionEnd\n";
7595 
7596 	struct NameConstantsCode
7597 	{
7598 		string name;
7599 		string constants;
7600 		string code;
7601 	};
7602 
7603 	NameConstantsCode tests[] =
7604 	{
7605 		{
7606 			"vec4",
7607 			"%cnull = OpConstantNull %v4f32\n",
7608 			"%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7609 		},
7610 		{
7611 			"float",
7612 			"%cnull = OpConstantNull %f32\n",
7613 			"%vp = OpVariable %fp_v4f32 Function\n"
7614 			"%v  = OpLoad %v4f32 %vp\n"
7615 			"%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7616 			"%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7617 			"%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7618 			"%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7619 			"%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7620 		},
7621 		{
7622 			"bool",
7623 			"%cnull             = OpConstantNull %bool\n",
7624 			"%v                 = OpVariable %fp_v4f32 Function\n"
7625 			"                     OpStore %v %param1\n"
7626 			"                     OpSelectionMerge %false_label None\n"
7627 			"                     OpBranchConditional %cnull %true_label %false_label\n"
7628 			"%true_label        = OpLabel\n"
7629 			"                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7630 			"                     OpBranch %false_label\n"
7631 			"%false_label       = OpLabel\n"
7632 			"%transformed_param = OpLoad %v4f32 %v\n"
7633 		},
7634 		{
7635 			"i32",
7636 			"%cnull             = OpConstantNull %i32\n",
7637 			"%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7638 			"%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7639 			"                     OpSelectionMerge %false_label None\n"
7640 			"                     OpBranchConditional %b %true_label %false_label\n"
7641 			"%true_label        = OpLabel\n"
7642 			"                     OpStore %v %param1\n"
7643 			"                     OpBranch %false_label\n"
7644 			"%false_label       = OpLabel\n"
7645 			"%transformed_param = OpLoad %v4f32 %v\n"
7646 		},
7647 		{
7648 			"struct",
7649 			"%stype             = OpTypeStruct %f32 %v4f32\n"
7650 			"%fp_stype          = OpTypePointer Function %stype\n"
7651 			"%cnull             = OpConstantNull %stype\n",
7652 			"%v                 = OpVariable %fp_stype Function %cnull\n"
7653 			"%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7654 			"%f_val             = OpLoad %v4f32 %f\n"
7655 			"%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7656 		},
7657 		{
7658 			"array",
7659 			"%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7660 			"%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7661 			"%cnull             = OpConstantNull %a4_v4f32\n",
7662 			"%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7663 			"%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7664 			"%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7665 			"%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7666 			"%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7667 			"%f_val             = OpLoad %v4f32 %f\n"
7668 			"%f1_val            = OpLoad %v4f32 %f1\n"
7669 			"%f2_val            = OpLoad %v4f32 %f2\n"
7670 			"%f3_val            = OpLoad %v4f32 %f3\n"
7671 			"%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7672 			"%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7673 			"%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7674 			"%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7675 		},
7676 		{
7677 			"matrix",
7678 			"%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7679 			"%cnull             = OpConstantNull %mat4x4_f32\n",
7680 			// Our null matrix * any vector should result in a zero vector.
7681 			"%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7682 			"%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7683 		}
7684 	};
7685 
7686 	getHalfColorsFullAlpha(colors);
7687 
7688 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7689 	{
7690 		map<string, string> fragments;
7691 		fragments["pre_main"] = tests[testNdx].constants;
7692 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7693 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7694 	}
7695 	return opConstantNullTests.release();
7696 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7697 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7698 {
7699 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7700 	RGBA							inputColors[4];
7701 	RGBA							outputColors[4];
7702 
7703 
7704 	const char						functionStart[]	 =
7705 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7706 		"%param1 = OpFunctionParameter %v4f32\n"
7707 		"%lbl    = OpLabel\n";
7708 
7709 	const char						functionEnd[]		=
7710 		"OpReturnValue %transformed_param\n"
7711 		"OpFunctionEnd\n";
7712 
7713 	struct NameConstantsCode
7714 	{
7715 		string name;
7716 		string constants;
7717 		string code;
7718 	};
7719 
7720 	NameConstantsCode tests[] =
7721 	{
7722 		{
7723 			"vec4",
7724 
7725 			"%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7726 			"%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7727 		},
7728 		{
7729 			"struct",
7730 
7731 			"%stype             = OpTypeStruct %v4f32 %f32\n"
7732 			"%fp_stype          = OpTypePointer Function %stype\n"
7733 			"%f32_n_1           = OpConstant %f32 -1.0\n"
7734 			"%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7735 			"%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7736 			"%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7737 
7738 			"%v                 = OpVariable %fp_stype Function %cval\n"
7739 			"%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7740 			"%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7741 			"%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7742 			"%f32_val           = OpLoad %f32 %f32_ptr\n"
7743 			"%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7744 			"%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7745 			"%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7746 		},
7747 		{
7748 			// [1|0|0|0.5] [x] = x + 0.5
7749 			// [0|1|0|0.5] [y] = y + 0.5
7750 			// [0|0|1|0.5] [z] = z + 0.5
7751 			// [0|0|0|1  ] [1] = 1
7752 			"matrix",
7753 
7754 			"%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7755 			"%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7756 			"%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7757 			"%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7758 			"%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7759 			"%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7760 
7761 			"%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
7762 		},
7763 		{
7764 			"array",
7765 
7766 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7767 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
7768 			"%f32_n_1             = OpConstant %f32 -1.0\n"
7769 			"%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
7770 			"%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7771 
7772 			"%v                   = OpVariable %fp_a4f32 Function %carr\n"
7773 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
7774 			"%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
7775 			"%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
7776 			"%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
7777 			"%f_val               = OpLoad %f32 %f\n"
7778 			"%f1_val              = OpLoad %f32 %f1\n"
7779 			"%f2_val              = OpLoad %f32 %f2\n"
7780 			"%f3_val              = OpLoad %f32 %f3\n"
7781 			"%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
7782 			"%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
7783 			"%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
7784 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7785 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7786 		},
7787 		{
7788 			//
7789 			// [
7790 			//   {
7791 			//      0.0,
7792 			//      [ 1.0, 1.0, 1.0, 1.0]
7793 			//   },
7794 			//   {
7795 			//      1.0,
7796 			//      [ 0.0, 0.5, 0.0, 0.0]
7797 			//   }, //     ^^^
7798 			//   {
7799 			//      0.0,
7800 			//      [ 1.0, 1.0, 1.0, 1.0]
7801 			//   }
7802 			// ]
7803 			"array_of_struct_of_array",
7804 
7805 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7806 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
7807 			"%stype               = OpTypeStruct %f32 %a4f32\n"
7808 			"%a3stype             = OpTypeArray %stype %c_u32_3\n"
7809 			"%fp_a3stype          = OpTypePointer Function %a3stype\n"
7810 			"%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7811 			"%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7812 			"%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7813 			"%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7814 			"%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7815 
7816 			"%v                   = OpVariable %fp_a3stype Function %carr\n"
7817 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7818 			"%f_l                 = OpLoad %f32 %f\n"
7819 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7820 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7821 		}
7822 	};
7823 
7824 	getHalfColorsFullAlpha(inputColors);
7825 	outputColors[0] = RGBA(255, 255, 255, 255);
7826 	outputColors[1] = RGBA(255, 127, 127, 255);
7827 	outputColors[2] = RGBA(127, 255, 127, 255);
7828 	outputColors[3] = RGBA(127, 127, 255, 255);
7829 
7830 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7831 	{
7832 		map<string, string> fragments;
7833 		fragments["pre_main"] = tests[testNdx].constants;
7834 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7835 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7836 	}
7837 	return opConstantCompositeTests.release();
7838 }
7839 
createSelectionBlockOrderTests(tcu::TestContext & testCtx)7840 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7841 {
7842 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7843 	RGBA							inputColors[4];
7844 	RGBA							outputColors[4];
7845 	map<string, string>				fragments;
7846 
7847 	// vec4 test_code(vec4 param) {
7848 	//   vec4 result = param;
7849 	//   for (int i = 0; i < 4; ++i) {
7850 	//     if (i == 0) result[i] = 0.;
7851 	//     else        result[i] = 1. - result[i];
7852 	//   }
7853 	//   return result;
7854 	// }
7855 	const char						function[]			=
7856 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7857 		"%param1    = OpFunctionParameter %v4f32\n"
7858 		"%lbl       = OpLabel\n"
7859 		"%iptr      = OpVariable %fp_i32 Function\n"
7860 		"%result    = OpVariable %fp_v4f32 Function\n"
7861 		"             OpStore %iptr %c_i32_0\n"
7862 		"             OpStore %result %param1\n"
7863 		"             OpBranch %loop\n"
7864 
7865 		// Loop entry block.
7866 		"%loop      = OpLabel\n"
7867 		"%ival      = OpLoad %i32 %iptr\n"
7868 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7869 		"             OpLoopMerge %exit %if_entry None\n"
7870 		"             OpBranchConditional %lt_4 %if_entry %exit\n"
7871 
7872 		// Merge block for loop.
7873 		"%exit      = OpLabel\n"
7874 		"%ret       = OpLoad %v4f32 %result\n"
7875 		"             OpReturnValue %ret\n"
7876 
7877 		// If-statement entry block.
7878 		"%if_entry  = OpLabel\n"
7879 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
7880 		"%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
7881 		"             OpSelectionMerge %if_exit None\n"
7882 		"             OpBranchConditional %eq_0 %if_true %if_false\n"
7883 
7884 		// False branch for if-statement.
7885 		"%if_false  = OpLabel\n"
7886 		"%val       = OpLoad %f32 %loc\n"
7887 		"%sub       = OpFSub %f32 %c_f32_1 %val\n"
7888 		"             OpStore %loc %sub\n"
7889 		"             OpBranch %if_exit\n"
7890 
7891 		// Merge block for if-statement.
7892 		"%if_exit   = OpLabel\n"
7893 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7894 		"             OpStore %iptr %ival_next\n"
7895 		"             OpBranch %loop\n"
7896 
7897 		// True branch for if-statement.
7898 		"%if_true   = OpLabel\n"
7899 		"             OpStore %loc %c_f32_0\n"
7900 		"             OpBranch %if_exit\n"
7901 
7902 		"             OpFunctionEnd\n";
7903 
7904 	fragments["testfun"]	= function;
7905 
7906 	inputColors[0]			= RGBA(127, 127, 127, 0);
7907 	inputColors[1]			= RGBA(127, 0,   0,   0);
7908 	inputColors[2]			= RGBA(0,   127, 0,   0);
7909 	inputColors[3]			= RGBA(0,   0,   127, 0);
7910 
7911 	outputColors[0]			= RGBA(0, 128, 128, 255);
7912 	outputColors[1]			= RGBA(0, 255, 255, 255);
7913 	outputColors[2]			= RGBA(0, 128, 255, 255);
7914 	outputColors[3]			= RGBA(0, 255, 128, 255);
7915 
7916 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7917 
7918 	return group.release();
7919 }
7920 
createSwitchBlockOrderTests(tcu::TestContext & testCtx)7921 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7922 {
7923 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7924 	RGBA							inputColors[4];
7925 	RGBA							outputColors[4];
7926 	map<string, string>				fragments;
7927 
7928 	const char						typesAndConstants[]	=
7929 		"%c_f32_p2  = OpConstant %f32 0.2\n"
7930 		"%c_f32_p4  = OpConstant %f32 0.4\n"
7931 		"%c_f32_p6  = OpConstant %f32 0.6\n"
7932 		"%c_f32_p8  = OpConstant %f32 0.8\n";
7933 
7934 	// vec4 test_code(vec4 param) {
7935 	//   vec4 result = param;
7936 	//   for (int i = 0; i < 4; ++i) {
7937 	//     switch (i) {
7938 	//       case 0: result[i] += .2; break;
7939 	//       case 1: result[i] += .6; break;
7940 	//       case 2: result[i] += .4; break;
7941 	//       case 3: result[i] += .8; break;
7942 	//       default: break; // unreachable
7943 	//     }
7944 	//   }
7945 	//   return result;
7946 	// }
7947 	const char						function[]			=
7948 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7949 		"%param1    = OpFunctionParameter %v4f32\n"
7950 		"%lbl       = OpLabel\n"
7951 		"%iptr      = OpVariable %fp_i32 Function\n"
7952 		"%result    = OpVariable %fp_v4f32 Function\n"
7953 		"             OpStore %iptr %c_i32_0\n"
7954 		"             OpStore %result %param1\n"
7955 		"             OpBranch %loop\n"
7956 
7957 		// Loop entry block.
7958 		"%loop      = OpLabel\n"
7959 		"%ival      = OpLoad %i32 %iptr\n"
7960 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7961 		"             OpLoopMerge %exit %cont None\n"
7962 		"             OpBranchConditional %lt_4 %switch_entry %exit\n"
7963 
7964 		// Merge block for loop.
7965 		"%exit      = OpLabel\n"
7966 		"%ret       = OpLoad %v4f32 %result\n"
7967 		"             OpReturnValue %ret\n"
7968 
7969 		// Switch-statement entry block.
7970 		"%switch_entry   = OpLabel\n"
7971 		"%loc            = OpAccessChain %fp_f32 %result %ival\n"
7972 		"%val            = OpLoad %f32 %loc\n"
7973 		"                  OpSelectionMerge %switch_exit None\n"
7974 		"                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7975 
7976 		"%case2          = OpLabel\n"
7977 		"%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
7978 		"                  OpStore %loc %addp4\n"
7979 		"                  OpBranch %switch_exit\n"
7980 
7981 		"%switch_default = OpLabel\n"
7982 		"                  OpUnreachable\n"
7983 
7984 		"%case3          = OpLabel\n"
7985 		"%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
7986 		"                  OpStore %loc %addp8\n"
7987 		"                  OpBranch %switch_exit\n"
7988 
7989 		"%case0          = OpLabel\n"
7990 		"%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
7991 		"                  OpStore %loc %addp2\n"
7992 		"                  OpBranch %switch_exit\n"
7993 
7994 		// Merge block for switch-statement.
7995 		"%switch_exit    = OpLabel\n"
7996 		"%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
7997 		"                  OpStore %iptr %ival_next\n"
7998 		"                  OpBranch %cont\n"
7999 		"%cont           = OpLabel\n"
8000 		"                  OpBranch %loop\n"
8001 
8002 		"%case1          = OpLabel\n"
8003 		"%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
8004 		"                  OpStore %loc %addp6\n"
8005 		"                  OpBranch %switch_exit\n"
8006 
8007 		"                  OpFunctionEnd\n";
8008 
8009 	fragments["pre_main"]	= typesAndConstants;
8010 	fragments["testfun"]	= function;
8011 
8012 	inputColors[0]			= RGBA(127, 27,  127, 51);
8013 	inputColors[1]			= RGBA(127, 0,   0,   51);
8014 	inputColors[2]			= RGBA(0,   27,  0,   51);
8015 	inputColors[3]			= RGBA(0,   0,   127, 51);
8016 
8017 	outputColors[0]			= RGBA(178, 180, 229, 255);
8018 	outputColors[1]			= RGBA(178, 153, 102, 255);
8019 	outputColors[2]			= RGBA(51,  180, 102, 255);
8020 	outputColors[3]			= RGBA(51,  153, 229, 255);
8021 
8022 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8023 
8024 	addOpSwitchAmberTests(*group, testCtx);
8025 
8026 	return group.release();
8027 }
8028 
createDecorationGroupTests(tcu::TestContext & testCtx)8029 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8030 {
8031 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8032 	RGBA							inputColors[4];
8033 	RGBA							outputColors[4];
8034 	map<string, string>				fragments;
8035 
8036 	const char						decorations[]		=
8037 		"OpDecorate %array_group         ArrayStride 4\n"
8038 		"OpDecorate %struct_member_group Offset 0\n"
8039 		"%array_group         = OpDecorationGroup\n"
8040 		"%struct_member_group = OpDecorationGroup\n"
8041 
8042 		"OpDecorate %group1 RelaxedPrecision\n"
8043 		"OpDecorate %group3 RelaxedPrecision\n"
8044 		"OpDecorate %group3 Flat\n"
8045 		"OpDecorate %group3 Restrict\n"
8046 		"%group0 = OpDecorationGroup\n"
8047 		"%group1 = OpDecorationGroup\n"
8048 		"%group3 = OpDecorationGroup\n";
8049 
8050 	const char						typesAndConstants[]	=
8051 		"%a3f32     = OpTypeArray %f32 %c_u32_3\n"
8052 		"%struct1   = OpTypeStruct %a3f32\n"
8053 		"%struct2   = OpTypeStruct %a3f32\n"
8054 		"%fp_struct1 = OpTypePointer Function %struct1\n"
8055 		"%fp_struct2 = OpTypePointer Function %struct2\n"
8056 		"%c_f32_2    = OpConstant %f32 2.\n"
8057 		"%c_f32_n2   = OpConstant %f32 -2.\n"
8058 
8059 		"%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8060 		"%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8061 		"%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8062 		"%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8063 
8064 	const char						function[]			=
8065 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8066 		"%param     = OpFunctionParameter %v4f32\n"
8067 		"%entry     = OpLabel\n"
8068 		"%result    = OpVariable %fp_v4f32 Function\n"
8069 		"%v_struct1 = OpVariable %fp_struct1 Function\n"
8070 		"%v_struct2 = OpVariable %fp_struct2 Function\n"
8071 		"             OpStore %result %param\n"
8072 		"             OpStore %v_struct1 %c_struct1\n"
8073 		"             OpStore %v_struct2 %c_struct2\n"
8074 		"%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8075 		"%val1      = OpLoad %f32 %ptr1\n"
8076 		"%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8077 		"%val2      = OpLoad %f32 %ptr2\n"
8078 		"%addvalues = OpFAdd %f32 %val1 %val2\n"
8079 		"%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8080 		"%val       = OpLoad %f32 %ptr\n"
8081 		"%addresult = OpFAdd %f32 %addvalues %val\n"
8082 		"             OpStore %ptr %addresult\n"
8083 		"%ret       = OpLoad %v4f32 %result\n"
8084 		"             OpReturnValue %ret\n"
8085 		"             OpFunctionEnd\n";
8086 
8087 	struct CaseNameDecoration
8088 	{
8089 		string name;
8090 		string decoration;
8091 	};
8092 
8093 	CaseNameDecoration tests[] =
8094 	{
8095 		{
8096 			"same_decoration_group_on_multiple_types",
8097 			"OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8098 		},
8099 		{
8100 			"empty_decoration_group",
8101 			"OpGroupDecorate %group0      %a3f32\n"
8102 			"OpGroupDecorate %group0      %result\n"
8103 		},
8104 		{
8105 			"one_element_decoration_group",
8106 			"OpGroupDecorate %array_group %a3f32\n"
8107 		},
8108 		{
8109 			"multiple_elements_decoration_group",
8110 			"OpGroupDecorate %group3      %v_struct1\n"
8111 		},
8112 		{
8113 			"multiple_decoration_groups_on_same_variable",
8114 			"OpGroupDecorate %group0      %v_struct2\n"
8115 			"OpGroupDecorate %group1      %v_struct2\n"
8116 			"OpGroupDecorate %group3      %v_struct2\n"
8117 		},
8118 		{
8119 			"same_decoration_group_multiple_times",
8120 			"OpGroupDecorate %group1      %addvalues\n"
8121 			"OpGroupDecorate %group1      %addvalues\n"
8122 			"OpGroupDecorate %group1      %addvalues\n"
8123 		},
8124 
8125 	};
8126 
8127 	getHalfColorsFullAlpha(inputColors);
8128 	getHalfColorsFullAlpha(outputColors);
8129 
8130 	for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8131 	{
8132 		fragments["decoration"]	= decorations + tests[idx].decoration;
8133 		fragments["pre_main"]	= typesAndConstants;
8134 		fragments["testfun"]	= function;
8135 
8136 		createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8137 	}
8138 
8139 	return group.release();
8140 }
8141 
8142 struct SpecConstantTwoValGraphicsCase
8143 {
8144 	const std::string	caseName;
8145 	const std::string	scDefinition0;
8146 	const std::string	scDefinition1;
8147 	const std::string	scResultType;
8148 	const std::string	scOperation;
8149 	SpecConstantValue	scActualValue0;
8150 	SpecConstantValue	scActualValue1;
8151 	const std::string	resultOperation;
8152 	RGBA				expectedColors[4];
8153 	CaseFlags			caseFlags;
8154 
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8155 						SpecConstantTwoValGraphicsCase (const std::string&			name,
8156 														const std::string&			definition0,
8157 														const std::string&			definition1,
8158 														const std::string&			resultType,
8159 														const std::string&			operation,
8160 														const SpecConstantValue&	value0,
8161 														const SpecConstantValue&	value1,
8162 														const std::string&			resultOp,
8163 														const RGBA					(&output)[4],
8164 														CaseFlags					flags = FLAG_NONE)
8165 							: caseName				(name)
8166 							, scDefinition0			(definition0)
8167 							, scDefinition1			(definition1)
8168 							, scResultType			(resultType)
8169 							, scOperation			(operation)
8170 							, scActualValue0		(value0)
8171 							, scActualValue1		(value1)
8172 							, resultOperation		(resultOp)
8173 							, caseFlags				(flags)
8174 	{
8175 		expectedColors[0] = output[0];
8176 		expectedColors[1] = output[1];
8177 		expectedColors[2] = output[2];
8178 		expectedColors[3] = output[3];
8179 	}
8180 };
8181 
createSpecConstantTests(tcu::TestContext & testCtx)8182 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8183 {
8184 	de::MovePtr<tcu::TestCaseGroup>			group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8185 	vector<SpecConstantTwoValGraphicsCase>	cases;
8186 	RGBA									inputColors[4];
8187 	RGBA									outputColors0[4];
8188 	RGBA									outputColors1[4];
8189 	RGBA									outputColors2[4];
8190 
8191 	const char	decorations1[]			=
8192 		"OpDecorate %sc_0  SpecId 0\n"
8193 		"OpDecorate %sc_1  SpecId 1\n";
8194 
8195 	const char	typesAndConstants1[]	=
8196 		"${OPTYPE_DEFINITIONS:opt}"
8197 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
8198 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
8199 		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8200 
8201 	const char	function1[]				=
8202 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8203 		"%param     = OpFunctionParameter %v4f32\n"
8204 		"%label     = OpLabel\n"
8205 		"%result    = OpVariable %fp_v4f32 Function\n"
8206 		"${TYPE_CONVERT:opt}"
8207 		"             OpStore %result %param\n"
8208 		"%gen       = ${GEN_RESULT}\n"
8209 		"%index     = OpIAdd %i32 %gen %c_i32_1\n"
8210 		"%loc       = OpAccessChain %fp_f32 %result %index\n"
8211 		"%val       = OpLoad %f32 %loc\n"
8212 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8213 		"             OpStore %loc %add\n"
8214 		"%ret       = OpLoad %v4f32 %result\n"
8215 		"             OpReturnValue %ret\n"
8216 		"             OpFunctionEnd\n";
8217 
8218 	inputColors[0] = RGBA(127, 127, 127, 255);
8219 	inputColors[1] = RGBA(127, 0,   0,   255);
8220 	inputColors[2] = RGBA(0,   127, 0,   255);
8221 	inputColors[3] = RGBA(0,   0,   127, 255);
8222 
8223 	// Derived from inputColors[x] by adding 128 to inputColors[x][0].
8224 	outputColors0[0] = RGBA(255, 127, 127, 255);
8225 	outputColors0[1] = RGBA(255, 0,   0,   255);
8226 	outputColors0[2] = RGBA(128, 127, 0,   255);
8227 	outputColors0[3] = RGBA(128, 0,   127, 255);
8228 
8229 	// Derived from inputColors[x] by adding 128 to inputColors[x][1].
8230 	outputColors1[0] = RGBA(127, 255, 127, 255);
8231 	outputColors1[1] = RGBA(127, 128, 0,   255);
8232 	outputColors1[2] = RGBA(0,   255, 0,   255);
8233 	outputColors1[3] = RGBA(0,   128, 127, 255);
8234 
8235 	// Derived from inputColors[x] by adding 128 to inputColors[x][2].
8236 	outputColors2[0] = RGBA(127, 127, 255, 255);
8237 	outputColors2[1] = RGBA(127, 0,   128, 255);
8238 	outputColors2[2] = RGBA(0,   127, 128, 255);
8239 	outputColors2[3] = RGBA(0,   0,   255, 255);
8240 
8241 	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
8242 	const char addZeroToSc32[]		= "OpIAdd %i32 %c_i32_0 %sc_op32";
8243 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8244 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8245 
8246 	cases.push_back(SpecConstantTwoValGraphicsCase("iadd",							" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",				19,					-20,				addZeroToSc,		outputColors0));
8247 	cases.push_back(SpecConstantTwoValGraphicsCase("isub",							" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",				19,					20,					addZeroToSc,		outputColors0));
8248 	cases.push_back(SpecConstantTwoValGraphicsCase("imul",							" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",				-1,					-1,					addZeroToSc,		outputColors2));
8249 	cases.push_back(SpecConstantTwoValGraphicsCase("sdiv",							" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",				-126,				126,				addZeroToSc,		outputColors0));
8250 	cases.push_back(SpecConstantTwoValGraphicsCase("udiv",							" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",				126,				126,				addZeroToSc,		outputColors2));
8251 	cases.push_back(SpecConstantTwoValGraphicsCase("srem",							" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",				3,					2,					addZeroToSc,		outputColors2));
8252 	cases.push_back(SpecConstantTwoValGraphicsCase("smod",							" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",				3,					2,					addZeroToSc,		outputColors2));
8253 	cases.push_back(SpecConstantTwoValGraphicsCase("umod",							" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",				1001,				500,				addZeroToSc,		outputColors2));
8254 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",				0x33,				0x0d,				addZeroToSc,		outputColors2));
8255 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor",						" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",				0,					1,					addZeroToSc,		outputColors2));
8256 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",				0x2e,				0x2f,				addZeroToSc,		outputColors2));
8257 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical",				" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					1,					addZeroToSc,		outputColors2));
8258 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic",			" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					2,					addZeroToSc,		outputColors0));
8259 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical",				" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					0,					addZeroToSc,		outputColors2));
8260 
8261 	// Shifts for other integer sizes.
8262 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64",			" %i64 0",		" %i64 0",		"%i64",		"ShiftRightLogical    %sc_0 %sc_1",				deInt64{2},			deInt64{1},			addZeroToSc32,		outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8263 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt64{-4},		deInt64{2},			addZeroToSc32,		outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8264 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64",			" %i64 0",		" %i64 0",		"%i64",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt64{1},			deInt64{0},			addZeroToSc32,		outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8265 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16",			" %i16 0",		" %i16 0",		"%i16",		"ShiftRightLogical    %sc_0 %sc_1",				deInt16{2},			deInt16{1},			addZeroToSc32,		outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8266 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt16{-4},		deInt16{2},			addZeroToSc32,		outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8267 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16",			" %i16 0",		" %i16 0",		"%i16",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt16{1},			deInt16{0},			addZeroToSc32,		outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8268 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8",			" %i8 0",		" %i8 0",		"%i8",		"ShiftRightLogical    %sc_0 %sc_1",				deInt8{2},			deInt8{1},			addZeroToSc32,		outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8269 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt8{-4},			deInt8{2},			addZeroToSc32,		outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8270 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8",			" %i8 0",		" %i8 0",		"%i8",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt8{1},			deInt8{0},			addZeroToSc32,		outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8271 
8272 	// Shifts for other integer sizes but only in the shift amount.
8273 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64",		" %i32 0",		" %i64 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt64{1},			addZeroToSc,		outputColors2, (FLAG_I64)));
8274 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt64{2},			addZeroToSc,		outputColors0, (FLAG_I64)));
8275 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64",		" %i32 0",		" %i64 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt64{0},			addZeroToSc,		outputColors2, (FLAG_I64)));
8276 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16",		" %i32 0",		" %i16 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt16{1},			addZeroToSc,		outputColors2, (FLAG_I16)));
8277 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt16{2},			addZeroToSc,		outputColors0, (FLAG_I16)));
8278 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16",		" %i32 0",		" %i16 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt16{0},			addZeroToSc,		outputColors2, (FLAG_I16)));
8279 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt8{1},			addZeroToSc,		outputColors2, (FLAG_I8)));
8280 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt8{2},			addZeroToSc,		outputColors0, (FLAG_I8)));
8281 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8",			" %i32 0",		" %i8 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt8{0},			addZeroToSc,		outputColors2, (FLAG_I8)));
8282 
8283 	cases.push_back(SpecConstantTwoValGraphicsCase("slessthan",						" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",				-20,				-10,				selectTrueUsingSc,	outputColors2));
8284 	cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan",						" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",				10,					20,					selectTrueUsingSc,	outputColors2));
8285 	cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan",					" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",				-1000,				50,					selectFalseUsingSc,	outputColors2));
8286 	cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan",					" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",				10,					5,					selectTrueUsingSc,	outputColors2));
8287 	cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal",				" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",				-10,				-10,				selectTrueUsingSc,	outputColors2));
8288 	cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal",				" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",				50,					100,				selectTrueUsingSc,	outputColors2));
8289 	cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal",				" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",				-1000,				50,					selectFalseUsingSc,	outputColors2));
8290 	cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal",				" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",				10,					10,					selectTrueUsingSc,	outputColors2));
8291 	cases.push_back(SpecConstantTwoValGraphicsCase("iequal",						" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",				42,					24,					selectFalseUsingSc,	outputColors2));
8292 	cases.push_back(SpecConstantTwoValGraphicsCase("inotequal",						" %i32 0",		" %i32 0",		"%bool",	"INotEqual            %sc_0 %sc_1",				42,					24,					selectTrueUsingSc,	outputColors2));
8293 	cases.push_back(SpecConstantTwoValGraphicsCase("logicaland",					"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",				0,					1,					selectFalseUsingSc,	outputColors2));
8294 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalor",						"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",				1,					0,					selectTrueUsingSc,	outputColors2));
8295 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal",					"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",				0,					1,					selectFalseUsingSc,	outputColors2));
8296 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal",				"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",				1,					0,					selectTrueUsingSc,	outputColors2));
8297 	cases.push_back(SpecConstantTwoValGraphicsCase("snegate",						" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",					-1,					0,					addZeroToSc,		outputColors2));
8298 	cases.push_back(SpecConstantTwoValGraphicsCase("not",							" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,					0,					addZeroToSc,		outputColors2));
8299 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot",					"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,					0,					selectFalseUsingSc,	outputColors2));
8300 	cases.push_back(SpecConstantTwoValGraphicsCase("select",						"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,					1,					addZeroToSc,		outputColors2));
8301 	cases.push_back(SpecConstantTwoValGraphicsCase("sconvert",						" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",					-1,					0,					addZeroToSc32,		outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8302 	cases.push_back(SpecConstantTwoValGraphicsCase("fconvert",						" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",					tcu::Float32(-1.0),	tcu::Float32(0.0),	addZeroToSc32,		outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8303 	cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16",					" %f16 0",		" %f16 0",		"%f32",		"FConvert             %sc_0",					tcu::Float16(-1.0),	tcu::Float16(0.0),	addZeroToSc32,		outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8304 	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
8305 
8306 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8307 	{
8308 		map<string, string>			specializations;
8309 		map<string, string>			fragments;
8310 		SpecConstants				specConstants;
8311 		PushConstants				noPushConstants;
8312 		GraphicsResources			noResources;
8313 		GraphicsInterfaces			noInterfaces;
8314 		vector<string>				extensions;
8315 		VulkanFeatures				requiredFeatures;
8316 
8317 		// Special SPIR-V code when using 16-bit integers.
8318 		if (cases[caseNdx].caseFlags & FLAG_I16)
8319 		{
8320 			requiredFeatures.coreFeatures.shaderInt16		= VK_TRUE;
8321 			fragments["capability"]							+= "OpCapability Int16\n";							// Adds 16-bit integer capability
8322 			specializations["OPTYPE_DEFINITIONS"]			+= "%i16 = OpTypeInt 16 1\n";						// Adds 16-bit integer type
8323 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8324 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpSConvert %i32 %sc_op\n";			// Converts 16-bit integer to 32-bit integer
8325 		}
8326 
8327 		// Special SPIR-V code when using 64-bit integers.
8328 		if (cases[caseNdx].caseFlags & FLAG_I64)
8329 		{
8330 			requiredFeatures.coreFeatures.shaderInt64		= VK_TRUE;
8331 			fragments["capability"]							+= "OpCapability Int64\n";							// Adds 64-bit integer capability
8332 			specializations["OPTYPE_DEFINITIONS"]			+= "%i64 = OpTypeInt 64 1\n";						// Adds 64-bit integer type
8333 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8334 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpSConvert %i32 %sc_op\n";			// Converts 64-bit integer to 32-bit integer
8335 		}
8336 
8337 		// Special SPIR-V code when using 64-bit floats.
8338 		if (cases[caseNdx].caseFlags & FLAG_F64)
8339 		{
8340 			requiredFeatures.coreFeatures.shaderFloat64		= VK_TRUE;
8341 			fragments["capability"]							+= "OpCapability Float64\n";						// Adds 64-bit float capability
8342 			specializations["OPTYPE_DEFINITIONS"]			+= "%f64 = OpTypeFloat 64\n";						// Adds 64-bit float type
8343 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8344 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpConvertFToS %i32 %sc_op\n";		// Converts 64-bit float to 32-bit integer
8345 		}
8346 
8347 		// Extension needed for float16 and int8.
8348 		if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8349 			extensions.push_back("VK_KHR_shader_float16_int8");
8350 
8351 		// Special SPIR-V code when using 16-bit floats.
8352 		if (cases[caseNdx].caseFlags & FLAG_F16)
8353 		{
8354 			requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8355 			fragments["capability"]						+= "OpCapability Float16\n";						// Adds 16-bit float capability
8356 			specializations["OPTYPE_DEFINITIONS"]		+= "%f16 = OpTypeFloat 16\n";						// Adds 16-bit float type
8357 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8358 				specializations["TYPE_CONVERT"]			+= "%sc_op32 = OpConvertFToS %i32 %sc_op\n";		// Converts 16-bit float to 32-bit integer
8359 		}
8360 
8361 		// Special SPIR-V code when using 8-bit integers.
8362 		if (cases[caseNdx].caseFlags & FLAG_I8)
8363 		{
8364 			requiredFeatures.extFloat16Int8.shaderInt8 = true;
8365 			fragments["capability"]						+= "OpCapability Int8\n";						// Adds 8-bit integer capability
8366 			specializations["OPTYPE_DEFINITIONS"]		+= "%i8 = OpTypeInt 8 1\n";						// Adds 8-bit integer type
8367 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8368 				specializations["TYPE_CONVERT"]			+= "%sc_op32 = OpSConvert %i32 %sc_op\n";		// Converts 8-bit integer to 32-bit integer
8369 		}
8370 
8371 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
8372 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
8373 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
8374 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
8375 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
8376 
8377 		fragments["decoration"]				= tcu::StringTemplate(decorations1).specialize(specializations);
8378 		fragments["pre_main"]				= tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8379 		fragments["testfun"]				= tcu::StringTemplate(function1).specialize(specializations);
8380 
8381 		cases[caseNdx].scActualValue0.appendTo(specConstants);
8382 		cases[caseNdx].scActualValue1.appendTo(specConstants);
8383 
8384 		createTestsForAllStages(
8385 			cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8386 			noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8387 	}
8388 
8389 	const char			decorations2[]		=
8390 		"OpDecorate %sc_0  SpecId 0\n"
8391 		"OpDecorate %sc_1  SpecId 1\n"
8392 		"OpDecorate %sc_2  SpecId 2\n";
8393 
8394 	const std::string	typesAndConstants2	=
8395 		"%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8396 		"%vec3_undef  = OpUndef %v3i32\n"
8397 
8398 		+ getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8399 
8400 		"%sc_0        = OpSpecConstant %i32 0\n"
8401 		"%sc_1        = OpSpecConstant %i32 0\n"
8402 		"%sc_2        = OpSpecConstant %i32 0\n"
8403 
8404 		+ getSpecConstantOpStructConstBlock() +
8405 
8406 		"%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"							// (sc_0, 0,    0)
8407 		"%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"							// (0,    sc_1, 0)
8408 		"%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"							// (0,    0,    sc_2)
8409 		"%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
8410 		"%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
8411 		"%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
8412 		"%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
8413 		"%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
8414 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
8415 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
8416 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
8417 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
8418 		"%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";								// (sc_2 - sc_0) * sc_1
8419 
8420 	const std::string	function2			=
8421 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8422 		"%param     = OpFunctionParameter %v4f32\n"
8423 		"%label     = OpLabel\n"
8424 		"%result    = OpVariable %fp_v4f32 Function\n"
8425 
8426 		+ getSpecConstantOpStructInstructions() +
8427 
8428 		"             OpStore %result %param\n"
8429 		"%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
8430 		"%val       = OpLoad %f32 %loc\n"
8431 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8432 		"             OpStore %loc %add\n"
8433 		"%ret       = OpLoad %v4f32 %result\n"
8434 		"             OpReturnValue %ret\n"
8435 		"             OpFunctionEnd\n";
8436 
8437 	map<string, string>	fragments;
8438 	SpecConstants		specConstants;
8439 
8440 	fragments["decoration"]	= decorations2;
8441 	fragments["pre_main"]	= typesAndConstants2;
8442 	fragments["testfun"]	= function2;
8443 
8444 	specConstants.append<deInt32>(56789);
8445 	specConstants.append<deInt32>(-2);
8446 	specConstants.append<deInt32>(56788);
8447 
8448 	createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8449 
8450 	return group.release();
8451 }
8452 
createOpPhiTests(tcu::TestContext & testCtx)8453 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8454 {
8455 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8456 	RGBA							inputColors[4];
8457 	RGBA							outputColors1[4];
8458 	RGBA							outputColors2[4];
8459 	RGBA							outputColors3[4];
8460 	RGBA							outputColors4[4];
8461 	map<string, string>				fragments1;
8462 	map<string, string>				fragments2;
8463 	map<string, string>				fragments3;
8464 	map<string, string>				fragments4;
8465 	std::vector<std::string>		extensions4;
8466 	GraphicsResources				resources4;
8467 	VulkanFeatures					vulkanFeatures4;
8468 
8469 	const char	typesAndConstants1[]	=
8470 		"%c_f32_p2  = OpConstant %f32 0.2\n"
8471 		"%c_f32_p4  = OpConstant %f32 0.4\n"
8472 		"%c_f32_p5  = OpConstant %f32 0.5\n"
8473 		"%c_f32_p8  = OpConstant %f32 0.8\n";
8474 
8475 	// vec4 test_code(vec4 param) {
8476 	//   vec4 result = param;
8477 	//   for (int i = 0; i < 4; ++i) {
8478 	//     float operand;
8479 	//     switch (i) {
8480 	//       case 0: operand = .2; break;
8481 	//       case 1: operand = .5; break;
8482 	//       case 2: operand = .4; break;
8483 	//       case 3: operand = .0; break;
8484 	//       default: break; // unreachable
8485 	//     }
8486 	//     result[i] += operand;
8487 	//   }
8488 	//   return result;
8489 	// }
8490 	const char	function1[]				=
8491 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8492 		"%param1    = OpFunctionParameter %v4f32\n"
8493 		"%lbl       = OpLabel\n"
8494 		"%iptr      = OpVariable %fp_i32 Function\n"
8495 		"%result    = OpVariable %fp_v4f32 Function\n"
8496 		"             OpStore %iptr %c_i32_0\n"
8497 		"             OpStore %result %param1\n"
8498 		"             OpBranch %loop\n"
8499 
8500 		"%loop      = OpLabel\n"
8501 		"%ival      = OpLoad %i32 %iptr\n"
8502 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8503 		"             OpLoopMerge %exit %cont None\n"
8504 		"             OpBranchConditional %lt_4 %entry %exit\n"
8505 
8506 		"%entry     = OpLabel\n"
8507 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
8508 		"%val       = OpLoad %f32 %loc\n"
8509 		"             OpSelectionMerge %phi None\n"
8510 		"             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8511 
8512 		"%case0     = OpLabel\n"
8513 		"             OpBranch %phi\n"
8514 		"%case1     = OpLabel\n"
8515 		"             OpBranch %phi\n"
8516 		"%case2     = OpLabel\n"
8517 		"             OpBranch %phi\n"
8518 		"%case3     = OpLabel\n"
8519 		"             OpBranch %phi\n"
8520 
8521 		"%default   = OpLabel\n"
8522 		"             OpUnreachable\n"
8523 
8524 		"%phi       = OpLabel\n"
8525 		"%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8526 		"             OpBranch %cont\n"
8527 		"%cont      = OpLabel\n"
8528 		"%add       = OpFAdd %f32 %val %operand\n"
8529 		"             OpStore %loc %add\n"
8530 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8531 		"             OpStore %iptr %ival_next\n"
8532 		"             OpBranch %loop\n"
8533 
8534 		"%exit      = OpLabel\n"
8535 		"%ret       = OpLoad %v4f32 %result\n"
8536 		"             OpReturnValue %ret\n"
8537 
8538 		"             OpFunctionEnd\n";
8539 
8540 	fragments1["pre_main"]	= typesAndConstants1;
8541 	fragments1["testfun"]	= function1;
8542 
8543 	getHalfColorsFullAlpha(inputColors);
8544 
8545 	outputColors1[0]		= RGBA(178, 255, 229, 255);
8546 	outputColors1[1]		= RGBA(178, 127, 102, 255);
8547 	outputColors1[2]		= RGBA(51,  255, 102, 255);
8548 	outputColors1[3]		= RGBA(51,  127, 229, 255);
8549 
8550 	createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8551 
8552 	const char	typesAndConstants2[]	=
8553 		"%c_f32_p2  = OpConstant %f32 0.2\n";
8554 
8555 	// Add .4 to the second element of the given parameter.
8556 	const char	function2[]				=
8557 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8558 		"%param     = OpFunctionParameter %v4f32\n"
8559 		"%entry     = OpLabel\n"
8560 		"%result    = OpVariable %fp_v4f32 Function\n"
8561 		"             OpStore %result %param\n"
8562 		"%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8563 		"%val       = OpLoad %f32 %loc\n"
8564 		"             OpBranch %phi\n"
8565 
8566 		"%phi        = OpLabel\n"
8567 		"%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8568 		"%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8569 		"%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8570 		"%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8571 		"%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8572 		"              OpLoopMerge %exit %phi None\n"
8573 		"              OpBranchConditional %still_loop %phi %exit\n"
8574 
8575 		"%exit       = OpLabel\n"
8576 		"              OpStore %loc %accum\n"
8577 		"%ret        = OpLoad %v4f32 %result\n"
8578 		"              OpReturnValue %ret\n"
8579 
8580 		"              OpFunctionEnd\n";
8581 
8582 	fragments2["pre_main"]	= typesAndConstants2;
8583 	fragments2["testfun"]	= function2;
8584 
8585 	outputColors2[0]			= RGBA(127, 229, 127, 255);
8586 	outputColors2[1]			= RGBA(127, 102, 0,   255);
8587 	outputColors2[2]			= RGBA(0,   229, 0,   255);
8588 	outputColors2[3]			= RGBA(0,   102, 127, 255);
8589 
8590 	createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8591 
8592 	const char	typesAndConstants3[]	=
8593 		"%true      = OpConstantTrue %bool\n"
8594 		"%false     = OpConstantFalse %bool\n"
8595 		"%c_f32_p2  = OpConstant %f32 0.2\n";
8596 
8597 	// Swap the second and the third element of the given parameter.
8598 	const char	function3[]				=
8599 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8600 		"%param     = OpFunctionParameter %v4f32\n"
8601 		"%entry     = OpLabel\n"
8602 		"%result    = OpVariable %fp_v4f32 Function\n"
8603 		"             OpStore %result %param\n"
8604 		"%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8605 		"%a_init    = OpLoad %f32 %a_loc\n"
8606 		"%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8607 		"%b_init    = OpLoad %f32 %b_loc\n"
8608 		"             OpBranch %phi\n"
8609 
8610 		"%phi        = OpLabel\n"
8611 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8612 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8613 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8614 		"              OpLoopMerge %exit %phi None\n"
8615 		"              OpBranchConditional %still_loop %phi %exit\n"
8616 
8617 		"%exit       = OpLabel\n"
8618 		"              OpStore %a_loc %a_next\n"
8619 		"              OpStore %b_loc %b_next\n"
8620 		"%ret        = OpLoad %v4f32 %result\n"
8621 		"              OpReturnValue %ret\n"
8622 
8623 		"              OpFunctionEnd\n";
8624 
8625 	fragments3["pre_main"]	= typesAndConstants3;
8626 	fragments3["testfun"]	= function3;
8627 
8628 	outputColors3[0]			= RGBA(127, 127, 127, 255);
8629 	outputColors3[1]			= RGBA(127, 0,   0,   255);
8630 	outputColors3[2]			= RGBA(0,   0,   127, 255);
8631 	outputColors3[3]			= RGBA(0,   127, 0,   255);
8632 
8633 	createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8634 
8635 	const char	typesAndConstants4[]	=
8636 		"%f16        = OpTypeFloat 16\n"
8637 		"%v4f16      = OpTypeVector %f16 4\n"
8638 		"%fp_f16     = OpTypePointer Function %f16\n"
8639 		"%fp_v4f16   = OpTypePointer Function %v4f16\n"
8640 		"%true       = OpConstantTrue %bool\n"
8641 		"%false      = OpConstantFalse %bool\n"
8642 		"%c_f32_p2   = OpConstant %f32 0.2\n";
8643 
8644 	// Swap the second and the third element of the given parameter.
8645 	const char	function4[]				=
8646 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8647 		"%param      = OpFunctionParameter %v4f32\n"
8648 		"%entry      = OpLabel\n"
8649 		"%result     = OpVariable %fp_v4f16 Function\n"
8650 		"%param16    = OpFConvert %v4f16 %param\n"
8651 		"              OpStore %result %param16\n"
8652 		"%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8653 		"%a_init     = OpLoad %f16 %a_loc\n"
8654 		"%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8655 		"%b_init     = OpLoad %f16 %b_loc\n"
8656 		"              OpBranch %phi\n"
8657 
8658 		"%phi        = OpLabel\n"
8659 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8660 		"%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8661 		"%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8662 		"              OpLoopMerge %exit %phi None\n"
8663 		"              OpBranchConditional %still_loop %phi %exit\n"
8664 
8665 		"%exit       = OpLabel\n"
8666 		"              OpStore %a_loc %a_next\n"
8667 		"              OpStore %b_loc %b_next\n"
8668 		"%ret16      = OpLoad %v4f16 %result\n"
8669 		"%ret        = OpFConvert %v4f32 %ret16\n"
8670 		"              OpReturnValue %ret\n"
8671 
8672 		"              OpFunctionEnd\n";
8673 
8674 	fragments4["pre_main"]		= typesAndConstants4;
8675 	fragments4["testfun"]		= function4;
8676 	fragments4["capability"]	= "OpCapability Float16\n";
8677 
8678 	extensions4.push_back("VK_KHR_shader_float16_int8");
8679 
8680 	vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8681 
8682 	outputColors4[0]			= RGBA(127, 127, 127, 255);
8683 	outputColors4[1]			= RGBA(127, 0,   0,   255);
8684 	outputColors4[2]			= RGBA(0,   0,   127, 255);
8685 	outputColors4[3]			= RGBA(0,   127, 0,   255);
8686 
8687 	createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8688 
8689 	return group.release();
8690 }
8691 
createNoContractionTests(tcu::TestContext & testCtx)8692 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8693 {
8694 	de::MovePtr<tcu::TestCaseGroup> group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8695 	RGBA							inputColors[4];
8696 	RGBA							outputColors[4];
8697 
8698 	// With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8699 	// For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8700 	// only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8701 	// On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8702 	const char						constantsAndTypes[]	 =
8703 		"%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8704 		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8705 		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8706 		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8707 		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
8708 
8709 	const char						function[]	 =
8710 		"%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8711 		"%param          = OpFunctionParameter %v4f32\n"
8712 		"%label          = OpLabel\n"
8713 		"%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8714 		"%var2           = OpVariable %fp_f32 Function\n"
8715 		"%red            = OpCompositeExtract %f32 %param 0\n"
8716 		"%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8717 		"                  OpStore %var2 %plus_red\n"
8718 		"%val1           = OpLoad %f32 %var1\n"
8719 		"%val2           = OpLoad %f32 %var2\n"
8720 		"%mul            = OpFMul %f32 %val1 %val2\n"
8721 		"%add            = OpFAdd %f32 %mul %c_f32_n1\n"
8722 		"%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
8723 		"%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8724 		"%success        = OpLogicalOr %bool %is0 %isn1n24\n"
8725 		"%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
8726 		"%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8727 		"                  OpReturnValue %ret\n"
8728 		"                  OpFunctionEnd\n";
8729 
8730 	struct CaseNameDecoration
8731 	{
8732 		string name;
8733 		string decoration;
8734 	};
8735 
8736 
8737 	CaseNameDecoration tests[] = {
8738 		{"multiplication",	"OpDecorate %mul NoContraction"},
8739 		{"addition",		"OpDecorate %add NoContraction"},
8740 		{"both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8741 	};
8742 
8743 	getHalfColorsFullAlpha(inputColors);
8744 
8745 	for (deUint8 idx = 0; idx < 4; ++idx)
8746 	{
8747 		inputColors[idx].setRed(0);
8748 		outputColors[idx] = RGBA(0, 0, 0, 255);
8749 	}
8750 
8751 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8752 	{
8753 		map<string, string> fragments;
8754 
8755 		fragments["decoration"] = tests[testNdx].decoration;
8756 		fragments["pre_main"] = constantsAndTypes;
8757 		fragments["testfun"] = function;
8758 
8759 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8760 	}
8761 
8762 	return group.release();
8763 }
8764 
createMemoryAccessTests(tcu::TestContext & testCtx)8765 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8766 {
8767 	de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8768 	RGBA							colors[4];
8769 
8770 	const char						constantsAndTypes[]	 =
8771 		"%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8772 		"%fp_a2f32          = OpTypePointer Function %a2f32\n"
8773 		"%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
8774 		"%fp_stype          = OpTypePointer Function %stype\n";
8775 
8776 	const char						function[]	 =
8777 		"%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8778 		"%param1            = OpFunctionParameter %v4f32\n"
8779 		"%lbl               = OpLabel\n"
8780 		"%v1                = OpVariable %fp_v4f32 Function\n"
8781 		"%v2                = OpVariable %fp_a2f32 Function\n"
8782 		"%v3                = OpVariable %fp_f32 Function\n"
8783 		"%v                 = OpVariable %fp_stype Function\n"
8784 		"%vv                = OpVariable %fp_stype Function\n"
8785 		"%vvv               = OpVariable %fp_f32 Function\n"
8786 
8787 		"                     OpStore %v1 %c_v4f32_1_1_1_1\n"
8788 		"                     OpStore %v2 %c_a2f32_1\n"
8789 		"                     OpStore %v3 %c_f32_1\n"
8790 
8791 		"%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8792 		"%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8793 		"%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
8794 		"%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
8795 		"%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
8796 		"%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
8797 
8798 		"                    OpStore %p_v4f32 %v1_v ${access_type}\n"
8799 		"                    OpStore %p_a2f32 %v2_v ${access_type}\n"
8800 		"                    OpStore %p_f32 %v3_v ${access_type}\n"
8801 
8802 		"                    OpCopyMemory %vv %v ${access_type}\n"
8803 		"                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
8804 
8805 		"%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8806 		"%v_f32_2          = OpLoad %f32 %p_f32_2\n"
8807 		"%v_f32_3          = OpLoad %f32 %vvv\n"
8808 
8809 		"%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8810 		"%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8811 		"                    OpReturnValue %ret2\n"
8812 		"                    OpFunctionEnd\n";
8813 
8814 	struct NameMemoryAccess
8815 	{
8816 		string name;
8817 		string accessType;
8818 	};
8819 
8820 
8821 	NameMemoryAccess tests[] =
8822 	{
8823 		{ "none", "" },
8824 		{ "volatile", "Volatile" },
8825 		{ "aligned",  "Aligned 1" },
8826 		{ "volatile_aligned",  "Volatile|Aligned 1" },
8827 		{ "nontemporal_aligned",  "Nontemporal|Aligned 1" },
8828 		{ "volatile_nontemporal",  "Volatile|Nontemporal" },
8829 		{ "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
8830 	};
8831 
8832 	getHalfColorsFullAlpha(colors);
8833 
8834 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8835 	{
8836 		map<string, string> fragments;
8837 		map<string, string> memoryAccess;
8838 		memoryAccess["access_type"] = tests[testNdx].accessType;
8839 
8840 		fragments["pre_main"] = constantsAndTypes;
8841 		fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8842 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8843 	}
8844 	return memoryAccessTests.release();
8845 }
createOpUndefTests(tcu::TestContext & testCtx)8846 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8847 {
8848 	de::MovePtr<tcu::TestCaseGroup>		opUndefTests		 (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8849 	RGBA								defaultColors[4];
8850 	map<string, string>					fragments;
8851 	getDefaultColors(defaultColors);
8852 
8853 	// First, simple cases that don't do anything with the OpUndef result.
8854 	struct NameCodePair { string name, decl, type; };
8855 	const NameCodePair tests[] =
8856 	{
8857 		{"bool", "", "%bool"},
8858 		{"vec2uint32", "", "%v2u32"},
8859 		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8860 		{"sampler", "%type = OpTypeSampler", "%type"},
8861 		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8862 		{"pointer", "", "%fp_i32"},
8863 		{"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8864 		{"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8865 		{"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8866 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8867 	{
8868 		fragments["undef_type"] = tests[testNdx].type;
8869 		fragments["testfun"] = StringTemplate(
8870 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8871 			"%param1 = OpFunctionParameter %v4f32\n"
8872 			"%label_testfun = OpLabel\n"
8873 			"%undef = OpUndef ${undef_type}\n"
8874 			"OpReturnValue %param1\n"
8875 			"OpFunctionEnd\n").specialize(fragments);
8876 		fragments["pre_main"] = tests[testNdx].decl;
8877 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8878 	}
8879 	fragments.clear();
8880 
8881 	fragments["testfun"] =
8882 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8883 		"%param1 = OpFunctionParameter %v4f32\n"
8884 		"%label_testfun = OpLabel\n"
8885 		"%undef = OpUndef %f32\n"
8886 		"%zero = OpFMul %f32 %undef %c_f32_0\n"
8887 		"%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8888 		"%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8889 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8890 		"%b = OpFAdd %f32 %a %actually_zero\n"
8891 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8892 		"OpReturnValue %ret\n"
8893 		"OpFunctionEnd\n";
8894 
8895 	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8896 
8897 	fragments["testfun"] =
8898 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8899 		"%param1 = OpFunctionParameter %v4f32\n"
8900 		"%label_testfun = OpLabel\n"
8901 		"%undef = OpUndef %i32\n"
8902 		"%zero = OpIMul %i32 %undef %c_i32_0\n"
8903 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8904 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8905 		"OpReturnValue %ret\n"
8906 		"OpFunctionEnd\n";
8907 
8908 	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8909 
8910 	fragments["testfun"] =
8911 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8912 		"%param1 = OpFunctionParameter %v4f32\n"
8913 		"%label_testfun = OpLabel\n"
8914 		"%undef = OpUndef %u32\n"
8915 		"%zero = OpIMul %u32 %undef %c_i32_0\n"
8916 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8917 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8918 		"OpReturnValue %ret\n"
8919 		"OpFunctionEnd\n";
8920 
8921 	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8922 
8923 	fragments["testfun"] =
8924 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8925 		"%param1 = OpFunctionParameter %v4f32\n"
8926 		"%label_testfun = OpLabel\n"
8927 		"%undef = OpUndef %v4f32\n"
8928 		"%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8929 		"%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8930 		"%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8931 		"%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8932 		"%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8933 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
8934 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
8935 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
8936 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
8937 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8938 		"%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8939 		"%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8940 		"%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8941 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8942 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8943 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8944 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8945 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8946 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8947 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8948 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8949 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8950 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8951 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8952 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8953 		"OpReturnValue %ret\n"
8954 		"OpFunctionEnd\n";
8955 
8956 	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8957 
8958 	fragments["pre_main"] =
8959 		"%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8960 	fragments["testfun"] =
8961 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8962 		"%param1 = OpFunctionParameter %v4f32\n"
8963 		"%label_testfun = OpLabel\n"
8964 		"%undef = OpUndef %m2x2f32\n"
8965 		"%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8966 		"%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8967 		"%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8968 		"%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8969 		"%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8970 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
8971 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
8972 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
8973 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
8974 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8975 		"%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8976 		"%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8977 		"%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8978 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8979 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8980 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8981 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8982 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8983 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8984 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8985 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8986 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8987 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8988 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8989 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8990 		"OpReturnValue %ret\n"
8991 		"OpFunctionEnd\n";
8992 
8993 	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8994 
8995 	return opUndefTests.release();
8996 }
8997 
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)8998 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8999 {
9000 	const RGBA		inputColors[4]		=
9001 	{
9002 		RGBA(0,		0,		0,		255),
9003 		RGBA(0,		0,		255,	255),
9004 		RGBA(0,		255,	0,		255),
9005 		RGBA(0,		255,	255,	255)
9006 	};
9007 
9008 	const RGBA		expectedColors[4]	=
9009 	{
9010 		RGBA(255,	 0,		 0,		 255),
9011 		RGBA(255,	 0,		 0,		 255),
9012 		RGBA(255,	 0,		 0,		 255),
9013 		RGBA(255,	 0,		 0,		 255)
9014 	};
9015 
9016 	const struct SingleFP16Possibility
9017 	{
9018 		const char* name;
9019 		const char* constant;  // Value to assign to %test_constant.
9020 		float		valueAsFloat;
9021 		const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9022 	}				tests[]				=
9023 	{
9024 		{
9025 			"negative",
9026 			"-0x1.3p1\n",
9027 			-constructNormalizedFloat(1, 0x300000),
9028 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
9029 		}, // -19
9030 		{
9031 			"positive",
9032 			"0x1.0p7\n",
9033 			constructNormalizedFloat(7, 0x000000),
9034 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
9035 		},  // +128
9036 		// SPIR-V requires that OpQuantizeToF16 flushes
9037 		// any numbers that would end up denormalized in F16 to zero.
9038 		{
9039 			"denorm",
9040 			"0x0.0006p-126\n",
9041 			std::ldexp(1.5f, -140),
9042 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9043 		},  // denorm
9044 		{
9045 			"negative_denorm",
9046 			"-0x0.0006p-126\n",
9047 			-std::ldexp(1.5f, -140),
9048 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9049 		}, // -denorm
9050 		{
9051 			"too_small",
9052 			"0x1.0p-16\n",
9053 			std::ldexp(1.0f, -16),
9054 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9055 		},     // too small positive
9056 		{
9057 			"negative_too_small",
9058 			"-0x1.0p-32\n",
9059 			-std::ldexp(1.0f, -32),
9060 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9061 		},      // too small negative
9062 		{
9063 			"negative_inf",
9064 			"-0x1.0p128\n",
9065 			-std::ldexp(1.0f, 128),
9066 
9067 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9068 			"%inf = OpIsInf %bool %c\n"
9069 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9070 		},     // -inf to -inf
9071 		{
9072 			"inf",
9073 			"0x1.0p128\n",
9074 			std::ldexp(1.0f, 128),
9075 
9076 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9077 			"%inf = OpIsInf %bool %c\n"
9078 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9079 		},     // +inf to +inf
9080 		{
9081 			"round_to_negative_inf",
9082 			"-0x1.0p32\n",
9083 			-std::ldexp(1.0f, 32),
9084 
9085 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9086 			"%inf = OpIsInf %bool %c\n"
9087 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9088 		},     // round to -inf
9089 		{
9090 			"round_to_inf",
9091 			"0x1.0p16\n",
9092 			std::ldexp(1.0f, 16),
9093 
9094 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9095 			"%inf = OpIsInf %bool %c\n"
9096 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9097 		},     // round to +inf
9098 		{
9099 			"nan",
9100 			"0x1.1p128\n",
9101 			std::numeric_limits<float>::quiet_NaN(),
9102 
9103 			// Test for any NaN value, as NaNs are not preserved
9104 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9105 			"%cond = OpIsNan %bool %direct_quant\n"
9106 		}, // nan
9107 		{
9108 			"negative_nan",
9109 			"-0x1.0001p128\n",
9110 			std::numeric_limits<float>::quiet_NaN(),
9111 
9112 			// Test for any NaN value, as NaNs are not preserved
9113 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9114 			"%cond = OpIsNan %bool %direct_quant\n"
9115 		} // -nan
9116 	};
9117 	const char*		constants			=
9118 		"%test_constant = OpConstant %f32 ";  // The value will be test.constant.
9119 
9120 	StringTemplate	function			(
9121 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9122 		"%param1        = OpFunctionParameter %v4f32\n"
9123 		"%label_testfun = OpLabel\n"
9124 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9125 		"%b             = OpFAdd %f32 %test_constant %a\n"
9126 		"%c             = OpQuantizeToF16 %f32 %b\n"
9127 		"${condition}\n"
9128 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9129 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9130 		"                 OpReturnValue %retval\n"
9131 		"OpFunctionEnd\n"
9132 	);
9133 
9134 	const char*		specDecorations		= "OpDecorate %test_constant SpecId 0\n";
9135 	const char*		specConstants		=
9136 			"%test_constant = OpSpecConstant %f32 0.\n"
9137 			"%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9138 
9139 	StringTemplate	specConstantFunction(
9140 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9141 		"%param1        = OpFunctionParameter %v4f32\n"
9142 		"%label_testfun = OpLabel\n"
9143 		"${condition}\n"
9144 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9145 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9146 		"                 OpReturnValue %retval\n"
9147 		"OpFunctionEnd\n"
9148 	);
9149 
9150 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9151 	{
9152 		map<string, string>								codeSpecialization;
9153 		map<string, string>								fragments;
9154 		codeSpecialization["condition"]					= tests[idx].condition;
9155 		fragments["testfun"]							= function.specialize(codeSpecialization);
9156 		fragments["pre_main"]							= string(constants) + tests[idx].constant + "\n";
9157 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9158 	}
9159 
9160 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9161 	{
9162 		map<string, string>								codeSpecialization;
9163 		map<string, string>								fragments;
9164 		SpecConstants									passConstants;
9165 
9166 		codeSpecialization["condition"]					= tests[idx].condition;
9167 		fragments["testfun"]							= specConstantFunction.specialize(codeSpecialization);
9168 		fragments["decoration"]							= specDecorations;
9169 		fragments["pre_main"]							= specConstants;
9170 
9171 		passConstants.append<float>(tests[idx].valueAsFloat);
9172 
9173 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9174 	}
9175 }
9176 
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9177 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9178 {
9179 	RGBA inputColors[4] =  {
9180 		RGBA(0,		0,		0,		255),
9181 		RGBA(0,		0,		255,	255),
9182 		RGBA(0,		255,	0,		255),
9183 		RGBA(0,		255,	255,	255)
9184 	};
9185 
9186 	RGBA expectedColors[4] =
9187 	{
9188 		RGBA(255,	 0,		 0,		 255),
9189 		RGBA(255,	 0,		 0,		 255),
9190 		RGBA(255,	 0,		 0,		 255),
9191 		RGBA(255,	 0,		 0,		 255)
9192 	};
9193 
9194 	struct DualFP16Possibility
9195 	{
9196 		const char* name;
9197 		const char* input;
9198 		float		inputAsFloat;
9199 		const char* possibleOutput1;
9200 		const char* possibleOutput2;
9201 	} tests[] = {
9202 		{
9203 			"positive_round_up_or_round_down",
9204 			"0x1.3003p8",
9205 			constructNormalizedFloat(8, 0x300300),
9206 			"0x1.304p8",
9207 			"0x1.3p8"
9208 		},
9209 		{
9210 			"negative_round_up_or_round_down",
9211 			"-0x1.6008p-7",
9212 			-constructNormalizedFloat(-7, 0x600800),
9213 			"-0x1.6p-7",
9214 			"-0x1.604p-7"
9215 		},
9216 		{
9217 			"carry_bit",
9218 			"0x1.01ep2",
9219 			constructNormalizedFloat(2, 0x01e000),
9220 			"0x1.01cp2",
9221 			"0x1.02p2"
9222 		},
9223 		{
9224 			"carry_to_exponent",
9225 			"0x1.ffep1",
9226 			constructNormalizedFloat(1, 0xffe000),
9227 			"0x1.ffcp1",
9228 			"0x1.0p2"
9229 		},
9230 	};
9231 	StringTemplate constants (
9232 		"%input_const = OpConstant %f32 ${input}\n"
9233 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
9234 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
9235 		);
9236 
9237 	StringTemplate specConstants (
9238 		"%input_const = OpSpecConstant %f32 0.\n"
9239 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
9240 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
9241 	);
9242 
9243 	const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
9244 
9245 	const char* function  =
9246 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9247 		"%param1        = OpFunctionParameter %v4f32\n"
9248 		"%label_testfun = OpLabel\n"
9249 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9250 		// For the purposes of this test we assume that 0.f will always get
9251 		// faithfully passed through the pipeline stages.
9252 		"%b             = OpFAdd %f32 %input_const %a\n"
9253 		"%c             = OpQuantizeToF16 %f32 %b\n"
9254 		"%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
9255 		"%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
9256 		"%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
9257 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9258 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9259 		"                 OpReturnValue %retval\n"
9260 		"OpFunctionEnd\n";
9261 
9262 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9263 		map<string, string>									fragments;
9264 		map<string, string>									constantSpecialization;
9265 
9266 		constantSpecialization["input"]						= tests[idx].input;
9267 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
9268 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
9269 		fragments["testfun"]								= function;
9270 		fragments["pre_main"]								= constants.specialize(constantSpecialization);
9271 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9272 	}
9273 
9274 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9275 		map<string, string>									fragments;
9276 		map<string, string>									constantSpecialization;
9277 		SpecConstants										passConstants;
9278 
9279 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
9280 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
9281 		fragments["testfun"]								= function;
9282 		fragments["decoration"]								= specDecorations;
9283 		fragments["pre_main"]								= specConstants.specialize(constantSpecialization);
9284 
9285 		passConstants.append<float>(tests[idx].inputAsFloat);
9286 
9287 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9288 	}
9289 }
9290 
createOpQuantizeTests(tcu::TestContext & testCtx)9291 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9292 {
9293 	de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9294 	createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9295 	createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9296 	return opQuantizeTests.release();
9297 }
9298 
9299 struct ShaderPermutation
9300 {
9301 	deUint8 vertexPermutation;
9302 	deUint8 geometryPermutation;
9303 	deUint8 tesscPermutation;
9304 	deUint8 tessePermutation;
9305 	deUint8 fragmentPermutation;
9306 };
9307 
getShaderPermutation(deUint8 inputValue)9308 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9309 {
9310 	ShaderPermutation	permutation =
9311 	{
9312 		static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9313 		static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9314 		static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9315 		static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9316 		static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9317 	};
9318 	return permutation;
9319 }
9320 
createModuleTests(tcu::TestContext & testCtx)9321 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9322 {
9323 	RGBA								defaultColors[4];
9324 	RGBA								invertedColors[4];
9325 	de::MovePtr<tcu::TestCaseGroup>		moduleTests			(new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9326 
9327 	getDefaultColors(defaultColors);
9328 	getInvertedDefaultColors(invertedColors);
9329 
9330 	// Combined module tests
9331 	{
9332 		// Shader stages: vertex and fragment
9333 		{
9334 			const ShaderElement combinedPipeline[]	=
9335 			{
9336 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9337 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9338 			};
9339 
9340 			addFunctionCaseWithPrograms<InstanceContext>(
9341 				moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
9342 				createInstanceContext(combinedPipeline, map<string, string>()));
9343 		}
9344 
9345 		// Shader stages: vertex, geometry and fragment
9346 		{
9347 			const ShaderElement combinedPipeline[]	=
9348 			{
9349 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9350 				ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9351 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9352 			};
9353 
9354 			addFunctionCaseWithPrograms<InstanceContext>(
9355 				moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9356 				createInstanceContext(combinedPipeline, map<string, string>()));
9357 		}
9358 
9359 		// Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9360 		{
9361 			const ShaderElement combinedPipeline[]	=
9362 			{
9363 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9364 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9365 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9366 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9367 			};
9368 
9369 			addFunctionCaseWithPrograms<InstanceContext>(
9370 				moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
9371 				createInstanceContext(combinedPipeline, map<string, string>()));
9372 		}
9373 
9374 		// Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9375 		{
9376 			const ShaderElement combinedPipeline[]	=
9377 			{
9378 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9379 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9380 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9381 				ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9382 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9383 			};
9384 
9385 			addFunctionCaseWithPrograms<InstanceContext>(
9386 				moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9387 				createInstanceContext(combinedPipeline, map<string, string>()));
9388 		}
9389 	}
9390 
9391 	const char* numbers[] =
9392 	{
9393 		"1", "2"
9394 	};
9395 
9396 	for (deInt8 idx = 0; idx < 32; ++idx)
9397 	{
9398 		ShaderPermutation			permutation		= getShaderPermutation(idx);
9399 		string						name			= string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9400 		const ShaderElement			pipeline[]		=
9401 		{
9402 			ShaderElement("vert",	string("vert") +	numbers[permutation.vertexPermutation],		VK_SHADER_STAGE_VERTEX_BIT),
9403 			ShaderElement("geom",	string("geom") +	numbers[permutation.geometryPermutation],	VK_SHADER_STAGE_GEOMETRY_BIT),
9404 			ShaderElement("tessc",	string("tessc") +	numbers[permutation.tesscPermutation],		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9405 			ShaderElement("tesse",	string("tesse") +	numbers[permutation.tessePermutation],		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9406 			ShaderElement("frag",	string("frag") +	numbers[permutation.fragmentPermutation],	VK_SHADER_STAGE_FRAGMENT_BIT)
9407 		};
9408 
9409 		// If there are an even number of swaps, then it should be no-op.
9410 		// If there are an odd number, the color should be flipped.
9411 		if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9412 		{
9413 			addFunctionCaseWithPrograms<InstanceContext>(
9414 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9415 					createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9416 		}
9417 		else
9418 		{
9419 			addFunctionCaseWithPrograms<InstanceContext>(
9420 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9421 					createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9422 		}
9423 	}
9424 	return moduleTests.release();
9425 }
9426 
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9427 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9428 {
9429 	switch (task)
9430 	{
9431 		case SHADER_TASK_NONE:			return "";
9432 		case SHADER_TASK_NORMAL:		return prefix + "_normal";
9433 		case SHADER_TASK_UNUSED_VAR:	return prefix + "_unused_var";
9434 		case SHADER_TASK_UNUSED_FUNC:	return prefix + "_unused_func";
9435 		default:						DE_ASSERT(DE_FALSE);
9436 	}
9437 	// unreachable
9438 	return "";
9439 }
9440 
getShaderTaskIndexName(ShaderTaskIndex index)9441 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9442 {
9443 	switch (index)
9444 	{
9445 	case SHADER_TASK_INDEX_VERTEX:			return "vertex";
9446 	case SHADER_TASK_INDEX_GEOMETRY:		return "geom";
9447 	case SHADER_TASK_INDEX_TESS_CONTROL:	return "tessc";
9448 	case SHADER_TASK_INDEX_TESS_EVAL:		return "tesse";
9449 	case SHADER_TASK_INDEX_FRAGMENT:		return "frag";
9450 	default:								DE_ASSERT(DE_FALSE);
9451 	}
9452 	// unreachable
9453 	return "";
9454 }
9455 
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9456 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9457 {
9458 	std::string testName = location.toString();
9459 
9460 	for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9461 	{
9462 		if (shaderTasks[i] != SHADER_TASK_NONE)
9463 		{
9464 			testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9465 		}
9466 	}
9467 
9468 	return testName;
9469 }
9470 
createUnusedVariableTests(tcu::TestContext & testCtx)9471 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9472 {
9473 	de::MovePtr<tcu::TestCaseGroup>		moduleTests				(new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9474 
9475 	ShaderTaskArray						shaderCombinations[]	=
9476 	{
9477 		// Vertex					Geometry					Tess. Control				Tess. Evaluation			Fragment
9478 		{ SHADER_TASK_UNUSED_VAR,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9479 		{ SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9480 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_UNUSED_VAR	},
9481 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_UNUSED_FUNC	},
9482 		{ SHADER_TASK_NORMAL,		SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9483 		{ SHADER_TASK_NORMAL,		SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9484 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NORMAL,			SHADER_TASK_NORMAL	},
9485 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NORMAL,			SHADER_TASK_NORMAL	},
9486 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NORMAL,			SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NORMAL	},
9487 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NORMAL,			SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NORMAL	}
9488 	};
9489 
9490 	const VariableLocation				testLocations[] =
9491 	{
9492 		// Set		Binding
9493 		{ 0,		5			},
9494 		{ 5,		5			},
9495 	};
9496 
9497 	for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9498 	{
9499 		for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9500 		{
9501 			const ShaderTaskArray&	shaderTasks		= shaderCombinations[combNdx];
9502 			const VariableLocation&	location		= testLocations[locationNdx];
9503 			std::string				testName		= getUnusedVarTestName(shaderTasks, location);
9504 
9505 			addFunctionCaseWithPrograms<UnusedVariableContext>(
9506 				moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9507 				createUnusedVariableContext(shaderTasks, location));
9508 		}
9509 	}
9510 
9511 	return moduleTests.release();
9512 }
9513 
createLoopTests(tcu::TestContext & testCtx)9514 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9515 {
9516 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9517 	RGBA defaultColors[4];
9518 	getDefaultColors(defaultColors);
9519 	map<string, string> fragments;
9520 	fragments["pre_main"] =
9521 		"%c_f32_5 = OpConstant %f32 5.\n";
9522 
9523 	// A loop with a single block. The Continue Target is the loop block
9524 	// itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9525 	// -- the "continue construct" forms the entire loop.
9526 	fragments["testfun"] =
9527 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9528 		"%param1 = OpFunctionParameter %v4f32\n"
9529 
9530 		"%entry = OpLabel\n"
9531 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9532 		"OpBranch %loop\n"
9533 
9534 		";adds and subtracts 1.0 to %val in alternate iterations\n"
9535 		"%loop = OpLabel\n"
9536 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9537 		"%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9538 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9539 		"%val = OpFAdd %f32 %val1 %delta\n"
9540 		"%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9541 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9542 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9543 		"OpLoopMerge %exit %loop None\n"
9544 		"OpBranchConditional %again %loop %exit\n"
9545 
9546 		"%exit = OpLabel\n"
9547 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9548 		"OpReturnValue %result\n"
9549 
9550 		"OpFunctionEnd\n";
9551 
9552 	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9553 
9554 	// Body comprised of multiple basic blocks.
9555 	const StringTemplate multiBlock(
9556 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9557 		"%param1 = OpFunctionParameter %v4f32\n"
9558 
9559 		"%entry = OpLabel\n"
9560 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9561 		"OpBranch %loop\n"
9562 
9563 		";adds and subtracts 1.0 to %val in alternate iterations\n"
9564 		"%loop = OpLabel\n"
9565 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9566 		"%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9567 		"%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9568 		// There are several possibilities for the Continue Target below.  Each
9569 		// will be specialized into a separate test case.
9570 		"OpLoopMerge %exit ${continue_target} None\n"
9571 		"OpBranch %if\n"
9572 
9573 		"%if = OpLabel\n"
9574 		";delta_next = (delta > 0) ? -1 : 1;\n"
9575 		"%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9576 		"OpSelectionMerge %gather DontFlatten\n"
9577 		"OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9578 
9579 		"%odd = OpLabel\n"
9580 		"OpBranch %gather\n"
9581 
9582 		"%even = OpLabel\n"
9583 		"OpBranch %gather\n"
9584 
9585 		"%gather = OpLabel\n"
9586 		"%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9587 		"%val = OpFAdd %f32 %val1 %delta\n"
9588 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9589 		"OpBranch %cont\n"
9590 
9591 		"%cont = OpLabel\n"
9592 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9593 		"OpBranchConditional %again %loop %exit\n"
9594 
9595 		"%exit = OpLabel\n"
9596 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9597 		"OpReturnValue %result\n"
9598 
9599 		"OpFunctionEnd\n");
9600 
9601 	map<string, string> continue_target;
9602 
9603 	// The Continue Target is the loop block itself.
9604 	continue_target["continue_target"] = "%if";
9605 	fragments["testfun"] = multiBlock.specialize(continue_target);
9606 	createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9607 
9608 	// The Continue Target is at the end of the loop.
9609 	continue_target["continue_target"] = "%cont";
9610 	fragments["testfun"] = multiBlock.specialize(continue_target);
9611 	createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9612 
9613 	// A loop with continue statement.
9614 	fragments["testfun"] =
9615 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9616 		"%param1 = OpFunctionParameter %v4f32\n"
9617 
9618 		"%entry = OpLabel\n"
9619 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9620 		"OpBranch %loop\n"
9621 
9622 		";adds 4, 3, and 1 to %val0 (skips 2)\n"
9623 		"%loop = OpLabel\n"
9624 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9625 		"%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9626 		"OpLoopMerge %exit %continue None\n"
9627 		"OpBranch %if\n"
9628 
9629 		"%if = OpLabel\n"
9630 		";skip if %count==2\n"
9631 		"%eq2 = OpIEqual %bool %count %c_i32_2\n"
9632 		"OpBranchConditional %eq2 %continue %body\n"
9633 
9634 		"%body = OpLabel\n"
9635 		"%fcount = OpConvertSToF %f32 %count\n"
9636 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9637 		"OpBranch %continue\n"
9638 
9639 		"%continue = OpLabel\n"
9640 		"%val = OpPhi %f32 %val2 %body %val1 %if\n"
9641 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9642 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9643 		"OpBranchConditional %again %loop %exit\n"
9644 
9645 		"%exit = OpLabel\n"
9646 		"%same = OpFSub %f32 %val %c_f32_8\n"
9647 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9648 		"OpReturnValue %result\n"
9649 		"OpFunctionEnd\n";
9650 	createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9651 
9652 	// A loop with break.
9653 	fragments["testfun"] =
9654 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9655 		"%param1 = OpFunctionParameter %v4f32\n"
9656 
9657 		"%entry = OpLabel\n"
9658 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9659 		"%dot = OpDot %f32 %param1 %param1\n"
9660 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9661 		"%zero = OpConvertFToU %u32 %div\n"
9662 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
9663 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9664 		"OpBranch %loop\n"
9665 
9666 		";adds 4 and 3 to %val0 (exits early)\n"
9667 		"%loop = OpLabel\n"
9668 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9669 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9670 		"OpLoopMerge %exit %continue None\n"
9671 		"OpBranch %if\n"
9672 
9673 		"%if = OpLabel\n"
9674 		";end loop if %count==%two\n"
9675 		"%above2 = OpSGreaterThan %bool %count %two\n"
9676 		"OpBranchConditional %above2 %body %exit\n"
9677 
9678 		"%body = OpLabel\n"
9679 		"%fcount = OpConvertSToF %f32 %count\n"
9680 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9681 		"OpBranch %continue\n"
9682 
9683 		"%continue = OpLabel\n"
9684 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9685 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9686 		"OpBranchConditional %again %loop %exit\n"
9687 
9688 		"%exit = OpLabel\n"
9689 		"%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9690 		"%same = OpFSub %f32 %val_post %c_f32_7\n"
9691 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9692 		"OpReturnValue %result\n"
9693 		"OpFunctionEnd\n";
9694 	createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9695 
9696 	// A loop with return.
9697 	fragments["testfun"] =
9698 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9699 		"%param1 = OpFunctionParameter %v4f32\n"
9700 
9701 		"%entry = OpLabel\n"
9702 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9703 		"%dot = OpDot %f32 %param1 %param1\n"
9704 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9705 		"%zero = OpConvertFToU %u32 %div\n"
9706 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
9707 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9708 		"OpBranch %loop\n"
9709 
9710 		";returns early without modifying %param1\n"
9711 		"%loop = OpLabel\n"
9712 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9713 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9714 		"OpLoopMerge %exit %continue None\n"
9715 		"OpBranch %if\n"
9716 
9717 		"%if = OpLabel\n"
9718 		";return if %count==%two\n"
9719 		"%above2 = OpSGreaterThan %bool %count %two\n"
9720 		"OpSelectionMerge %body DontFlatten\n"
9721 		"OpBranchConditional %above2 %body %early_exit\n"
9722 
9723 		"%early_exit = OpLabel\n"
9724 		"OpReturnValue %param1\n"
9725 
9726 		"%body = OpLabel\n"
9727 		"%fcount = OpConvertSToF %f32 %count\n"
9728 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9729 		"OpBranch %continue\n"
9730 
9731 		"%continue = OpLabel\n"
9732 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9733 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9734 		"OpBranchConditional %again %loop %exit\n"
9735 
9736 		"%exit = OpLabel\n"
9737 		";should never get here, so return an incorrect result\n"
9738 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9739 		"OpReturnValue %result\n"
9740 		"OpFunctionEnd\n";
9741 	createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9742 
9743 	// Continue inside a switch block to break to enclosing loop's merge block.
9744 	// Matches roughly the following GLSL code:
9745 	// for (; keep_going; keep_going = false)
9746 	// {
9747 	//     switch (int(param1.x))
9748 	//     {
9749 	//         case 0: continue;
9750 	//         case 1: continue;
9751 	//         default: continue;
9752 	//     }
9753 	//     dead code: modify return value to invalid result.
9754 	// }
9755 	fragments["pre_main"] =
9756 		"%fp_bool = OpTypePointer Function %bool\n"
9757 		"%true = OpConstantTrue %bool\n"
9758 		"%false = OpConstantFalse %bool\n";
9759 
9760 	fragments["testfun"] =
9761 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9762 		"%param1 = OpFunctionParameter %v4f32\n"
9763 
9764 		"%entry = OpLabel\n"
9765 		"%keep_going = OpVariable %fp_bool Function\n"
9766 		"%val_ptr = OpVariable %fp_f32 Function\n"
9767 		"%param1_x = OpCompositeExtract %f32 %param1 0\n"
9768 		"OpStore %keep_going %true\n"
9769 		"OpBranch %forloop_begin\n"
9770 
9771 		"%forloop_begin = OpLabel\n"
9772 		"OpLoopMerge %forloop_merge %forloop_continue None\n"
9773 		"OpBranch %forloop\n"
9774 
9775 		"%forloop = OpLabel\n"
9776 		"%for_condition = OpLoad %bool %keep_going\n"
9777 		"OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9778 
9779 		"%forloop_body = OpLabel\n"
9780 		"OpStore %val_ptr %param1_x\n"
9781 		"%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9782 
9783 		"OpSelectionMerge %switch_merge None\n"
9784 		"OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9785 		"%case_0 = OpLabel\n"
9786 		"OpBranch %forloop_continue\n"
9787 		"%case_1 = OpLabel\n"
9788 		"OpBranch %forloop_continue\n"
9789 		"%default = OpLabel\n"
9790 		"OpBranch %forloop_continue\n"
9791 		"%switch_merge = OpLabel\n"
9792 		";should never get here, so change the return value to invalid result\n"
9793 		"OpStore %val_ptr %c_f32_1\n"
9794 		"OpBranch %forloop_continue\n"
9795 
9796 		"%forloop_continue = OpLabel\n"
9797 		"OpStore %keep_going %false\n"
9798 		"OpBranch %forloop_begin\n"
9799 		"%forloop_merge = OpLabel\n"
9800 
9801 		"%val = OpLoad %f32 %val_ptr\n"
9802 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9803 		"OpReturnValue %result\n"
9804 		"OpFunctionEnd\n";
9805 	createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9806 
9807 	return testGroup.release();
9808 }
9809 
9810 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)9811 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9812 {
9813 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9814 	map<string, string> fragments;
9815 
9816 	// A barrier inside a function body.
9817 	fragments["pre_main"] =
9818 		"%Workgroup = OpConstant %i32 2\n"
9819 		"%Invocation = OpConstant %i32 4\n"
9820 		"%MemorySemanticsNone = OpConstant %i32 0\n";
9821 	fragments["testfun"] =
9822 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9823 		"%param1 = OpFunctionParameter %v4f32\n"
9824 		"%label_testfun = OpLabel\n"
9825 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9826 		"OpReturnValue %param1\n"
9827 		"OpFunctionEnd\n";
9828 	addTessCtrlTest(testGroup.get(), "in_function", fragments);
9829 
9830 	// Common setup code for the following tests.
9831 	fragments["pre_main"] =
9832 		"%Workgroup = OpConstant %i32 2\n"
9833 		"%Invocation = OpConstant %i32 4\n"
9834 		"%MemorySemanticsNone = OpConstant %i32 0\n"
9835 		"%c_f32_5 = OpConstant %f32 5.\n";
9836 	const string setupPercentZero =	 // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9837 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9838 		"%param1 = OpFunctionParameter %v4f32\n"
9839 		"%entry = OpLabel\n"
9840 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9841 		"%dot = OpDot %f32 %param1 %param1\n"
9842 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9843 		"%zero = OpConvertFToU %u32 %div\n";
9844 
9845 	// Barriers inside OpSwitch branches.
9846 	fragments["testfun"] =
9847 		setupPercentZero +
9848 		"OpSelectionMerge %switch_exit None\n"
9849 		"OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9850 
9851 		"%case1 = OpLabel\n"
9852 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9853 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9854 		"%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9855 		"OpBranch %switch_exit\n"
9856 
9857 		"%switch_default = OpLabel\n"
9858 		"%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9859 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9860 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9861 		"OpBranch %switch_exit\n"
9862 
9863 		"%case0 = OpLabel\n"
9864 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9865 		"OpBranch %switch_exit\n"
9866 
9867 		"%switch_exit = OpLabel\n"
9868 		"%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9869 		"OpReturnValue %ret\n"
9870 		"OpFunctionEnd\n";
9871 	addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9872 
9873 	// Barriers inside if-then-else.
9874 	fragments["testfun"] =
9875 		setupPercentZero +
9876 		"%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9877 		"OpSelectionMerge %exit DontFlatten\n"
9878 		"OpBranchConditional %eq0 %then %else\n"
9879 
9880 		"%else = OpLabel\n"
9881 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9882 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9883 		"%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9884 		"OpBranch %exit\n"
9885 
9886 		"%then = OpLabel\n"
9887 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9888 		"OpBranch %exit\n"
9889 		"%exit = OpLabel\n"
9890 		"%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9891 		"OpReturnValue %ret\n"
9892 		"OpFunctionEnd\n";
9893 	addTessCtrlTest(testGroup.get(), "in_if", fragments);
9894 
9895 	// A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9896 	// http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9897 	fragments["testfun"] =
9898 		setupPercentZero +
9899 		"%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9900 		"%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9901 		"OpSelectionMerge %exit DontFlatten\n"
9902 		"OpBranchConditional %thread0 %then %else\n"
9903 
9904 		"%else = OpLabel\n"
9905 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9906 		"OpBranch %exit\n"
9907 
9908 		"%then = OpLabel\n"
9909 		"%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9910 		"OpBranch %exit\n"
9911 
9912 		"%exit = OpLabel\n"
9913 		"%val = OpPhi %f32 %val0 %else %val1 %then\n"
9914 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9915 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9916 		"OpReturnValue %ret\n"
9917 		"OpFunctionEnd\n";
9918 	addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9919 
9920 	// A barrier inside a loop.
9921 	fragments["pre_main"] =
9922 		"%Workgroup = OpConstant %i32 2\n"
9923 		"%Invocation = OpConstant %i32 4\n"
9924 		"%MemorySemanticsNone = OpConstant %i32 0\n"
9925 		"%c_f32_10 = OpConstant %f32 10.\n";
9926 	fragments["testfun"] =
9927 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9928 		"%param1 = OpFunctionParameter %v4f32\n"
9929 		"%entry = OpLabel\n"
9930 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9931 		"OpBranch %loop\n"
9932 
9933 		";adds 4, 3, 2, and 1 to %val0\n"
9934 		"%loop = OpLabel\n"
9935 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9936 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9937 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9938 		"%fcount = OpConvertSToF %f32 %count\n"
9939 		"%val = OpFAdd %f32 %val1 %fcount\n"
9940 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9941 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9942 		"OpLoopMerge %exit %loop None\n"
9943 		"OpBranchConditional %again %loop %exit\n"
9944 
9945 		"%exit = OpLabel\n"
9946 		"%same = OpFSub %f32 %val %c_f32_10\n"
9947 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9948 		"OpReturnValue %ret\n"
9949 		"OpFunctionEnd\n";
9950 	addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9951 
9952 	return testGroup.release();
9953 }
9954 
9955 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)9956 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9957 {
9958 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9959 	map<string, string>					fragments;
9960 	RGBA								inputColors[4];
9961 	RGBA								outputColors[4];
9962 
9963 	fragments["pre_main"]				 =
9964 		"%c_f32_3 = OpConstant %f32 3.0\n"
9965 		"%c_f32_n3 = OpConstant %f32 -3.0\n"
9966 		"%c_f32_4 = OpConstant %f32 4.0\n"
9967 		"%c_f32_p75 = OpConstant %f32 0.75\n"
9968 		"%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9969 		"%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9970 		"%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9971 
9972 	// The test does the following.
9973 	// vec4 result = (param1 * 8.0) - 4.0;
9974 	// return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9975 	fragments["testfun"]				 =
9976 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9977 		"%param1 = OpFunctionParameter %v4f32\n"
9978 		"%label_testfun = OpLabel\n"
9979 		"%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9980 		"%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9981 		"%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9982 		"%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9983 		"%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9984 		"%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9985 		"OpReturnValue %xy_0_1\n"
9986 		"OpFunctionEnd\n";
9987 
9988 
9989 	inputColors[0]		= RGBA(16,	16,		0, 255);
9990 	inputColors[1]		= RGBA(232, 232,	0, 255);
9991 	inputColors[2]		= RGBA(232, 16,		0, 255);
9992 	inputColors[3]		= RGBA(16,	232,	0, 255);
9993 
9994 	outputColors[0]		= RGBA(64,	64,		0, 255);
9995 	outputColors[1]		= RGBA(255, 255,	0, 255);
9996 	outputColors[2]		= RGBA(255, 64,		0, 255);
9997 	outputColors[3]		= RGBA(64,	255,	0, 255);
9998 
9999 	createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
10000 	return testGroup.release();
10001 }
10002 
10003 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10004 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10005 {
10006 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
10007 	map<string, string>					fragments;
10008 
10009 	fragments["pre_main"]				 =
10010 		"%c_f32_255 = OpConstant %f32 255.0\n"
10011 		"%c_i32_128 = OpConstant %i32 128\n"
10012 		"%c_i32_255 = OpConstant %i32 255\n"
10013 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10014 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10015 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10016 
10017 	// The test does the following.
10018 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10019 	// ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10020 	// return float(result + 128) / 255.0;
10021 	fragments["testfun"]				 =
10022 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10023 		"%param1 = OpFunctionParameter %v4f32\n"
10024 		"%label_testfun = OpLabel\n"
10025 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10026 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10027 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10028 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10029 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10030 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10031 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10032 		"%x_out = OpSRem %i32 %x_in %y_in\n"
10033 		"%y_out = OpSRem %i32 %y_in %z_in\n"
10034 		"%z_out = OpSRem %i32 %z_in %x_in\n"
10035 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10036 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10037 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10038 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10039 		"OpReturnValue %float_out\n"
10040 		"OpFunctionEnd\n";
10041 
10042 	const struct CaseParams
10043 	{
10044 		const char*		name;
10045 		const char*		failMessageTemplate;	// customized status message
10046 		qpTestResult	failResult;				// override status on failure
10047 		int				operands[4][3];			// four (x, y, z) vectors of operands
10048 		int				results[4][3];			// four (x, y, z) vectors of results
10049 	} cases[] =
10050 	{
10051 		{
10052 			"positive",
10053 			"${reason}",
10054 			QP_TEST_RESULT_FAIL,
10055 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },			// operands
10056 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },			// results
10057 		},
10058 		{
10059 			"all",
10060 			"Inconsistent results, but within specification: ${reason}",
10061 			negFailResult,															// negative operands, not required by the spec
10062 			{ { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },	// operands
10063 			{ { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },	// results
10064 		},
10065 	};
10066 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
10067 
10068 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10069 	{
10070 		const CaseParams&	params			= cases[caseNdx];
10071 		RGBA				inputColors[4];
10072 		RGBA				outputColors[4];
10073 
10074 		for (int i = 0; i < 4; ++i)
10075 		{
10076 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10077 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10078 		}
10079 
10080 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10081 	}
10082 
10083 	return testGroup.release();
10084 }
10085 
10086 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10087 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10088 {
10089 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10090 	map<string, string>					fragments;
10091 
10092 	fragments["pre_main"]				 =
10093 		"%c_f32_255 = OpConstant %f32 255.0\n"
10094 		"%c_i32_128 = OpConstant %i32 128\n"
10095 		"%c_i32_255 = OpConstant %i32 255\n"
10096 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10097 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10098 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10099 
10100 	// The test does the following.
10101 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10102 	// ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10103 	// return float(result + 128) / 255.0;
10104 	fragments["testfun"]				 =
10105 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10106 		"%param1 = OpFunctionParameter %v4f32\n"
10107 		"%label_testfun = OpLabel\n"
10108 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10109 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10110 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10111 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10112 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10113 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10114 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10115 		"%x_out = OpSMod %i32 %x_in %y_in\n"
10116 		"%y_out = OpSMod %i32 %y_in %z_in\n"
10117 		"%z_out = OpSMod %i32 %z_in %x_in\n"
10118 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10119 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10120 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10121 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10122 		"OpReturnValue %float_out\n"
10123 		"OpFunctionEnd\n";
10124 
10125 	const struct CaseParams
10126 	{
10127 		const char*		name;
10128 		const char*		failMessageTemplate;	// customized status message
10129 		qpTestResult	failResult;				// override status on failure
10130 		int				operands[4][3];			// four (x, y, z) vectors of operands
10131 		int				results[4][3];			// four (x, y, z) vectors of results
10132 	} cases[] =
10133 	{
10134 		{
10135 			"positive",
10136 			"${reason}",
10137 			QP_TEST_RESULT_FAIL,
10138 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },				// operands
10139 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },				// results
10140 		},
10141 		{
10142 			"all",
10143 			"Inconsistent results, but within specification: ${reason}",
10144 			negFailResult,																// negative operands, not required by the spec
10145 			{ { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },	// operands
10146 			{ { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },	// results
10147 		},
10148 	};
10149 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
10150 
10151 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10152 	{
10153 		const CaseParams&	params			= cases[caseNdx];
10154 		RGBA				inputColors[4];
10155 		RGBA				outputColors[4];
10156 
10157 		for (int i = 0; i < 4; ++i)
10158 		{
10159 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10160 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10161 		}
10162 
10163 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10164 	}
10165 	return testGroup.release();
10166 }
10167 
10168 enum ConversionDataType
10169 {
10170 	DATA_TYPE_SIGNED_8,
10171 	DATA_TYPE_SIGNED_16,
10172 	DATA_TYPE_SIGNED_32,
10173 	DATA_TYPE_SIGNED_64,
10174 	DATA_TYPE_UNSIGNED_8,
10175 	DATA_TYPE_UNSIGNED_16,
10176 	DATA_TYPE_UNSIGNED_32,
10177 	DATA_TYPE_UNSIGNED_64,
10178 	DATA_TYPE_FLOAT_16,
10179 	DATA_TYPE_FLOAT_32,
10180 	DATA_TYPE_FLOAT_64,
10181 	DATA_TYPE_VEC2_SIGNED_16,
10182 	DATA_TYPE_VEC2_SIGNED_32
10183 };
10184 
getBitWidthStr(ConversionDataType type)10185 const string getBitWidthStr (ConversionDataType type)
10186 {
10187 	switch (type)
10188 	{
10189 		case DATA_TYPE_SIGNED_8:
10190 		case DATA_TYPE_UNSIGNED_8:
10191 			return "8";
10192 
10193 		case DATA_TYPE_SIGNED_16:
10194 		case DATA_TYPE_UNSIGNED_16:
10195 		case DATA_TYPE_FLOAT_16:
10196 			return "16";
10197 
10198 		case DATA_TYPE_SIGNED_32:
10199 		case DATA_TYPE_UNSIGNED_32:
10200 		case DATA_TYPE_FLOAT_32:
10201 		case DATA_TYPE_VEC2_SIGNED_16:
10202 			return "32";
10203 
10204 		case DATA_TYPE_SIGNED_64:
10205 		case DATA_TYPE_UNSIGNED_64:
10206 		case DATA_TYPE_FLOAT_64:
10207 		case DATA_TYPE_VEC2_SIGNED_32:
10208 			return "64";
10209 
10210 		default:
10211 			DE_ASSERT(false);
10212 	}
10213 	return "";
10214 }
10215 
getByteWidthStr(ConversionDataType type)10216 const string getByteWidthStr (ConversionDataType type)
10217 {
10218 	switch (type)
10219 	{
10220 		case DATA_TYPE_SIGNED_8:
10221 		case DATA_TYPE_UNSIGNED_8:
10222 			return "1";
10223 
10224 		case DATA_TYPE_SIGNED_16:
10225 		case DATA_TYPE_UNSIGNED_16:
10226 		case DATA_TYPE_FLOAT_16:
10227 			return "2";
10228 
10229 		case DATA_TYPE_SIGNED_32:
10230 		case DATA_TYPE_UNSIGNED_32:
10231 		case DATA_TYPE_FLOAT_32:
10232 		case DATA_TYPE_VEC2_SIGNED_16:
10233 			return "4";
10234 
10235 		case DATA_TYPE_SIGNED_64:
10236 		case DATA_TYPE_UNSIGNED_64:
10237 		case DATA_TYPE_FLOAT_64:
10238 		case DATA_TYPE_VEC2_SIGNED_32:
10239 			return "8";
10240 
10241 		default:
10242 			DE_ASSERT(false);
10243 	}
10244 	return "";
10245 }
10246 
isSigned(ConversionDataType type)10247 bool isSigned (ConversionDataType type)
10248 {
10249 	switch (type)
10250 	{
10251 		case DATA_TYPE_SIGNED_8:
10252 		case DATA_TYPE_SIGNED_16:
10253 		case DATA_TYPE_SIGNED_32:
10254 		case DATA_TYPE_SIGNED_64:
10255 		case DATA_TYPE_FLOAT_16:
10256 		case DATA_TYPE_FLOAT_32:
10257 		case DATA_TYPE_FLOAT_64:
10258 		case DATA_TYPE_VEC2_SIGNED_16:
10259 		case DATA_TYPE_VEC2_SIGNED_32:
10260 			return true;
10261 
10262 		case DATA_TYPE_UNSIGNED_8:
10263 		case DATA_TYPE_UNSIGNED_16:
10264 		case DATA_TYPE_UNSIGNED_32:
10265 		case DATA_TYPE_UNSIGNED_64:
10266 			return false;
10267 
10268 		default:
10269 			DE_ASSERT(false);
10270 	}
10271 	return false;
10272 }
10273 
isInt(ConversionDataType type)10274 bool isInt (ConversionDataType type)
10275 {
10276 	switch (type)
10277 	{
10278 		case DATA_TYPE_SIGNED_8:
10279 		case DATA_TYPE_SIGNED_16:
10280 		case DATA_TYPE_SIGNED_32:
10281 		case DATA_TYPE_SIGNED_64:
10282 		case DATA_TYPE_UNSIGNED_8:
10283 		case DATA_TYPE_UNSIGNED_16:
10284 		case DATA_TYPE_UNSIGNED_32:
10285 		case DATA_TYPE_UNSIGNED_64:
10286 			return true;
10287 
10288 		case DATA_TYPE_FLOAT_16:
10289 		case DATA_TYPE_FLOAT_32:
10290 		case DATA_TYPE_FLOAT_64:
10291 		case DATA_TYPE_VEC2_SIGNED_16:
10292 		case DATA_TYPE_VEC2_SIGNED_32:
10293 			return false;
10294 
10295 		default:
10296 			DE_ASSERT(false);
10297 	}
10298 	return false;
10299 }
10300 
isFloat(ConversionDataType type)10301 bool isFloat (ConversionDataType type)
10302 {
10303 	switch (type)
10304 	{
10305 		case DATA_TYPE_SIGNED_8:
10306 		case DATA_TYPE_SIGNED_16:
10307 		case DATA_TYPE_SIGNED_32:
10308 		case DATA_TYPE_SIGNED_64:
10309 		case DATA_TYPE_UNSIGNED_8:
10310 		case DATA_TYPE_UNSIGNED_16:
10311 		case DATA_TYPE_UNSIGNED_32:
10312 		case DATA_TYPE_UNSIGNED_64:
10313 		case DATA_TYPE_VEC2_SIGNED_16:
10314 		case DATA_TYPE_VEC2_SIGNED_32:
10315 			return false;
10316 
10317 		case DATA_TYPE_FLOAT_16:
10318 		case DATA_TYPE_FLOAT_32:
10319 		case DATA_TYPE_FLOAT_64:
10320 			return true;
10321 
10322 		default:
10323 			DE_ASSERT(false);
10324 	}
10325 	return false;
10326 }
10327 
getTypeName(ConversionDataType type)10328 const string getTypeName (ConversionDataType type)
10329 {
10330 	string prefix = isSigned(type) ? "" : "u";
10331 
10332 	if		(isInt(type))						return prefix + "int"	+ getBitWidthStr(type);
10333 	else if (isFloat(type))						return prefix + "float"	+ getBitWidthStr(type);
10334 	else if (type == DATA_TYPE_VEC2_SIGNED_16)	return "i16vec2";
10335 	else if (type == DATA_TYPE_VEC2_SIGNED_32)	return "i32vec2";
10336 	else										DE_ASSERT(false);
10337 
10338 	return "";
10339 }
10340 
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10341 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10342 {
10343 	const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10344 
10345 	return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10346 }
10347 
getAsmTypeName(ConversionDataType type,deUint32 elements=1)10348 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10349 {
10350 	string prefix;
10351 
10352 	if		(isInt(type))						prefix = isSigned(type) ? "i" : "u";
10353 	else if (isFloat(type))						prefix = "f";
10354 	else if (type == DATA_TYPE_VEC2_SIGNED_16)	return "i16vec2";
10355 	else if (type == DATA_TYPE_VEC2_SIGNED_32)	return "v2i32";
10356 	else										DE_ASSERT(false);
10357 	if ((isInt(type) || isFloat(type)) && elements == 2)
10358 	{
10359 		prefix = "v2" + prefix;
10360 	}
10361 
10362 	return prefix + getBitWidthStr(type);
10363 }
10364 
10365 template<typename T>
getSpecializedBuffer(deInt64 number,deUint32 elements=1)10366 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10367 {
10368 	return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10369 }
10370 
getBuffer(ConversionDataType type,deInt64 number,deUint32 elements=1)10371 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10372 {
10373 	switch (type)
10374 	{
10375 		case DATA_TYPE_SIGNED_8:		return getSpecializedBuffer<deInt8>(number, elements);
10376 		case DATA_TYPE_SIGNED_16:		return getSpecializedBuffer<deInt16>(number, elements);
10377 		case DATA_TYPE_SIGNED_32:		return getSpecializedBuffer<deInt32>(number, elements);
10378 		case DATA_TYPE_SIGNED_64:		return getSpecializedBuffer<deInt64>(number, elements);
10379 		case DATA_TYPE_UNSIGNED_8:		return getSpecializedBuffer<deUint8>(number, elements);
10380 		case DATA_TYPE_UNSIGNED_16:		return getSpecializedBuffer<deUint16>(number, elements);
10381 		case DATA_TYPE_UNSIGNED_32:		return getSpecializedBuffer<deUint32>(number, elements);
10382 		case DATA_TYPE_UNSIGNED_64:		return getSpecializedBuffer<deUint64>(number, elements);
10383 		case DATA_TYPE_FLOAT_16:		return getSpecializedBuffer<deUint16>(number, elements);
10384 		case DATA_TYPE_FLOAT_32:		return getSpecializedBuffer<deUint32>(number, elements);
10385 		case DATA_TYPE_FLOAT_64:		return getSpecializedBuffer<deUint64>(number, elements);
10386 		case DATA_TYPE_VEC2_SIGNED_16:	return getSpecializedBuffer<deUint32>(number, elements);
10387 		case DATA_TYPE_VEC2_SIGNED_32:	return getSpecializedBuffer<deUint64>(number, elements);
10388 
10389 		default:						TCU_THROW(InternalError, "Unimplemented type passed");
10390 	}
10391 }
10392 
usesInt8(ConversionDataType from,ConversionDataType to)10393 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10394 {
10395 	return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10396 			from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10397 }
10398 
usesInt16(ConversionDataType from,ConversionDataType to)10399 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10400 {
10401 	return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10402 			from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10403 			from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10404 }
10405 
usesInt32(ConversionDataType from,ConversionDataType to)10406 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10407 {
10408 	return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10409 			from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10410 			from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10411 }
10412 
usesInt64(ConversionDataType from,ConversionDataType to)10413 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10414 {
10415 	return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10416 			from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10417 }
10418 
usesFloat16(ConversionDataType from,ConversionDataType to)10419 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10420 {
10421 	return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10422 }
10423 
usesFloat32(ConversionDataType from,ConversionDataType to)10424 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10425 {
10426 	return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10427 }
10428 
usesFloat64(ConversionDataType from,ConversionDataType to)10429 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10430 {
10431 	return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10432 }
10433 
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10434 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10435 {
10436 	if (usesInt16(from, to) && !usesInt32(from, to))
10437 		vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10438 
10439 	if (usesInt64(from, to))
10440 		vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10441 
10442 	if (usesFloat64(from, to))
10443 		vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10444 
10445 	if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10446 	{
10447 		extensions.push_back("VK_KHR_16bit_storage");
10448 		vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10449 	}
10450 
10451 	if (usesFloat16(from, to) || usesInt8(from, to))
10452 	{
10453 		extensions.push_back("VK_KHR_shader_float16_int8");
10454 
10455 		if (usesFloat16(from, to))
10456 		{
10457 			vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10458 		}
10459 
10460 		if (usesInt8(from, to))
10461 		{
10462 			vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10463 
10464 			extensions.push_back("VK_KHR_8bit_storage");
10465 			vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10466 		}
10467 	}
10468 }
10469 
10470 struct ConvertCase
10471 {
ConvertCasevkt::SpirVAssembly::ConvertCase10472 	ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10473 	: m_fromType		(from)
10474 	, m_toType			(to)
10475 	, m_elements		(1)
10476 	, m_useStorageExt	(useStorageExt)
10477 	, m_name			(getTestName(from, to, suffix))
10478 	{
10479 		string caps;
10480 		string decl;
10481 		string exts;
10482 
10483 		m_asmTypes["inStorageType"]	= getAsmTypeName(from);
10484 		m_asmTypes["outStorageType"] = getAsmTypeName(to);
10485 		m_asmTypes["inCast"] = "OpCopyObject";
10486 		m_asmTypes["outCast"] = "OpCopyObject";
10487 		// If the storage extensions are being avoided, tests instead uses
10488 		// vectors so that they are easily convertible to 32-bit integers.
10489 		// |m_elements| indicates the size of the vector. It modifies how many
10490 		// items added to the buffers and converted in the tests.
10491 		//
10492 		// Currently only supports 1 (default) or 2 elements.
10493 		if (!m_useStorageExt)
10494 		{
10495 			bool in_change = false;
10496 			bool out_change = false;
10497 			if (usesFloat16(from, from) || usesInt16(from, from))
10498 			{
10499 				m_asmTypes["inStorageType"] = "u32";
10500 				m_asmTypes["inCast"] = "OpBitcast";
10501 				m_elements = 2;
10502 				in_change = true;
10503 			}
10504 			if (usesFloat16(to, to) || usesInt16(to, to))
10505 			{
10506 				m_asmTypes["outStorageType"] = "u32";
10507 				m_asmTypes["outCast"] = "OpBitcast";
10508 				m_elements = 2;
10509 				out_change = true;
10510 			}
10511 			if (in_change && !out_change)
10512 			{
10513 				m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10514 			}
10515 			if (!in_change && out_change)
10516 			{
10517 				m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10518 			}
10519 		}
10520 
10521 		// Safety check for implementation.
10522 		if (m_elements < 1 || m_elements > 2)
10523 			TCU_THROW(InternalError, "Unsupported number of elements");
10524 
10525 		m_asmTypes["inputType"]		= getAsmTypeName(from, m_elements);
10526 		m_asmTypes["outputType"]	= getAsmTypeName(to, m_elements);
10527 
10528 		m_inputBuffer = getBuffer(from, number, m_elements);
10529 		if (separateOutput)
10530 			m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10531 		else
10532 			m_outputBuffer = getBuffer(to, number, m_elements);
10533 
10534 		if (usesInt8(from, to))
10535 		{
10536 			bool requiresInt8Capability = true;
10537 			if (instruction == "OpUConvert" || instruction == "OpSConvert")
10538 			{
10539 				// Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10540 				if (usesInt32(from, to))
10541 					requiresInt8Capability = false;
10542 			}
10543 
10544 			caps += "OpCapability StorageBuffer8BitAccess\n";
10545 			if (requiresInt8Capability)
10546 				caps += "OpCapability Int8\n";
10547 
10548 			decl += "%i8         = OpTypeInt 8 1\n"
10549 					"%u8         = OpTypeInt 8 0\n";
10550 
10551 			if (m_elements == 2)
10552 			{
10553 				decl += "%v2i8       = OpTypeVector %i8 2\n"
10554 						"%v2u8       = OpTypeVector %u8 2\n";
10555 			}
10556 			exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10557 		}
10558 
10559 		if (usesInt16(from, to))
10560 		{
10561 			bool requiresInt16Capability = true;
10562 
10563 			if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10564 			{
10565 				// Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10566 				if (usesInt32(from, to) || usesFloat32(from, to))
10567 					requiresInt16Capability = false;
10568 			}
10569 
10570 			decl += "%i16        = OpTypeInt 16 1\n"
10571 					"%u16        = OpTypeInt 16 0\n";
10572 			if (m_elements == 2)
10573 			{
10574 				decl += "%v2i16      = OpTypeVector %i16 2\n"
10575 						"%v2u16      = OpTypeVector %u16 2\n";
10576 			}
10577 			else
10578 			{
10579 				decl += "%i16vec2    = OpTypeVector %i16 2\n";
10580 			}
10581 
10582 			// Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10583 			if (requiresInt16Capability || !m_useStorageExt)
10584 				caps += "OpCapability Int16\n";
10585 		}
10586 
10587 		if (usesFloat16(from, to))
10588 		{
10589 			decl += "%f16        = OpTypeFloat 16\n";
10590 			if (m_elements == 2)
10591 			{
10592 				decl += "%v2f16      = OpTypeVector %f16 2\n";
10593 			}
10594 
10595 			// Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10596 			if (!usesFloat32(from, to) || !m_useStorageExt)
10597 				caps += "OpCapability Float16\n";
10598 		}
10599 
10600 		if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10601 		{
10602 			caps += "OpCapability StorageUniformBufferBlock16\n";
10603 			exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10604 		}
10605 
10606 		if (usesInt64(from, to))
10607 		{
10608 			caps += "OpCapability Int64\n";
10609 			decl += "%i64        = OpTypeInt 64 1\n"
10610 					"%u64        = OpTypeInt 64 0\n";
10611 			if (m_elements == 2)
10612 			{
10613 				decl += "%v2i64      = OpTypeVector %i64 2\n"
10614 						"%v2u64      = OpTypeVector %u64 2\n";
10615 			}
10616 		}
10617 
10618 		if (usesFloat64(from, to))
10619 		{
10620 			caps += "OpCapability Float64\n";
10621 			decl += "%f64        = OpTypeFloat 64\n";
10622 			if (m_elements == 2)
10623 			{
10624 				decl += "%v2f64        = OpTypeVector %f64 2\n";
10625 			}
10626 		}
10627 
10628 		m_asmTypes["datatype_capabilities"]		= caps;
10629 		m_asmTypes["datatype_additional_decl"]	= decl;
10630 		m_asmTypes["datatype_extensions"]		= exts;
10631 	}
10632 
10633 	ConversionDataType		m_fromType;
10634 	ConversionDataType		m_toType;
10635 	deUint32				m_elements;
10636 	bool					m_useStorageExt;
10637 	string					m_name;
10638 	map<string, string>		m_asmTypes;
10639 	BufferSp				m_inputBuffer;
10640 	BufferSp				m_outputBuffer;
10641 };
10642 
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10643 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10644 {
10645 	map<string, string> params = convertCase.m_asmTypes;
10646 
10647 	params["instruction"]	= instruction;
10648 	params["inDecorator"]	= getByteWidthStr(convertCase.m_fromType);
10649 	params["outDecorator"]	= getByteWidthStr(convertCase.m_toType);
10650 
10651 	std::string shader (
10652 		"OpCapability Shader\n"
10653 		"${datatype_capabilities}"
10654 		"${datatype_extensions:opt}"
10655 		"OpMemoryModel Logical GLSL450\n"
10656 		"OpEntryPoint GLCompute %main \"main\"\n"
10657 		"OpExecutionMode %main LocalSize 1 1 1\n"
10658 		"OpSource GLSL 430\n"
10659 		"OpName %main           \"main\"\n"
10660 		// Decorators
10661 		"OpDecorate %indata DescriptorSet 0\n"
10662 		"OpDecorate %indata Binding 0\n"
10663 		"OpDecorate %outdata DescriptorSet 0\n"
10664 		"OpDecorate %outdata Binding 1\n"
10665 		"OpDecorate %in_buf BufferBlock\n"
10666 		"OpDecorate %out_buf BufferBlock\n"
10667 		"OpMemberDecorate %in_buf 0 Offset 0\n"
10668 		"OpMemberDecorate %out_buf 0 Offset 0\n"
10669 		// Base types
10670 		"%void       = OpTypeVoid\n"
10671 		"%voidf      = OpTypeFunction %void\n"
10672 		"%u32        = OpTypeInt 32 0\n"
10673 		"%i32        = OpTypeInt 32 1\n"
10674 		"%f32        = OpTypeFloat 32\n"
10675 		"%v2i32      = OpTypeVector %i32 2\n"
10676 		"${datatype_additional_decl}"
10677 	);
10678 	if (addVectors)
10679 	{
10680 		shader += "%v2u32 = OpTypeVector %u32 2\n"
10681 					"%v2f32 = OpTypeVector %f32 2\n";
10682 	}
10683 	shader +=
10684 		"%uvec3      = OpTypeVector %u32 3\n"
10685 		// Derived types
10686 		"%in_ptr     = OpTypePointer Uniform %${inStorageType}\n"
10687 		"%out_ptr    = OpTypePointer Uniform %${outStorageType}\n"
10688 		"%in_buf     = OpTypeStruct %${inStorageType}\n"
10689 		"%out_buf    = OpTypeStruct %${outStorageType}\n"
10690 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10691 		"%out_bufptr = OpTypePointer Uniform %out_buf\n"
10692 		"%indata     = OpVariable %in_bufptr Uniform\n"
10693 		"%outdata    = OpVariable %out_bufptr Uniform\n"
10694 		// Constants
10695 		"%zero       = OpConstant %i32 0\n"
10696 		// Main function
10697 		"%main       = OpFunction %void None %voidf\n"
10698 		"%label      = OpLabel\n"
10699 		"%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10700 		"%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10701 		"%inval      = OpLoad %${inStorageType} %inloc\n"
10702 		"%in_cast    = ${inCast} %${inputType} %inval\n"
10703 		"%conv       = ${instruction} %${outputType} %in_cast\n"
10704 		"%out_cast   = ${outCast} %${outStorageType} %conv\n"
10705 		"              OpStore %outloc %out_cast\n"
10706 		"              OpReturn\n"
10707 		"              OpFunctionEnd\n"
10708 	;
10709 
10710 	return StringTemplate(shader).specialize(params);
10711 }
10712 
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10713 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10714 {
10715 	if (instruction == "OpUConvert")
10716 	{
10717 		// Convert unsigned int to unsigned int
10718 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_16,		42));
10719 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_32,		73));
10720 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_64,		121));
10721 
10722 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_8,		33));
10723 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_32,		60653));
10724 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_64,		17991));
10725 
10726 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_64,		904256275));
10727 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_16,		6275));
10728 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_8,		17));
10729 
10730 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_32,		701256243));
10731 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_16,		4741));
10732 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_8,		65));
10733 
10734 		// Zero extension for int->uint
10735 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_16,		56));
10736 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_32,		-47,								true,	209));
10737 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_64,		-5,									true,	251));
10738 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_32,		14669));
10739 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_64,		-3341,								true,	62195));
10740 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_64,		973610259));
10741 
10742 		// Truncate for int->uint
10743 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_8,		-25711,								true,	145));
10744 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_8,		103));
10745 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_8,		-1067742499291926803ll,				true,	237));
10746 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_16,		12382));
10747 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_32,		-972812359,							true,	3322154937u));
10748 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_16,		-1067742499291926803ll,				true,	61165));
10749 	}
10750 	else if (instruction == "OpSConvert")
10751 	{
10752 		// Sign extension int->int
10753 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_16,		-30));
10754 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_32,		55));
10755 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_64,		-3));
10756 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_32,		14669));
10757 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_64,		-3341));
10758 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_64,		973610259));
10759 
10760 		// Truncate for int->int
10761 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_8,			81));
10762 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_8,			-93));
10763 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_8,			3182748172687672ll,					true,	56));
10764 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_16,		12382));
10765 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_32,		-972812359));
10766 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_16,		-1067742499291926803ll,				true,	-4371));
10767 
10768 		// Sign extension for int->uint
10769 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_16,		56));
10770 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_32,		-47,								true,	4294967249u));
10771 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_64,		-5,									true,	18446744073709551611ull));
10772 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_32,		14669));
10773 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_64,		-3341,								true,	18446744073709548275ull));
10774 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_64,		973610259));
10775 
10776 		// Truncate for int->uint
10777 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_8,		-25711,								true,	145));
10778 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_8,		103));
10779 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_8,		-1067742499291926803ll,				true,	237));
10780 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_16,		12382));
10781 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_32,		-972812359,							true,	3322154937u));
10782 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_16,		-1067742499291926803ll,				true,	61165));
10783 
10784 		// Sign extension for uint->int
10785 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_16,		71));
10786 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_32,		201,								true,	-55));
10787 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_64,		188,								true,	-68));
10788 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_32,		14669));
10789 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_64,		62195,								true,	-3341));
10790 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_64,		973610259));
10791 
10792 		// Truncate for uint->int
10793 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_8,			67));
10794 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_8,			133,								true,	-123));
10795 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_8,			836927654193256494ull,				true,	46));
10796 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_16,		12382));
10797 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_32,		18446744072736739257ull,			true,	-972812359));
10798 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_16,		17379001574417624813ull,			true,	-4371));
10799 
10800 		// Convert i16vec2 to i32vec2 and vice versa
10801 		// Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10802 		// The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10803 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_VEC2_SIGNED_16,	DATA_TYPE_VEC2_SIGNED_32,	(33413u << 16)			| 27593,	true,	(4294935173ull << 32)	| 27593));
10804 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_VEC2_SIGNED_32,	DATA_TYPE_VEC2_SIGNED_16,	(4294935173ull << 32)	| 27593,	true,	(33413u << 16)			| 27593));
10805 	}
10806 	else if (instruction == "OpFConvert")
10807 	{
10808 		// All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10809 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_64,			0x449a4000,							true,	0x4093480000000000));
10810 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_32,			0x4093480000000000,					true,	0x449a4000));
10811 
10812 		// Conversion to/from 32-bit floats are supported by both 16-bit
10813 		// storage and Float16. The tests are duplicated to exercise both
10814 		// cases.
10815 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_16,			0x449a4000,							true,	0x64D2));
10816 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_32,			0x64D2,								true,	0x449a4000));
10817 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_16,			0x449a4000,							true,	0x64D2,					"no_storage",	false));
10818 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_32,			0x64D2,								true,	0x449a4000,				"no_storage",	false));
10819 
10820 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_64,			0x64D2,								true,	0x4093480000000000));
10821 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_16,			0x4093480000000000,					true,	0x64D2));
10822 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_64,			0x64D2,								true,	0x4093480000000000,		"no_storage",	false));
10823 	    testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_16,			0x4093480000000000,					true,	0x64D2,					"no_storage",	false));
10824 
10825 	}
10826 	else if (instruction == "OpConvertFToU")
10827 	{
10828 		// Normal numbers from uint8 range
10829 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x5020,								true,	33,									"33",	false));
10830 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x42280000,							true,	42,									"42"));
10831 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x4067800000000000ull,				true,	188,								"188"));
10832 
10833 		// Maximum uint8 value
10834 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x5BF8,								true,	255,								"max",	false));
10835 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x437F0000,							true,	255,								"max"));
10836 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x406FE00000000000ull,				true,	255,								"max"));
10837 
10838 		// +0
10839 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x0000,								true,	0,									"p0",	false));
10840 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x00000000,							true,	0,									"p0"));
10841 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x0000000000000000ull,				true,	0,									"p0"));
10842 
10843 		// -0
10844 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x8000,								true,	0,									"m0",	false));
10845 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x80000000,							true,	0,									"m0"));
10846 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x8000000000000000ull,				true,	0,									"m0"));
10847 
10848 		// All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10849 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x64D2,								true,	1234,								"1234",	false));
10850 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x64D2,								true,	1234,								"1234",	false));
10851 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x64D2,								true,	1234,								"1234",	false));
10852 
10853 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10854 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x7BFF,								true,	65504,								"max",	false));
10855 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x7BFF,								true,	65504,								"max",	false));
10856 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x7BFF,								true,	65504,								"max",	false));
10857 
10858 		// +0
10859 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x0000,								true,	0,									"p0",	false));
10860 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x0000,								true,	0,									"p0",	false));
10861 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x0000,								true,	0,									"p0",	false));
10862 
10863 		// -0
10864 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x8000,								true,	0,									"m0",	false));
10865 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x8000,								true,	0,									"m0",	false));
10866 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x8000,								true,	0,									"m0",	false));
10867 
10868 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_16,		0x449a4000,							true,	1234));
10869 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_32,		0x449a4000,							true,	1234));
10870 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_64,		0x449a4000,							true,	1234));
10871 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_16,		0x4093480000000000,					true,	1234));
10872 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_32,		0x4093480000000000,					true,	1234));
10873 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_64,		0x4093480000000000,					true,	1234));
10874 	}
10875 	else if (instruction == "OpConvertUToF")
10876 	{
10877 		// Normal numbers from uint8 range
10878 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_16,			116,								true,	0x5740,								"116",	false));
10879 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_32,			232,								true,	0x43680000,							"232"));
10880 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_64,			164,								true,	0x4064800000000000ull,				"164"));
10881 
10882 		// Maximum uint8 value
10883 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_16,			255,								true,	0x5BF8,								"max",	false));
10884 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_32,			255,								true,	0x437F0000,							"max"));
10885 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_64,			255,								true,	0x406FE00000000000ull,				"max"));
10886 
10887 		// All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10888 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10889 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10890 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10891 
10892 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10893 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10894 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10895 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10896 
10897 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			4294967296ll,						true,	0x4f800000,							"4294967296",	false));
10898 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_64,			4294967296ll,						true,	0x41f0000000000000,					"4294967296",	false));
10899 
10900 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			0xffffff0000000000,					true,	0x5f7fffff,							"max",	false));
10901 
10902 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10903 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10904 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10905 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10906 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10907 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10908 	}
10909 	else if (instruction == "OpConvertFToS")
10910 	{
10911 		// Normal numbers from int8 range
10912 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0xC980,								true,	-11,								"m11",	false));
10913 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0xC2140000,							true,	-37,								"m37"));
10914 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0xC050800000000000ull,				true,	-66,								"m66"));
10915 
10916 		// Minimum int8 value
10917 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0xD800,								true,	-128,								"min",	false));
10918 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0xC3000000,							true,	-128,								"min"));
10919 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0xC060000000000000ull,				true,	-128,								"min"));
10920 
10921 		// Maximum int8 value
10922 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x57F0,								true,	127,								"max",	false));
10923 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x42FE0000,							true,	127,								"max"));
10924 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x405FC00000000000ull,				true,	127,								"max"));
10925 
10926 		// +0
10927 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x0000,								true,	0,									"p0",	false));
10928 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x00000000,							true,	0,									"p0"));
10929 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x0000000000000000ull,				true,	0,									"p0"));
10930 
10931 		// -0
10932 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x8000,								true,	0,									"m0",	false));
10933 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x80000000,							true,	0,									"m0"));
10934 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x8000000000000000ull,				true,	0,									"m0"));
10935 
10936 		// All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10937 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0xE4D2,								true,	-1234,								"m1234",	false));
10938 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0xE4D2,								true,	-1234,								"m1234",	false));
10939 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0xE4D2,								true,	-1234,								"m1234",	false));
10940 
10941 		// 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10942 		// 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
10943 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0xF800,								true,	-32768,								"min",	false));
10944 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0xFBFF,								true,	-65504,								"min",	false));
10945 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0xFBFF,								true,	-65504,								"min",	false));
10946 
10947 		// 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10948 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10949 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x77FF,								true,	32752,								"max",	false));
10950 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x7BFF,								true,	65504,								"max",	false));
10951 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x7BFF,								true,	65504,								"max",	false));
10952 
10953 		// +0
10954 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x0000,								true,	0,									"p0",	false));
10955 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x0000,								true,	0,									"p0",	false));
10956 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x0000,								true,	0,									"p0",	false));
10957 
10958 		// -0
10959 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x8000,								true,	0,									"m0",	false));
10960 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x8000,								true,	0,									"m0",	false));
10961 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x8000,								true,	0,									"m0",	false));
10962 
10963 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0xc49a4000,							true,	-1234));
10964 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_32,		0xc49a4000,							true,	-1234));
10965 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_64,		0xc49a4000,							true,	-1234));
10966 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_16,		0xc093480000000000,					true,	-1234));
10967 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_32,		0xc093480000000000,					true,	-1234));
10968 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_64,		0xc093480000000000,					true,	-1234));
10969 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0x453b9000,							true,	 3001,								"p3001"));
10970 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0xc53b9000,							true,	-3001,								"m3001"));
10971 	}
10972 	else if (instruction == "OpConvertSToF")
10973 	{
10974 		// Normal numbers from int8 range
10975 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			-12,								true,	0xCA00,								"m21",	false));
10976 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			-21,								true,	0xC1A80000,							"m21"));
10977 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			-99,								true,	0xC058C00000000000ull,				"m99"));
10978 
10979 		// Minimum int8 value
10980 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			-128,								true,	0xD800,								"min",	false));
10981 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			-128,								true,	0xC3000000,							"min"));
10982 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			-128,								true,	0xC060000000000000ull,				"min"));
10983 
10984 		// Maximum int8 value
10985 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			127,								true,	0x57F0,								"max",	false));
10986 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			127,								true,	0x42FE0000,							"max"));
10987 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			127,								true,	0x405FC00000000000ull,				"max"));
10988 
10989 		// All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10990 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10991 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10992 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10993 
10994 		// 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
10995 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			32768,								true,	0x7800,								"p32768",	false));
10996 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			32768,								true,	0x7800,								"p32768",	false));
10997 
10998 		// 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10999 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"m32768",	false));
11000 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"m32768",	false));
11001 
11002 		// 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
11003 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"min",	false));
11004 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-65504,								true,	0xFBFF,								"min",	false));
11005 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-65504,								true,	0xFBFF,								"min",	false));
11006 
11007 		// 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
11008 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
11009 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			32752,								true,	0x77FF,								"max",	false));
11010 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
11011 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
11012 
11013 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			4294967296ll,						true,	0x4f800000,							"p4294967296",	false));
11014 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			4294967296ll,						true,	0x41f0000000000000,					"p4294967296",	false));
11015 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-4294967296ll,						true,	0xcf800000,							"m4294967296",	false));
11016 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			-4294967296ll,						true,	0xc1f0000000000000,					"m4294967296",	false));
11017 
11018 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			0x7fffff8000000000,					true,	0x5effffff,							"max",	false));
11019 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-0x7fffff8000000000,				true,	0xdeffffff,							"min",	false));
11020 
11021 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11022 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11023 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11024 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11025 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11026 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11027 	}
11028 	else
11029 		DE_FATAL("Unknown instruction");
11030 }
11031 
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11032 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11033 {
11034 	map<string, string> params = convertCase.m_asmTypes;
11035 	map<string, string> fragments;
11036 
11037 	params["instruction"] = instruction;
11038 	params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11039 
11040 	const StringTemplate decoration (
11041 		"      OpDecorate %SSBOi DescriptorSet 0\n"
11042 		"      OpDecorate %SSBOo DescriptorSet 0\n"
11043 		"      OpDecorate %SSBOi Binding 0\n"
11044 		"      OpDecorate %SSBOo Binding 1\n"
11045 		"      OpDecorate %s_SSBOi Block\n"
11046 		"      OpDecorate %s_SSBOo Block\n"
11047 		"OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11048 		"OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11049 
11050 	const StringTemplate pre_main (
11051 		"${datatype_additional_decl:opt}"
11052 		"    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11053 		"   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11054 		"   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11055 		"   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11056 		" %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11057 		" %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11058 		"     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11059 		"     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11060 
11061 	const StringTemplate testfun (
11062 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11063 		"%param      = OpFunctionParameter %v4f32\n"
11064 		"%label      = OpLabel\n"
11065 		"%iLoc       = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11066 		"%oLoc       = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11067 		"%valIn      = OpLoad %${inStorageType} %iLoc\n"
11068 		"%valInCast  = ${inCast} %${inputType} %valIn\n"
11069 		"%conv       = ${instruction} %${outputType} %valInCast\n"
11070 		"%valOutCast = ${outCast} %${outStorageType} %conv\n"
11071 		"              OpStore %oLoc %valOutCast\n"
11072 		"              OpReturnValue %param\n"
11073 		"              OpFunctionEnd\n");
11074 
11075 	params["datatype_extensions"] =
11076 		params["datatype_extensions"] +
11077 		"OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11078 
11079 	fragments["capability"]	= params["datatype_capabilities"];
11080 	fragments["extension"]	= params["datatype_extensions"];
11081 	fragments["decoration"]	= decoration.specialize(params);
11082 	fragments["pre_main"]	= pre_main.specialize(params);
11083 	fragments["testfun"]	= testfun.specialize(params);
11084 
11085 	return fragments;
11086 }
11087 
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11088 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11089 {
11090 	map<string, string> params = convertCase.m_asmTypes;
11091 	map<string, string> fragments;
11092 
11093 	params["instruction"] = instruction;
11094 	params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11095 
11096 	const StringTemplate decoration(
11097 		"      OpDecorate %SSBOi DescriptorSet 0\n"
11098 		"      OpDecorate %SSBOo DescriptorSet 0\n"
11099 		"      OpDecorate %SSBOi Binding 0\n"
11100 		"      OpDecorate %SSBOo Binding 1\n"
11101 		"      OpDecorate %s_SSBOi Block\n"
11102 		"      OpDecorate %s_SSBOo Block\n"
11103 		"OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11104 		"OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11105 
11106 	const StringTemplate pre_main(
11107 		"${datatype_additional_decl:opt}"
11108 		"    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11109 		"   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11110 		"   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11111 		"   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11112 		" %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11113 		" %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11114 		"     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11115 		"     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11116 
11117 	const StringTemplate testfun(
11118 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11119 		"%param     = OpFunctionParameter %v4f32\n"
11120 		"%label     = OpLabel\n"
11121 		"%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11122 		"%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11123 		"%inval      = OpLoad %${inStorageType} %iLoc\n"
11124 		"%in_cast    = ${inCast} %${inputType} %inval\n"
11125 		"%conv       = ${instruction} %${outputType} %in_cast\n"
11126 		"%out_cast   = ${outCast} %${outStorageType} %conv\n"
11127 		"              OpStore %oLoc %out_cast\n"
11128 		"              OpReturnValue %param\n"
11129 		"              OpFunctionEnd\n");
11130 
11131 	params["datatype_extensions"] =
11132 		params["datatype_extensions"] +
11133 		"OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11134 
11135 	fragments["capability"] = params["datatype_capabilities"];
11136 	fragments["extension"] = params["datatype_extensions"];
11137 	fragments["decoration"] = decoration.specialize(params);
11138 	fragments["pre_main"] = pre_main.specialize(params);
11139 	fragments["testfun"] = testfun.specialize(params);
11140 	return fragments;
11141 }
11142 
11143 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11144 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11145 {
11146 	de::MovePtr<tcu::TestCaseGroup>		group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11147 	vector<ConvertCase>					testCases;
11148 	createConvertCases(testCases, instruction);
11149 
11150 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11151 	{
11152 		ComputeShaderSpec spec;
11153 		spec.assembly			= getConvertCaseShaderStr(instruction, *test, true);
11154 		spec.numWorkGroups		= IVec3(1, 1, 1);
11155 		spec.inputs.push_back	(test->m_inputBuffer);
11156 		spec.outputs.push_back	(test->m_outputBuffer);
11157 
11158 		getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11159 
11160 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
11161 	}
11162 	return group.release();
11163 }
11164 
11165 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11166 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11167 {
11168 	de::MovePtr<tcu::TestCaseGroup>		group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11169 	vector<ConvertCase>					testCases;
11170 	createConvertCases(testCases, instruction);
11171 
11172 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11173 	{
11174 		map<string, string>	fragments		= (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11175 		VulkanFeatures		vulkanFeatures;
11176 		GraphicsResources	resources;
11177 		vector<string>		extensions;
11178 		SpecConstants		noSpecConstants;
11179 		PushConstants		noPushConstants;
11180 		GraphicsInterfaces	noInterfaces;
11181 		tcu::RGBA			defaultColors[4];
11182 
11183 		getDefaultColors			(defaultColors);
11184 		resources.inputs.push_back	(Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11185 		resources.outputs.push_back	(Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11186 		extensions.push_back		("VK_KHR_storage_buffer_storage_class");
11187 
11188 		getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11189 
11190 		vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics	= true;
11191 		vulkanFeatures.coreFeatures.fragmentStoresAndAtomics		= true;
11192 
11193 		createTestsForAllStages(
11194 			test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11195 			noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11196 	}
11197 	return group.release();
11198 }
11199 
11200 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11201 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11202 {
11203 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11204 	RGBA							inputColors[4];
11205 	RGBA							outputColors[4];
11206 	vector<string>					extensions;
11207 	GraphicsResources				resources;
11208 	VulkanFeatures					features;
11209 
11210 	const char						functionStart[]	 =
11211 		"%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11212 		"%param1                = OpFunctionParameter %v4f32\n"
11213 		"%lbl                   = OpLabel\n";
11214 
11215 	const char						functionEnd[]		=
11216 		"%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
11217 		"                         OpReturnValue %transformed_param_32\n"
11218 		"                         OpFunctionEnd\n";
11219 
11220 	struct NameConstantsCode
11221 	{
11222 		string name;
11223 		string constants;
11224 		string code;
11225 	};
11226 
11227 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11228 			"%f16                  = OpTypeFloat 16\n"                                                 \
11229 			"%c_f16_0              = OpConstant %f16 0.0\n"                                            \
11230 			"%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
11231 			"%c_f16_1              = OpConstant %f16 1.0\n"                                            \
11232 			"%v4f16                = OpTypeVector %f16 4\n"                                            \
11233 			"%fp_f16               = OpTypePointer Function %f16\n"                                    \
11234 			"%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
11235 			"%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11236 			"%a4f16                = OpTypeArray %f16 %c_u32_4\n"                                      \
11237 
11238 	NameConstantsCode				tests[] =
11239 	{
11240 		{
11241 			"vec4",
11242 
11243 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11244 			"%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11245 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11246 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"
11247 		},
11248 		{
11249 			"struct",
11250 
11251 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11252 			"%stype                = OpTypeStruct %v4f16 %f16\n"
11253 			"%fp_stype             = OpTypePointer Function %stype\n"
11254 			"%f16_n_1              = OpConstant %f16 -1.0\n"
11255 			"%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11256 			"%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11257 			"%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
11258 
11259 			"%v                    = OpVariable %fp_stype Function %cval\n"
11260 			"%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11261 			"%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
11262 			"%vec_val              = OpLoad %v4f16 %vec_ptr\n"
11263 			"%f16_val              = OpLoad %f16 %f16_ptr\n"
11264 			"%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11265 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11266 			"%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11267 			"%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11268 		},
11269 		{
11270 			// [1|0|0|0.5] [x] = x + 0.5
11271 			// [0|1|0|0.5] [y] = y + 0.5
11272 			// [0|0|1|0.5] [z] = z + 0.5
11273 			// [0|0|0|1  ] [1] = 1
11274 			"matrix",
11275 
11276 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11277 			"%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
11278 			"%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11279 			"%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11280 			"%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11281 			"%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11282 			"%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11283 
11284 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11285 			"%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11286 		},
11287 		{
11288 			"array",
11289 
11290 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11291 			"%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11292 			"%fp_a4f16             = OpTypePointer Function %a4f16\n"
11293 			"%f16_n_1              = OpConstant %f16 -1.0\n"
11294 			"%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11295 			"%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11296 
11297 			"%v                    = OpVariable %fp_a4f16 Function %carr\n"
11298 			"%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
11299 			"%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
11300 			"%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
11301 			"%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
11302 			"%f_val                = OpLoad %f16 %f\n"
11303 			"%f1_val               = OpLoad %f16 %f1\n"
11304 			"%f2_val               = OpLoad %f16 %f2\n"
11305 			"%f3_val               = OpLoad %f16 %f3\n"
11306 			"%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
11307 			"%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
11308 			"%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
11309 			"%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11310 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11311 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11312 		},
11313 		{
11314 			//
11315 			// [
11316 			//   {
11317 			//      0.0,
11318 			//      [ 1.0, 1.0, 1.0, 1.0]
11319 			//   },
11320 			//   {
11321 			//      1.0,
11322 			//      [ 0.0, 0.5, 0.0, 0.0]
11323 			//   }, //     ^^^
11324 			//   {
11325 			//      0.0,
11326 			//      [ 1.0, 1.0, 1.0, 1.0]
11327 			//   }
11328 			// ]
11329 			"array_of_struct_of_array",
11330 
11331 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11332 			"%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11333 			"%fp_a4f16             = OpTypePointer Function %a4f16\n"
11334 			"%stype                = OpTypeStruct %f16 %a4f16\n"
11335 			"%a3stype              = OpTypeArray %stype %c_u32_3\n"
11336 			"%fp_a3stype           = OpTypePointer Function %a3stype\n"
11337 			"%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11338 			"%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11339 			"%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11340 			"%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11341 			"%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11342 
11343 			"%v                    = OpVariable %fp_a3stype Function %carr\n"
11344 			"%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11345 			"%f_l                  = OpLoad %f16 %f\n"
11346 			"%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11347 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11348 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11349 		}
11350 	};
11351 
11352 	getHalfColorsFullAlpha(inputColors);
11353 	outputColors[0] = RGBA(255, 255, 255, 255);
11354 	outputColors[1] = RGBA(255, 127, 127, 255);
11355 	outputColors[2] = RGBA(127, 255, 127, 255);
11356 	outputColors[3] = RGBA(127, 127, 255, 255);
11357 
11358 	extensions.push_back("VK_KHR_shader_float16_int8");
11359 	features.extFloat16Int8.shaderFloat16 = true;
11360 
11361 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11362 	{
11363 		map<string, string> fragments;
11364 
11365 		fragments["capability"]	= "OpCapability Float16\n";
11366 		fragments["pre_main"]	= tests[testNdx].constants;
11367 		fragments["testfun"]	= string(functionStart) + tests[testNdx].code + functionEnd;
11368 
11369 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11370 	}
11371 	return opConstantCompositeTests.release();
11372 }
11373 
11374 template<typename T>
11375 void finalizeTestsCreation (T&							specResource,
11376 							const map<string, string>&	fragments,
11377 							tcu::TestContext&			testCtx,
11378 							tcu::TestCaseGroup&			testGroup,
11379 							const std::string&			testName,
11380 							const VulkanFeatures&		vulkanFeatures,
11381 							const vector<string>&		extensions,
11382 							const IVec3&				numWorkGroups,
11383 							const bool					splitRenderArea = false);
11384 
11385 template<>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11386 void finalizeTestsCreation (GraphicsResources&			specResource,
11387 							const map<string, string>&	fragments,
11388 							tcu::TestContext&			,
11389 							tcu::TestCaseGroup&			testGroup,
11390 							const std::string&			testName,
11391 							const VulkanFeatures&		vulkanFeatures,
11392 							const vector<string>&		extensions,
11393 							const IVec3&				,
11394 							const bool					splitRenderArea)
11395 {
11396 	RGBA defaultColors[4];
11397 	getDefaultColors(defaultColors);
11398 
11399 	createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11400 }
11401 
11402 template<>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11403 void finalizeTestsCreation (ComputeShaderSpec&			specResource,
11404 							const map<string, string>&	fragments,
11405 							tcu::TestContext&			testCtx,
11406 							tcu::TestCaseGroup&			testGroup,
11407 							const std::string&			testName,
11408 							const VulkanFeatures&		vulkanFeatures,
11409 							const vector<string>&		extensions,
11410 							const IVec3&				numWorkGroups,
11411 							bool)
11412 {
11413 	specResource.numWorkGroups = numWorkGroups;
11414 	specResource.requestedVulkanFeatures = vulkanFeatures;
11415 	specResource.extensions = extensions;
11416 
11417 	specResource.assembly = makeComputeShaderAssembly(fragments);
11418 
11419 	testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
11420 }
11421 
11422 template<class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11423 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11424 {
11425 	const string						nan					= nanSupported ? "_nan" : "";
11426 	const string						groupName			= "logical" + nan;
11427 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11428 
11429 	de::Random							rnd					(deStringHash(testGroup->getName()));
11430 	const string						spvCapabilities		= string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11431 	const string						spvExtensions		= (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11432 	const string						spvExecutionMode	= nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11433 	const deUint32						numDataPointsScalar	= 16;
11434 	const deUint32						numDataPointsVector	= 14;
11435 	const vector<deFloat16>				float16DataScalar	= getFloat16s(rnd, numDataPointsScalar);
11436 	const vector<deFloat16>				float16DataVector	= getFloat16s(rnd, numDataPointsVector);
11437 	const vector<deFloat16>				float16Data1		= squarize(float16DataScalar, 0);			// Total Size: square(sizeof(float16DataScalar))
11438 	const vector<deFloat16>				float16Data2		= squarize(float16DataScalar, 1);
11439 	const vector<deFloat16>				float16DataVec1		= squarizeVector(float16DataVector, 0);		// Total Size: 2 * (square(square(sizeof(float16DataVector))))
11440 	const vector<deFloat16>				float16DataVec2		= squarizeVector(float16DataVector, 1);
11441 	const vector<deFloat16>				float16OutUnused	(float16Data1.size(), 0);
11442 	const vector<deFloat16>				float16OutVecUnused	(float16DataVec1.size(), 0);
11443 
11444 	struct TestOp
11445 	{
11446 		const char*		opCode;
11447 		VerifyIOFunc	verifyFuncNan;
11448 		VerifyIOFunc	verifyFuncNonNan;
11449 		const deUint32	argCount;
11450 	};
11451 
11452 	const TestOp	testOps[]	=
11453 	{
11454 		{ "OpIsNan"						,	compareFP16Logical<fp16isNan,				true,  false, true>,	compareFP16Logical<fp16isNan,				true,  false, false>,	1	},
11455 		{ "OpIsInf"						,	compareFP16Logical<fp16isInf,				true,  false, true>,	compareFP16Logical<fp16isInf,				true,  false, false>,	1	},
11456 		{ "OpFOrdEqual"					,	compareFP16Logical<fp16isEqual,				false, true,  true>,	compareFP16Logical<fp16isEqual,				false, true,  false>,	2	},
11457 		{ "OpFUnordEqual"				,	compareFP16Logical<fp16isEqual,				false, false, true>,	compareFP16Logical<fp16isEqual,				false, false, false>,	2	},
11458 		{ "OpFOrdNotEqual"				,	compareFP16Logical<fp16isUnequal,			false, true,  true>,	compareFP16Logical<fp16isUnequal,			false, true,  false>,	2	},
11459 		{ "OpFUnordNotEqual"			,	compareFP16Logical<fp16isUnequal,			false, false, true>,	compareFP16Logical<fp16isUnequal,			false, false, false>,	2	},
11460 		{ "OpFOrdLessThan"				,	compareFP16Logical<fp16isLess,				false, true,  true>,	compareFP16Logical<fp16isLess,				false, true,  false>,	2	},
11461 		{ "OpFUnordLessThan"			,	compareFP16Logical<fp16isLess,				false, false, true>,	compareFP16Logical<fp16isLess,				false, false, false>,	2	},
11462 		{ "OpFOrdGreaterThan"			,	compareFP16Logical<fp16isGreater,			false, true,  true>,	compareFP16Logical<fp16isGreater,			false, true,  false>,	2	},
11463 		{ "OpFUnordGreaterThan"			,	compareFP16Logical<fp16isGreater,			false, false, true>,	compareFP16Logical<fp16isGreater,			false, false, false>,	2	},
11464 		{ "OpFOrdLessThanEqual"			,	compareFP16Logical<fp16isLessOrEqual,		false, true,  true>,	compareFP16Logical<fp16isLessOrEqual,		false, true,  false>,	2	},
11465 		{ "OpFUnordLessThanEqual"		,	compareFP16Logical<fp16isLessOrEqual,		false, false, true>,	compareFP16Logical<fp16isLessOrEqual,		false, false, false>,	2	},
11466 		{ "OpFOrdGreaterThanEqual"		,	compareFP16Logical<fp16isGreaterOrEqual,	false, true,  true>,	compareFP16Logical<fp16isGreaterOrEqual,	false, true,  false>,	2	},
11467 		{ "OpFUnordGreaterThanEqual"	,	compareFP16Logical<fp16isGreaterOrEqual,	false, false, true>,	compareFP16Logical<fp16isGreaterOrEqual,	false, false, false>,	2	},
11468 	};
11469 
11470 	{ // scalar cases
11471 		const StringTemplate preMain
11472 		(
11473 			"      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11474 			"     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11475 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11476 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11477 			"            %f16 = OpTypeFloat 16\n"
11478 			"          %v2f16 = OpTypeVector %f16 2\n"
11479 			"        %c_f16_0 = OpConstant %f16 0.0\n"
11480 			"        %c_f16_1 = OpConstant %f16 1.0\n"
11481 			"         %up_u32 = OpTypePointer Uniform %u32\n"
11482 			"         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11483 			"         %SSBO16 = OpTypeStruct %ra_u32\n"
11484 			"      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11485 			"     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11486 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11487 			"      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11488 			"      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11489 			"       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11490 		);
11491 
11492 		const StringTemplate decoration
11493 		(
11494 			"OpDecorate %ra_u32 ArrayStride 4\n"
11495 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
11496 			"OpDecorate %SSBO16 BufferBlock\n"
11497 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
11498 			"OpDecorate %ssbo_src0 Binding 0\n"
11499 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
11500 			"OpDecorate %ssbo_src1 Binding 1\n"
11501 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
11502 			"OpDecorate %ssbo_dst Binding 2\n"
11503 		);
11504 
11505 		const StringTemplate testFun
11506 		(
11507 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11508 			"    %param = OpFunctionParameter %v4f32\n"
11509 
11510 			"    %entry = OpLabel\n"
11511 			"        %i = OpVariable %fp_i32 Function\n"
11512 			"             OpStore %i %c_i32_0\n"
11513 			"             OpBranch %loop\n"
11514 
11515 			"     %loop = OpLabel\n"
11516 			"    %i_cmp = OpLoad %i32 %i\n"
11517 			"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11518 			"             OpLoopMerge %merge %next None\n"
11519 			"             OpBranchConditional %lt %write %merge\n"
11520 
11521 			"    %write = OpLabel\n"
11522 			"      %ndx = OpLoad %i32 %i\n"
11523 
11524 			" %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11525 
11526 			"${op_arg1_calc}"
11527 
11528 			" %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11529 			"  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11530 			"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11531 			"             OpBranch %next\n"
11532 
11533 			"     %next = OpLabel\n"
11534 			"    %i_cur = OpLoad %i32 %i\n"
11535 			"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11536 			"             OpStore %i %i_new\n"
11537 			"             OpBranch %loop\n"
11538 
11539 			"    %merge = OpLabel\n"
11540 			"             OpReturnValue %param\n"
11541 
11542 			"             OpFunctionEnd\n"
11543 		);
11544 
11545 		const StringTemplate arg1Calc
11546 		(
11547 			" %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11548 		);
11549 
11550 		for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11551 		{
11552 			const size_t		iterations		= float16Data1.size();
11553 			const TestOp&		testOp			= testOps[testOpsIdx];
11554 			const string		testName		= de::toLower(string(testOp.opCode)) + "_scalar";
11555 			SpecResource		specResource;
11556 			map<string, string>	specs;
11557 			VulkanFeatures		features;
11558 			map<string, string>	fragments;
11559 			vector<string>		extensions;
11560 
11561 			specs["num_data_points"]	= de::toString(iterations);
11562 			specs["op_code"]			= testOp.opCode;
11563 			specs["op_arg1"]			= (testOp.argCount == 1) ? "" : "%val_src1";
11564 			specs["op_arg1_calc"]		= (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11565 
11566 			fragments["extension"]		= spvExtensions;
11567 			fragments["capability"]		= spvCapabilities;
11568 			fragments["execution_mode"]	= spvExecutionMode;
11569 			fragments["decoration"]		= decoration.specialize(specs);
11570 			fragments["pre_main"]		= preMain.specialize(specs);
11571 			fragments["testfun"]		= testFun.specialize(specs);
11572 			fragments["testfun"]		+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11573 			if (testOp.argCount > 1)
11574 			{
11575 				fragments["testfun"]	+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11576 			}
11577 			fragments["testfun"]		+= StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11578 
11579 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11580 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11581 			specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11582 			specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11583 
11584 			extensions.push_back("VK_KHR_shader_float16_int8");
11585 
11586 			if (nanSupported)
11587 			{
11588 				extensions.push_back("VK_KHR_shader_float_controls");
11589 
11590 				features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11591 			}
11592 
11593 			features.extFloat16Int8.shaderFloat16 = true;
11594 
11595 			finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11596 		}
11597 	}
11598 	{ // vector cases
11599 		const StringTemplate preMain
11600 		(
11601 			"        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11602 			"           %v2bool = OpTypeVector %bool 2\n"
11603 			"              %f16 = OpTypeFloat 16\n"
11604 			"          %c_f16_0 = OpConstant %f16 0.0\n"
11605 			"          %c_f16_1 = OpConstant %f16 1.0\n"
11606 			"            %v2f16 = OpTypeVector %f16 2\n"
11607 			"      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11608 			"      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11609 			"           %up_u32 = OpTypePointer Uniform %u32\n"
11610 			"           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11611 			"           %SSBO16 = OpTypeStruct %ra_u32\n"
11612 			"        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11613 			"     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11614 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11615 			"        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11616 			"        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11617 			"         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11618 		);
11619 
11620 		const StringTemplate decoration
11621 		(
11622 			"OpDecorate %ra_u32 ArrayStride 4\n"
11623 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
11624 			"OpDecorate %SSBO16 BufferBlock\n"
11625 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
11626 			"OpDecorate %ssbo_src0 Binding 0\n"
11627 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
11628 			"OpDecorate %ssbo_src1 Binding 1\n"
11629 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
11630 			"OpDecorate %ssbo_dst Binding 2\n"
11631 		);
11632 
11633 		const StringTemplate testFun
11634 		(
11635 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11636 			"    %param = OpFunctionParameter %v4f32\n"
11637 
11638 			"    %entry = OpLabel\n"
11639 			"        %i = OpVariable %fp_i32 Function\n"
11640 			"             OpStore %i %c_i32_0\n"
11641 			"             OpBranch %loop\n"
11642 
11643 			"     %loop = OpLabel\n"
11644 			"    %i_cmp = OpLoad %i32 %i\n"
11645 			"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11646 			"             OpLoopMerge %merge %next None\n"
11647 			"             OpBranchConditional %lt %write %merge\n"
11648 
11649 			"    %write = OpLabel\n"
11650 			"      %ndx = OpLoad %i32 %i\n"
11651 
11652 			" %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11653 
11654 			"${op_arg1_calc}"
11655 
11656 			" %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11657 			"  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11658 			"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11659 			"             OpBranch %next\n"
11660 
11661 			"     %next = OpLabel\n"
11662 			"    %i_cur = OpLoad %i32 %i\n"
11663 			"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11664 			"             OpStore %i %i_new\n"
11665 			"             OpBranch %loop\n"
11666 
11667 			"    %merge = OpLabel\n"
11668 			"             OpReturnValue %param\n"
11669 
11670 			"             OpFunctionEnd\n"
11671 		);
11672 
11673 		const StringTemplate arg1Calc
11674 		(
11675 			" %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11676 		);
11677 
11678 		for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11679 		{
11680 			const deUint32		itemsPerVec	= 2;
11681 			const size_t		iterations	= float16DataVec1.size() / itemsPerVec;
11682 			const TestOp&		testOp		= testOps[testOpsIdx];
11683 			const string		testName	= de::toLower(string(testOp.opCode)) + "_vector";
11684 			SpecResource		specResource;
11685 			map<string, string>	specs;
11686 			vector<string>		extensions;
11687 			VulkanFeatures		features;
11688 			map<string, string>	fragments;
11689 
11690 			specs["num_data_points"]	= de::toString(iterations);
11691 			specs["op_code"]			= testOp.opCode;
11692 			specs["op_arg1"]			= (testOp.argCount == 1) ? "" : "%val_src1";
11693 			specs["op_arg1_calc"]		= (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11694 
11695 			fragments["extension"]		= spvExtensions;
11696 			fragments["capability"]		= spvCapabilities;
11697 			fragments["execution_mode"]	= spvExecutionMode;
11698 			fragments["decoration"]		= decoration.specialize(specs);
11699 			fragments["pre_main"]		= preMain.specialize(specs);
11700 			fragments["testfun"]		= testFun.specialize(specs);
11701 			fragments["testfun"]		+= StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11702 			if (testOp.argCount > 1)
11703 			{
11704 				fragments["testfun"]	+= StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11705 			}
11706 			fragments["testfun"]		+= StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11707 
11708 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11709 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11710 			specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11711 			specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11712 
11713 			extensions.push_back("VK_KHR_shader_float16_int8");
11714 
11715 			if (nanSupported)
11716 			{
11717 				extensions.push_back("VK_KHR_shader_float_controls");
11718 
11719 				features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11720 			}
11721 
11722 			features.extFloat16Int8.shaderFloat16 = true;
11723 
11724 			finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11725 		}
11726 	}
11727 
11728 	return testGroup.release();
11729 }
11730 
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11731 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11732 {
11733 	if (inputs.size() != 1 || outputAllocs.size() != 1)
11734 		return false;
11735 
11736 	vector<deUint8>	input1Bytes;
11737 
11738 	inputs[0].getBytes(input1Bytes);
11739 
11740 	const deUint16* const	input1AsFP16	= (const deUint16*)&input1Bytes[0];
11741 	const deUint16* const	outputAsFP16	= (const deUint16*)outputAllocs[0]->getHostPtr();
11742 	std::string				error;
11743 
11744 	for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11745 	{
11746 		if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11747 		{
11748 			log << TestLog::Message << error << TestLog::EndMessage;
11749 
11750 			return false;
11751 		}
11752 	}
11753 
11754 	return true;
11755 }
11756 
11757 template<class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)11758 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11759 {
11760 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11761 
11762 	de::Random							rnd					(deStringHash(testGroup->getName()));
11763 	const StringTemplate				capabilities		("OpCapability Float16\n");
11764 	const deUint32						numDataPoints		= 256;
11765 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
11766 	const vector<deFloat16>				float16OutputUnused	(float16InputData.size(), 0);
11767 	map<string, string>					fragments;
11768 
11769 	struct TestType
11770 	{
11771 		const deUint32	typeComponents;
11772 		const char*		typeName;
11773 		const char*		typeDecls;
11774 		const char*		typeStorage;
11775 		const string		loadFunc;
11776 		const string		storeFunc;
11777 	};
11778 
11779 	const TestType	testTypes[]	=
11780 	{
11781 		{
11782 			1,
11783 			"f16",
11784 			"      %v2f16 = OpTypeVector %f16 2\n"
11785 			"%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11786 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11787 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11788 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11789 			"u32_hndp",
11790 			loadScalarF16FromUint,
11791 			storeScalarF16AsUint
11792 		},
11793 		{
11794 			2,
11795 			"v2f16",
11796 			"      %v2f16 = OpTypeVector %f16 2\n"
11797 			"  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11798 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11799 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11800 			"u32_ndp",
11801 			loadV2F16FromUint,
11802 			storeV2F16AsUint
11803 		},
11804 		{
11805 			4,
11806 			"v4f16",
11807 			"      %v2f16 = OpTypeVector %f16 2\n"
11808 			"      %v4f16 = OpTypeVector %f16 4\n"
11809 			"  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11810 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11811 			"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11812 			"ra_u32_2",
11813 			loadV4F16FromUints,
11814 			storeV4F16AsUints
11815 		},
11816 	};
11817 
11818 	const StringTemplate preMain
11819 	(
11820 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11821 		" %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11822 		"     %v2bool = OpTypeVector %bool 2\n"
11823 		"        %f16 = OpTypeFloat 16\n"
11824 		"    %c_f16_0 = OpConstant %f16 0.0\n"
11825 
11826 		"${type_decls}"
11827 
11828 		"  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11829 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11830 		"%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11831 		" %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11832 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11833 		"	  %up_u32 = OpTypePointer Uniform %u32\n"
11834 		"     %SSBO16 = OpTypeStruct %ra_${ts}\n"
11835 		"  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11836 		"   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11837 		"   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11838 	);
11839 
11840 	const StringTemplate decoration
11841 	(
11842 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
11843 		"OpDecorate %ra_u32_hndp ArrayStride 4\n"
11844 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
11845 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11846 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
11847 		"OpDecorate %SSBO16 BufferBlock\n"
11848 		"OpDecorate %ssbo_src DescriptorSet 0\n"
11849 		"OpDecorate %ssbo_src Binding 0\n"
11850 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
11851 		"OpDecorate %ssbo_dst Binding 1\n"
11852 	);
11853 
11854 	const StringTemplate testFun
11855 	(
11856 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11857 		"    %param = OpFunctionParameter %v4f32\n"
11858 		"    %entry = OpLabel\n"
11859 
11860 		"        %i = OpVariable %fp_i32 Function\n"
11861 		"             OpStore %i %c_i32_0\n"
11862 		"             OpBranch %loop\n"
11863 
11864 		"     %loop = OpLabel\n"
11865 		"    %i_cmp = OpLoad %i32 %i\n"
11866 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11867 		"             OpLoopMerge %merge %next None\n"
11868 		"             OpBranchConditional %lt %write %merge\n"
11869 
11870 		"    %write = OpLabel\n"
11871 		"      %ndx = OpLoad %i32 %i\n"
11872 
11873 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11874 		"  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11875 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11876 		"             OpBranch %next\n"
11877 
11878 		"     %next = OpLabel\n"
11879 		"    %i_cur = OpLoad %i32 %i\n"
11880 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11881 		"             OpStore %i %i_new\n"
11882 		"             OpBranch %loop\n"
11883 
11884 		"    %merge = OpLabel\n"
11885 		"             OpReturnValue %param\n"
11886 
11887 		"             OpFunctionEnd\n"
11888 
11889 		" %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11890 		"   %param0 = OpFunctionParameter %${tt}\n"
11891 		" %entry_pf = OpLabel\n"
11892 		"     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11893 		"             OpReturnValue %res0\n"
11894 		"             OpFunctionEnd\n"
11895 	);
11896 
11897 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11898 	{
11899 		const TestType&		testType		= testTypes[testTypeIdx];
11900 		const string		testName		= testType.typeName;
11901 		const deUint32		itemsPerType	= testType.typeComponents;
11902 		const size_t		iterations		= float16InputData.size() / itemsPerType;
11903 		const size_t		typeStride		= itemsPerType * sizeof(deFloat16);
11904 		SpecResource		specResource;
11905 		map<string, string>	specs;
11906 		VulkanFeatures		features;
11907 		vector<string>		extensions;
11908 
11909 		specs["num_data_points"]	= de::toString(iterations);
11910 		specs["tt"]					= testType.typeName;
11911 		specs["ts"]					= testType.typeStorage;
11912 		specs["tt_stride"]			= de::toString(typeStride);
11913 		specs["type_decls"]			= testType.typeDecls;
11914 
11915 		fragments["capability"]		= capabilities.specialize(specs);
11916 		fragments["decoration"]		= decoration.specialize(specs);
11917 		fragments["pre_main"]		= preMain.specialize(specs);
11918 		fragments["testfun"]		= testFun.specialize(specs);
11919 		fragments["testfun"]		+= StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11920 		fragments["testfun"]		+= StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11921 
11922 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11923 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11924 		specResource.verifyIO = compareFP16FunctionSetFunc;
11925 
11926 		extensions.push_back("VK_KHR_shader_float16_int8");
11927 
11928 		features.extFloat16Int8.shaderFloat16 = true;
11929 
11930 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11931 	}
11932 
11933 	return testGroup.release();
11934 }
11935 
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11936 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11937 {
11938 	if (inputs.size() != 2 || outputAllocs.size() != 1)
11939 		return false;
11940 
11941 	vector<deUint8>	input1Bytes;
11942 	vector<deUint8>	input2Bytes;
11943 
11944 	inputs[0].getBytes(input1Bytes);
11945 	inputs[1].getBytes(input2Bytes);
11946 
11947 	DE_ASSERT(input1Bytes.size() > 0);
11948 	DE_ASSERT(input2Bytes.size() > 0);
11949 	DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11950 
11951 	const size_t			iterations		= input2Bytes.size() / sizeof(deUint32);
11952 	const size_t			components		= input1Bytes.size() / (sizeof(deFloat16) * iterations);
11953 	const deFloat16* const	input1AsFP16	= (const deFloat16*)&input1Bytes[0];
11954 	const deUint32* const	inputIndices	= (const deUint32*)&input2Bytes[0];
11955 	const deFloat16* const	outputAsFP16	= (const deFloat16*)outputAllocs[0]->getHostPtr();
11956 	std::string				error;
11957 
11958 	DE_ASSERT(components == 2 || components == 4);
11959 	DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11960 
11961 	for (size_t idx = 0; idx < iterations; ++idx)
11962 	{
11963 		const deUint32	componentNdx	= inputIndices[idx];
11964 
11965 		DE_ASSERT(componentNdx < components);
11966 
11967 		const deFloat16	expected		= input1AsFP16[components * idx + componentNdx];
11968 
11969 		if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11970 		{
11971 			log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11972 
11973 			return false;
11974 		}
11975 	}
11976 
11977 	return true;
11978 }
11979 
11980 template<class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)11981 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11982 {
11983 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11984 
11985 	de::Random							rnd					(deStringHash(testGroup->getName()));
11986 	const deUint32						numDataPoints		= 256;
11987 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
11988 	const vector<deFloat16>				float16OutputUnused	(float16InputData.size(), 0);
11989 
11990 	struct TestType
11991 	{
11992 		const deUint32	typeComponents;
11993 		const size_t	typeStride;
11994 		const char*		typeName;
11995 		const char*		typeDecls;
11996 		const char*		typeStorage;
11997 		const string		loadFunction;
11998 		const string		storeFunction;
11999 	};
12000 
12001 	const TestType	testTypes[]	=
12002 	{
12003 		{
12004 			2,
12005 			2 * sizeof(deFloat16),
12006 			"v2f16",
12007 			"      %v2f16 = OpTypeVector %f16 2\n"
12008 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12009 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12010 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12011 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12012 			"u32",
12013 			loadV2F16FromUint,
12014 			storeScalarF16AsUint
12015 		},
12016 		{
12017 			3,
12018 			4 * sizeof(deFloat16),
12019 			"v3f16",
12020 			"      %v2f16 = OpTypeVector %f16 2\n"
12021 			"      %v3f16 = OpTypeVector %f16 3\n"
12022 			"%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12023 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12024 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12025 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12026 			"ra_u32_2",
12027 			loadV3F16FromUints,
12028 			storeScalarF16AsUint
12029 		},
12030 		{
12031 			4,
12032 			4 * sizeof(deFloat16),
12033 			"v4f16",
12034 			"      %v2f16 = OpTypeVector %f16 2\n"
12035 			"      %v4f16 = OpTypeVector %f16 4\n"
12036 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12037 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12038 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12039 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12040 			"ra_u32_2",
12041 			loadV4F16FromUints,
12042 			storeScalarF16AsUint
12043 		},
12044 	};
12045 
12046 	const StringTemplate preMain
12047 	(
12048 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12049 		" %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12050 		"        %f16 = OpTypeFloat 16\n"
12051 
12052 		"${type_decl}"
12053 
12054 		"     %up_u32 = OpTypePointer Uniform %u32\n"
12055 		"     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12056 		"   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12057 		"%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12058 
12059 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12060 		" %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12061 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12062 		"   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12063 		"%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12064 
12065 		" %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12066 		"   %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12067 		"%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12068 
12069 		"   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12070 		"   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12071 		"   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12072 	);
12073 
12074 	const StringTemplate decoration
12075 	(
12076 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12077 		"OpDecorate %ra_u32_hndp ArrayStride 4\n"
12078 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12079 		"OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12080 		"OpDecorate %SSBO_SRC BufferBlock\n"
12081 		"OpDecorate %ssbo_src DescriptorSet 0\n"
12082 		"OpDecorate %ssbo_src Binding 0\n"
12083 
12084 		"OpDecorate %ra_u32 ArrayStride 4\n"
12085 		"OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12086 		"OpDecorate %SSBO_IDX BufferBlock\n"
12087 		"OpDecorate %ssbo_idx DescriptorSet 0\n"
12088 		"OpDecorate %ssbo_idx Binding 1\n"
12089 
12090 		"OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12091 		"OpDecorate %SSBO_DST BufferBlock\n"
12092 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12093 		"OpDecorate %ssbo_dst Binding 2\n"
12094 	);
12095 
12096 	const StringTemplate testFun
12097 	(
12098 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12099 		"    %param = OpFunctionParameter %v4f32\n"
12100 		"    %entry = OpLabel\n"
12101 
12102 		"        %i = OpVariable %fp_i32 Function\n"
12103 		"             OpStore %i %c_i32_0\n"
12104 
12105 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12106 		"             OpSelectionMerge %end_if None\n"
12107 		"             OpBranchConditional %will_run %run_test %end_if\n"
12108 
12109 		" %run_test = OpLabel\n"
12110 		"             OpBranch %loop\n"
12111 
12112 		"     %loop = OpLabel\n"
12113 		"    %i_cmp = OpLoad %i32 %i\n"
12114 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12115 		"             OpLoopMerge %merge %next None\n"
12116 		"             OpBranchConditional %lt %write %merge\n"
12117 
12118 		"    %write = OpLabel\n"
12119 		"      %ndx = OpLoad %i32 %i\n"
12120 
12121 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12122 
12123 		"  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12124 		"  %val_idx = OpLoad %u32 %src_idx\n"
12125 
12126 		"  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12127 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12128 
12129 		"             OpBranch %next\n"
12130 
12131 		"     %next = OpLabel\n"
12132 		"    %i_cur = OpLoad %i32 %i\n"
12133 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12134 		"             OpStore %i %i_new\n"
12135 		"             OpBranch %loop\n"
12136 
12137 		"    %merge = OpLabel\n"
12138 		"             OpBranch %end_if\n"
12139 		"   %end_if = OpLabel\n"
12140 		"             OpReturnValue %param\n"
12141 
12142 		"             OpFunctionEnd\n"
12143 	);
12144 
12145 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12146 	{
12147 		const TestType&		testType		= testTypes[testTypeIdx];
12148 		const string		testName		= testType.typeName;
12149 		const size_t		itemsPerType	= testType.typeStride / sizeof(deFloat16);
12150 		const size_t		iterations		= float16InputData.size() / itemsPerType;
12151 		SpecResource		specResource;
12152 		map<string, string>	specs;
12153 		VulkanFeatures		features;
12154 		vector<deUint32>	inputDataNdx;
12155 		map<string, string>	fragments;
12156 		vector<string>		extensions;
12157 
12158 		for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12159 			inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12160 
12161 		specs["num_data_points"]	= de::toString(iterations);
12162 		specs["tt"]					= testType.typeName;
12163 		specs["ts"]					= testType.typeStorage;
12164 		specs["tt_stride"]			= de::toString(testType.typeStride);
12165 		specs["type_decl"]			= testType.typeDecls;
12166 
12167 		fragments["capability"]		= "OpCapability Float16\n";
12168 		fragments["decoration"]		= decoration.specialize(specs);
12169 		fragments["pre_main"]		= preMain.specialize(specs);
12170 		fragments["testfun"]		= testFun.specialize(specs);
12171 		fragments["testfun"]		+= StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12172 		fragments["testfun"]		+= StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12173 
12174 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12175 		specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12176 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12177 		specResource.verifyIO = compareFP16VectorExtractFunc;
12178 
12179 		extensions.push_back("VK_KHR_shader_float16_int8");
12180 
12181 		features.extFloat16Int8.shaderFloat16 = true;
12182 
12183 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12184 	}
12185 
12186 	return testGroup.release();
12187 }
12188 
12189 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12190 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12191 {
12192 	if (inputs.size() != 2 || outputAllocs.size() != 1)
12193 		return false;
12194 
12195 	vector<deUint8>	input1Bytes;
12196 	vector<deUint8>	input2Bytes;
12197 
12198 	inputs[0].getBytes(input1Bytes);
12199 	inputs[1].getBytes(input2Bytes);
12200 
12201 	DE_ASSERT(input1Bytes.size() > 0);
12202 	DE_ASSERT(input2Bytes.size() > 0);
12203 	DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12204 
12205 	const size_t			iterations			= input2Bytes.size() / sizeof(deUint32);
12206 	const size_t			componentsStride	= input1Bytes.size() / (sizeof(deFloat16) * iterations);
12207 	const deFloat16* const	input1AsFP16		= (const deFloat16*)&input1Bytes[0];
12208 	const deUint32* const	inputIndices		= (const deUint32*)&input2Bytes[0];
12209 	const deFloat16* const	outputAsFP16		= (const deFloat16*)outputAllocs[0]->getHostPtr();
12210 	const deFloat16			magic				= tcu::Float16(float(REPLACEMENT)).bits();
12211 	std::string				error;
12212 
12213 	DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12214 	DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12215 
12216 	for (size_t idx = 0; idx < iterations; ++idx)
12217 	{
12218 		const deFloat16*	inputVec		= &input1AsFP16[componentsStride * idx];
12219 		const deFloat16*	outputVec		= &outputAsFP16[componentsStride * idx];
12220 		const deUint32		replacedCompNdx	= inputIndices[idx];
12221 
12222 		DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12223 
12224 		for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12225 		{
12226 			const deFloat16	expected	= (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12227 
12228 			if (!compare16BitFloat(expected, outputVec[compNdx], error))
12229 			{
12230 				log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12231 
12232 				return false;
12233 			}
12234 		}
12235 	}
12236 
12237 	return true;
12238 }
12239 
12240 template<class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12241 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12242 {
12243 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12244 
12245 	de::Random							rnd					(deStringHash(testGroup->getName()));
12246 	const deUint32						replacement			= 42;
12247 	const deUint32						numDataPoints		= 256;
12248 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
12249 	const vector<deFloat16>				float16OutputUnused	(float16InputData.size(), 0);
12250 
12251 	struct TestType
12252 	{
12253 		const deUint32	typeComponents;
12254 		const size_t	typeStride;
12255 		const char*		typeName;
12256 		const char*		typeDecls;
12257 		VerifyIOFunc	verifyIOFunc;
12258 		const char*		typeStorage;
12259 		const string		loadFunction;
12260 		const string		storeFunction;
12261 	};
12262 
12263 	const TestType	testTypes[]	=
12264 	{
12265 		{
12266 			2,
12267 			2 * sizeof(deFloat16),
12268 			"v2f16",
12269 			"      %v2f16 = OpTypeVector %f16 2\n"
12270 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12271 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12272 			compareFP16VectorInsertFunc<2, replacement>,
12273 			"u32",
12274 			loadV2F16FromUint,
12275 			storeV2F16AsUint
12276 		},
12277 		{
12278 			3,
12279 			4 * sizeof(deFloat16),
12280 			"v3f16",
12281 			"      %v2f16 = OpTypeVector %f16 2\n"
12282 			"      %v3f16 = OpTypeVector %f16 3\n"
12283 			"%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12284 			"%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12285 			compareFP16VectorInsertFunc<3, replacement>,
12286 			"ra_u32_2",
12287 			loadV3F16FromUints,
12288 			storeV3F16AsUints
12289 		},
12290 		{
12291 			4,
12292 			4 * sizeof(deFloat16),
12293 			"v4f16",
12294 			"      %v2f16 = OpTypeVector %f16 2\n"
12295 			"      %v4f16 = OpTypeVector %f16 4\n"
12296 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12297 			"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12298 			compareFP16VectorInsertFunc<4, replacement>,
12299 			"ra_u32_2",
12300 			loadV4F16FromUints,
12301 			storeV4F16AsUints
12302 		},
12303 	};
12304 
12305 	const StringTemplate preMain
12306 	(
12307 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12308 		"        %f16 = OpTypeFloat 16\n"
12309 		"  %c_f16_ins = OpConstant %f16 ${replacement}\n"
12310 
12311 		"${type_decl}"
12312 
12313 		"     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12314 		"	  %up_u32 = OpTypePointer Uniform %u32\n"
12315 		"   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12316 		"%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12317 
12318 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12319 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12320 		"   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12321 		"%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12322 
12323 		"   %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12324 		"%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12325 
12326 		"   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12327 		"   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12328 		"   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12329 	);
12330 
12331 	const StringTemplate decoration
12332 	(
12333 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12334 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12335 		"OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12336 		"OpDecorate %SSBO_SRC BufferBlock\n"
12337 		"OpDecorate %ssbo_src DescriptorSet 0\n"
12338 		"OpDecorate %ssbo_src Binding 0\n"
12339 
12340 		"OpDecorate %ra_u32 ArrayStride 4\n"
12341 		"OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12342 		"OpDecorate %SSBO_IDX BufferBlock\n"
12343 		"OpDecorate %ssbo_idx DescriptorSet 0\n"
12344 		"OpDecorate %ssbo_idx Binding 1\n"
12345 
12346 		"OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12347 		"OpDecorate %SSBO_DST BufferBlock\n"
12348 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12349 		"OpDecorate %ssbo_dst Binding 2\n"
12350 	);
12351 
12352 	const StringTemplate testFun
12353 	(
12354 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12355 		"    %param = OpFunctionParameter %v4f32\n"
12356 		"    %entry = OpLabel\n"
12357 
12358 		"        %i = OpVariable %fp_i32 Function\n"
12359 		"             OpStore %i %c_i32_0\n"
12360 
12361 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12362 		"             OpSelectionMerge %end_if None\n"
12363 		"             OpBranchConditional %will_run %run_test %end_if\n"
12364 
12365 		" %run_test = OpLabel\n"
12366 		"             OpBranch %loop\n"
12367 
12368 		"     %loop = OpLabel\n"
12369 		"    %i_cmp = OpLoad %i32 %i\n"
12370 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12371 		"             OpLoopMerge %merge %next None\n"
12372 		"             OpBranchConditional %lt %write %merge\n"
12373 
12374 		"    %write = OpLabel\n"
12375 		"      %ndx = OpLoad %i32 %i\n"
12376 
12377 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12378 
12379 		"  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12380 		"  %val_idx = OpLoad %u32 %src_idx\n"
12381 
12382 		"  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12383 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12384 
12385 		"             OpBranch %next\n"
12386 
12387 		"     %next = OpLabel\n"
12388 		"    %i_cur = OpLoad %i32 %i\n"
12389 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12390 		"             OpStore %i %i_new\n"
12391 		"             OpBranch %loop\n"
12392 
12393 		"    %merge = OpLabel\n"
12394 		"             OpBranch %end_if\n"
12395 		"   %end_if = OpLabel\n"
12396 		"             OpReturnValue %param\n"
12397 
12398 		"             OpFunctionEnd\n"
12399 	);
12400 
12401 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12402 	{
12403 		const TestType&		testType		= testTypes[testTypeIdx];
12404 		const string		testName		= testType.typeName;
12405 		const size_t		itemsPerType	= testType.typeStride / sizeof(deFloat16);
12406 		const size_t		iterations		= float16InputData.size() / itemsPerType;
12407 		SpecResource		specResource;
12408 		map<string, string>	specs;
12409 		VulkanFeatures		features;
12410 		vector<deUint32>	inputDataNdx;
12411 		map<string, string>	fragments;
12412 		vector<string>		extensions;
12413 
12414 		for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12415 			inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12416 
12417 		specs["num_data_points"]	= de::toString(iterations);
12418 		specs["tt"]					= testType.typeName;
12419 		specs["ts"]					= testType.typeStorage;
12420 		specs["tt_stride"]			= de::toString(testType.typeStride);
12421 		specs["type_decl"]			= testType.typeDecls;
12422 		specs["replacement"]		= de::toString(replacement);
12423 
12424 		fragments["capability"]		= "OpCapability Float16\n";
12425 		fragments["decoration"]		= decoration.specialize(specs);
12426 		fragments["pre_main"]		= preMain.specialize(specs);
12427 		fragments["testfun"]		= testFun.specialize(specs);
12428 		fragments["testfun"]		+= StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12429 		fragments["testfun"]		+= StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12430 
12431 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12432 		specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12433 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12434 		specResource.verifyIO = testType.verifyIOFunc;
12435 
12436 		extensions.push_back("VK_KHR_shader_float16_int8");
12437 
12438 		features.extFloat16Int8.shaderFloat16 = true;
12439 
12440 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12441 	}
12442 
12443 	return testGroup.release();
12444 }
12445 
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12446 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12447 {
12448 	const size_t	compNdxCount	= (vec1Len + vec2Len + 1);
12449 	const size_t	compNdxLimited	= iteration % (compNdxCount * compNdxCount);
12450 	size_t			comp;
12451 
12452 	switch (componentNdx)
12453 	{
12454 		case 0: comp = compNdxLimited / compNdxCount; break;
12455 		case 1: comp = compNdxLimited % compNdxCount; break;
12456 		case 2: comp = 0; break;
12457 		case 3: comp = 1; break;
12458 		default: TCU_THROW(InternalError, "Impossible");
12459 	}
12460 
12461 	if (comp >= vec1Len + vec2Len)
12462 	{
12463 		validate = false;
12464 		return 0;
12465 	}
12466 	else
12467 	{
12468 		validate = true;
12469 		return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12470 	}
12471 }
12472 
12473 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12474 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12475 {
12476 	DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12477 	DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12478 	DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12479 
12480 	if (inputs.size() != 2 || outputAllocs.size() != 1)
12481 		return false;
12482 
12483 	vector<deUint8>	input1Bytes;
12484 	vector<deUint8>	input2Bytes;
12485 
12486 	inputs[0].getBytes(input1Bytes);
12487 	inputs[1].getBytes(input2Bytes);
12488 
12489 	DE_ASSERT(input1Bytes.size() > 0);
12490 	DE_ASSERT(input2Bytes.size() > 0);
12491 	DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12492 
12493 	const size_t			componentsStrideDst		= (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12494 	const size_t			componentsStrideSrc0	= (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12495 	const size_t			componentsStrideSrc1	= (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12496 	const size_t			iterations				= input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12497 	const deFloat16* const	input1AsFP16			= (const deFloat16*)&input1Bytes[0];
12498 	const deFloat16* const	input2AsFP16			= (const deFloat16*)&input2Bytes[0];
12499 	const deFloat16* const	outputAsFP16			= (const deFloat16*)outputAllocs[0]->getHostPtr();
12500 	std::string				error;
12501 
12502 	DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12503 	DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12504 
12505 	for (size_t idx = 0; idx < iterations; ++idx)
12506 	{
12507 		const deFloat16*	input1Vec	= &input1AsFP16[componentsStrideSrc0 * idx];
12508 		const deFloat16*	input2Vec	= &input2AsFP16[componentsStrideSrc1 * idx];
12509 		const deFloat16*	outputVec	= &outputAsFP16[componentsStrideDst * idx];
12510 
12511 		for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12512 		{
12513 			bool		validate	= true;
12514 			deFloat16	expected	= getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12515 
12516 			if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12517 			{
12518 				log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12519 
12520 				return false;
12521 			}
12522 		}
12523 	}
12524 
12525 	return true;
12526 }
12527 
getFloat16VectorShuffleVerifyIOFunc(deUint32 dstComponentsCount,deUint32 src0ComponentsCount,deUint32 src1ComponentsCount)12528 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12529 {
12530 	DE_ASSERT(dstComponentsCount <= 4);
12531 	DE_ASSERT(src0ComponentsCount <= 4);
12532 	DE_ASSERT(src1ComponentsCount <= 4);
12533 	deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12534 
12535 	switch (funcCode)
12536 	{
12537 		case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12538 		case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12539 		case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12540 		case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12541 		case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12542 		case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12543 		case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12544 		case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12545 		case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12546 		case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12547 		case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12548 		case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12549 		case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12550 		case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12551 		case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12552 		case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12553 		case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12554 		case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12555 		case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12556 		case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12557 		case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12558 		case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12559 		case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12560 		case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12561 		case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12562 		case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12563 		case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12564 		default: TCU_THROW(InternalError, "Invalid number of components specified.");
12565 	}
12566 }
12567 
12568 template<class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)12569 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12570 {
12571 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12572 	const int							testSpecificSeed	= deStringHash(testGroup->getName());
12573 	const int							seed				= testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12574 	de::Random							rnd					(seed);
12575 	const deUint32						numDataPoints		= 128;
12576 	map<string, string>					fragments;
12577 
12578 	struct TestType
12579 	{
12580 		const deUint32	typeComponents;
12581 		const char*		typeName;
12582 		const string	loadFunction;
12583 		const string	storeFunction;
12584 	};
12585 
12586 	const TestType	testTypes[]	=
12587 	{
12588 		{
12589 			2,
12590 			"v2f16",
12591 			loadV2F16FromUint,
12592 			storeV2F16AsUint
12593 		},
12594 		{
12595 			3,
12596 			"v3f16",
12597 			loadV3F16FromUints,
12598 			storeV3F16AsUints
12599 		},
12600 		{
12601 			4,
12602 			"v4f16",
12603 			loadV4F16FromUints,
12604 			storeV4F16AsUints
12605 		},
12606 	};
12607 
12608 	const StringTemplate preMain
12609 	(
12610 		"    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12611 		"     %c_i32_cc = OpConstant %i32 ${case_count}\n"
12612 		"          %f16 = OpTypeFloat 16\n"
12613 		"        %v2f16 = OpTypeVector %f16 2\n"
12614 		"        %v3f16 = OpTypeVector %f16 3\n"
12615 		"        %v4f16 = OpTypeVector %f16 4\n"
12616 
12617 		"     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12618 		"     %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12619 		"     %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12620 		"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12621 		"%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12622 		"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12623 
12624 		"     %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12625 		"   %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12626 		"  %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12627 		"       %up_u32 = OpTypePointer Uniform %u32\n"
12628 		"   %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12629 		"   %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12630 		"   %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12631 
12632 		"%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12633 		"%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12634 		"%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12635 
12636 		"        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12637 
12638 		"    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12639 		"    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12640 		"     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12641 	);
12642 
12643 	const StringTemplate decoration
12644 	(
12645 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12646 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
12647 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12648 
12649 		"OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12650 		"OpDecorate %SSBO_v2f16 BufferBlock\n"
12651 
12652 		"OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12653 		"OpDecorate %SSBO_v3f16 BufferBlock\n"
12654 
12655 		"OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12656 		"OpDecorate %SSBO_v4f16 BufferBlock\n"
12657 
12658 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
12659 		"OpDecorate %ssbo_src0 Binding 0\n"
12660 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
12661 		"OpDecorate %ssbo_src1 Binding 1\n"
12662 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12663 		"OpDecorate %ssbo_dst Binding 2\n"
12664 	);
12665 
12666 	const StringTemplate testFun
12667 	(
12668 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12669 		"    %param = OpFunctionParameter %v4f32\n"
12670 		"    %entry = OpLabel\n"
12671 
12672 		"        %i = OpVariable %fp_i32 Function\n"
12673 		"             OpStore %i %c_i32_0\n"
12674 
12675 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12676 		"             OpSelectionMerge %end_if None\n"
12677 		"             OpBranchConditional %will_run %run_test %end_if\n"
12678 
12679 		" %run_test = OpLabel\n"
12680 		"             OpBranch %loop\n"
12681 
12682 		"     %loop = OpLabel\n"
12683 		"    %i_cmp = OpLoad %i32 %i\n"
12684 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12685 		"             OpLoopMerge %merge %next None\n"
12686 		"             OpBranchConditional %lt %write %merge\n"
12687 
12688 		"    %write = OpLabel\n"
12689 		"      %ndx = OpLoad %i32 %i\n"
12690 		" %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12691 		" %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12692 		"  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12693 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12694 		"             OpBranch %next\n"
12695 
12696 		"     %next = OpLabel\n"
12697 		"    %i_cur = OpLoad %i32 %i\n"
12698 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12699 		"             OpStore %i %i_new\n"
12700 		"             OpBranch %loop\n"
12701 
12702 		"    %merge = OpLabel\n"
12703 		"             OpBranch %end_if\n"
12704 		"   %end_if = OpLabel\n"
12705 		"             OpReturnValue %param\n"
12706 		"             OpFunctionEnd\n"
12707 		"\n"
12708 
12709 		"   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12710 		"%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12711 		"%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12712 		"%sw_paramn = OpFunctionParameter %i32\n"
12713 		" %sw_entry = OpLabel\n"
12714 		"   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12715 		"             OpSelectionMerge %switch_e None\n"
12716 		"             OpSwitch %modulo %default ${case_list}\n"
12717 		"${case_bodies}"
12718 		"%default   = OpLabel\n"
12719 		"             OpUnreachable\n" // Unreachable default case for switch statement
12720 		"%switch_e  = OpLabel\n"
12721 		"             OpUnreachable\n" // Unreachable merge block for switch statement
12722 		"             OpFunctionEnd\n"
12723 	);
12724 
12725 	const StringTemplate testCaseBody
12726 	(
12727 		"%case_${case_ndx}    = OpLabel\n"
12728 		"%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12729 		"             OpReturnValue %val_dst_${case_ndx}\n"
12730 	);
12731 
12732 	for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12733 	{
12734 		const TestType&	dstType			= testTypes[dstTypeIdx];
12735 
12736 		for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12737 		{
12738 			const TestType&	src0Type	= testTypes[comp0Idx];
12739 
12740 			for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12741 			{
12742 				const TestType&			src1Type			= testTypes[comp1Idx];
12743 				const deUint32			input0Stride		= (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12744 				const deUint32			input1Stride		= (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12745 				const deUint32			outputStride		= (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12746 				const vector<deFloat16>	float16Input0Data	= getFloat16s(rnd, input0Stride * numDataPoints);
12747 				const vector<deFloat16>	float16Input1Data	= getFloat16s(rnd, input1Stride * numDataPoints);
12748 				const vector<deFloat16>	float16OutputUnused	(outputStride * numDataPoints, 0);
12749 				const string			testName			= de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12750 				deUint32				caseCount			= 0;
12751 				SpecResource			specResource;
12752 				map<string, string>		specs;
12753 				vector<string>			extensions;
12754 				VulkanFeatures			features;
12755 				string					caseBodies;
12756 				string					caseList;
12757 
12758 				// Generate case
12759 				{
12760 					vector<string>	componentList;
12761 
12762 					// Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12763 					{
12764 						deUint32		caseNo		= 0;
12765 
12766 						for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12767 							componentList.push_back(de::toString(caseNo++));
12768 						for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12769 							componentList.push_back(de::toString(caseNo++));
12770 						componentList.push_back("0xFFFFFFFF");
12771 					}
12772 
12773 					for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12774 					{
12775 						for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12776 						{
12777 							map<string, string>	specCase;
12778 							string				shuffle		= componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12779 
12780 							for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12781 								shuffle += " " + de::toString(compIdx - 2);
12782 
12783 							specCase["case_ndx"]	= de::toString(caseCount);
12784 							specCase["shuffle"]		= shuffle;
12785 							specCase["tt_dst"]		= dstType.typeName;
12786 
12787 							caseBodies	+= testCaseBody.specialize(specCase);
12788 							caseList	+= de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12789 
12790 							caseCount++;
12791 						}
12792 					}
12793 				}
12794 
12795 				specs["num_data_points"]	= de::toString(numDataPoints);
12796 				specs["tt_dst"]				= dstType.typeName;
12797 				specs["tt_src0"]			= src0Type.typeName;
12798 				specs["tt_src1"]			= src1Type.typeName;
12799 				specs["case_bodies"]		= caseBodies;
12800 				specs["case_list"]			= caseList;
12801 				specs["case_count"]			= de::toString(caseCount);
12802 
12803 				fragments["capability"]		= "OpCapability Float16\n";
12804 				fragments["decoration"]		= decoration.specialize(specs);
12805 				fragments["pre_main"]		= preMain.specialize(specs);
12806 				fragments["testfun"]		= testFun.specialize(specs);
12807 				fragments["testfun"]		+= StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12808 				fragments["testfun"]		+= StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12809 				fragments["testfun"]		+= StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12810 
12811 				specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12812 				specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12813 				specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputUnused)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12814 				specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12815 
12816 				extensions.push_back("VK_KHR_shader_float16_int8");
12817 
12818 				features.extFloat16Int8.shaderFloat16 = true;
12819 
12820 				finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12821 			}
12822 		}
12823 	}
12824 
12825 	return testGroup.release();
12826 }
12827 
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12828 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12829 {
12830 	if (inputs.size() != 1 || outputAllocs.size() != 1)
12831 		return false;
12832 
12833 	vector<deUint8>	input1Bytes;
12834 
12835 	inputs[0].getBytes(input1Bytes);
12836 
12837 	DE_ASSERT(input1Bytes.size() > 0);
12838 	DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12839 
12840 	const size_t			iterations		= input1Bytes.size() / sizeof(deFloat16);
12841 	const deFloat16* const	input1AsFP16	= (const deFloat16*)&input1Bytes[0];
12842 	const deFloat16* const	outputAsFP16	= (const deFloat16*)outputAllocs[0]->getHostPtr();
12843 	const deFloat16			exceptionValue	= tcu::Float16(-1.0).bits();
12844 	std::string				error;
12845 
12846 	for (size_t idx = 0; idx < iterations; ++idx)
12847 	{
12848 		if (input1AsFP16[idx] == exceptionValue)
12849 			continue;
12850 
12851 		if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12852 		{
12853 			log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12854 
12855 			return false;
12856 		}
12857 	}
12858 
12859 	return true;
12860 }
12861 
12862 template<class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)12863 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12864 {
12865 	de::MovePtr<tcu::TestCaseGroup>		testGroup				(new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12866 	const deUint32						numElements				= 8;
12867 	const string						testName				= "struct";
12868 	const deUint32						structItemsCount		= 88;
12869 	const deUint32						exceptionIndices[]		= { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12870 	const deFloat16						exceptionValue			= tcu::Float16(-1.0).bits();
12871 	const deUint32						fieldModifier			= 2;
12872 	const deUint32						fieldModifiedMulIndex	= 60;
12873 	const deUint32						fieldModifiedAddIndex	= 66;
12874 
12875 	const StringTemplate preMain
12876 	(
12877 		"    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12878 		"          %f16 = OpTypeFloat 16\n"
12879 		"        %v2f16 = OpTypeVector %f16 2\n"
12880 		"        %v3f16 = OpTypeVector %f16 3\n"
12881 		"        %v4f16 = OpTypeVector %f16 4\n"
12882 		"    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12883 
12884 		"${consts}"
12885 
12886 		"     %c_f16_n1 = OpConstant %f16 -1.0\n"
12887 		"   %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12888 		"      %c_u32_5 = OpConstant %u32 5\n"
12889 		"      %c_u32_6 = OpConstant %u32 6\n"
12890 		"      %c_u32_7 = OpConstant %u32 7\n"
12891 		"      %c_u32_8 = OpConstant %u32 8\n"
12892 		"      %c_u32_9 = OpConstant %u32 9\n"
12893 		"     %c_u32_10 = OpConstant %u32 10\n"
12894 		"     %c_u32_11 = OpConstant %u32 11\n"
12895 		"     %c_u32_12 = OpConstant %u32 12\n"
12896 		"     %c_u32_13 = OpConstant %u32 13\n"
12897 		"     %c_u32_14 = OpConstant %u32 14\n"
12898 		"     %c_u32_15 = OpConstant %u32 15\n"
12899 		"     %c_u32_16 = OpConstant %u32 16\n"
12900 		"     %c_u32_17 = OpConstant %u32 17\n"
12901 		"     %c_u32_18 = OpConstant %u32 18\n"
12902 		"     %c_u32_19 = OpConstant %u32 19\n"
12903 		"     %c_u32_20 = OpConstant %u32 20\n"
12904 		"     %c_u32_21 = OpConstant %u32 21\n"
12905 		"     %c_u32_22 = OpConstant %u32 22\n"
12906 		"     %c_u32_23 = OpConstant %u32 23\n"
12907 		"     %c_u32_24 = OpConstant %u32 24\n"
12908 		"     %c_u32_25 = OpConstant %u32 25\n"
12909 		"     %c_u32_26 = OpConstant %u32 26\n"
12910 		"     %c_u32_27 = OpConstant %u32 27\n"
12911 		"     %c_u32_28 = OpConstant %u32 28\n"
12912 		"     %c_u32_29 = OpConstant %u32 29\n"
12913 		"     %c_u32_30 = OpConstant %u32 30\n"
12914 		"     %c_u32_31 = OpConstant %u32 31\n"
12915 		"     %c_u32_33 = OpConstant %u32 33\n"
12916 		"     %c_u32_34 = OpConstant %u32 34\n"
12917 		"     %c_u32_35 = OpConstant %u32 35\n"
12918 		"     %c_u32_36 = OpConstant %u32 36\n"
12919 		"     %c_u32_37 = OpConstant %u32 37\n"
12920 		"     %c_u32_38 = OpConstant %u32 38\n"
12921 		"     %c_u32_39 = OpConstant %u32 39\n"
12922 		"     %c_u32_40 = OpConstant %u32 40\n"
12923 		"     %c_u32_41 = OpConstant %u32 41\n"
12924 		"     %c_u32_44 = OpConstant %u32 44\n"
12925 
12926 		" %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12927 		" %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12928 		" %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12929 		" %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12930 		" %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12931 		" %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12932 		" %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12933 		" %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12934 
12935 		"       %up_u32 = OpTypePointer Uniform %u32\n"
12936 		"    %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12937 		"    %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12938 		"      %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12939 		"   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12940 
12941 		"     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12942 	);
12943 
12944 	const StringTemplate decoration
12945 	(
12946 		"OpDecorate %SSBO_st BufferBlock\n"
12947 		"OpDecorate %ra_u32_44 ArrayStride 4\n"
12948 		"OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12949 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12950 		"OpDecorate %ssbo_dst Binding 1\n"
12951 
12952 		"OpMemberDecorate %SSBO_st 0 Offset 0\n"
12953 
12954 		"OpDecorate %v2f16arr3 ArrayStride 4\n"
12955 		"OpMemberDecorate %struct16 0 Offset 0\n"
12956 		"OpMemberDecorate %struct16 1 Offset 4\n"
12957 		"OpDecorate %struct16arr3 ArrayStride 16\n"
12958 		"OpDecorate %f16arr3 ArrayStride 2\n"
12959 		"OpDecorate %v2f16arr5 ArrayStride 4\n"
12960 		"OpDecorate %v3f16arr5 ArrayStride 8\n"
12961 		"OpDecorate %v4f16arr3 ArrayStride 8\n"
12962 
12963 		"OpMemberDecorate %st_test 0 Offset 0\n"
12964 		"OpMemberDecorate %st_test 1 Offset 4\n"
12965 		"OpMemberDecorate %st_test 2 Offset 8\n"
12966 		"OpMemberDecorate %st_test 3 Offset 16\n"
12967 		"OpMemberDecorate %st_test 4 Offset 24\n"
12968 		"OpMemberDecorate %st_test 5 Offset 32\n"
12969 		"OpMemberDecorate %st_test 6 Offset 80\n"
12970 		"OpMemberDecorate %st_test 7 Offset 100\n"
12971 		"OpMemberDecorate %st_test 8 Offset 104\n"
12972 		"OpMemberDecorate %st_test 9 Offset 144\n"
12973 	);
12974 
12975 	const StringTemplate testFun
12976 	(
12977 		" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12978 		"     %param = OpFunctionParameter %v4f32\n"
12979 		"     %entry = OpLabel\n"
12980 
12981 		"         %i = OpVariable %fp_i32 Function\n"
12982 		"              OpStore %i %c_i32_0\n"
12983 
12984 		"  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12985 		"              OpSelectionMerge %end_if None\n"
12986 		"              OpBranchConditional %will_run %run_test %end_if\n"
12987 
12988 		"  %run_test = OpLabel\n"
12989 		"              OpBranch %loop\n"
12990 
12991 		"      %loop = OpLabel\n"
12992 		"     %i_cmp = OpLoad %i32 %i\n"
12993 		"        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12994 		"              OpLoopMerge %merge %next None\n"
12995 		"              OpBranchConditional %lt %write %merge\n"
12996 
12997 		"     %write = OpLabel\n"
12998 		"       %ndx = OpLoad %i32 %i\n"
12999 
13000 		"      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
13001 		"      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
13002 		"      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
13003 
13004 		"      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
13005 
13006 		"%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
13007 		"%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
13008 		"%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
13009 		"  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
13010 		"    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
13011 
13012 		"%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
13013 		"%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
13014 		"%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13015 		"  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13016 		"    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13017 
13018 		"%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13019 		"%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13020 		"%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13021 		"  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13022 		"    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13023 
13024 		"      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13025 
13026 		"    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13027 		"    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13028 		"    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13029 		"    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13030 		"    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13031 		"      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13032 
13033 		"      %fndx = OpConvertSToF %f16 %ndx\n"
13034 		"  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13035 		"  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13036 
13037 		"   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13038 		"   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13039 		"    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13040 		"    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13041 		"    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13042 		"    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13043 		"    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13044 		"      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13045 
13046 		"    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13047 		"    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13048 		"    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13049 		"      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13050 
13051 		"    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13052 
13053 		// Storage section: all elements that are not directly accessed should
13054 		// have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13055 		// is constructed with one element from a constant -1.0.
13056 		// half offset 0
13057 		"      %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13058 		"     %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13059 		"      %bc_0 = OpBitcast %u32 %vec_0\n"
13060 		"     %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13061 		"              OpStore %gep_0 %bc_0\n"
13062 
13063 		// <2 x half> offset 4
13064 		"      %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13065 		"      %bc_1 = OpBitcast %u32 %ex_1\n"
13066 		"     %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13067 		"              OpStore %gep_1 %bc_1\n"
13068 
13069 		// <3 x half> offset 8
13070 		"      %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13071 		"    %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13072 		"    %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13073 		"    %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13074 		"    %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13075 		"   %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13076 		"   %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13077 		"              OpStore %gep_2_0 %bc_2_0\n"
13078 		"              OpStore %gep_2_1 %bc_2_1\n"
13079 
13080 		// <4 x half> offset 16
13081 		"      %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13082 		"    %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13083 		"    %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13084 		"    %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13085 		"    %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13086 		"   %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13087 		"   %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13088 		"              OpStore %gep_3_0 %bc_3_0\n"
13089 		"              OpStore %gep_3_1 %bc_3_1\n"
13090 
13091 		// [3 x half] offset 24
13092 		"    %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13093 		"    %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13094 		"    %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13095 		"   %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13096 		"   %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13097 		"    %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13098 		"    %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13099 		"   %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13100 		"   %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13101 		"              OpStore %gep_4_0 %bc_4_0\n"
13102 		"              OpStore %gep_4_1 %bc_4_1\n"
13103 
13104 		// [3 x {half, [3 x <2 x half>]}] offset 32
13105 		"    %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13106 		"    %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13107 		"    %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13108 		"  %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13109 		"  %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13110 		"  %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13111 		"%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13112 		"%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13113 		"%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13114 		"%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13115 		"%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13116 		"%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13117 		"%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13118 		"%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13119 		"%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13120 		" %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13121 		" %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13122 		" %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13123 		"  %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13124 		"  %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13125 		"  %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13126 		"%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13127 		"%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13128 		"%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13129 		"%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13130 		"%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13131 		"%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13132 		"%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13133 		"%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13134 		"%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13135 		"  %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13136 		"%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13137 		"%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13138 		"%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13139 		"  %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13140 		"%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13141 		"%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13142 		"%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13143 		"  %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13144 		"%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13145 		"%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13146 		"%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13147 		"              OpStore %gep_5_0_0 %bc_5_0_0\n"
13148 		"              OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13149 		"              OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13150 		"              OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13151 		"              OpStore %gep_5_1_0 %bc_5_1_0\n"
13152 		"              OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13153 		"              OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13154 		"              OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13155 		"              OpStore %gep_5_2_0 %bc_5_2_0\n"
13156 		"              OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13157 		"              OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13158 		"              OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13159 
13160 		// [5 x <2 x half>] offset 80
13161 		"    %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13162 		"    %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13163 		"    %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13164 		"    %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13165 		"    %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13166 		"    %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13167 		"    %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13168 		"    %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13169 		"    %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13170 		"    %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13171 		"   %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13172 		"   %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13173 		"   %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13174 		"   %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13175 		"   %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13176 		"              OpStore %gep_6_0 %bc_6_0\n"
13177 		"              OpStore %gep_6_1 %bc_6_1\n"
13178 		"              OpStore %gep_6_2 %bc_6_2\n"
13179 		"              OpStore %gep_6_3 %bc_6_3\n"
13180 		"              OpStore %gep_6_4 %bc_6_4\n"
13181 
13182 		// half offset 100
13183 		"      %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13184 		"     %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13185 		"      %bc_7 = OpBitcast %u32 %vec_7\n"
13186 		"     %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13187 		"              OpStore %gep_7 %bc_7\n"
13188 
13189 		// [5 x <3 x half>] offset 104
13190 		"    %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13191 		"    %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13192 		"    %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13193 		"    %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13194 		"    %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13195 		" %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13196 		" %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13197 		" %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13198 		" %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13199 		" %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13200 		" %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13201 		" %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13202 		" %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13203 		" %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13204 		" %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13205 		"  %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13206 		"  %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13207 		"  %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13208 		"  %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13209 		"  %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13210 		"  %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13211 		"  %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13212 		"  %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13213 		"  %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13214 		"  %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13215 		" %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13216 		" %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13217 		" %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13218 		" %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13219 		" %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13220 		" %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13221 		" %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13222 		" %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13223 		" %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13224 		" %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13225 		"              OpStore %gep_8_0_0 %bc_8_0_0\n"
13226 		"              OpStore %gep_8_0_1 %bc_8_0_1\n"
13227 		"              OpStore %gep_8_1_0 %bc_8_1_0\n"
13228 		"              OpStore %gep_8_1_1 %bc_8_1_1\n"
13229 		"              OpStore %gep_8_2_0 %bc_8_2_0\n"
13230 		"              OpStore %gep_8_2_1 %bc_8_2_1\n"
13231 		"              OpStore %gep_8_3_0 %bc_8_3_0\n"
13232 		"              OpStore %gep_8_3_1 %bc_8_3_1\n"
13233 		"              OpStore %gep_8_4_0 %bc_8_4_0\n"
13234 		"              OpStore %gep_8_4_1 %bc_8_4_1\n"
13235 
13236 		// [3 x <4 x half>] offset 144
13237 		"    %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13238 		"    %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13239 		"    %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13240 		" %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13241 		" %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13242 		" %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13243 		" %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13244 		" %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13245 		" %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13246 		"  %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13247 		"  %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13248 		"  %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13249 		"  %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13250 		"  %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13251 		"  %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13252 		" %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13253 		" %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13254 		" %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13255 		" %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13256 		" %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13257 		" %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13258 		"              OpStore %gep_9_0_0 %bc_9_0_0\n"
13259 		"              OpStore %gep_9_0_1 %bc_9_0_1\n"
13260 		"              OpStore %gep_9_1_0 %bc_9_1_0\n"
13261 		"              OpStore %gep_9_1_1 %bc_9_1_1\n"
13262 		"              OpStore %gep_9_2_0 %bc_9_2_0\n"
13263 		"              OpStore %gep_9_2_1 %bc_9_2_1\n"
13264 
13265 		"              OpBranch %next\n"
13266 
13267 		"      %next = OpLabel\n"
13268 		"     %i_cur = OpLoad %i32 %i\n"
13269 		"     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13270 		"              OpStore %i %i_new\n"
13271 		"              OpBranch %loop\n"
13272 
13273 		"     %merge = OpLabel\n"
13274 		"              OpBranch %end_if\n"
13275 		"    %end_if = OpLabel\n"
13276 		"              OpReturnValue %param\n"
13277 		"              OpFunctionEnd\n"
13278 	);
13279 
13280 	{
13281 		SpecResource		specResource;
13282 		map<string, string>	specs;
13283 		VulkanFeatures		features;
13284 		map<string, string>	fragments;
13285 		vector<string>		extensions;
13286 		vector<deFloat16>	expectedOutput;
13287 		string				consts;
13288 
13289 		for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13290 		{
13291 			vector<deFloat16>	expectedIterationOutput;
13292 
13293 			for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13294 				expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13295 
13296 			for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13297 				expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13298 
13299 			expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13300 			expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13301 
13302 			expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13303 		}
13304 
13305 		for (deUint32 i = 0; i < structItemsCount; ++i)
13306 			consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 "  + de::toString(i) + "\n";
13307 
13308 		specs["num_elements"]		= de::toString(numElements);
13309 		specs["struct_item_size"]	= de::toString(structItemsCount * sizeof(deFloat16));
13310 		specs["field_modifier"]		= de::toString(fieldModifier);
13311 		specs["consts"]				= consts;
13312 
13313 		fragments["capability"]		= "OpCapability Float16\n";
13314 		fragments["decoration"]		= decoration.specialize(specs);
13315 		fragments["pre_main"]		= preMain.specialize(specs);
13316 		fragments["testfun"]		= testFun.specialize(specs);
13317 
13318 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13319 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13320 		specResource.verifyIO = compareFP16CompositeFunc;
13321 
13322 		extensions.push_back("VK_KHR_shader_float16_int8");
13323 
13324 		features.extFloat16Int8.shaderFloat16 = true;
13325 
13326 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13327 	}
13328 
13329 	return testGroup.release();
13330 }
13331 
13332 template<class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13333 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13334 {
13335 	de::MovePtr<tcu::TestCaseGroup>		testGroup		(new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13336 	const deFloat16						exceptionValue	= tcu::Float16(-1.0).bits();
13337 	const string						opName			(op);
13338 	const deUint32						opIndex			= (opName == "OpCompositeInsert") ? 0
13339 														: (opName == "OpCompositeExtract") ? 1
13340 														: std::numeric_limits<deUint32>::max();
13341 
13342 	const StringTemplate preMain
13343 	(
13344 		"   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13345 		"  %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13346 		"  %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13347 		"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13348 		" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13349 		"         %f16 = OpTypeFloat 16\n"
13350 		"       %v2f16 = OpTypeVector %f16 2\n"
13351 		"       %v3f16 = OpTypeVector %f16 3\n"
13352 		"       %v4f16 = OpTypeVector %f16 4\n"
13353 		"    %c_f16_na = OpConstant %f16 -1.0\n"
13354 		"  %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13355 		"     %c_u32_5 = OpConstant %u32 5\n"
13356 		"     %c_i32_5 = OpConstant %i32 5\n"
13357 		"     %c_i32_6 = OpConstant %i32 6\n"
13358 		"     %c_i32_7 = OpConstant %i32 7\n"
13359 		"     %c_i32_8 = OpConstant %i32 8\n"
13360 		"     %c_i32_9 = OpConstant %i32 9\n"
13361 		"    %c_i32_10 = OpConstant %i32 10\n"
13362 		"    %c_i32_11 = OpConstant %i32 11\n"
13363 
13364 		"%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
13365 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
13366 		"%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
13367 		"%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
13368 		"%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
13369 		"%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
13370 		"%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13371 		"%st_test      = OpTypeStruct %${field_type}\n"
13372 
13373 		"      %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13374 		"       %ra_st = OpTypeArray %u32 %c_i32_size\n"
13375 		"      %up_u32 = OpTypePointer Uniform %u32\n"
13376 		"     %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13377 		"%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13378 		"         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13379 		"    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13380 		"       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13381 		"  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13382 
13383 		"${op_premain_decls}"
13384 
13385 		" %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13386 		" %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13387 
13388 		"    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13389 		"    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13390 	);
13391 
13392 	const StringTemplate decoration
13393 	(
13394 		"OpDecorate %SSBO_src BufferBlock\n"
13395 		"OpDecorate %SSBO_dst BufferBlock\n"
13396 		"OpDecorate %ra_f16 ArrayStride 4\n"
13397 		"OpDecorate %ra_st ArrayStride 4\n"
13398 		"OpDecorate %ssbo_src DescriptorSet 0\n"
13399 		"OpDecorate %ssbo_src Binding 0\n"
13400 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
13401 		"OpDecorate %ssbo_dst Binding 1\n"
13402 
13403 		"OpMemberDecorate %SSBO_src 0 Offset 0\n"
13404 		"OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13405 
13406 		"OpDecorate %v2f16arr3 ArrayStride 4\n"
13407 		"OpMemberDecorate %struct16 0 Offset 0\n"
13408 		"OpMemberDecorate %struct16 1 Offset 4\n"
13409 		"OpDecorate %struct16arr3 ArrayStride 16\n"
13410 		"OpDecorate %f16arr3 ArrayStride 2\n"
13411 		"OpDecorate %v2f16arr5 ArrayStride 4\n"
13412 		"OpDecorate %v3f16arr5 ArrayStride 8\n"
13413 		"OpDecorate %v4f16arr3 ArrayStride 8\n"
13414 
13415 		"OpMemberDecorate %st_test 0 Offset 0\n"
13416 	);
13417 
13418 	const StringTemplate testFun
13419 	(
13420 		" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13421 		"     %param = OpFunctionParameter %v4f32\n"
13422 		"     %entry = OpLabel\n"
13423 
13424 		"         %i = OpVariable %fp_i32 Function\n"
13425 		"              OpStore %i %c_i32_0\n"
13426 
13427 		"  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13428 		"              OpSelectionMerge %end_if None\n"
13429 		"              OpBranchConditional %will_run %run_test %end_if\n"
13430 
13431 		"  %run_test = OpLabel\n"
13432 		"              OpBranch %loop\n"
13433 
13434 		"      %loop = OpLabel\n"
13435 		"     %i_cmp = OpLoad %i32 %i\n"
13436 		"        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13437 		"              OpLoopMerge %merge %next None\n"
13438 		"              OpBranchConditional %lt %write %merge\n"
13439 
13440 		"     %write = OpLabel\n"
13441 		"       %ndx = OpLoad %i32 %i\n"
13442 
13443 		"${op_sw_fun_call}"
13444 
13445 		"    %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13446 		"              OpBranch %next\n"
13447 
13448 		"      %next = OpLabel\n"
13449 		"     %i_cur = OpLoad %i32 %i\n"
13450 		"     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13451 		"              OpStore %i %i_new\n"
13452 		"              OpBranch %loop\n"
13453 
13454 		"     %merge = OpLabel\n"
13455 		"              OpBranch %end_if\n"
13456 		"    %end_if = OpLabel\n"
13457 		"              OpReturnValue %param\n"
13458 		"              OpFunctionEnd\n"
13459 
13460 		"${op_sw_fun_header}"
13461 		" %sw_param = OpFunctionParameter %st_test\n"
13462 		"%sw_paramn = OpFunctionParameter %i32\n"
13463 		" %sw_entry = OpLabel\n"
13464 		"             OpSelectionMerge %switch_e None\n"
13465 		"             OpSwitch %sw_paramn %default ${case_list}\n"
13466 
13467 		"${case_bodies}"
13468 
13469 		"%default   = OpLabel\n"
13470 		"             OpReturnValue ${op_case_default_value}\n"
13471 		"%switch_e  = OpLabel\n"
13472 		"             OpUnreachable\n" // Unreachable merge block for switch statement
13473 		"             OpFunctionEnd\n"
13474 	);
13475 
13476 	const StringTemplate testCaseBody
13477 	(
13478 		"%case_${case_ndx}    = OpLabel\n"
13479 		"%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13480 		"             OpReturnValue %val_ret_${case_ndx}\n"
13481 	);
13482 
13483 	const string loadF16
13484 	(
13485 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13486 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13487 		"  %ld_${var}_entry = OpLabel\n"
13488 		"   %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13489 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13490 		"                     OpReturnValue %ld_${var}_st_test\n"
13491 		"                     OpFunctionEnd\n" +
13492 		loadScalarF16FromUint
13493 	);
13494 
13495 	const string loadV2F16
13496 	(
13497 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13498 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13499 		"  %ld_${var}_entry = OpLabel\n"
13500 		"   %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13501 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13502 		"                     OpReturnValue %ld_${var}_st_test\n"
13503 		"                     OpFunctionEnd\n" +
13504 		loadV2F16FromUint
13505 	);
13506 
13507 	const string loadV3F16
13508 	(
13509 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13510 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13511 		"  %ld_${var}_entry = OpLabel\n"
13512 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13513 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13514 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13515 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13516 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13517 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13518 		"    %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13519 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13520 		"                     OpReturnValue %ld_${var}_st_test\n"
13521 		"                     OpFunctionEnd\n"
13522 	);
13523 
13524 	const string loadV4F16
13525 	(
13526 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13527 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13528 		"  %ld_${var}_entry = OpLabel\n"
13529 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13530 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13531 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13532 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13533 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13534 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13535 		"    %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13536 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13537 		"                     OpReturnValue %ld_${var}_st_test\n"
13538 		"                     OpFunctionEnd\n"
13539 	);
13540 
13541 	const string loadF16Arr3
13542 	(
13543 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13544 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13545 		"  %ld_${var}_entry = OpLabel\n"
13546 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13547 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13548 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13549 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13550 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13551 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13552 		"   %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13553 		"   %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13554 		"   %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13555 		"   %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13556 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13557 		"                     OpReturnValue %ld_${var}_st_test\n"
13558 		"                     OpFunctionEnd\n"
13559 	);
13560 
13561 	const string loadV2F16Arr5
13562 	(
13563 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13564 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13565 		"  %ld_${var}_label = OpLabel\n"
13566 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13567 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13568 		"  %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13569 		"  %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13570 		"  %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13571 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13572 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13573 		"   %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13574 		"   %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13575 		"   %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13576 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13577 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13578 		"   %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13579 		"   %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13580 		"   %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13581 		"   %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13582 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13583 		"                     OpReturnValue %ld_${var}_st_test\n"
13584 		"                     OpFunctionEnd\n"
13585 	);
13586 
13587 	const string loadV3F16Arr5
13588 	(
13589 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13590 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13591 		"  %ld_${var}_entry = OpLabel\n"
13592 		"%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13593 		"%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13594 		"%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13595 		"%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13596 		"%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13597 		"%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13598 		"%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13599 		"%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13600 		"%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13601 		"%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13602 		" %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13603 		" %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13604 		" %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13605 		" %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13606 		" %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13607 		" %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13608 		" %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13609 		" %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13610 		" %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13611 		" %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13612 		" %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13613 		" %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13614 		" %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13615 		" %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13616 		" %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13617 		" %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13618 		" %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13619 		" %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13620 		" %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13621 		" %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13622 		"  %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13623 		"  %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13624 		"  %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13625 		"  %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13626 		"  %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13627 		"   %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13628 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13629 		"                     OpReturnValue %ld_${var}_st_test\n"
13630 		"                     OpFunctionEnd\n"
13631 	);
13632 
13633 	const string loadV4F16Arr3
13634 	(
13635 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13636 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13637 		"  %ld_${var}_entry = OpLabel\n"
13638 		"%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13639 		"%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13640 		"%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13641 		"%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13642 		"%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13643 		"%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13644 		" %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13645 		" %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13646 		" %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13647 		" %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13648 		" %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13649 		" %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13650 		" %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13651 		" %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13652 		" %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13653 		" %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13654 		" %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13655 		" %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13656 		"  %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13657 		"  %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13658 		"  %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13659 		"   %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13660 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13661 		"                     OpReturnValue %ld_${var}_st_test\n"
13662 		"                     OpFunctionEnd\n"
13663 	);
13664 
13665 	const string loadStruct16Arr3
13666 	(
13667 		"          %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13668 		"    %ld_${var}_param = OpFunctionParameter %i32\n"
13669 		"    %ld_${var}_entry = OpLabel\n"
13670 		"%ld_${var}_gep_0_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13671 		"%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13672 		"%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13673 		"%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13674 		"%ld_${var}_gep_1_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13675 		"%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13676 		"%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13677 		"%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13678 		"%ld_${var}_gep_2_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13679 		"%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13680 		"%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13681 		"%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13682 		" %ld_${var}_ld_0_0   = OpLoad %u32 %ld_${var}_gep_0_0\n"
13683 		" %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13684 		" %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13685 		" %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13686 		" %ld_${var}_ld_1_0   = OpLoad %u32 %ld_${var}_gep_1_0\n"
13687 		" %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13688 		" %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13689 		" %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13690 		" %ld_${var}_ld_2_0   = OpLoad %u32 %ld_${var}_gep_2_0\n"
13691 		" %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13692 		" %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13693 		" %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13694 		" %ld_${var}_bc_0_0   = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13695 		" %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13696 		" %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13697 		" %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13698 		" %ld_${var}_bc_1_0   = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13699 		" %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13700 		" %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13701 		" %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13702 		" %ld_${var}_bc_2_0   = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13703 		" %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13704 		" %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13705 		" %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13706 		"    %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13707 		"    %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13708 		"    %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13709 		"     %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13710 		"     %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13711 		"     %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13712 		"     %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13713 		"     %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13714 		"     %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13715 		"     %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13716 		"  %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13717 		"                       OpReturnValue %ld_${var}_st_test\n"
13718 		"                      OpFunctionEnd\n"
13719 	);
13720 
13721 	const string storeF16
13722 	(
13723 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13724 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13725 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13726 		" %st_${var}_entry = OpLabel\n"
13727 		"    %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13728 		"  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13729 		"                    OpReturn\n"
13730 		"                    OpFunctionEnd\n" +
13731 		storeScalarF16AsUint
13732 	);
13733 
13734 	const string storeV2F16
13735 	(
13736 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13737 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13738 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13739 		" %st_${var}_entry = OpLabel\n"
13740 		"    %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13741 		"  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13742 		"                    OpReturn\n"
13743 		"                    OpFunctionEnd\n" +
13744 		storeV2F16AsUint
13745 	);
13746 
13747 	const string storeV3F16
13748 	(
13749 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13750 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13751 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13752 		" %st_${var}_entry = OpLabel\n"
13753 		"    %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13754 		" %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13755 		" %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13756 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13757 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13758 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13759 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13760 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13761 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13762 		"                    OpReturn\n"
13763 		"                    OpFunctionEnd\n"
13764 	);
13765 
13766 	const string storeV4F16
13767 	(
13768 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13769 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13770 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13771 		" %st_${var}_entry = OpLabel\n"
13772 		"    %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13773 		" %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13774 		" %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13775 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13776 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13777 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13778 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13779 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13780 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13781 		"                    OpReturn\n"
13782 		"                    OpFunctionEnd\n"
13783 	);
13784 
13785 	const string storeF16Arr3
13786 	(
13787 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13788 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13789 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13790 		" %st_${var}_entry = OpLabel\n"
13791 		"  %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13792 		"  %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13793 		"  %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13794 		" %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13795 		" %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13796 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13797 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13798 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13799 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13800 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13801 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13802 		"                    OpReturn\n"
13803 		"                    OpFunctionEnd\n"
13804 	);
13805 
13806 	const string storeV2F16Arr5
13807 	(
13808 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13809 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13810 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13811 		" %st_${var}_entry = OpLabel\n"
13812 		"  %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13813 		"  %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13814 		"  %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13815 		"  %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13816 		"  %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13817 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13818 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13819 		"  %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13820 		"  %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13821 		"  %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13822 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13823 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13824 		" %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13825 		" %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13826 		" %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13827 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13828 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13829 		"                    OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13830 		"                    OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13831 		"                    OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13832 		"                    OpReturn\n"
13833 		"                    OpFunctionEnd\n"
13834 	);
13835 
13836 	const string storeV3F16Arr5
13837 	(
13838 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13839 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13840 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13841 		" %st_${var}_entry = OpLabel\n"
13842 		"  %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13843 		"  %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13844 		"  %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13845 		"  %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13846 		"  %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13847 		"%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13848 		"%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13849 		"%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13850 		"%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13851 		"%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13852 		"%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13853 		"%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13854 		"%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13855 		"%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13856 		"%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13857 		"%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13858 		"%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13859 		"%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13860 		"%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13861 		"%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13862 		"%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13863 		"%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13864 		"%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13865 		"%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13866 		"%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13867 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13868 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13869 		" %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13870 		" %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13871 		" %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13872 		" %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13873 		" %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13874 		" %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13875 		" %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13876 		" %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13877 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13878 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13879 		"                    OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13880 		"                    OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13881 		"                    OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13882 		"                    OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13883 		"                    OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13884 		"                    OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13885 		"                    OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13886 		"                    OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13887 		"                    OpReturn\n"
13888 		"                    OpFunctionEnd\n"
13889 	);
13890 
13891 	const string storeV4F16Arr3
13892 	(
13893 		"        %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13894 		" %st_${var}_param1 = OpFunctionParameter %st_test\n"
13895 		" %st_${var}_param2 = OpFunctionParameter %i32\n"
13896 		"  %st_${var}_entry = OpLabel\n"
13897 		"   %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13898 		"   %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13899 		"   %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13900 		"%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13901 		"%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13902 		"%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13903 		"%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13904 		"%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13905 		"%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13906 		" %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13907 		" %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13908 		" %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13909 		" %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13910 		" %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13911 		" %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13912 		"%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13913 		"%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13914 		"%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13915 		"%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13916 		"%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13917 		"%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13918 		"                     OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13919 		"                     OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13920 		"                     OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13921 		"                     OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13922 		"                     OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13923 		"                     OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13924 		"                     OpReturn\n"
13925 		"                     OpFunctionEnd\n"
13926 	);
13927 
13928 	const string storeStruct16Arr3
13929 	(
13930 		"          %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13931 		"   %st_${var}_param1 = OpFunctionParameter %st_test\n"
13932 		"   %st_${var}_param2 = OpFunctionParameter %i32\n"
13933 		"    %st_${var}_entry = OpLabel\n"
13934 		"     %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13935 		"     %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13936 		"     %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13937 		"   %st_${var}_el_0   = OpCompositeExtract   %f16 %st_${var}_st_0 0\n"
13938 		"   %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13939 		"   %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13940 		"   %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13941 		"   %st_${var}_el_1   = OpCompositeExtract   %f16 %st_${var}_st_1 0\n"
13942 		"   %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13943 		"   %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13944 		"   %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13945 		"   %st_${var}_el_2   = OpCompositeExtract   %f16 %st_${var}_st_2 0\n"
13946 		"   %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13947 		"   %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13948 		"   %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13949 		"     %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13950 		"     %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13951 		"     %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13952 		"   %st_${var}_bc_0   = OpBitcast %u32 %st_${var}_v2_0\n"
13953 		"   %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13954 		"   %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13955 		"   %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13956 		"   %st_${var}_bc_1   = OpBitcast %u32 %st_${var}_v2_1\n"
13957 		"   %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13958 		"   %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13959 		"   %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13960 		"   %st_${var}_bc_2   = OpBitcast %u32 %st_${var}_v2_2\n"
13961 		"   %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13962 		"   %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13963 		"   %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13964 		"%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13965 		"%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13966 		"%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13967 		"%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13968 		"%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13969 		"%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13970 		"%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13971 		"%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13972 		"%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13973 		"%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13974 		"%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13975 		"%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13976 		"                       OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13977 		"                       OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13978 		"                       OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13979 		"                       OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13980 		"                       OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13981 		"                       OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13982 		"                       OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13983 		"                       OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13984 		"                       OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13985 		"                       OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13986 		"                       OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13987 		"                       OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13988 		"                       OpReturn\n"
13989 		"                       OpFunctionEnd\n"
13990 	);
13991 
13992 	struct OpParts
13993 	{
13994 		const char*	premainDecls;
13995 		const char*	swFunCall;
13996 		const char*	swFunHeader;
13997 		const char*	caseDefaultValue;
13998 		const char*	argsPartial;
13999 	};
14000 
14001 	OpParts								opPartsArray[]			=
14002 	{
14003 		// OpCompositeInsert
14004 		{
14005 			"       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
14006 			"    %SSBO_src = OpTypeStruct %ra_f16\n"
14007 			"    %SSBO_dst = OpTypeStruct %ra_st\n",
14008 
14009 			"   %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
14010 			"   %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
14011 			"   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
14012 
14013 			"   %sw_fun = OpFunction %st_test None %fun_t\n"
14014 			"%sw_paramv = OpFunctionParameter %f16\n",
14015 
14016 			"%sw_param",
14017 
14018 			"%st_test %sw_paramv %sw_param",
14019 		},
14020 		// OpCompositeExtract
14021 		{
14022 			"       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14023 			"    %SSBO_src = OpTypeStruct %ra_st\n"
14024 			"    %SSBO_dst = OpTypeStruct %ra_f16\n",
14025 
14026 			"   %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14027 			"   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14028 
14029 			"   %sw_fun = OpFunction %f16 None %fun_t\n",
14030 
14031 			"%c_f16_na",
14032 
14033 			"%f16 %sw_param",
14034 		},
14035 	};
14036 
14037 	DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14038 
14039 	const char*	accessPathF16[] =
14040 	{
14041 		"0",			// %f16
14042 		DE_NULL,
14043 	};
14044 	const char*	accessPathV2F16[] =
14045 	{
14046 		"0 0",			// %v2f16
14047 		"0 1",
14048 	};
14049 	const char*	accessPathV3F16[] =
14050 	{
14051 		"0 0",			// %v3f16
14052 		"0 1",
14053 		"0 2",
14054 		DE_NULL,
14055 	};
14056 	const char*	accessPathV4F16[] =
14057 	{
14058 		"0 0",			// %v4f16"
14059 		"0 1",
14060 		"0 2",
14061 		"0 3",
14062 	};
14063 	const char*	accessPathF16Arr3[] =
14064 	{
14065 		"0 0",			// %f16arr3
14066 		"0 1",
14067 		"0 2",
14068 		DE_NULL,
14069 	};
14070 	const char*	accessPathStruct16Arr3[] =
14071 	{
14072 		"0 0 0",		// %struct16arr3
14073 		DE_NULL,
14074 		"0 0 1 0 0",
14075 		"0 0 1 0 1",
14076 		"0 0 1 1 0",
14077 		"0 0 1 1 1",
14078 		"0 0 1 2 0",
14079 		"0 0 1 2 1",
14080 		"0 1 0",
14081 		DE_NULL,
14082 		"0 1 1 0 0",
14083 		"0 1 1 0 1",
14084 		"0 1 1 1 0",
14085 		"0 1 1 1 1",
14086 		"0 1 1 2 0",
14087 		"0 1 1 2 1",
14088 		"0 2 0",
14089 		DE_NULL,
14090 		"0 2 1 0 0",
14091 		"0 2 1 0 1",
14092 		"0 2 1 1 0",
14093 		"0 2 1 1 1",
14094 		"0 2 1 2 0",
14095 		"0 2 1 2 1",
14096 	};
14097 	const char*	accessPathV2F16Arr5[] =
14098 	{
14099 		"0 0 0",		// %v2f16arr5
14100 		"0 0 1",
14101 		"0 1 0",
14102 		"0 1 1",
14103 		"0 2 0",
14104 		"0 2 1",
14105 		"0 3 0",
14106 		"0 3 1",
14107 		"0 4 0",
14108 		"0 4 1",
14109 	};
14110 	const char*	accessPathV3F16Arr5[] =
14111 	{
14112 		"0 0 0",		// %v3f16arr5
14113 		"0 0 1",
14114 		"0 0 2",
14115 		DE_NULL,
14116 		"0 1 0",
14117 		"0 1 1",
14118 		"0 1 2",
14119 		DE_NULL,
14120 		"0 2 0",
14121 		"0 2 1",
14122 		"0 2 2",
14123 		DE_NULL,
14124 		"0 3 0",
14125 		"0 3 1",
14126 		"0 3 2",
14127 		DE_NULL,
14128 		"0 4 0",
14129 		"0 4 1",
14130 		"0 4 2",
14131 		DE_NULL,
14132 	};
14133 	const char*	accessPathV4F16Arr3[] =
14134 	{
14135 		"0 0 0",		// %v4f16arr3
14136 		"0 0 1",
14137 		"0 0 2",
14138 		"0 0 3",
14139 		"0 1 0",
14140 		"0 1 1",
14141 		"0 1 2",
14142 		"0 1 3",
14143 		"0 2 0",
14144 		"0 2 1",
14145 		"0 2 2",
14146 		"0 2 3",
14147 		DE_NULL,
14148 		DE_NULL,
14149 		DE_NULL,
14150 		DE_NULL,
14151 	};
14152 
14153 	struct TypeTestParameters
14154 	{
14155 		const char*		name;
14156 		size_t			accessPathLength;
14157 		const char**	accessPath;
14158 		const string	loadFunction;
14159 		const string	storeFunction;
14160 	};
14161 
14162 	const TypeTestParameters typeTestParameters[] =
14163 	{
14164 		{	"f16",			DE_LENGTH_OF_ARRAY(accessPathF16),			accessPathF16,			loadF16,			storeF16		 },
14165 		{	"v2f16",		DE_LENGTH_OF_ARRAY(accessPathV2F16),		accessPathV2F16,		loadV2F16,			storeV2F16		 },
14166 		{	"v3f16",		DE_LENGTH_OF_ARRAY(accessPathV3F16),		accessPathV3F16,		loadV3F16,			storeV3F16		 },
14167 		{	"v4f16",		DE_LENGTH_OF_ARRAY(accessPathV4F16),		accessPathV4F16,		loadV4F16,			storeV4F16		  },
14168 		{	"f16arr3",		DE_LENGTH_OF_ARRAY(accessPathF16Arr3),		accessPathF16Arr3,		loadF16Arr3,		storeF16Arr3	  },
14169 		{	"v2f16arr5",	DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5),	accessPathV2F16Arr5,	loadV2F16Arr5,		storeV2F16Arr5	  },
14170 		{	"v3f16arr5",	DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5),	accessPathV3F16Arr5,	loadV3F16Arr5,		storeV3F16Arr5	  },
14171 		{	"v4f16arr3",	DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3),	accessPathV4F16Arr3,	loadV4F16Arr3,		storeV4F16Arr3	  },
14172 		{	"struct16arr3",	DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3),	accessPathStruct16Arr3,	loadStruct16Arr3,	storeStruct16Arr3},
14173 	};
14174 
14175 	for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14176 	{
14177 		const OpParts		opParts				= opPartsArray[opIndex];
14178 		const string		testName			= typeTestParameters[typeTestNdx].name;
14179 		const size_t		structItemsCount	= typeTestParameters[typeTestNdx].accessPathLength;
14180 		const char**		accessPath			= typeTestParameters[typeTestNdx].accessPath;
14181 		SpecResource		specResource;
14182 		map<string, string>	specs;
14183 		VulkanFeatures		features;
14184 		map<string, string>	fragments;
14185 		vector<string>		extensions;
14186 		vector<deFloat16>	inputFP16;
14187 		vector<deFloat16>	unusedFP16Output;
14188 
14189 		// Generate values for input
14190 		inputFP16.reserve(structItemsCount);
14191 		for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14192 			inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14193 
14194 		unusedFP16Output.resize(structItemsCount);
14195 
14196 		// Generate cases for OpSwitch
14197 		{
14198 			string	caseBodies;
14199 			string	caseList;
14200 
14201 			for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14202 				if (accessPath[caseNdx] != DE_NULL)
14203 				{
14204 					map<string, string>	specCase;
14205 
14206 					specCase["case_ndx"]		= de::toString(caseNdx);
14207 					specCase["access_path"]		= accessPath[caseNdx];
14208 					specCase["op_args_part"]	= opParts.argsPartial;
14209 					specCase["op_name"]			= opName;
14210 
14211 					caseBodies	+= testCaseBody.specialize(specCase);
14212 					caseList	+= de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14213 				}
14214 
14215 			specs["case_bodies"]	= caseBodies;
14216 			specs["case_list"]		= caseList;
14217 		}
14218 
14219 		specs["num_elements"]			= de::toString(structItemsCount);
14220 		specs["field_type"]				= typeTestParameters[typeTestNdx].name;
14221 		specs["struct_item_size"]		= de::toString(structItemsCount * sizeof(deFloat16));
14222 		specs["struct_u32s"]			= de::toString(structItemsCount / 2);
14223 		specs["op_premain_decls"]		= opParts.premainDecls;
14224 		specs["op_sw_fun_call"]			= opParts.swFunCall;
14225 		specs["op_sw_fun_header"]		= opParts.swFunHeader;
14226 		specs["op_case_default_value"]	= opParts.caseDefaultValue;
14227 		if (opIndex == 0) {
14228 			specs["st_call"]			= "st_ssbo_dst";
14229 			specs["st_ndx"]				= "c_i32_0";
14230 		} else {
14231 			specs["st_call"]			= "st_fn_ssbo_dst";
14232 			specs["st_ndx"]				= "ndx";
14233 		}
14234 
14235 		fragments["capability"]		= "OpCapability Float16\n";
14236 		fragments["decoration"]		= decoration.specialize(specs);
14237 		fragments["pre_main"]		= preMain.specialize(specs);
14238 		fragments["testfun"]		= testFun.specialize(specs);
14239 		if (opIndex == 0) {
14240 			fragments["testfun"]		+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14241 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14242 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14243 		} else {
14244 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14245 			fragments["testfun"]		+= StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14246 		}
14247 
14248 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14249 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(unusedFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14250 		specResource.verifyIO = compareFP16CompositeFunc;
14251 
14252 		extensions.push_back("VK_KHR_shader_float16_int8");
14253 
14254 		features.extFloat16Int8.shaderFloat16 = true;
14255 
14256 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14257 	}
14258 
14259 	return testGroup.release();
14260 }
14261 
14262 struct fp16PerComponent
14263 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14264 	fp16PerComponent()
14265 		: flavor(0)
14266 		, floatFormat16	(-14, 15, 10, true)
14267 		, outCompCount(0)
14268 		, argCompCount(3, 0)
14269 	{
14270 	}
14271 
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14272 	bool			callOncePerComponent	()									{ return true; }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14273 	deUint32		getComponentValidity	()									{ return static_cast<deUint32>(-1); }
14274 
getULPsvkt::SpirVAssembly::fp16PerComponent14275 	virtual double	getULPs					(vector<const deFloat16*>&)			{ return 1.0; }
getMinvkt::SpirVAssembly::fp16PerComponent14276 	virtual double	getMin					(double value, double ulps)			{ return value - floatFormat16.ulp(deAbs(value), ulps); }
getMaxvkt::SpirVAssembly::fp16PerComponent14277 	virtual double	getMax					(double value, double ulps)			{ return value + floatFormat16.ulp(deAbs(value), ulps); }
14278 
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14279 	virtual size_t	getFlavorCount			()									{ return flavorNames.empty() ? 1 : flavorNames.size(); }
setFlavorvkt::SpirVAssembly::fp16PerComponent14280 	virtual void	setFlavor				(size_t flavorNo)					{ DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
getFlavorvkt::SpirVAssembly::fp16PerComponent14281 	virtual size_t	getFlavor				()									{ return flavor; }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14282 	virtual string	getCurrentFlavorName	()									{ return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14283 
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14284 	virtual void	setOutCompCount			(size_t compCount)					{ outCompCount = compCount; }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14285 	virtual size_t	getOutCompCount			()									{ return outCompCount; }
14286 
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14287 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)	{ argCompCount[argNo] = compCount; }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14288 	virtual size_t	getArgCompCount			(size_t argNo)						{ return argCompCount[argNo]; }
14289 
14290 protected:
14291 	size_t				flavor;
14292 	tcu::FloatFormat	floatFormat16;
14293 	size_t				outCompCount;
14294 	vector<size_t>		argCompCount;
14295 	vector<string>		flavorNames;
14296 };
14297 
14298 struct fp16OpFNegate : public fp16PerComponent
14299 {
14300 	template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14301 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14302 	{
14303 		const fp16type	x		(*in[0]);
14304 		const double	d		(x.asDouble());
14305 		const double	result	(0.0 - d);
14306 
14307 		out[0] = fp16type(result).bits();
14308 		min[0] = getMin(result, getULPs(in));
14309 		max[0] = getMax(result, getULPs(in));
14310 
14311 		return true;
14312 	}
14313 };
14314 
14315 struct fp16Round : public fp16PerComponent
14316 {
fp16Roundvkt::SpirVAssembly::fp16Round14317 	fp16Round() : fp16PerComponent()
14318 	{
14319 		flavorNames.push_back("Floor(x+0.5)");
14320 		flavorNames.push_back("Floor(x-0.5)");
14321 		flavorNames.push_back("RoundEven");
14322 	}
14323 
14324 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Round14325 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14326 	{
14327 		const fp16type	x		(*in[0]);
14328 		const double	d		(x.asDouble());
14329 		double			result	(0.0);
14330 
14331 		switch (flavor)
14332 		{
14333 			case 0:		result = deRound(d);		break;
14334 			case 1:		result = deFloor(d - 0.5);	break;
14335 			case 2:		result = deRoundEven(d);	break;
14336 			default:	TCU_THROW(InternalError, "Invalid flavor specified");
14337 		}
14338 
14339 		out[0] = fp16type(result).bits();
14340 		min[0] = getMin(result, getULPs(in));
14341 		max[0] = getMax(result, getULPs(in));
14342 
14343 		return true;
14344 	}
14345 };
14346 
14347 struct fp16RoundEven : public fp16PerComponent
14348 {
14349 	template<class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14350 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14351 	{
14352 		const fp16type	x		(*in[0]);
14353 		const double	d		(x.asDouble());
14354 		const double	result	(deRoundEven(d));
14355 
14356 		out[0] = fp16type(result).bits();
14357 		min[0] = getMin(result, getULPs(in));
14358 		max[0] = getMax(result, getULPs(in));
14359 
14360 		return true;
14361 	}
14362 };
14363 
14364 struct fp16Trunc : public fp16PerComponent
14365 {
14366 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14367 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14368 	{
14369 		const fp16type	x		(*in[0]);
14370 		const double	d		(x.asDouble());
14371 		const double	result	(deTrunc(d));
14372 
14373 		out[0] = fp16type(result).bits();
14374 		min[0] = getMin(result, getULPs(in));
14375 		max[0] = getMax(result, getULPs(in));
14376 
14377 		return true;
14378 	}
14379 };
14380 
14381 struct fp16FAbs : public fp16PerComponent
14382 {
14383 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14384 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14385 	{
14386 		const fp16type	x		(*in[0]);
14387 		const double	d		(x.asDouble());
14388 		const double	result	(deAbs(d));
14389 
14390 		out[0] = fp16type(result).bits();
14391 		min[0] = getMin(result, getULPs(in));
14392 		max[0] = getMax(result, getULPs(in));
14393 
14394 		return true;
14395 	}
14396 };
14397 
14398 struct fp16FSign : public fp16PerComponent
14399 {
14400 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FSign14401 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14402 	{
14403 		const fp16type	x		(*in[0]);
14404 		const double	d		(x.asDouble());
14405 		const double	result	(deSign(d));
14406 
14407 		if (x.isNaN())
14408 			return false;
14409 
14410 		out[0] = fp16type(result).bits();
14411 		min[0] = getMin(result, getULPs(in));
14412 		max[0] = getMax(result, getULPs(in));
14413 
14414 		return true;
14415 	}
14416 };
14417 
14418 struct fp16Floor : public fp16PerComponent
14419 {
14420 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Floor14421 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14422 	{
14423 		const fp16type	x		(*in[0]);
14424 		const double	d		(x.asDouble());
14425 		const double	result	(deFloor(d));
14426 
14427 		out[0] = fp16type(result).bits();
14428 		min[0] = getMin(result, getULPs(in));
14429 		max[0] = getMax(result, getULPs(in));
14430 
14431 		return true;
14432 	}
14433 };
14434 
14435 struct fp16Ceil : public fp16PerComponent
14436 {
14437 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14438 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14439 	{
14440 		const fp16type	x		(*in[0]);
14441 		const double	d		(x.asDouble());
14442 		const double	result	(deCeil(d));
14443 
14444 		out[0] = fp16type(result).bits();
14445 		min[0] = getMin(result, getULPs(in));
14446 		max[0] = getMax(result, getULPs(in));
14447 
14448 		return true;
14449 	}
14450 };
14451 
14452 struct fp16Fract : public fp16PerComponent
14453 {
14454 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Fract14455 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14456 	{
14457 		const fp16type	x		(*in[0]);
14458 		const double	d		(x.asDouble());
14459 		const double	result	(deFrac(d));
14460 
14461 		out[0] = fp16type(result).bits();
14462 		min[0] = getMin(result, getULPs(in));
14463 		max[0] = getMax(result, getULPs(in));
14464 
14465 		return true;
14466 	}
14467 };
14468 
14469 struct fp16Radians : public fp16PerComponent
14470 {
getULPsvkt::SpirVAssembly::fp16Radians14471 	virtual double getULPs (vector<const deFloat16*>& in)
14472 	{
14473 		DE_UNREF(in);
14474 
14475 		return 2.5;
14476 	}
14477 
14478 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Radians14479 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14480 	{
14481 		const fp16type	x		(*in[0]);
14482 		const float		d		(x.asFloat());
14483 		const float		result	(deFloatRadians(d));
14484 
14485 		out[0] = fp16type(result).bits();
14486 		min[0] = getMin(result, getULPs(in));
14487 		max[0] = getMax(result, getULPs(in));
14488 
14489 		return true;
14490 	}
14491 };
14492 
14493 struct fp16Degrees : public fp16PerComponent
14494 {
getULPsvkt::SpirVAssembly::fp16Degrees14495 	virtual double getULPs (vector<const deFloat16*>& in)
14496 	{
14497 		DE_UNREF(in);
14498 
14499 		return 2.5;
14500 	}
14501 
14502 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14503 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14504 	{
14505 		const fp16type	x		(*in[0]);
14506 		const float		d		(x.asFloat());
14507 		const float		result	(deFloatDegrees(d));
14508 
14509 		out[0] = fp16type(result).bits();
14510 		min[0] = getMin(result, getULPs(in));
14511 		max[0] = getMax(result, getULPs(in));
14512 
14513 		return true;
14514 	}
14515 };
14516 
14517 struct fp16Sin : public fp16PerComponent
14518 {
14519 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sin14520 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14521 	{
14522 		const fp16type	x			(*in[0]);
14523 		const double	d			(x.asDouble());
14524 		const double	result		(deSin(d));
14525 		const double	unspecUlp	(16.0);
14526 		const double	err			(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14527 
14528 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14529 			return false;
14530 
14531 		out[0] = fp16type(result).bits();
14532 		min[0] = result - err;
14533 		max[0] = result + err;
14534 
14535 		return true;
14536 	}
14537 };
14538 
14539 struct fp16Cos : public fp16PerComponent
14540 {
14541 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cos14542 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14543 	{
14544 		const fp16type	x			(*in[0]);
14545 		const double	d			(x.asDouble());
14546 		const double	result		(deCos(d));
14547 		const double	unspecUlp	(16.0);
14548 		const double	err			(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14549 
14550 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14551 			return false;
14552 
14553 		out[0] = fp16type(result).bits();
14554 		min[0] = result - err;
14555 		max[0] = result + err;
14556 
14557 		return true;
14558 	}
14559 };
14560 
14561 struct fp16Tan : public fp16PerComponent
14562 {
14563 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Tan14564 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14565 	{
14566 		const fp16type	x		(*in[0]);
14567 		const double	d		(x.asDouble());
14568 		const double	result	(deTan(d));
14569 
14570 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14571 			return false;
14572 
14573 		out[0] = fp16type(result).bits();
14574 		{
14575 			const double	err			= deLdExp(1.0, -7);
14576 			const double	s1			= deSin(d) + err;
14577 			const double	s2			= deSin(d) - err;
14578 			const double	c1			= deCos(d) + err;
14579 			const double	c2			= deCos(d) - err;
14580 			const double	edgeVals[]	= {s1/c1, s1/c2, s2/c1, s2/c2};
14581 			double			edgeLeft	= out[0];
14582 			double			edgeRight	= out[0];
14583 
14584 			if (deSign(c1 * c2) < 0.0)
14585 			{
14586 				edgeLeft	= -std::numeric_limits<double>::infinity();
14587 				edgeRight	= +std::numeric_limits<double>::infinity();
14588 			}
14589 			else
14590 			{
14591 				edgeLeft	= *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14592 				edgeRight	= *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14593 			}
14594 
14595 			min[0] = edgeLeft;
14596 			max[0] = edgeRight;
14597 		}
14598 
14599 		return true;
14600 	}
14601 };
14602 
14603 struct fp16Asin : public fp16PerComponent
14604 {
14605 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Asin14606 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14607 	{
14608 		const fp16type	x		(*in[0]);
14609 		const double	d		(x.asDouble());
14610 		const double	result	(deAsin(d));
14611 		const double	error	(deAtan2(d, sqrt(1.0 - d * d)));
14612 
14613 		if (!x.isNaN() && deAbs(d) > 1.0)
14614 			return false;
14615 
14616 		out[0] = fp16type(result).bits();
14617 		min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14618 		max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14619 
14620 		return true;
14621 	}
14622 };
14623 
14624 struct fp16Acos : public fp16PerComponent
14625 {
14626 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Acos14627 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14628 	{
14629 		const fp16type	x		(*in[0]);
14630 		const double	d		(x.asDouble());
14631 		const double	result	(deAcos(d));
14632 		const double	error	(deAtan2(sqrt(1.0 - d * d), d));
14633 
14634 		if (!x.isNaN() && deAbs(d) > 1.0)
14635 			return false;
14636 
14637 		out[0] = fp16type(result).bits();
14638 		min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14639 		max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14640 
14641 		return true;
14642 	}
14643 };
14644 
14645 struct fp16Atan : public fp16PerComponent
14646 {
getULPsvkt::SpirVAssembly::fp16Atan14647 	virtual double getULPs(vector<const deFloat16*>& in)
14648 	{
14649 		DE_UNREF(in);
14650 
14651 		return 2 * 5.0; // This is not a precision test. Value is not from spec
14652 	}
14653 
14654 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan14655 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14656 	{
14657 		const fp16type	x		(*in[0]);
14658 		const double	d		(x.asDouble());
14659 		const double	result	(deAtanOver(d));
14660 
14661 		out[0] = fp16type(result).bits();
14662 		min[0] = getMin(result, getULPs(in));
14663 		max[0] = getMax(result, getULPs(in));
14664 
14665 		return true;
14666 	}
14667 };
14668 
14669 struct fp16Sinh : public fp16PerComponent
14670 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh14671 	fp16Sinh() : fp16PerComponent()
14672 	{
14673 		flavorNames.push_back("Double");
14674 		flavorNames.push_back("ExpFP16");
14675 	}
14676 
14677 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sinh14678 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14679 	{
14680 		const fp16type	x		(*in[0]);
14681 		const double	d		(x.asDouble());
14682 		const double	ulps	(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14683 		double			result	(0.0);
14684 		double			error	(0.0);
14685 
14686 		if (getFlavor() == 0)
14687 		{
14688 			result	= deSinh(d);
14689 			error	= floatFormat16.ulp(deAbs(result), ulps);
14690 		}
14691 		else if (getFlavor() == 1)
14692 		{
14693 			const fp16type	epx	(deExp(d));
14694 			const fp16type	enx	(deExp(-d));
14695 			const fp16type	esx	(epx.asDouble() - enx.asDouble());
14696 			const fp16type	sx2	(esx.asDouble() / 2.0);
14697 
14698 			result	= sx2.asDouble();
14699 			error	= deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14700 		}
14701 		else
14702 		{
14703 			TCU_THROW(InternalError, "Unknown flavor");
14704 		}
14705 
14706 		out[0] = fp16type(result).bits();
14707 		min[0] = result - error;
14708 		max[0] = result + error;
14709 
14710 		return true;
14711 	}
14712 };
14713 
14714 struct fp16Cosh : public fp16PerComponent
14715 {
fp16Coshvkt::SpirVAssembly::fp16Cosh14716 	fp16Cosh() : fp16PerComponent()
14717 	{
14718 		flavorNames.push_back("Double");
14719 		flavorNames.push_back("ExpFP16");
14720 	}
14721 
14722 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cosh14723 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14724 	{
14725 		const fp16type	x		(*in[0]);
14726 		const double	d		(x.asDouble());
14727 		const double	ulps	(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14728 		double			result	(0.0);
14729 
14730 		if (getFlavor() == 0)
14731 		{
14732 			result = deCosh(d);
14733 		}
14734 		else if (getFlavor() == 1)
14735 		{
14736 			const fp16type	epx	(deExp(d));
14737 			const fp16type	enx	(deExp(-d));
14738 			const fp16type	esx	(epx.asDouble() + enx.asDouble());
14739 			const fp16type	sx2	(esx.asDouble() / 2.0);
14740 
14741 			result = sx2.asDouble();
14742 		}
14743 		else
14744 		{
14745 			TCU_THROW(InternalError, "Unknown flavor");
14746 		}
14747 
14748 		out[0] = fp16type(result).bits();
14749 		min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14750 		max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14751 
14752 		return true;
14753 	}
14754 };
14755 
14756 struct fp16Tanh : public fp16PerComponent
14757 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh14758 	fp16Tanh() : fp16PerComponent()
14759 	{
14760 		flavorNames.push_back("Tanh");
14761 		flavorNames.push_back("SinhCosh");
14762 		flavorNames.push_back("SinhCoshFP16");
14763 		flavorNames.push_back("PolyFP16");
14764 	}
14765 
getULPsvkt::SpirVAssembly::fp16Tanh14766 	virtual double getULPs (vector<const deFloat16*>& in)
14767 	{
14768 		const tcu::Float16	x	(*in[0]);
14769 		const double		d	(x.asDouble());
14770 
14771 		return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14772 	}
14773 
14774 	template<class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh14775 	inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14776 	{
14777 		const fp16type	esx	(espx.asDouble() - esnx.asDouble());
14778 		const fp16type	sx2	(esx.asDouble() / 2.0);
14779 		const fp16type	ecx	(ecpx.asDouble() + ecnx.asDouble());
14780 		const fp16type	cx2	(ecx.asDouble() / 2.0);
14781 		const fp16type	tg	(sx2.asDouble() / cx2.asDouble());
14782 		const double	rez	(tg.asDouble());
14783 
14784 		return rez;
14785 	}
14786 
14787 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Tanh14788 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14789 	{
14790 		const fp16type	x		(*in[0]);
14791 		const double	d		(x.asDouble());
14792 		double			result	(0.0);
14793 
14794 		if (getFlavor() == 0)
14795 		{
14796 			result	= deTanh(d);
14797 			min[0]	= getMin(result, getULPs(in));
14798 			max[0]	= getMax(result, getULPs(in));
14799 		}
14800 		else if (getFlavor() == 1)
14801 		{
14802 			result	= deSinh(d) / deCosh(d);
14803 			min[0]	= getMin(result, getULPs(in));
14804 			max[0]	= getMax(result, getULPs(in));
14805 		}
14806 		else if (getFlavor() == 2)
14807 		{
14808 			const fp16type	s	(deSinh(d));
14809 			const fp16type	c	(deCosh(d));
14810 
14811 			result	= s.asDouble() / c.asDouble();
14812 			min[0]	= getMin(result, getULPs(in));
14813 			max[0]	= getMax(result, getULPs(in));
14814 		}
14815 		else if (getFlavor() == 3)
14816 		{
14817 			const double	ulps	(getULPs(in));
14818 			const double	epxm	(deExp( d));
14819 			const double	enxm	(deExp(-d));
14820 			const double	epxmerr	= floatFormat16.ulp(epxm, ulps);
14821 			const double	enxmerr	= floatFormat16.ulp(enxm, ulps);
14822 			const fp16type	epx[]	= { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14823 			const fp16type	enx[]	= { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14824 			const fp16type	epxm16	(epxm);
14825 			const fp16type	enxm16	(enxm);
14826 			vector<double>	tgs;
14827 
14828 			for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14829 			for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14830 			for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14831 			for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14832 			{
14833 				const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14834 
14835 				tgs.push_back(tgh);
14836 			}
14837 
14838 			result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14839 			min[0] = *std::min_element(tgs.begin(), tgs.end());
14840 			max[0] = *std::max_element(tgs.begin(), tgs.end());
14841 		}
14842 		else
14843 		{
14844 			TCU_THROW(InternalError, "Unknown flavor");
14845 		}
14846 
14847 		out[0] = fp16type(result).bits();
14848 
14849 		return true;
14850 	}
14851 };
14852 
14853 struct fp16Asinh : public fp16PerComponent
14854 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh14855 	fp16Asinh() : fp16PerComponent()
14856 	{
14857 		flavorNames.push_back("Double");
14858 		flavorNames.push_back("PolyFP16Wiki");
14859 		flavorNames.push_back("PolyFP16Abs");
14860 	}
14861 
getULPsvkt::SpirVAssembly::fp16Asinh14862 	virtual double getULPs (vector<const deFloat16*>& in)
14863 	{
14864 		DE_UNREF(in);
14865 
14866 		return 256.0; // This is not a precision test. Value is not from spec
14867 	}
14868 
14869 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Asinh14870 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14871 	{
14872 		const fp16type	x		(*in[0]);
14873 		const double	d		(x.asDouble());
14874 		double			result	(0.0);
14875 
14876 		if (getFlavor() == 0)
14877 		{
14878 			result = deAsinh(d);
14879 		}
14880 		else if (getFlavor() == 1)
14881 		{
14882 			const fp16type	x2		(d * d);
14883 			const fp16type	x2p1	(x2.asDouble() + 1.0);
14884 			const fp16type	sq		(deSqrt(x2p1.asDouble()));
14885 			const fp16type	sxsq	(d + sq.asDouble());
14886 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14887 
14888 			if (lsxsq.isInf())
14889 				return false;
14890 
14891 			result = lsxsq.asDouble();
14892 		}
14893 		else if (getFlavor() == 2)
14894 		{
14895 			const fp16type	x2		(d * d);
14896 			const fp16type	x2p1	(x2.asDouble() + 1.0);
14897 			const fp16type	sq		(deSqrt(x2p1.asDouble()));
14898 			const fp16type	sxsq	(deAbs(d) + sq.asDouble());
14899 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14900 
14901 			result = deSign(d) * lsxsq.asDouble();
14902 		}
14903 		else
14904 		{
14905 			TCU_THROW(InternalError, "Unknown flavor");
14906 		}
14907 
14908 		out[0] = fp16type(result).bits();
14909 		min[0] = getMin(result, getULPs(in));
14910 		max[0] = getMax(result, getULPs(in));
14911 
14912 		return true;
14913 	}
14914 };
14915 
14916 struct fp16Acosh : public fp16PerComponent
14917 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh14918 	fp16Acosh() : fp16PerComponent()
14919 	{
14920 		flavorNames.push_back("Double");
14921 		flavorNames.push_back("PolyFP16");
14922 	}
14923 
getULPsvkt::SpirVAssembly::fp16Acosh14924 	virtual double getULPs (vector<const deFloat16*>& in)
14925 	{
14926 		DE_UNREF(in);
14927 
14928 		return 16.0; // This is not a precision test. Value is not from spec
14929 	}
14930 
14931 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Acosh14932 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14933 	{
14934 		const fp16type	x		(*in[0]);
14935 		const double	d		(x.asDouble());
14936 		double			result	(0.0);
14937 
14938 		if (!x.isNaN() && d < 1.0)
14939 			return false;
14940 
14941 		if (getFlavor() == 0)
14942 		{
14943 			result = deAcosh(d);
14944 		}
14945 		else if (getFlavor() == 1)
14946 		{
14947 			const fp16type	x2		(d * d);
14948 			const fp16type	x2m1	(x2.asDouble() - 1.0);
14949 			const fp16type	sq		(deSqrt(x2m1.asDouble()));
14950 			const fp16type	sxsq	(d + sq.asDouble());
14951 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14952 
14953 			result = lsxsq.asDouble();
14954 		}
14955 		else
14956 		{
14957 			TCU_THROW(InternalError, "Unknown flavor");
14958 		}
14959 
14960 		out[0] = fp16type(result).bits();
14961 		min[0] = getMin(result, getULPs(in));
14962 		max[0] = getMax(result, getULPs(in));
14963 
14964 		return true;
14965 	}
14966 };
14967 
14968 struct fp16Atanh : public fp16PerComponent
14969 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh14970 	fp16Atanh() : fp16PerComponent()
14971 	{
14972 		flavorNames.push_back("Double");
14973 		flavorNames.push_back("PolyFP16");
14974 	}
14975 
14976 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atanh14977 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14978 	{
14979 		const fp16type	x		(*in[0]);
14980 		const double	d		(x.asDouble());
14981 		double			result	(0.0);
14982 
14983 		if (deAbs(d) >= 1.0)
14984 			return false;
14985 
14986 		if (getFlavor() == 0)
14987 		{
14988 			const double	ulps	(16.0);	// This is not a precision test. Value is not from spec
14989 
14990 			result = deAtanh(d);
14991 			min[0] = getMin(result, ulps);
14992 			max[0] = getMax(result, ulps);
14993 		}
14994 		else if (getFlavor() == 1)
14995 		{
14996 			const fp16type	x1a		(1.0 + d);
14997 			const fp16type	x1b		(1.0 - d);
14998 			const fp16type	x1d		(x1a.asDouble() / x1b.asDouble());
14999 			const fp16type	lx1d	(deLog(x1d.asDouble()));
15000 			const fp16type	lx1d2	(0.5 * lx1d.asDouble());
15001 			const double	error	(2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
15002 
15003 			result = lx1d2.asDouble();
15004 			min[0] = result - error;
15005 			max[0] = result + error;
15006 		}
15007 		else
15008 		{
15009 			TCU_THROW(InternalError, "Unknown flavor");
15010 		}
15011 
15012 		out[0] = fp16type(result).bits();
15013 
15014 		return true;
15015 	}
15016 };
15017 
15018 struct fp16Exp : public fp16PerComponent
15019 {
15020 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp15021 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15022 	{
15023 		const fp16type	x		(*in[0]);
15024 		const double	d		(x.asDouble());
15025 		const double	ulps	(10.0 * (1.0 + 2.0 * deAbs(d)));
15026 		const double	result	(deExp(d));
15027 
15028 		out[0] = fp16type(result).bits();
15029 		min[0] = getMin(result, ulps);
15030 		max[0] = getMax(result, ulps);
15031 
15032 		return true;
15033 	}
15034 };
15035 
15036 struct fp16Log : public fp16PerComponent
15037 {
15038 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Log15039 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15040 	{
15041 		const fp16type	x		(*in[0]);
15042 		const double	d		(x.asDouble());
15043 		const double	result	(deLog(d));
15044 		const double	error	(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15045 
15046 		if (d <= 0.0)
15047 			return false;
15048 
15049 		out[0] = fp16type(result).bits();
15050 		min[0] = result - error;
15051 		max[0] = result + error;
15052 
15053 		return true;
15054 	}
15055 };
15056 
15057 struct fp16Exp2 : public fp16PerComponent
15058 {
15059 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp215060 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15061 	{
15062 		const fp16type	x		(*in[0]);
15063 		const double	d		(x.asDouble());
15064 		const double	result	(deExp2(d));
15065 		const double	ulps	(1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15066 
15067 		out[0] = fp16type(result).bits();
15068 		min[0] = getMin(result, ulps);
15069 		max[0] = getMax(result, ulps);
15070 
15071 		return true;
15072 	}
15073 };
15074 
15075 struct fp16Log2 : public fp16PerComponent
15076 {
15077 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Log215078 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15079 	{
15080 		const fp16type	x		(*in[0]);
15081 		const double	d		(x.asDouble());
15082 		const double	result	(deLog2(d));
15083 		const double	error	(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15084 
15085 		if (d <= 0.0)
15086 			return false;
15087 
15088 		out[0] = fp16type(result).bits();
15089 		min[0] = result - error;
15090 		max[0] = result + error;
15091 
15092 		return true;
15093 	}
15094 };
15095 
15096 struct fp16Sqrt : public fp16PerComponent
15097 {
getULPsvkt::SpirVAssembly::fp16Sqrt15098 	virtual double getULPs (vector<const deFloat16*>& in)
15099 	{
15100 		DE_UNREF(in);
15101 
15102 		return 6.0;
15103 	}
15104 
15105 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15106 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15107 	{
15108 		const fp16type	x		(*in[0]);
15109 		const double	d		(x.asDouble());
15110 		const double	result	(deSqrt(d));
15111 
15112 		if (!x.isNaN() && d < 0.0)
15113 			return false;
15114 
15115 		out[0] = fp16type(result).bits();
15116 		min[0] = getMin(result, getULPs(in));
15117 		max[0] = getMax(result, getULPs(in));
15118 
15119 		return true;
15120 	}
15121 };
15122 
15123 struct fp16InverseSqrt : public fp16PerComponent
15124 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15125 	virtual double getULPs (vector<const deFloat16*>& in)
15126 	{
15127 		DE_UNREF(in);
15128 
15129 		return 2.0;
15130 	}
15131 
15132 	template<class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15133 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15134 	{
15135 		const fp16type	x		(*in[0]);
15136 		const double	d		(x.asDouble());
15137 		const double	result	(1.0/deSqrt(d));
15138 
15139 		if (!x.isNaN() && d <= 0.0)
15140 			return false;
15141 
15142 		out[0] = fp16type(result).bits();
15143 		min[0] = getMin(result, getULPs(in));
15144 		max[0] = getMax(result, getULPs(in));
15145 
15146 		return true;
15147 	}
15148 };
15149 
15150 struct fp16ModfFrac : public fp16PerComponent
15151 {
15152 	template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15153 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15154 	{
15155 		const fp16type	x		(*in[0]);
15156 		const double	d		(x.asDouble());
15157 		double			i		(0.0);
15158 		const double	result	(deModf(d, &i));
15159 
15160 		if (x.isInf() || x.isNaN())
15161 			return false;
15162 
15163 		out[0] = fp16type(result).bits();
15164 		min[0] = getMin(result, getULPs(in));
15165 		max[0] = getMax(result, getULPs(in));
15166 
15167 		return true;
15168 	}
15169 };
15170 
15171 struct fp16ModfInt : public fp16PerComponent
15172 {
15173 	template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15174 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15175 	{
15176 		const fp16type	x		(*in[0]);
15177 		const double	d		(x.asDouble());
15178 		double			i		(0.0);
15179 		const double	unused	(deModf(d, &i));
15180 		const double	result	(i);
15181 
15182 		DE_UNREF(unused);
15183 
15184 		if (x.isInf() || x.isNaN())
15185 			return false;
15186 
15187 		out[0] = fp16type(result).bits();
15188 		min[0] = getMin(result, getULPs(in));
15189 		max[0] = getMax(result, getULPs(in));
15190 
15191 		return true;
15192 	}
15193 };
15194 
15195 struct fp16FrexpS : public fp16PerComponent
15196 {
15197 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15198 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15199 	{
15200 		const fp16type	x		(*in[0]);
15201 		const double	d		(x.asDouble());
15202 		int				e		(0);
15203 		const double	result	(deFrExp(d, &e));
15204 
15205 		if (x.isNaN() || x.isInf())
15206 			return false;
15207 
15208 		out[0] = fp16type(result).bits();
15209 		min[0] = getMin(result, getULPs(in));
15210 		max[0] = getMax(result, getULPs(in));
15211 
15212 		return true;
15213 	}
15214 };
15215 
15216 struct fp16FrexpE : public fp16PerComponent
15217 {
15218 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15219 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15220 	{
15221 		const fp16type	x		(*in[0]);
15222 		const double	d		(x.asDouble());
15223 		int				e		(0);
15224 		const double	unused	(deFrExp(d, &e));
15225 		const double	result	(static_cast<double>(e));
15226 
15227 		DE_UNREF(unused);
15228 
15229 		if (x.isNaN() || x.isInf())
15230 			return false;
15231 
15232 		out[0] = fp16type(result).bits();
15233 		min[0] = getMin(result, getULPs(in));
15234 		max[0] = getMax(result, getULPs(in));
15235 
15236 		return true;
15237 	}
15238 };
15239 
15240 struct fp16OpFAdd : public fp16PerComponent
15241 {
15242 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15243 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15244 	{
15245 		const fp16type	x		(*in[0]);
15246 		const fp16type	y		(*in[1]);
15247 		const double	xd		(x.asDouble());
15248 		const double	yd		(y.asDouble());
15249 		const double	result	(xd + yd);
15250 
15251 		out[0] = fp16type(result).bits();
15252 		min[0] = getMin(result, getULPs(in));
15253 		max[0] = getMax(result, getULPs(in));
15254 
15255 		return true;
15256 	}
15257 };
15258 
15259 struct fp16OpFSub : public fp16PerComponent
15260 {
15261 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15262 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15263 	{
15264 		const fp16type	x		(*in[0]);
15265 		const fp16type	y		(*in[1]);
15266 		const double	xd		(x.asDouble());
15267 		const double	yd		(y.asDouble());
15268 		const double	result	(xd - yd);
15269 
15270 		out[0] = fp16type(result).bits();
15271 		min[0] = getMin(result, getULPs(in));
15272 		max[0] = getMax(result, getULPs(in));
15273 
15274 		return true;
15275 	}
15276 };
15277 
15278 struct fp16OpFMul : public fp16PerComponent
15279 {
15280 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15281 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15282 	{
15283 		const fp16type	x		(*in[0]);
15284 		const fp16type	y		(*in[1]);
15285 		const double	xd		(x.asDouble());
15286 		const double	yd		(y.asDouble());
15287 		const double	result	(xd * yd);
15288 
15289 		out[0] = fp16type(result).bits();
15290 		min[0] = getMin(result, getULPs(in));
15291 		max[0] = getMax(result, getULPs(in));
15292 
15293 		return true;
15294 	}
15295 };
15296 
15297 struct fp16OpFDiv : public fp16PerComponent
15298 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15299 	fp16OpFDiv() : fp16PerComponent()
15300 	{
15301 		flavorNames.push_back("DirectDiv");
15302 		flavorNames.push_back("InverseDiv");
15303 	}
15304 
15305 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15306 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15307 	{
15308 		const fp16type	x			(*in[0]);
15309 		const fp16type	y			(*in[1]);
15310 		const double	xd			(x.asDouble());
15311 		const double	yd			(y.asDouble());
15312 		const double	unspecUlp	(16.0);
15313 		const double	ulpCnt		(de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15314 		double			result		(0.0);
15315 
15316 		if (y.isZero())
15317 			return false;
15318 
15319 		if (getFlavor() == 0)
15320 		{
15321 			result = (xd / yd);
15322 		}
15323 		else if (getFlavor() == 1)
15324 		{
15325 			const double	invyd	(1.0 / yd);
15326 			const fp16type	invy	(invyd);
15327 
15328 			result = (xd * invy.asDouble());
15329 		}
15330 		else
15331 		{
15332 			TCU_THROW(InternalError, "Unknown flavor");
15333 		}
15334 
15335 		out[0] = fp16type(result).bits();
15336 		min[0] = getMin(result, ulpCnt);
15337 		max[0] = getMax(result, ulpCnt);
15338 
15339 		return true;
15340 	}
15341 };
15342 
15343 struct fp16Atan2 : public fp16PerComponent
15344 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215345 	fp16Atan2() : fp16PerComponent()
15346 	{
15347 		flavorNames.push_back("DoubleCalc");
15348 		flavorNames.push_back("DoubleCalc_PI");
15349 	}
15350 
getULPsvkt::SpirVAssembly::fp16Atan215351 	virtual double getULPs(vector<const deFloat16*>& in)
15352 	{
15353 		DE_UNREF(in);
15354 
15355 		return 2 * 5.0; // This is not a precision test. Value is not from spec
15356 	}
15357 
15358 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan215359 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15360 	{
15361 		const fp16type	x		(*in[0]);
15362 		const fp16type	y		(*in[1]);
15363 		const double	xd		(x.asDouble());
15364 		const double	yd		(y.asDouble());
15365 		double			result	(0.0);
15366 
15367 		if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15368 			return false;
15369 
15370 		if (getFlavor() == 0)
15371 		{
15372 			result	= deAtan2(xd, yd);
15373 		}
15374 		else if (getFlavor() == 1)
15375 		{
15376 			const double	ulps	(2.0 * 5.0); // This is not a precision test. Value is not from spec
15377 			const double	eps		(floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15378 
15379 			result	= deAtan2(xd, yd);
15380 
15381 			if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15382 				result	= -result;
15383 		}
15384 		else
15385 		{
15386 			TCU_THROW(InternalError, "Unknown flavor");
15387 		}
15388 
15389 		out[0] = fp16type(result).bits();
15390 		min[0] = getMin(result, getULPs(in));
15391 		max[0] = getMax(result, getULPs(in));
15392 
15393 		return true;
15394 	}
15395 };
15396 
15397 struct fp16Pow : public fp16PerComponent
15398 {
fp16Powvkt::SpirVAssembly::fp16Pow15399 	fp16Pow() : fp16PerComponent()
15400 	{
15401 		flavorNames.push_back("Pow");
15402 		flavorNames.push_back("PowLog2");
15403 		flavorNames.push_back("PowLog2FP16");
15404 	}
15405 
15406 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Pow15407 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15408 	{
15409 		const fp16type	x		(*in[0]);
15410 		const fp16type	y		(*in[1]);
15411 		const double	xd		(x.asDouble());
15412 		const double	yd		(y.asDouble());
15413 		const double	logxeps	(de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15414 		const double	ulps1	(1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15415 		const double	ulps2	(1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15416 		const double	ulps	(deMax(deAbs(ulps1), deAbs(ulps2)));
15417 		double			result	(0.0);
15418 
15419 		if (xd < 0.0)
15420 			return false;
15421 
15422 		if (x.isZero() && yd <= 0.0)
15423 			return false;
15424 
15425 		if (getFlavor() == 0)
15426 		{
15427 			result = dePow(xd, yd);
15428 		}
15429 		else if (getFlavor() == 1)
15430 		{
15431 			const double	l2d	(deLog2(xd));
15432 			const double	e2d	(deExp2(yd * l2d));
15433 
15434 			result = e2d;
15435 		}
15436 		else if (getFlavor() == 2)
15437 		{
15438 			const double	l2d	(deLog2(xd));
15439 			const fp16type	l2	(l2d);
15440 			const double	e2d	(deExp2(yd * l2.asDouble()));
15441 			const fp16type	e2	(e2d);
15442 
15443 			result = e2.asDouble();
15444 		}
15445 		else
15446 		{
15447 			TCU_THROW(InternalError, "Unknown flavor");
15448 		}
15449 
15450 		out[0] = fp16type(result).bits();
15451 		min[0] = getMin(result, ulps);
15452 		max[0] = getMax(result, ulps);
15453 
15454 		return true;
15455 	}
15456 };
15457 
15458 struct fp16FMin : public fp16PerComponent
15459 {
15460 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMin15461 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15462 	{
15463 		const fp16type	x		(*in[0]);
15464 		const fp16type	y		(*in[1]);
15465 		const double	xd		(x.asDouble());
15466 		const double	yd		(y.asDouble());
15467 		const double	result	(deMin(xd, yd));
15468 
15469 		if (x.isNaN() || y.isNaN())
15470 			return false;
15471 
15472 		out[0] = fp16type(result).bits();
15473 		min[0] = getMin(result, getULPs(in));
15474 		max[0] = getMax(result, getULPs(in));
15475 
15476 		return true;
15477 	}
15478 };
15479 
15480 struct fp16FMax : public fp16PerComponent
15481 {
15482 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMax15483 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15484 	{
15485 		const fp16type	x		(*in[0]);
15486 		const fp16type	y		(*in[1]);
15487 		const double	xd		(x.asDouble());
15488 		const double	yd		(y.asDouble());
15489 		const double	result	(deMax(xd, yd));
15490 
15491 		if (x.isNaN() || y.isNaN())
15492 			return false;
15493 
15494 		out[0] = fp16type(result).bits();
15495 		min[0] = getMin(result, getULPs(in));
15496 		max[0] = getMax(result, getULPs(in));
15497 
15498 		return true;
15499 	}
15500 };
15501 
15502 struct fp16Step : public fp16PerComponent
15503 {
15504 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Step15505 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15506 	{
15507 		const fp16type	edge	(*in[0]);
15508 		const fp16type	x		(*in[1]);
15509 		const double	edged	(edge.asDouble());
15510 		const double	xd		(x.asDouble());
15511 		const double	result	(deStep(edged, xd));
15512 
15513 		out[0] = fp16type(result).bits();
15514 		min[0] = getMin(result, getULPs(in));
15515 		max[0] = getMax(result, getULPs(in));
15516 
15517 		return true;
15518 	}
15519 };
15520 
15521 struct fp16Ldexp : public fp16PerComponent
15522 {
15523 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15524 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15525 	{
15526 		const fp16type	x		(*in[0]);
15527 		const fp16type	y		(*in[1]);
15528 		const double	xd		(x.asDouble());
15529 		const int		yd		(static_cast<int>(deTrunc(y.asDouble())));
15530 		const double	result	(deLdExp(xd, yd));
15531 
15532 		if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15533 			return false;
15534 
15535 		// Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15536 		if (fp16type(result).isInf())
15537 			return false;
15538 
15539 		out[0] = fp16type(result).bits();
15540 		min[0] = getMin(result, getULPs(in));
15541 		max[0] = getMax(result, getULPs(in));
15542 
15543 		return true;
15544 	}
15545 };
15546 
15547 struct fp16FClamp : public fp16PerComponent
15548 {
15549 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15550 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15551 	{
15552 		const fp16type	x		(*in[0]);
15553 		const fp16type	minVal	(*in[1]);
15554 		const fp16type	maxVal	(*in[2]);
15555 		const double	xd		(x.asDouble());
15556 		const double	minVald	(minVal.asDouble());
15557 		const double	maxVald	(maxVal.asDouble());
15558 		const double	result	(deClamp(xd, minVald, maxVald));
15559 
15560 		if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15561 			return false;
15562 
15563 		out[0] = fp16type(result).bits();
15564 		min[0] = getMin(result, getULPs(in));
15565 		max[0] = getMax(result, getULPs(in));
15566 
15567 		return true;
15568 	}
15569 };
15570 
15571 struct fp16FMix : public fp16PerComponent
15572 {
fp16FMixvkt::SpirVAssembly::fp16FMix15573 	fp16FMix() : fp16PerComponent()
15574 	{
15575 		flavorNames.push_back("DoubleCalc");
15576 		flavorNames.push_back("EmulatingFP16");
15577 		flavorNames.push_back("EmulatingFP16YminusX");
15578 	}
15579 
15580 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMix15581 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15582 	{
15583 		const fp16type	x		(*in[0]);
15584 		const fp16type	y		(*in[1]);
15585 		const fp16type	a		(*in[2]);
15586 		const double	ulps	(8.0); // This is not a precision test. Value is not from spec
15587 		double			result	(0.0);
15588 
15589 		if (getFlavor() == 0)
15590 		{
15591 			const double	xd		(x.asDouble());
15592 			const double	yd		(y.asDouble());
15593 			const double	ad		(a.asDouble());
15594 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15595 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15596 			const double	eps		(xeps + yeps);
15597 
15598 			result = deMix(xd, yd, ad);
15599 			min[0] = result - eps;
15600 			max[0] = result + eps;
15601 		}
15602 		else if (getFlavor() == 1)
15603 		{
15604 			const double	xd		(x.asDouble());
15605 			const double	yd		(y.asDouble());
15606 			const double	ad		(a.asDouble());
15607 			const fp16type	am		(1.0 - ad);
15608 			const double	amd		(am.asDouble());
15609 			const fp16type	xam		(xd * amd);
15610 			const double	xamd	(xam.asDouble());
15611 			const fp16type	ya		(yd * ad);
15612 			const double	yad		(ya.asDouble());
15613 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15614 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15615 			const double	eps		(xeps + yeps);
15616 
15617 			result = xamd + yad;
15618 			min[0] = result - eps;
15619 			max[0] = result + eps;
15620 		}
15621 		else if (getFlavor() == 2)
15622 		{
15623 			const double	xd		(x.asDouble());
15624 			const double	yd		(y.asDouble());
15625 			const double	ad		(a.asDouble());
15626 			const fp16type	ymx		(yd - xd);
15627 			const double	ymxd	(ymx.asDouble());
15628 			const fp16type	ymxa	(ymxd * ad);
15629 			const double	ymxad	(ymxa.asDouble());
15630 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15631 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15632 			const double	eps		(xeps + yeps);
15633 
15634 			result = xd + ymxad;
15635 			min[0] = result - eps;
15636 			max[0] = result + eps;
15637 		}
15638 		else
15639 		{
15640 			TCU_THROW(InternalError, "Unknown flavor");
15641 		}
15642 
15643 		out[0] = fp16type(result).bits();
15644 
15645 		return true;
15646 	}
15647 };
15648 
15649 struct fp16SmoothStep : public fp16PerComponent
15650 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep15651 	fp16SmoothStep() : fp16PerComponent()
15652 	{
15653 		flavorNames.push_back("FloatCalc");
15654 		flavorNames.push_back("EmulatingFP16");
15655 		flavorNames.push_back("EmulatingFP16WClamp");
15656 	}
15657 
getULPsvkt::SpirVAssembly::fp16SmoothStep15658 	virtual double getULPs(vector<const deFloat16*>& in)
15659 	{
15660 		DE_UNREF(in);
15661 
15662 		return 4.0; // This is not a precision test. Value is not from spec
15663 	}
15664 
15665 	template<class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep15666 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15667 	{
15668 		const fp16type	edge0	(*in[0]);
15669 		const fp16type	edge1	(*in[1]);
15670 		const fp16type	x		(*in[2]);
15671 		double			result	(0.0);
15672 
15673 		if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15674 			return false;
15675 
15676 		if (edge0.isInf() || edge1.isInf() || x.isInf())
15677 			return false;
15678 
15679 		if (getFlavor() == 0)
15680 		{
15681 			const float	edge0d	(edge0.asFloat());
15682 			const float	edge1d	(edge1.asFloat());
15683 			const float	xd		(x.asFloat());
15684 			const float	sstep	(deFloatSmoothStep(edge0d, edge1d, xd));
15685 
15686 			result = sstep;
15687 		}
15688 		else if (getFlavor() == 1)
15689 		{
15690 			const double	edge0d	(edge0.asDouble());
15691 			const double	edge1d	(edge1.asDouble());
15692 			const double	xd		(x.asDouble());
15693 
15694 			if (xd <= edge0d)
15695 				result = 0.0;
15696 			else if (xd >= edge1d)
15697 				result = 1.0;
15698 			else
15699 			{
15700 				const fp16type	a	(xd - edge0d);
15701 				const fp16type	b	(edge1d - edge0d);
15702 				const fp16type	t	(a.asDouble() / b.asDouble());
15703 				const fp16type	t2	(2.0 * t.asDouble());
15704 				const fp16type	t3	(3.0 - t2.asDouble());
15705 				const fp16type	t4	(t.asDouble() * t3.asDouble());
15706 				const fp16type	t5	(t.asDouble() * t4.asDouble());
15707 
15708 				result = t5.asDouble();
15709 			}
15710 		}
15711 		else if (getFlavor() == 2)
15712 		{
15713 			const double	edge0d	(edge0.asDouble());
15714 			const double	edge1d	(edge1.asDouble());
15715 			const double	xd		(x.asDouble());
15716 			const fp16type	a	(xd - edge0d);
15717 			const fp16type	b	(edge1d - edge0d);
15718 			const fp16type	bi	(1.0 / b.asDouble());
15719 			const fp16type	t0	(a.asDouble() * bi.asDouble());
15720 			const double	tc	(deClamp(t0.asDouble(), 0.0, 1.0));
15721 			const fp16type	t	(tc);
15722 			const fp16type	t2	(2.0 * t.asDouble());
15723 			const fp16type	t3	(3.0 - t2.asDouble());
15724 			const fp16type	t4	(t.asDouble() * t3.asDouble());
15725 			const fp16type	t5	(t.asDouble() * t4.asDouble());
15726 
15727 			result = t5.asDouble();
15728 		}
15729 		else
15730 		{
15731 			TCU_THROW(InternalError, "Unknown flavor");
15732 		}
15733 
15734 		out[0] = fp16type(result).bits();
15735 		min[0] = getMin(result, getULPs(in));
15736 		max[0] = getMax(result, getULPs(in));
15737 
15738 		return true;
15739 	}
15740 };
15741 
15742 struct fp16Fma : public fp16PerComponent
15743 {
fp16Fmavkt::SpirVAssembly::fp16Fma15744 	fp16Fma()
15745 	{
15746 		flavorNames.push_back("DoubleCalc");
15747 		flavorNames.push_back("EmulatingFP16");
15748 	}
15749 
getULPsvkt::SpirVAssembly::fp16Fma15750 	virtual double getULPs(vector<const deFloat16*>& in)
15751 	{
15752 		DE_UNREF(in);
15753 
15754 		return 16.0;
15755 	}
15756 
15757 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Fma15758 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15759 	{
15760 		DE_ASSERT(in.size() == 3);
15761 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15762 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15763 		DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15764 		DE_ASSERT(getOutCompCount() > 0);
15765 
15766 		const fp16type	a		(*in[0]);
15767 		const fp16type	b		(*in[1]);
15768 		const fp16type	c		(*in[2]);
15769 		double			result	(0.0);
15770 
15771 		if (getFlavor() == 0)
15772 		{
15773 			const double	ad	(a.asDouble());
15774 			const double	bd	(b.asDouble());
15775 			const double	cd	(c.asDouble());
15776 
15777 			result	= deMadd(ad, bd, cd);
15778 		}
15779 		else if (getFlavor() == 1)
15780 		{
15781 			const double	ad	(a.asDouble());
15782 			const double	bd	(b.asDouble());
15783 			const double	cd	(c.asDouble());
15784 			const fp16type	ab	(ad * bd);
15785 			const fp16type	r	(ab.asDouble() + cd);
15786 
15787 			result	= r.asDouble();
15788 		}
15789 		else
15790 		{
15791 			TCU_THROW(InternalError, "Unknown flavor");
15792 		}
15793 
15794 		out[0] = fp16type(result).bits();
15795 		min[0] = getMin(result, getULPs(in));
15796 		max[0] = getMax(result, getULPs(in));
15797 
15798 		return true;
15799 	}
15800 };
15801 
15802 
15803 struct fp16AllComponents : public fp16PerComponent
15804 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents15805 	bool		callOncePerComponent	()	{ return false; }
15806 };
15807 
15808 struct fp16Length : public fp16AllComponents
15809 {
fp16Lengthvkt::SpirVAssembly::fp16Length15810 	fp16Length() : fp16AllComponents()
15811 	{
15812 		flavorNames.push_back("EmulatingFP16");
15813 		flavorNames.push_back("DoubleCalc");
15814 	}
15815 
getULPsvkt::SpirVAssembly::fp16Length15816 	virtual double getULPs(vector<const deFloat16*>& in)
15817 	{
15818 		DE_UNREF(in);
15819 
15820 		return 4.0;
15821 	}
15822 
15823 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Length15824 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15825 	{
15826 		DE_ASSERT(getOutCompCount() == 1);
15827 		DE_ASSERT(in.size() == 1);
15828 
15829 		double	result	(0.0);
15830 
15831 		if (getFlavor() == 0)
15832 		{
15833 			fp16type	r	(0.0);
15834 
15835 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15836 			{
15837 				const fp16type	x	(in[0][componentNdx]);
15838 				const fp16type	q	(x.asDouble() * x.asDouble());
15839 
15840 				r = fp16type(r.asDouble() + q.asDouble());
15841 			}
15842 
15843 			result = deSqrt(r.asDouble());
15844 
15845 			out[0] = fp16type(result).bits();
15846 		}
15847 		else if (getFlavor() == 1)
15848 		{
15849 			double	r	(0.0);
15850 
15851 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15852 			{
15853 				const fp16type	x	(in[0][componentNdx]);
15854 				const double	q	(x.asDouble() * x.asDouble());
15855 
15856 				r += q;
15857 			}
15858 
15859 			result = deSqrt(r);
15860 
15861 			out[0] = fp16type(result).bits();
15862 		}
15863 		else
15864 		{
15865 			TCU_THROW(InternalError, "Unknown flavor");
15866 		}
15867 
15868 		min[0] = getMin(result, getULPs(in));
15869 		max[0] = getMax(result, getULPs(in));
15870 
15871 		return true;
15872 	}
15873 };
15874 
15875 struct fp16Distance : public fp16AllComponents
15876 {
fp16Distancevkt::SpirVAssembly::fp16Distance15877 	fp16Distance() : fp16AllComponents()
15878 	{
15879 		flavorNames.push_back("EmulatingFP16");
15880 		flavorNames.push_back("DoubleCalc");
15881 	}
15882 
getULPsvkt::SpirVAssembly::fp16Distance15883 	virtual double getULPs(vector<const deFloat16*>& in)
15884 	{
15885 		DE_UNREF(in);
15886 
15887 		return 4.0;
15888 	}
15889 
15890 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Distance15891 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15892 	{
15893 		DE_ASSERT(getOutCompCount() == 1);
15894 		DE_ASSERT(in.size() == 2);
15895 		DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15896 
15897 		double	result	(0.0);
15898 
15899 		if (getFlavor() == 0)
15900 		{
15901 			fp16type	r	(0.0);
15902 
15903 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15904 			{
15905 				const fp16type	x	(in[0][componentNdx]);
15906 				const fp16type	y	(in[1][componentNdx]);
15907 				const fp16type	d	(x.asDouble() - y.asDouble());
15908 				const fp16type	q	(d.asDouble() * d.asDouble());
15909 
15910 				r = fp16type(r.asDouble() + q.asDouble());
15911 			}
15912 
15913 			result = deSqrt(r.asDouble());
15914 		}
15915 		else if (getFlavor() == 1)
15916 		{
15917 			double	r	(0.0);
15918 
15919 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15920 			{
15921 				const fp16type	x	(in[0][componentNdx]);
15922 				const fp16type	y	(in[1][componentNdx]);
15923 				const double	d	(x.asDouble() - y.asDouble());
15924 				const double	q	(d * d);
15925 
15926 				r += q;
15927 			}
15928 
15929 			result = deSqrt(r);
15930 		}
15931 		else
15932 		{
15933 			TCU_THROW(InternalError, "Unknown flavor");
15934 		}
15935 
15936 		out[0] = fp16type(result).bits();
15937 		min[0] = getMin(result, getULPs(in));
15938 		max[0] = getMax(result, getULPs(in));
15939 
15940 		return true;
15941 	}
15942 };
15943 
15944 struct fp16Cross : public fp16AllComponents
15945 {
fp16Crossvkt::SpirVAssembly::fp16Cross15946 	fp16Cross() : fp16AllComponents()
15947 	{
15948 		flavorNames.push_back("EmulatingFP16");
15949 		flavorNames.push_back("DoubleCalc");
15950 	}
15951 
getULPsvkt::SpirVAssembly::fp16Cross15952 	virtual double getULPs(vector<const deFloat16*>& in)
15953 	{
15954 		DE_UNREF(in);
15955 
15956 		return 4.0;
15957 	}
15958 
15959 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cross15960 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15961 	{
15962 		DE_ASSERT(getOutCompCount() == 3);
15963 		DE_ASSERT(in.size() == 2);
15964 		DE_ASSERT(getArgCompCount(0) == 3);
15965 		DE_ASSERT(getArgCompCount(1) == 3);
15966 
15967 		if (getFlavor() == 0)
15968 		{
15969 			const fp16type	x0		(in[0][0]);
15970 			const fp16type	x1		(in[0][1]);
15971 			const fp16type	x2		(in[0][2]);
15972 			const fp16type	y0		(in[1][0]);
15973 			const fp16type	y1		(in[1][1]);
15974 			const fp16type	y2		(in[1][2]);
15975 			const fp16type	x1y2	(x1.asDouble() * y2.asDouble());
15976 			const fp16type	y1x2	(y1.asDouble() * x2.asDouble());
15977 			const fp16type	x2y0	(x2.asDouble() * y0.asDouble());
15978 			const fp16type	y2x0	(y2.asDouble() * x0.asDouble());
15979 			const fp16type	x0y1	(x0.asDouble() * y1.asDouble());
15980 			const fp16type	y0x1	(y0.asDouble() * x1.asDouble());
15981 
15982 			out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15983 			out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15984 			out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15985 		}
15986 		else if (getFlavor() == 1)
15987 		{
15988 			const fp16type	x0		(in[0][0]);
15989 			const fp16type	x1		(in[0][1]);
15990 			const fp16type	x2		(in[0][2]);
15991 			const fp16type	y0		(in[1][0]);
15992 			const fp16type	y1		(in[1][1]);
15993 			const fp16type	y2		(in[1][2]);
15994 			const double	x1y2	(x1.asDouble() * y2.asDouble());
15995 			const double	y1x2	(y1.asDouble() * x2.asDouble());
15996 			const double	x2y0	(x2.asDouble() * y0.asDouble());
15997 			const double	y2x0	(y2.asDouble() * x0.asDouble());
15998 			const double	x0y1	(x0.asDouble() * y1.asDouble());
15999 			const double	y0x1	(y0.asDouble() * x1.asDouble());
16000 
16001 			out[0] = fp16type(x1y2 - y1x2).bits();
16002 			out[1] = fp16type(x2y0 - y2x0).bits();
16003 			out[2] = fp16type(x0y1 - y0x1).bits();
16004 		}
16005 		else
16006 		{
16007 			TCU_THROW(InternalError, "Unknown flavor");
16008 		}
16009 
16010 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16011 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16012 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16013 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16014 
16015 		return true;
16016 	}
16017 };
16018 
16019 struct fp16Normalize : public fp16AllComponents
16020 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16021 	fp16Normalize() : fp16AllComponents()
16022 	{
16023 		flavorNames.push_back("EmulatingFP16");
16024 		flavorNames.push_back("DoubleCalc");
16025 
16026 		permutationsFlavorStart = 0;
16027 		permutationsFlavorEnd = flavorNames.size();
16028 
16029 		// flavorNames will be extended later
16030 	}
16031 
setArgCompCountvkt::SpirVAssembly::fp16Normalize16032 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)
16033 	{
16034 		DE_ASSERT(argCompCount[argNo] == 0); // Once only
16035 
16036 		if (argNo == 0 && argCompCount[argNo] == 0)
16037 		{
16038 			const size_t		maxPermutationsCount	= 24u; // Equal to 4!
16039 			std::vector<int>	indices;
16040 
16041 			for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16042 				indices.push_back(static_cast<int>(componentNdx));
16043 
16044 			m_permutations.reserve(maxPermutationsCount);
16045 
16046 			permutationsFlavorStart = flavorNames.size();
16047 
16048 			do
16049 			{
16050 				tcu::UVec4	permutation;
16051 				std::string	name		= "Permutted_";
16052 
16053 				for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16054 				{
16055 					permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16056 					name += de::toString(indices[componentNdx]);
16057 				}
16058 
16059 				m_permutations.push_back(permutation);
16060 				flavorNames.push_back(name);
16061 
16062 			} while(std::next_permutation(indices.begin(), indices.end()));
16063 
16064 			permutationsFlavorEnd = flavorNames.size();
16065 		}
16066 
16067 		fp16AllComponents::setArgCompCount(argNo, compCount);
16068 	}
getULPsvkt::SpirVAssembly::fp16Normalize16069 	virtual double getULPs(vector<const deFloat16*>& in)
16070 	{
16071 		DE_UNREF(in);
16072 
16073 		return 8.0;
16074 	}
16075 
16076 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16077 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16078 	{
16079 		DE_ASSERT(in.size() == 1);
16080 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16081 
16082 		if (getFlavor() == 0)
16083 		{
16084 			fp16type	r(0.0);
16085 
16086 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16087 			{
16088 				const fp16type	x	(in[0][componentNdx]);
16089 				const fp16type	q	(x.asDouble() * x.asDouble());
16090 
16091 				r = fp16type(r.asDouble() + q.asDouble());
16092 			}
16093 
16094 			r = fp16type(deSqrt(r.asDouble()));
16095 
16096 			if (r.isZero())
16097 				return false;
16098 
16099 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16100 			{
16101 				const fp16type	x	(in[0][componentNdx]);
16102 
16103 				out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16104 			}
16105 		}
16106 		else if (getFlavor() == 1)
16107 		{
16108 			double	r(0.0);
16109 
16110 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16111 			{
16112 				const fp16type	x	(in[0][componentNdx]);
16113 				const double	q	(x.asDouble() * x.asDouble());
16114 
16115 				r += q;
16116 			}
16117 
16118 			r = deSqrt(r);
16119 
16120 			if (r == 0)
16121 				return false;
16122 
16123 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16124 			{
16125 				const fp16type	x	(in[0][componentNdx]);
16126 
16127 				out[componentNdx] = fp16type(x.asDouble() / r).bits();
16128 			}
16129 		}
16130 		else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16131 		{
16132 			const int			compCount		(static_cast<int>(getArgCompCount(0)));
16133 			const size_t		permutationNdx	(getFlavor() - permutationsFlavorStart);
16134 			const tcu::UVec4&	permutation		(m_permutations[permutationNdx]);
16135 			fp16type			r				(0.0);
16136 
16137 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16138 			{
16139 				const size_t	componentNdx	(permutation[permComponentNdx]);
16140 				const fp16type	x				(in[0][componentNdx]);
16141 				const fp16type	q				(x.asDouble() * x.asDouble());
16142 
16143 				r = fp16type(r.asDouble() + q.asDouble());
16144 			}
16145 
16146 			r = fp16type(deSqrt(r.asDouble()));
16147 
16148 			if (r.isZero())
16149 				return false;
16150 
16151 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16152 			{
16153 				const size_t	componentNdx	(permutation[permComponentNdx]);
16154 				const fp16type	x				(in[0][componentNdx]);
16155 
16156 				out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16157 			}
16158 		}
16159 		else
16160 		{
16161 			TCU_THROW(InternalError, "Unknown flavor");
16162 		}
16163 
16164 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16165 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16166 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16167 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16168 
16169 		return true;
16170 	}
16171 
16172 private:
16173 	std::vector<tcu::UVec4> m_permutations;
16174 	size_t					permutationsFlavorStart;
16175 	size_t					permutationsFlavorEnd;
16176 };
16177 
16178 struct fp16FaceForward : public fp16AllComponents
16179 {
getULPsvkt::SpirVAssembly::fp16FaceForward16180 	virtual double getULPs(vector<const deFloat16*>& in)
16181 	{
16182 		DE_UNREF(in);
16183 
16184 		return 4.0;
16185 	}
16186 
16187 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16188 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16189 	{
16190 		DE_ASSERT(in.size() == 3);
16191 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16192 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16193 		DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16194 
16195 		fp16type	dp(0.0);
16196 
16197 		for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16198 		{
16199 			const fp16type	x	(in[1][componentNdx]);
16200 			const fp16type	y	(in[2][componentNdx]);
16201 			const double	xd	(x.asDouble());
16202 			const double	yd	(y.asDouble());
16203 			const fp16type	q	(xd * yd);
16204 
16205 			dp = fp16type(dp.asDouble() + q.asDouble());
16206 		}
16207 
16208 		if (dp.isNaN() || dp.isZero())
16209 			return false;
16210 
16211 		for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16212 		{
16213 			const fp16type	n	(in[0][componentNdx]);
16214 
16215 			out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16216 		}
16217 
16218 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16219 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16220 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16221 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16222 
16223 		return true;
16224 	}
16225 };
16226 
16227 struct fp16Reflect : public fp16AllComponents
16228 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16229 	fp16Reflect() : fp16AllComponents()
16230 	{
16231 		flavorNames.push_back("EmulatingFP16");
16232 		flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16233 		flavorNames.push_back("FloatCalc");
16234 		flavorNames.push_back("FloatCalc+KeepZeroSign");
16235 		flavorNames.push_back("EmulatingFP16+2Nfirst");
16236 		flavorNames.push_back("EmulatingFP16+2Ifirst");
16237 	}
16238 
getULPsvkt::SpirVAssembly::fp16Reflect16239 	virtual double getULPs(vector<const deFloat16*>& in)
16240 	{
16241 		DE_UNREF(in);
16242 
16243 		return 256.0; // This is not a precision test. Value is not from spec
16244 	}
16245 
16246 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16247 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16248 	{
16249 		DE_ASSERT(in.size() == 2);
16250 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16251 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16252 
16253 		if (getFlavor() < 4)
16254 		{
16255 			const bool	keepZeroSign	((flavor & 1) != 0 ? true : false);
16256 			const bool	floatCalc		((flavor & 2) != 0 ? true : false);
16257 
16258 			if (floatCalc)
16259 			{
16260 				float	dp(0.0f);
16261 
16262 				for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16263 				{
16264 					const fp16type	i	(in[0][componentNdx]);
16265 					const fp16type	n	(in[1][componentNdx]);
16266 					const float		id	(i.asFloat());
16267 					const float		nd	(n.asFloat());
16268 					const float		qd	(id * nd);
16269 
16270 					if (keepZeroSign)
16271 						dp = (componentNdx == 0) ? qd : dp + qd;
16272 					else
16273 						dp = dp + qd;
16274 				}
16275 
16276 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16277 				{
16278 					const fp16type	i		(in[0][componentNdx]);
16279 					const fp16type	n		(in[1][componentNdx]);
16280 					const float		dpnd	(dp * n.asFloat());
16281 					const float		dpn2d	(2.0f * dpnd);
16282 					const float		idpn2d	(i.asFloat() - dpn2d);
16283 					const fp16type	result	(idpn2d);
16284 
16285 					out[componentNdx] = result.bits();
16286 				}
16287 			}
16288 			else
16289 			{
16290 				fp16type	dp(0.0);
16291 
16292 				for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16293 				{
16294 					const fp16type	i	(in[0][componentNdx]);
16295 					const fp16type	n	(in[1][componentNdx]);
16296 					const double	id	(i.asDouble());
16297 					const double	nd	(n.asDouble());
16298 					const fp16type	q	(id * nd);
16299 
16300 					if (keepZeroSign)
16301 						dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16302 					else
16303 						dp = fp16type(dp.asDouble() + q.asDouble());
16304 				}
16305 
16306 				if (dp.isNaN())
16307 					return false;
16308 
16309 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16310 				{
16311 					const fp16type	i		(in[0][componentNdx]);
16312 					const fp16type	n		(in[1][componentNdx]);
16313 					const fp16type	dpn		(dp.asDouble() * n.asDouble());
16314 					const fp16type	dpn2	(2 * dpn.asDouble());
16315 					const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16316 
16317 					out[componentNdx] = idpn2.bits();
16318 				}
16319 			}
16320 		}
16321 		else if (getFlavor() == 4)
16322 		{
16323 			fp16type	dp(0.0);
16324 
16325 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16326 			{
16327 				const fp16type	i	(in[0][componentNdx]);
16328 				const fp16type	n	(in[1][componentNdx]);
16329 				const double	id	(i.asDouble());
16330 				const double	nd	(n.asDouble());
16331 				const fp16type	q	(id * nd);
16332 
16333 				dp = fp16type(dp.asDouble() + q.asDouble());
16334 			}
16335 
16336 			if (dp.isNaN())
16337 				return false;
16338 
16339 			for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16340 			{
16341 				const fp16type	i		(in[0][componentNdx]);
16342 				const fp16type	n		(in[1][componentNdx]);
16343 				const fp16type	n2		(2 * n.asDouble());
16344 				const fp16type	dpn2	(dp.asDouble() * n2.asDouble());
16345 				const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16346 
16347 				out[componentNdx] = idpn2.bits();
16348 			}
16349 		}
16350 		else if (getFlavor() == 5)
16351 		{
16352 			fp16type	dp2(0.0);
16353 
16354 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16355 			{
16356 				const fp16type	i	(in[0][componentNdx]);
16357 				const fp16type	n	(in[1][componentNdx]);
16358 				const fp16type	i2	(2.0 * i.asDouble());
16359 				const double	i2d	(i2.asDouble());
16360 				const double	nd	(n.asDouble());
16361 				const fp16type	q	(i2d * nd);
16362 
16363 				dp2 = fp16type(dp2.asDouble() + q.asDouble());
16364 			}
16365 
16366 			if (dp2.isNaN())
16367 				return false;
16368 
16369 			for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16370 			{
16371 				const fp16type	i		(in[0][componentNdx]);
16372 				const fp16type	n		(in[1][componentNdx]);
16373 				const fp16type	dpn2	(dp2.asDouble() * n.asDouble());
16374 				const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16375 
16376 				out[componentNdx] = idpn2.bits();
16377 			}
16378 		}
16379 		else
16380 		{
16381 			TCU_THROW(InternalError, "Unknown flavor");
16382 		}
16383 
16384 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16385 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16386 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16387 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16388 
16389 		return true;
16390 	}
16391 };
16392 
16393 struct fp16Refract : public fp16AllComponents
16394 {
fp16Refractvkt::SpirVAssembly::fp16Refract16395 	fp16Refract() : fp16AllComponents()
16396 	{
16397 		flavorNames.push_back("EmulatingFP16");
16398 		flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16399 		flavorNames.push_back("FloatCalc");
16400 		flavorNames.push_back("FloatCalc+KeepZeroSign");
16401 	}
16402 
getULPsvkt::SpirVAssembly::fp16Refract16403 	virtual double getULPs(vector<const deFloat16*>& in)
16404 	{
16405 		DE_UNREF(in);
16406 
16407 		return 8192.0; // This is not a precision test. Value is not from spec
16408 	}
16409 
16410 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Refract16411 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16412 	{
16413 		DE_ASSERT(in.size() == 3);
16414 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16415 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16416 		DE_ASSERT(getArgCompCount(2) == 1);
16417 
16418 		const bool		keepZeroSign	((flavor & 1) != 0 ? true : false);
16419 		const bool		doubleCalc		((flavor & 2) != 0 ? true : false);
16420 		const fp16type	eta				(*in[2]);
16421 
16422 		if (doubleCalc)
16423 		{
16424 			double	dp	(0.0);
16425 
16426 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16427 			{
16428 				const fp16type	i	(in[0][componentNdx]);
16429 				const fp16type	n	(in[1][componentNdx]);
16430 				const double	id	(i.asDouble());
16431 				const double	nd	(n.asDouble());
16432 				const double	qd	(id * nd);
16433 
16434 				if (keepZeroSign)
16435 					dp = (componentNdx == 0) ? qd : dp + qd;
16436 				else
16437 					dp = dp + qd;
16438 			}
16439 
16440 			const double	eta2	(eta.asDouble() * eta.asDouble());
16441 			const double	dp2		(dp * dp);
16442 			const double	dp1		(1.0 - dp2);
16443 			const double	dpe		(eta2 * dp1);
16444 			const double	k		(1.0 - dpe);
16445 
16446 			if (k < 0.0)
16447 			{
16448 				const fp16type	zero	(0.0);
16449 
16450 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16451 					out[componentNdx] = zero.bits();
16452 			}
16453 			else
16454 			{
16455 				const double	sk	(deSqrt(k));
16456 
16457 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16458 				{
16459 					const fp16type	i		(in[0][componentNdx]);
16460 					const fp16type	n		(in[1][componentNdx]);
16461 					const double	etai	(i.asDouble() * eta.asDouble());
16462 					const double	etadp	(eta.asDouble() * dp);
16463 					const double	etadpk	(etadp + sk);
16464 					const double	etadpkn	(etadpk * n.asDouble());
16465 					const double	full	(etai - etadpkn);
16466 					const fp16type	result	(full);
16467 
16468 					if (result.isInf())
16469 						return false;
16470 
16471 					out[componentNdx] = result.bits();
16472 				}
16473 			}
16474 		}
16475 		else
16476 		{
16477 			fp16type	dp	(0.0);
16478 
16479 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16480 			{
16481 				const fp16type	i	(in[0][componentNdx]);
16482 				const fp16type	n	(in[1][componentNdx]);
16483 				const double	id	(i.asDouble());
16484 				const double	nd	(n.asDouble());
16485 				const fp16type	q	(id * nd);
16486 
16487 				if (keepZeroSign)
16488 					dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16489 				else
16490 					dp = fp16type(dp.asDouble() + q.asDouble());
16491 			}
16492 
16493 			if (dp.isNaN())
16494 				return false;
16495 
16496 			const fp16type	eta2(eta.asDouble() * eta.asDouble());
16497 			const fp16type	dp2	(dp.asDouble() * dp.asDouble());
16498 			const fp16type	dp1	(1.0 - dp2.asDouble());
16499 			const fp16type	dpe	(eta2.asDouble() * dp1.asDouble());
16500 			const fp16type	k	(1.0 - dpe.asDouble());
16501 
16502 			if (k.asDouble() < 0.0)
16503 			{
16504 				const fp16type	zero	(0.0);
16505 
16506 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16507 					out[componentNdx] = zero.bits();
16508 			}
16509 			else
16510 			{
16511 				const fp16type	sk	(deSqrt(k.asDouble()));
16512 
16513 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16514 				{
16515 					const fp16type	i		(in[0][componentNdx]);
16516 					const fp16type	n		(in[1][componentNdx]);
16517 					const fp16type	etai	(i.asDouble() * eta.asDouble());
16518 					const fp16type	etadp	(eta.asDouble() * dp.asDouble());
16519 					const fp16type	etadpk	(etadp.asDouble() + sk.asDouble());
16520 					const fp16type	etadpkn	(etadpk.asDouble() * n.asDouble());
16521 					const fp16type	full	(etai.asDouble() - etadpkn.asDouble());
16522 
16523 					if (full.isNaN() || full.isInf())
16524 						return false;
16525 
16526 					out[componentNdx] = full.bits();
16527 				}
16528 			}
16529 		}
16530 
16531 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16532 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16533 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16534 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16535 
16536 		return true;
16537 	}
16538 };
16539 
16540 struct fp16Dot : public fp16AllComponents
16541 {
fp16Dotvkt::SpirVAssembly::fp16Dot16542 	fp16Dot() : fp16AllComponents()
16543 	{
16544 		flavorNames.push_back("EmulatingFP16");
16545 		flavorNames.push_back("FloatCalc");
16546 		flavorNames.push_back("DoubleCalc");
16547 
16548 		permutationsFlavorStart = 0;
16549 		permutationsFlavorEnd = flavorNames.size();
16550 
16551 		// flavorNames will be extended later
16552 	}
16553 
setArgCompCountvkt::SpirVAssembly::fp16Dot16554 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)
16555 	{
16556 		DE_ASSERT(argCompCount[argNo] == 0); // Once only
16557 
16558 		if (argNo == 0 && argCompCount[argNo] == 0)
16559 		{
16560 			const size_t		maxPermutationsCount	= 24u; // Equal to 4!
16561 			std::vector<int>	indices;
16562 
16563 			for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16564 				indices.push_back(static_cast<int>(componentNdx));
16565 
16566 			m_permutations.reserve(maxPermutationsCount);
16567 
16568 			permutationsFlavorStart = flavorNames.size();
16569 
16570 			do
16571 			{
16572 				tcu::UVec4	permutation;
16573 				std::string	name		= "Permutted_";
16574 
16575 				for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16576 				{
16577 					permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16578 					name += de::toString(indices[componentNdx]);
16579 				}
16580 
16581 				m_permutations.push_back(permutation);
16582 				flavorNames.push_back(name);
16583 
16584 			} while(std::next_permutation(indices.begin(), indices.end()));
16585 
16586 			permutationsFlavorEnd = flavorNames.size();
16587 		}
16588 
16589 		fp16AllComponents::setArgCompCount(argNo, compCount);
16590 	}
16591 
getULPsvkt::SpirVAssembly::fp16Dot16592 	virtual double	getULPs(vector<const deFloat16*>& in)
16593 	{
16594 		DE_UNREF(in);
16595 
16596 		return 16.0; // This is not a precision test. Value is not from spec
16597 	}
16598 
16599 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Dot16600 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16601 	{
16602 		DE_ASSERT(in.size() == 2);
16603 		DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16604 		DE_ASSERT(getOutCompCount() == 1);
16605 
16606 		double	result	(0.0);
16607 		double	eps		(0.0);
16608 
16609 		if (getFlavor() == 0)
16610 		{
16611 			fp16type	dp	(0.0);
16612 
16613 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16614 			{
16615 				const fp16type	x	(in[0][componentNdx]);
16616 				const fp16type	y	(in[1][componentNdx]);
16617 				const fp16type	q	(x.asDouble() * y.asDouble());
16618 
16619 				dp = fp16type(dp.asDouble() + q.asDouble());
16620 				eps += floatFormat16.ulp(q.asDouble(), 2.0);
16621 			}
16622 
16623 			result = dp.asDouble();
16624 		}
16625 		else if (getFlavor() == 1)
16626 		{
16627 			float	dp	(0.0);
16628 
16629 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16630 			{
16631 				const fp16type	x	(in[0][componentNdx]);
16632 				const fp16type	y	(in[1][componentNdx]);
16633 				const float		q	(x.asFloat() * y.asFloat());
16634 
16635 				dp += q;
16636 				eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16637 			}
16638 
16639 			result = dp;
16640 		}
16641 		else if (getFlavor() == 2)
16642 		{
16643 			double	dp	(0.0);
16644 
16645 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16646 			{
16647 				const fp16type	x	(in[0][componentNdx]);
16648 				const fp16type	y	(in[1][componentNdx]);
16649 				const double	q	(x.asDouble() * y.asDouble());
16650 
16651 				dp += q;
16652 				eps += floatFormat16.ulp(q, 2.0);
16653 			}
16654 
16655 			result = dp;
16656 		}
16657 		else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16658 		{
16659 			const int			compCount		(static_cast<int>(getArgCompCount(1)));
16660 			const size_t		permutationNdx	(getFlavor() - permutationsFlavorStart);
16661 			const tcu::UVec4&	permutation		(m_permutations[permutationNdx]);
16662 			fp16type			dp				(0.0);
16663 
16664 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16665 			{
16666 				const size_t		componentNdx	(permutation[permComponentNdx]);
16667 				const fp16type		x				(in[0][componentNdx]);
16668 				const fp16type		y				(in[1][componentNdx]);
16669 				const fp16type		q				(x.asDouble() * y.asDouble());
16670 
16671 				dp = fp16type(dp.asDouble() + q.asDouble());
16672 				eps += floatFormat16.ulp(q.asDouble(), 2.0);
16673 			}
16674 
16675 			result = dp.asDouble();
16676 		}
16677 		else
16678 		{
16679 			TCU_THROW(InternalError, "Unknown flavor");
16680 		}
16681 
16682 		out[0] = fp16type(result).bits();
16683 		min[0] = result - eps;
16684 		max[0] = result + eps;
16685 
16686 		return true;
16687 	}
16688 
16689 private:
16690 	std::vector<tcu::UVec4> m_permutations;
16691 	size_t					permutationsFlavorStart;
16692 	size_t					permutationsFlavorEnd;
16693 };
16694 
16695 struct fp16VectorTimesScalar : public fp16AllComponents
16696 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar16697 	virtual double getULPs(vector<const deFloat16*>& in)
16698 	{
16699 		DE_UNREF(in);
16700 
16701 		return 2.0;
16702 	}
16703 
16704 	template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar16705 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16706 	{
16707 		DE_ASSERT(in.size() == 2);
16708 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16709 		DE_ASSERT(getArgCompCount(1) == 1);
16710 
16711 		fp16type	s	(*in[1]);
16712 
16713 		for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16714 		{
16715 			const fp16type	x	   (in[0][componentNdx]);
16716 			const double    result (s.asDouble() * x.asDouble());
16717 			const fp16type	m	   (result);
16718 
16719 			out[componentNdx] = m.bits();
16720 			min[componentNdx] = getMin(result, getULPs(in));
16721 			max[componentNdx] = getMax(result, getULPs(in));
16722 		}
16723 
16724 		return true;
16725 	}
16726 };
16727 
16728 struct fp16MatrixBase : public fp16AllComponents
16729 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase16730 	deUint32		getComponentValidity			()
16731 	{
16732 		return static_cast<deUint32>(-1);
16733 	}
16734 
getNdxvkt::SpirVAssembly::fp16MatrixBase16735 	inline size_t	getNdx							(const size_t rowCount, const size_t col, const size_t row)
16736 	{
16737 		const size_t minComponentCount	= 0;
16738 		const size_t maxComponentCount	= 3;
16739 		const size_t alignedRowsCount	= (rowCount == 3) ? 4 : rowCount;
16740 
16741 		DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16742 		DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16743 		DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16744 		DE_UNREF(minComponentCount);
16745 		DE_UNREF(maxComponentCount);
16746 
16747 		return col * alignedRowsCount + row;
16748 	}
16749 
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase16750 	deUint32		getComponentMatrixValidityMask	(size_t cols, size_t rows)
16751 	{
16752 		deUint32	result	= 0u;
16753 
16754 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16755 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16756 			{
16757 				const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16758 
16759 				DE_ASSERT(bitNdx < sizeof(result) * 8);
16760 
16761 				result |= (1<<bitNdx);
16762 			}
16763 
16764 		return result;
16765 	}
16766 };
16767 
16768 template<size_t cols, size_t rows>
16769 struct fp16Transpose : public fp16MatrixBase
16770 {
getULPsvkt::SpirVAssembly::fp16Transpose16771 	virtual double getULPs(vector<const deFloat16*>& in)
16772 	{
16773 		DE_UNREF(in);
16774 
16775 		return 1.0;
16776 	}
16777 
getComponentValidityvkt::SpirVAssembly::fp16Transpose16778 	deUint32	getComponentValidity	()
16779 	{
16780 		return getComponentMatrixValidityMask(rows, cols);
16781 	}
16782 
16783 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Transpose16784 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16785 	{
16786 		DE_ASSERT(in.size() == 1);
16787 
16788 		const size_t		alignedCols	= (cols == 3) ? 4 : cols;
16789 		const size_t		alignedRows	= (rows == 3) ? 4 : rows;
16790 		vector<deFloat16>	output		(alignedCols * alignedRows, 0);
16791 
16792 		DE_ASSERT(output.size() == alignedCols * alignedRows);
16793 
16794 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16795 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16796 				output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16797 
16798 		deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16799 		deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16800 		deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16801 
16802 		return true;
16803 	}
16804 };
16805 
16806 template<size_t cols, size_t rows>
16807 struct fp16MatrixTimesScalar : public fp16MatrixBase
16808 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar16809 	virtual double getULPs(vector<const deFloat16*>& in)
16810 	{
16811 		DE_UNREF(in);
16812 
16813 		return 4.0;
16814 	}
16815 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar16816 	deUint32	getComponentValidity	()
16817 	{
16818 		return getComponentMatrixValidityMask(cols, rows);
16819 	}
16820 
16821 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar16822 	bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16823 	{
16824 		DE_ASSERT(in.size() == 2);
16825 		DE_ASSERT(getArgCompCount(1) == 1);
16826 
16827 		const fp16type	y			(in[1][0]);
16828 		const float		scalar		(y.asFloat());
16829 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16830 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16831 
16832 		DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16833 		DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16834 		DE_UNREF(alignedCols);
16835 
16836 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16837 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16838 			{
16839 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
16840 				const fp16type	x	(in[0][ndx]);
16841 				const double	result	(scalar * x.asFloat());
16842 
16843 				out[ndx] = fp16type(result).bits();
16844 				min[ndx] = getMin(result, getULPs(in));
16845 				max[ndx] = getMax(result, getULPs(in));
16846 			}
16847 
16848 		return true;
16849 	}
16850 };
16851 
16852 template<size_t cols, size_t rows>
16853 struct fp16VectorTimesMatrix : public fp16MatrixBase
16854 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix16855 	fp16VectorTimesMatrix() : fp16MatrixBase()
16856 	{
16857 		flavorNames.push_back("EmulatingFP16");
16858 		flavorNames.push_back("FloatCalc");
16859 	}
16860 
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix16861 	virtual double getULPs (vector<const deFloat16*>& in)
16862 	{
16863 		DE_UNREF(in);
16864 
16865 		return (8.0 * cols);
16866 	}
16867 
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix16868 	deUint32 getComponentValidity ()
16869 	{
16870 		return getComponentMatrixValidityMask(cols, 1);
16871 	}
16872 
16873 	template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix16874 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16875 	{
16876 		DE_ASSERT(in.size() == 2);
16877 
16878 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16879 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16880 
16881 		DE_ASSERT(getOutCompCount() == cols);
16882 		DE_ASSERT(getArgCompCount(0) == rows);
16883 		DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16884 		DE_UNREF(alignedCols);
16885 
16886 		if (getFlavor() == 0)
16887 		{
16888 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16889 			{
16890 				fp16type	s	(fp16type::zero(1));
16891 
16892 				for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16893 				{
16894 					const fp16type	v	(in[0][rowNdx]);
16895 					const float		vf	(v.asFloat());
16896 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16897 					const fp16type	x	(in[1][ndx]);
16898 					const float		xf	(x.asFloat());
16899 					const fp16type	m	(vf * xf);
16900 
16901 					s = fp16type(s.asFloat() + m.asFloat());
16902 				}
16903 
16904 				out[colNdx] = s.bits();
16905 				min[colNdx] = getMin(s.asDouble(), getULPs(in));
16906 				max[colNdx] = getMax(s.asDouble(), getULPs(in));
16907 			}
16908 		}
16909 		else if (getFlavor() == 1)
16910 		{
16911 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16912 			{
16913 				float	s	(0.0f);
16914 
16915 				for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16916 				{
16917 					const fp16type	v	(in[0][rowNdx]);
16918 					const float		vf	(v.asFloat());
16919 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16920 					const fp16type	x	(in[1][ndx]);
16921 					const float		xf	(x.asFloat());
16922 					const float		m	(vf * xf);
16923 
16924 					s += m;
16925 				}
16926 
16927 				out[colNdx] = fp16type(s).bits();
16928 				min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16929 				max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16930 			}
16931 		}
16932 		else
16933 		{
16934 			TCU_THROW(InternalError, "Unknown flavor");
16935 		}
16936 
16937 		return true;
16938 	}
16939 };
16940 
16941 template<size_t cols, size_t rows>
16942 struct fp16MatrixTimesVector : public fp16MatrixBase
16943 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector16944 	fp16MatrixTimesVector() : fp16MatrixBase()
16945 	{
16946 		flavorNames.push_back("EmulatingFP16");
16947 		flavorNames.push_back("FloatCalc");
16948 	}
16949 
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector16950 	virtual double getULPs (vector<const deFloat16*>& in)
16951 	{
16952 		DE_UNREF(in);
16953 
16954 		return (8.0 * rows);
16955 	}
16956 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector16957 	deUint32 getComponentValidity ()
16958 	{
16959 		return getComponentMatrixValidityMask(rows, 1);
16960 	}
16961 
16962 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector16963 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16964 	{
16965 		DE_ASSERT(in.size() == 2);
16966 
16967 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16968 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16969 
16970 		DE_ASSERT(getOutCompCount() == rows);
16971 		DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16972 		DE_ASSERT(getArgCompCount(1) == cols);
16973 		DE_UNREF(alignedCols);
16974 
16975 		if (getFlavor() == 0)
16976 		{
16977 			for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16978 			{
16979 				fp16type	s	(fp16type::zero(1));
16980 
16981 				for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16982 				{
16983 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16984 					const fp16type	x	(in[0][ndx]);
16985 					const float		xf	(x.asFloat());
16986 					const fp16type	v	(in[1][colNdx]);
16987 					const float		vf	(v.asFloat());
16988 					const fp16type	m	(vf * xf);
16989 
16990 					s = fp16type(s.asFloat() + m.asFloat());
16991 				}
16992 
16993 				out[rowNdx] = s.bits();
16994 				min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16995 				max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16996 			}
16997 		}
16998 		else if (getFlavor() == 1)
16999 		{
17000 			for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17001 			{
17002 				float	s	(0.0f);
17003 
17004 				for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17005 				{
17006 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
17007 					const fp16type	x	(in[0][ndx]);
17008 					const float		xf	(x.asFloat());
17009 					const fp16type	v	(in[1][colNdx]);
17010 					const float		vf	(v.asFloat());
17011 					const float		m	(vf * xf);
17012 
17013 					s += m;
17014 				}
17015 
17016 				out[rowNdx] = fp16type(s).bits();
17017 				min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17018 				max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17019 			}
17020 		}
17021 		else
17022 		{
17023 			TCU_THROW(InternalError, "Unknown flavor");
17024 		}
17025 
17026 		return true;
17027 	}
17028 };
17029 
17030 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17031 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17032 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17033 	fp16MatrixTimesMatrix() : fp16MatrixBase()
17034 	{
17035 		flavorNames.push_back("EmulatingFP16");
17036 		flavorNames.push_back("FloatCalc");
17037 	}
17038 
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17039 	virtual double getULPs (vector<const deFloat16*>& in)
17040 	{
17041 		DE_UNREF(in);
17042 
17043 		return 32.0;
17044 	}
17045 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17046 	deUint32 getComponentValidity ()
17047 	{
17048 		return getComponentMatrixValidityMask(colsR, rowsL);
17049 	}
17050 
17051 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17052 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17053 	{
17054 		DE_STATIC_ASSERT(colsL == rowsR);
17055 
17056 		DE_ASSERT(in.size() == 2);
17057 
17058 		const size_t	alignedColsL	= (colsL == 3) ? 4 : colsL;
17059 		const size_t	alignedRowsL	= (rowsL == 3) ? 4 : rowsL;
17060 		const size_t	alignedColsR	= (colsR == 3) ? 4 : colsR;
17061 		const size_t	alignedRowsR	= (rowsR == 3) ? 4 : rowsR;
17062 
17063 		DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17064 		DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17065 		DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17066 		DE_UNREF(alignedColsL);
17067 		DE_UNREF(alignedColsR);
17068 
17069 		if (getFlavor() == 0)
17070 		{
17071 			for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17072 			{
17073 				for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17074 				{
17075 					const size_t	ndx	(colNdx * alignedRowsL + rowNdx);
17076 					fp16type		s	(fp16type::zero(1));
17077 
17078 					for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17079 					{
17080 						const size_t	ndxl	(commonNdx * alignedRowsL + rowNdx);
17081 						const fp16type	l		(in[0][ndxl]);
17082 						const float		lf		(l.asFloat());
17083 						const size_t	ndxr	(colNdx * alignedRowsR + commonNdx);
17084 						const fp16type	r		(in[1][ndxr]);
17085 						const float		rf		(r.asFloat());
17086 						const fp16type	m		(lf * rf);
17087 
17088 						s = fp16type(s.asFloat() + m.asFloat());
17089 					}
17090 
17091 					out[ndx] = s.bits();
17092 					min[ndx] = getMin(s.asDouble(), getULPs(in));
17093 					max[ndx] = getMax(s.asDouble(), getULPs(in));
17094 				}
17095 			}
17096 		}
17097 		else if (getFlavor() == 1)
17098 		{
17099 			for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17100 			{
17101 				for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17102 				{
17103 					const size_t	ndx	(colNdx * alignedRowsL + rowNdx);
17104 					float			s	(0.0f);
17105 
17106 					for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17107 					{
17108 						const size_t	ndxl	(commonNdx * alignedRowsL + rowNdx);
17109 						const fp16type	l		(in[0][ndxl]);
17110 						const float		lf		(l.asFloat());
17111 						const size_t	ndxr	(colNdx * alignedRowsR + commonNdx);
17112 						const fp16type	r		(in[1][ndxr]);
17113 						const float		rf		(r.asFloat());
17114 						const float		m		(lf * rf);
17115 
17116 						s += m;
17117 					}
17118 
17119 					out[ndx] = fp16type(s).bits();
17120 					min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17121 					max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17122 				}
17123 			}
17124 		}
17125 		else
17126 		{
17127 			TCU_THROW(InternalError, "Unknown flavor");
17128 		}
17129 
17130 		return true;
17131 	}
17132 };
17133 
17134 template<size_t cols, size_t rows>
17135 struct fp16OuterProduct : public fp16MatrixBase
17136 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17137 	virtual double getULPs (vector<const deFloat16*>& in)
17138 	{
17139 		DE_UNREF(in);
17140 
17141 		return 2.0;
17142 	}
17143 
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17144 	deUint32 getComponentValidity ()
17145 	{
17146 		return getComponentMatrixValidityMask(cols, rows);
17147 	}
17148 
17149 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17150 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17151 	{
17152 		DE_ASSERT(in.size() == 2);
17153 
17154 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17155 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17156 
17157 		DE_ASSERT(getArgCompCount(0) == rows);
17158 		DE_ASSERT(getArgCompCount(1) == cols);
17159 		DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17160 		DE_UNREF(alignedCols);
17161 
17162 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17163 		{
17164 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17165 			{
17166 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
17167 				const fp16type	x	(in[0][rowNdx]);
17168 				const float		xf	(x.asFloat());
17169 				const fp16type	y	(in[1][colNdx]);
17170 				const float		yf	(y.asFloat());
17171 				const fp16type	m	(xf * yf);
17172 
17173 				out[ndx] = m.bits();
17174 				min[ndx] = getMin(m.asDouble(), getULPs(in));
17175 				max[ndx] = getMax(m.asDouble(), getULPs(in));
17176 			}
17177 		}
17178 
17179 		return true;
17180 	}
17181 };
17182 
17183 template<size_t size>
17184 struct fp16Determinant;
17185 
17186 template<>
17187 struct fp16Determinant<2> : public fp16MatrixBase
17188 {
getULPsvkt::SpirVAssembly::fp16Determinant17189 	virtual double getULPs (vector<const deFloat16*>& in)
17190 	{
17191 		DE_UNREF(in);
17192 
17193 		return 128.0; // This is not a precision test. Value is not from spec
17194 	}
17195 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17196 	deUint32 getComponentValidity ()
17197 	{
17198 		return 1;
17199 	}
17200 
17201 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17202 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17203 	{
17204 		const size_t	cols		= 2;
17205 		const size_t	rows		= 2;
17206 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17207 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17208 
17209 		DE_ASSERT(in.size() == 1);
17210 		DE_ASSERT(getOutCompCount() == 1);
17211 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17212 		DE_UNREF(alignedCols);
17213 		DE_UNREF(alignedRows);
17214 
17215 		// [ a b ]
17216 		// [ c d ]
17217 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17218 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17219 		const float		c		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17220 		const float		d		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17221 		const float		ad		(a * d);
17222 		const fp16type	adf16	(ad);
17223 		const float		bc		(b * c);
17224 		const fp16type	bcf16	(bc);
17225 		const float		r		(adf16.asFloat() - bcf16.asFloat());
17226 		const fp16type	rf16	(r);
17227 
17228 		out[0] = rf16.bits();
17229 		min[0] = getMin(r, getULPs(in));
17230 		max[0] = getMax(r, getULPs(in));
17231 
17232 		return true;
17233 	}
17234 };
17235 
17236 template<>
17237 struct fp16Determinant<3> : public fp16MatrixBase
17238 {
getULPsvkt::SpirVAssembly::fp16Determinant17239 	virtual double getULPs (vector<const deFloat16*>& in)
17240 	{
17241 		DE_UNREF(in);
17242 
17243 		return 128.0; // This is not a precision test. Value is not from spec
17244 	}
17245 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17246 	deUint32 getComponentValidity ()
17247 	{
17248 		return 1;
17249 	}
17250 
17251 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17252 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17253 	{
17254 		const size_t	cols		= 3;
17255 		const size_t	rows		= 3;
17256 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17257 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17258 
17259 		DE_ASSERT(in.size() == 1);
17260 		DE_ASSERT(getOutCompCount() == 1);
17261 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17262 		DE_UNREF(alignedCols);
17263 		DE_UNREF(alignedRows);
17264 
17265 		// [ a b c ]
17266 		// [ d e f ]
17267 		// [ g h i ]
17268 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17269 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17270 		const float		c		(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17271 		const float		d		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17272 		const float		e		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17273 		const float		f		(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17274 		const float		g		(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17275 		const float		h		(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17276 		const float		i		(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17277 		const fp16type	aei		(a * e * i);
17278 		const fp16type	bfg		(b * f * g);
17279 		const fp16type	cdh		(c * d * h);
17280 		const fp16type	ceg		(c * e * g);
17281 		const fp16type	bdi		(b * d * i);
17282 		const fp16type	afh		(a * f * h);
17283 		const float		r		(aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17284 		const fp16type	rf16	(r);
17285 
17286 		out[0] = rf16.bits();
17287 		min[0] = getMin(r, getULPs(in));
17288 		max[0] = getMax(r, getULPs(in));
17289 
17290 		return true;
17291 	}
17292 };
17293 
17294 template<>
17295 struct fp16Determinant<4> : public fp16MatrixBase
17296 {
getULPsvkt::SpirVAssembly::fp16Determinant17297 	virtual double getULPs (vector<const deFloat16*>& in)
17298 	{
17299 		DE_UNREF(in);
17300 
17301 		return 128.0; // This is not a precision test. Value is not from spec
17302 	}
17303 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17304 	deUint32 getComponentValidity ()
17305 	{
17306 		return 1;
17307 	}
17308 
17309 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17310 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17311 	{
17312 		const size_t	rows		= 4;
17313 		const size_t	cols		= 4;
17314 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17315 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17316 
17317 		DE_ASSERT(in.size() == 1);
17318 		DE_ASSERT(getOutCompCount() == 1);
17319 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17320 		DE_UNREF(alignedCols);
17321 		DE_UNREF(alignedRows);
17322 
17323 		// [ a b c d ]
17324 		// [ e f g h ]
17325 		// [ i j k l ]
17326 		// [ m n o p ]
17327 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17328 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17329 		const float		c		(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17330 		const float		d		(fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17331 		const float		e		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17332 		const float		f		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17333 		const float		g		(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17334 		const float		h		(fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17335 		const float		i		(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17336 		const float		j		(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17337 		const float		k		(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17338 		const float		l		(fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17339 		const float		m		(fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17340 		const float		n		(fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17341 		const float		o		(fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17342 		const float		p		(fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17343 
17344 		// [ f g h ]
17345 		// [ j k l ]
17346 		// [ n o p ]
17347 		const fp16type	fkp		(f * k * p);
17348 		const fp16type	gln		(g * l * n);
17349 		const fp16type	hjo		(h * j * o);
17350 		const fp16type	hkn		(h * k * n);
17351 		const fp16type	gjp		(g * j * p);
17352 		const fp16type	flo		(f * l * o);
17353 		const fp16type	detA	(a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17354 
17355 		// [ e g h ]
17356 		// [ i k l ]
17357 		// [ m o p ]
17358 		const fp16type	ekp		(e * k * p);
17359 		const fp16type	glm		(g * l * m);
17360 		const fp16type	hio		(h * i * o);
17361 		const fp16type	hkm		(h * k * m);
17362 		const fp16type	gip		(g * i * p);
17363 		const fp16type	elo		(e * l * o);
17364 		const fp16type	detB	(b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17365 
17366 		// [ e f h ]
17367 		// [ i j l ]
17368 		// [ m n p ]
17369 		const fp16type	ejp		(e * j * p);
17370 		const fp16type	flm		(f * l * m);
17371 		const fp16type	hin		(h * i * n);
17372 		const fp16type	hjm		(h * j * m);
17373 		const fp16type	fip		(f * i * p);
17374 		const fp16type	eln		(e * l * n);
17375 		const fp16type	detC	(c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17376 
17377 		// [ e f g ]
17378 		// [ i j k ]
17379 		// [ m n o ]
17380 		const fp16type	ejo		(e * j * o);
17381 		const fp16type	fkm		(f * k * m);
17382 		const fp16type	gin		(g * i * n);
17383 		const fp16type	gjm		(g * j * m);
17384 		const fp16type	fio		(f * i * o);
17385 		const fp16type	ekn		(e * k * n);
17386 		const fp16type	detD	(d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17387 
17388 		const float		r		(detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17389 		const fp16type	rf16	(r);
17390 
17391 		out[0] = rf16.bits();
17392 		min[0] = getMin(r, getULPs(in));
17393 		max[0] = getMax(r, getULPs(in));
17394 
17395 		return true;
17396 	}
17397 };
17398 
17399 template<size_t size>
17400 struct fp16Inverse;
17401 
17402 template<>
17403 struct fp16Inverse<2> : public fp16MatrixBase
17404 {
getULPsvkt::SpirVAssembly::fp16Inverse17405 	virtual double getULPs (vector<const deFloat16*>& in)
17406 	{
17407 		DE_UNREF(in);
17408 
17409 		return 128.0; // This is not a precision test. Value is not from spec
17410 	}
17411 
getComponentValidityvkt::SpirVAssembly::fp16Inverse17412 	deUint32 getComponentValidity ()
17413 	{
17414 		return getComponentMatrixValidityMask(2, 2);
17415 	}
17416 
17417 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17418 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17419 	{
17420 		const size_t	cols		= 2;
17421 		const size_t	rows		= 2;
17422 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17423 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17424 
17425 		DE_ASSERT(in.size() == 1);
17426 		DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17427 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17428 		DE_UNREF(alignedCols);
17429 
17430 		// [ a b ]
17431 		// [ c d ]
17432 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17433 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17434 		const float		c		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17435 		const float		d		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17436 		const float		ad		(a * d);
17437 		const fp16type	adf16	(ad);
17438 		const float		bc		(b * c);
17439 		const fp16type	bcf16	(bc);
17440 		const float		det		(adf16.asFloat() - bcf16.asFloat());
17441 		const fp16type	det16	(det);
17442 
17443 		out[0] = fp16type( d / det16.asFloat()).bits();
17444 		out[1] = fp16type(-c / det16.asFloat()).bits();
17445 		out[2] = fp16type(-b / det16.asFloat()).bits();
17446 		out[3] = fp16type( a / det16.asFloat()).bits();
17447 
17448 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17449 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17450 			{
17451 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
17452 				const fp16type	s	(out[ndx]);
17453 
17454 				min[ndx] = getMin(s.asDouble(), getULPs(in));
17455 				max[ndx] = getMax(s.asDouble(), getULPs(in));
17456 			}
17457 
17458 		return true;
17459 	}
17460 };
17461 
fp16ToString(deFloat16 val)17462 inline std::string fp16ToString(deFloat16 val)
17463 {
17464 	return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17465 }
17466 
17467 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17468 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17469 {
17470 	if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17471 		return false;
17472 
17473 	const size_t	resultStep			= (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17474 	const size_t	iterationsCount		= expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17475 	const size_t	inputsSteps[3]		=
17476 	{
17477 		(ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17478 		(ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17479 		(ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17480 	};
17481 
17482 	DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17483 	DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17484 
17485 	for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17486 	{
17487 		DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17488 		DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17489 	}
17490 
17491 	const deFloat16* const		outputAsFP16					= (const deFloat16*)outputAllocs[0]->getHostPtr();
17492 	TestedArithmeticFunction	func;
17493 
17494 	func.setOutCompCount(RES_COMPONENTS);
17495 	func.setArgCompCount(0, ARG0_COMPONENTS);
17496 	func.setArgCompCount(1, ARG1_COMPONENTS);
17497 	func.setArgCompCount(2, ARG2_COMPONENTS);
17498 
17499 	const bool					callOncePerComponent			= func.callOncePerComponent();
17500 	const deUint32				componentValidityMask			= func.getComponentValidity();
17501 	const size_t				denormModesCount				= 2;
17502 	const char*					denormModes[denormModesCount]	= { "keep denormal numbers", "flush to zero" };
17503 	const size_t				successfulRunsPerComponent		= denormModesCount * func.getFlavorCount();
17504 	bool						success							= true;
17505 	size_t						validatedCount					= 0;
17506 
17507 	vector<deUint8>	inputBytes[3];
17508 
17509 	for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17510 		inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17511 
17512 	const deFloat16* const			inputsAsFP16[3]			=
17513 	{
17514 		inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17515 		inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17516 		inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17517 	};
17518 
17519 	for (size_t idx = 0; idx < iterationsCount; ++idx)
17520 	{
17521 		std::vector<size_t>			successfulRuns		(RES_COMPONENTS, successfulRunsPerComponent);
17522 		std::vector<std::string>	errors				(RES_COMPONENTS);
17523 		bool						iterationValidated	(true);
17524 
17525 		for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17526 		{
17527 			for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17528 			{
17529 				func.setFlavor(flavorNdx);
17530 
17531 				const deFloat16*			iterationOutputFP16		= &outputAsFP16[idx * resultStep];
17532 				vector<deFloat16>			iterationCalculatedFP16	(resultStep, 0);
17533 				vector<double>				iterationEdgeMin		(resultStep, 0.0);
17534 				vector<double>				iterationEdgeMax		(resultStep, 0.0);
17535 				vector<const deFloat16*>	arguments;
17536 
17537 				for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17538 				{
17539 					std::string	error;
17540 					bool		reportError = false;
17541 
17542 					if (callOncePerComponent || componentNdx == 0)
17543 					{
17544 						bool funcCallResult;
17545 
17546 						arguments.clear();
17547 
17548 						for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17549 							arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17550 
17551 						if (denormNdx == 0)
17552 							funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17553 						else
17554 							funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17555 
17556 						if (!funcCallResult)
17557 						{
17558 							iterationValidated = false;
17559 
17560 							if (callOncePerComponent)
17561 								continue;
17562 							else
17563 								break;
17564 						}
17565 					}
17566 
17567 					if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17568 						continue;
17569 
17570 					reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17571 
17572 					if (reportError)
17573 					{
17574 						tcu::Float16 expected	(iterationCalculatedFP16[componentNdx]);
17575 						tcu::Float16 outputted	(iterationOutputFP16[componentNdx]);
17576 						tcu::Float64 edgeMin    (iterationEdgeMin[componentNdx]);
17577 						tcu::Float64 edgeMax    (iterationEdgeMax[componentNdx]);
17578 
17579 						if (reportError && expected.isNaN())
17580 							reportError = false;
17581 
17582 						if (reportError && !expected.isNaN() && !outputted.isNaN())
17583 						{
17584 							if (reportError && !expected.isInf() && !outputted.isInf())
17585 							{
17586 								// Ignore rounding
17587 								if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17588 									reportError = false;
17589 							}
17590 
17591 							if (reportError && expected.isInf())
17592 							{
17593 								// RTZ rounding mode returns +/-65504 instead of Inf on overflow
17594 								if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17595 									reportError = false;
17596 								else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17597 									reportError = false;
17598 							}
17599 
17600 							if (reportError)
17601 							{
17602 								const double	outputtedDouble	= outputted.asDouble();
17603 
17604 							    DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17605 
17606 								if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17607 									reportError = false;
17608 							}
17609 						}
17610 
17611 						if (reportError)
17612 						{
17613 							const size_t		inputsComps[3]	=
17614 							{
17615 								ARG0_COMPONENTS,
17616 								ARG1_COMPONENTS,
17617 								ARG2_COMPONENTS,
17618 							};
17619 							string				inputsValues	("Inputs:");
17620 							string				flavorName		(func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17621 							std::stringstream	errStream;
17622 
17623 							for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17624 							{
17625 								const size_t	inputCompsCount = inputsComps[inputNdx];
17626 
17627 								inputsValues += " [" + de::toString(inputNdx) + "]=(";
17628 
17629 								for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17630 								{
17631 									const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17632 
17633 									inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17634 								}
17635 							}
17636 
17637 							errStream	<< "At"
17638 										<< " iteration " << de::toString(idx)
17639 										<< " component " << de::toString(componentNdx)
17640 										<< " denormMode " << de::toString(denormNdx)
17641 										<< " (" << denormModes[denormNdx] << ")"
17642 										<< " " << flavorName
17643 										<< " " << inputsValues
17644 										<< " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17645 										<< " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17646 										<< " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17647 										<< " " << error << "."
17648 										<< std::endl;
17649 
17650 							errors[componentNdx] += errStream.str();
17651 
17652 							successfulRuns[componentNdx]--;
17653 						}
17654 					}
17655 				}
17656 			}
17657 		}
17658 
17659 		for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17660 		{
17661 			// Check if any component has total failure
17662 			if (successfulRuns[componentNdx] == 0)
17663 			{
17664 				// Test failed in all denorm modes and all flavors for certain component: dump errors
17665 				log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17666 
17667 				success = false;
17668 			}
17669 		}
17670 
17671 		if (iterationValidated)
17672 			validatedCount++;
17673 	}
17674 
17675 	if (validatedCount < 16)
17676 		TCU_THROW(InternalError, "Too few samples have been validated.");
17677 
17678 	return success;
17679 }
17680 
17681 // IEEE-754 floating point numbers:
17682 // +--------+------+----------+-------------+
17683 // | binary | sign | exponent | significand |
17684 // +--------+------+----------+-------------+
17685 // | 16-bit |  1   |    5     |     10      |
17686 // +--------+------+----------+-------------+
17687 // | 32-bit |  1   |    8     |     23      |
17688 // +--------+------+----------+-------------+
17689 //
17690 // 16-bit floats:
17691 //
17692 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
17693 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17694 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
17695 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
17696 //
17697 // 0   000 00   00 0000 0000 (0x0000: +0)
17698 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
17699 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
17700 // 0   000 01   00 0000 0001 (0x0401: +Norm)
17701 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
17702 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
17703 // Generate and return 16-bit floats and their corresponding 32-bit values.
17704 //
17705 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17706 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,deUint32 count)17707 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17708 {
17709 	vector<deFloat16>	float16;
17710 
17711 	float16.reserve(count);
17712 
17713 	// Zero
17714 	float16.push_back(deUint16(0x0000));
17715 	float16.push_back(deUint16(0x8000));
17716 	// Infinity
17717 	float16.push_back(deUint16(0x7c00));
17718 	float16.push_back(deUint16(0xfc00));
17719 	// Normalized
17720 	float16.push_back(deUint16(0x0401));
17721 	float16.push_back(deUint16(0x8401));
17722 	// Some normal number
17723 	float16.push_back(deUint16(0x14cb));
17724 	float16.push_back(deUint16(0x94cb));
17725 	// Min/max positive normal
17726 	float16.push_back(deUint16(0x0400));
17727 	float16.push_back(deUint16(0x7bff));
17728 	// Min/max negative normal
17729 	float16.push_back(deUint16(0x8400));
17730 	float16.push_back(deUint16(0xfbff));
17731 	// PI
17732 	float16.push_back(deUint16(0x4248)); // 3.140625
17733 	float16.push_back(deUint16(0xb248)); // -3.140625
17734 	// PI/2
17735 	float16.push_back(deUint16(0x3e48)); // 1.5703125
17736 	float16.push_back(deUint16(0xbe48)); // -1.5703125
17737 	float16.push_back(deUint16(0x3c00)); // 1.0
17738 	float16.push_back(deUint16(0x3800)); // 0.5
17739 	// Some useful constants
17740 	float16.push_back(tcu::Float16(-2.5f).bits());
17741 	float16.push_back(tcu::Float16(-1.0f).bits());
17742 	float16.push_back(tcu::Float16( 0.4f).bits());
17743 	float16.push_back(tcu::Float16( 2.5f).bits());
17744 
17745 	const deUint32		numPicks	= static_cast<deUint32>(float16.size());
17746 
17747 	DE_ASSERT(count >= numPicks);
17748 	count -= numPicks;
17749 
17750 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17751 	{
17752 		int			sign		= (rnd.getUint16() % 2 == 0) ? +1 : -1;
17753 		int			exponent	= (rnd.getUint16() % 29) - 14 + 1;
17754 		deUint16	mantissa	= static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17755 
17756 		// Exclude power of -14 to avoid denorms
17757 		DE_ASSERT(de::inRange(exponent, -13, 15));
17758 
17759 		float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17760 	}
17761 
17762 	return float16;
17763 }
17764 
getInputData1(deUint32 seed,size_t count,size_t argNo)17765 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17766 {
17767 	DE_UNREF(argNo);
17768 
17769 	de::Random	rnd(seed);
17770 
17771 	return getFloat16a(rnd, static_cast<deUint32>(count));
17772 }
17773 
getInputData2(deUint32 seed,size_t count,size_t argNo)17774 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17775 {
17776 	de::Random	rnd		(seed);
17777 	size_t		newCount = static_cast<size_t>(deSqrt(double(count)));
17778 
17779 	DE_ASSERT(newCount * newCount == count);
17780 
17781 	vector<deFloat16>	float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17782 
17783 	return squarize(float16, static_cast<deUint32>(argNo));
17784 }
17785 
getInputData3(deUint32 seed,size_t count,size_t argNo)17786 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17787 {
17788 	if (argNo == 0 || argNo == 1)
17789 		return getInputData2(seed, count, argNo);
17790 	else
17791 		return getInputData1(seed<<argNo, count, argNo);
17792 }
17793 
getInputData(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17794 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17795 {
17796 	DE_UNREF(stride);
17797 
17798 	vector<deFloat16>	result;
17799 
17800 	switch (argCount)
17801 	{
17802 		case 1:result = getInputData1(seed, count, argNo); break;
17803 		case 2:result = getInputData2(seed, count, argNo); break;
17804 		case 3:result = getInputData3(seed, count, argNo); break;
17805 		default: TCU_THROW(InternalError, "Invalid argument count specified");
17806 	}
17807 
17808 	if (compCount == 3)
17809 	{
17810 		const size_t		newCount = (3 * count) / 4;
17811 		vector<deFloat16>	newResult;
17812 
17813 		newResult.reserve(result.size());
17814 
17815 		for (size_t ndx = 0; ndx < newCount; ++ndx)
17816 		{
17817 			newResult.push_back(result[ndx]);
17818 
17819 			if (ndx % 3 == 2)
17820 				newResult.push_back(0);
17821 		}
17822 
17823 		result = newResult;
17824 	}
17825 
17826 	DE_ASSERT(result.size() == count);
17827 
17828 	return result;
17829 }
17830 
17831 // Generator for functions requiring data in range [1, inf]
getInputDataAC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17832 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17833 {
17834 	vector<deFloat16>	result;
17835 
17836 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17837 
17838 	// Filter out values below 1.0 from upper half of numbers
17839 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17840 	{
17841 		const float f = tcu::Float16(result[idx]).asFloat();
17842 
17843 		if (f < 1.0f)
17844 			result[idx] = tcu::Float16(1.0f - f).bits();
17845 	}
17846 
17847 	return result;
17848 }
17849 
17850 // Generator for functions requiring data in range [-1, 1]
getInputDataA(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17851 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17852 {
17853 	vector<deFloat16>	result;
17854 
17855 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17856 
17857 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17858 	{
17859 		const float f = tcu::Float16(result[idx]).asFloat();
17860 
17861 		if (!de::inRange(f, -1.0f, 1.0f))
17862 			result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17863 	}
17864 
17865 	return result;
17866 }
17867 
17868 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17869 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17870 {
17871 	vector<deFloat16>	result;
17872 
17873 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17874 
17875 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17876 	{
17877 		const float f = tcu::Float16(result[idx]).asFloat();
17878 
17879 		if (!de::inRange(f, -DE_PI, DE_PI))
17880 			result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17881 	}
17882 
17883 	return result;
17884 }
17885 
17886 // Generator for functions requiring data in range [0, inf]
getInputDataP(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17887 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17888 {
17889 	vector<deFloat16>	result;
17890 
17891 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17892 
17893 	if (argNo == 0)
17894 	{
17895 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17896 			result[idx] &= static_cast<deFloat16>(~0x8000);
17897 	}
17898 
17899 	return result;
17900 }
17901 
getInputDataV(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17902 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17903 {
17904 	DE_UNREF(stride);
17905 	DE_UNREF(argCount);
17906 
17907 	vector<deFloat16>	result;
17908 
17909 	if (argNo == 0)
17910 		result = getInputData2(seed, count, argNo);
17911 	else
17912 	{
17913 		const size_t		alignedCount	= (compCount == 3) ? 4 : compCount;
17914 		const size_t		newCountX		= static_cast<size_t>(deSqrt(double(count * alignedCount)));
17915 		const size_t		newCountY		= count / newCountX;
17916 		de::Random			rnd				(seed);
17917 		vector<deFloat16>	float16			= getFloat16a(rnd, static_cast<deUint32>(newCountX));
17918 
17919 		DE_ASSERT(newCountX * newCountX == alignedCount * count);
17920 
17921 		for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17922 		{
17923 			const vector<deFloat16>	tmp(newCountY, float16[numIdx]);
17924 
17925 			result.insert(result.end(), tmp.begin(), tmp.end());
17926 		}
17927 	}
17928 
17929 	DE_ASSERT(result.size() == count);
17930 
17931 	return result;
17932 }
17933 
getInputDataM(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17934 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17935 {
17936 	DE_UNREF(compCount);
17937 	DE_UNREF(stride);
17938 	DE_UNREF(argCount);
17939 
17940 	de::Random			rnd		(seed << argNo);
17941 	vector<deFloat16>	result;
17942 
17943 	result = getFloat16a(rnd, static_cast<deUint32>(count));
17944 
17945 	DE_ASSERT(result.size() == count);
17946 
17947 	return result;
17948 }
17949 
getInputDataD(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17950 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17951 {
17952 	DE_UNREF(compCount);
17953 	DE_UNREF(argCount);
17954 
17955 	de::Random			rnd		(seed << argNo);
17956 	vector<deFloat16>	result;
17957 
17958 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17959 	{
17960 		int num	= (rnd.getUint16() % 16) - 8;
17961 
17962 		result.push_back(tcu::Float16(float(num)).bits());
17963 	}
17964 
17965 	result[0 * stride] = deUint16(0x7c00); // +Inf
17966 	result[1 * stride] = deUint16(0xfc00); // -Inf
17967 
17968 	DE_ASSERT(result.size() == count);
17969 
17970 	return result;
17971 }
17972 
17973 // Generator for smoothstep function
getInputDataSS(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17974 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17975 {
17976 	vector<deFloat16>	result;
17977 
17978 	result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17979 
17980 	if (argNo == 0)
17981 	{
17982 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17983 		{
17984 			const float f = tcu::Float16(result[idx]).asFloat();
17985 
17986 			if (f > 4.0f)
17987 				result[idx] = tcu::Float16(-f).bits();
17988 		}
17989 	}
17990 
17991 	if (argNo == 1)
17992 	{
17993 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17994 		{
17995 			const float f = tcu::Float16(result[idx]).asFloat();
17996 
17997 			if (f < 4.0f)
17998 				result[idx] = tcu::Float16(-f).bits();
17999 		}
18000 	}
18001 
18002 	return result;
18003 }
18004 
18005 // Generates normalized vectors for arguments 0 and 1
getInputDataN(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18006 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18007 {
18008 	DE_UNREF(compCount);
18009 	DE_UNREF(argCount);
18010 
18011 	de::Random			rnd		(seed << argNo);
18012 	vector<deFloat16>	result;
18013 
18014 	if (argNo == 0 || argNo == 1)
18015 	{
18016 		// The input parameters for the incident vector I and the surface normal N must already be normalized
18017 		for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18018 		{
18019 			vector <float>	unnormolized;
18020 			float			sum				= 0;
18021 
18022 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18023 				unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18024 
18025 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18026 				sum += unnormolized[compIdx] * unnormolized[compIdx];
18027 
18028 			sum = deFloatSqrt(sum);
18029 			if (sum == 0.0f)
18030 				unnormolized[0] = sum = 1.0f;
18031 
18032 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18033 				result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18034 
18035 			for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18036 				result.push_back(0);
18037 		}
18038 	}
18039 	else
18040 	{
18041 		// Input parameter eta
18042 		for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18043 		{
18044 			int num	= (rnd.getUint16() % 16) - 8;
18045 
18046 			result.push_back(tcu::Float16(float(num)).bits());
18047 		}
18048 	}
18049 
18050 	DE_ASSERT(result.size() == count);
18051 
18052 	return result;
18053 }
18054 
18055 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18056 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18057 {
18058 	DE_UNREF(compCount);
18059 	DE_UNREF(stride);
18060 	DE_UNREF(argCount);
18061 
18062 	de::Random			rnd		(seed << argNo);
18063 	vector<deFloat16>	result;
18064 
18065 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18066 	{
18067 		int num	= (rnd.getUint16() % 16) - 8;
18068 
18069 		result.push_back(tcu::Float16(float(num)).bits());
18070 	}
18071 
18072 	DE_ASSERT(result.size() == count);
18073 
18074 	return result;
18075 }
18076 
18077 struct Math16TestType
18078 {
18079 	const char*		typePrefix;
18080 	const size_t	typeComponents;
18081 	const size_t	typeArrayStride;
18082 	const size_t	typeStructStride;
18083 	const char*		storage_type;
18084 };
18085 
18086 enum Math16DataTypes
18087 {
18088 	NONE	= 0,
18089 	SCALAR	= 1,
18090 	VEC2	= 2,
18091 	VEC3	= 3,
18092 	VEC4	= 4,
18093 	MAT2X2,
18094 	MAT2X3,
18095 	MAT2X4,
18096 	MAT3X2,
18097 	MAT3X3,
18098 	MAT3X4,
18099 	MAT4X2,
18100 	MAT4X3,
18101 	MAT4X4,
18102 	MATH16_TYPE_LAST
18103 };
18104 
18105 struct Math16ArgFragments
18106 {
18107 	const char*	bodies;
18108 	const char*	variables;
18109 	const char*	decorations;
18110 	const char*	funcVariables;
18111 };
18112 
18113 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18114 
18115 struct Math16TestFunc
18116 {
18117 	const char*					funcName;
18118 	const char*					funcSuffix;
18119 	size_t						funcArgsCount;
18120 	size_t						typeResult;
18121 	size_t						typeArg0;
18122 	size_t						typeArg1;
18123 	size_t						typeArg2;
18124 	Math16GetInputData*			getInputDataFunc;
18125 	VerifyIOFunc				verifyFunc;
18126 };
18127 
18128 template<class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18129 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18130 {
18131 	const int					testSpecificSeed			= deStringHash(testGroup.getName());
18132 	const int					seed						= testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18133 	const size_t				numDataPointsByAxis			= 32;
18134 	const size_t				numDataPoints				= numDataPointsByAxis * numDataPointsByAxis;
18135 	const char*					componentType				= "f16";
18136 	const Math16TestType		testTypes[MATH16_TYPE_LAST]	=
18137 	{
18138 		{ "",		0,	 0,						 0,						"" },
18139 		{ "",		1,	 1 * sizeof(deFloat16),	 2 * sizeof(deFloat16),	"u32_half_ndp" },
18140 		{ "v2",		2,	 2 * sizeof(deFloat16),	 2 * sizeof(deFloat16),	"u32_ndp" },
18141 		{ "v3",		3,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18142 		{ "v4",		4,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18143 		{ "m2x2",	0,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18144 		{ "m2x3",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18145 		{ "m2x4",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18146 		{ "m3x2",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_3" },
18147 		{ "m3x3",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_6" },
18148 		{ "m3x4",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_6" },
18149 		{ "m4x2",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18150 		{ "m4x3",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_8" },
18151 		{ "m4x4",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_8" },
18152 	};
18153 
18154 	DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18155 
18156 
18157 	const StringTemplate preMain
18158 	(
18159 		"     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
18160 
18161 		"        %f16     = OpTypeFloat 16\n"
18162 		"        %v2f16   = OpTypeVector %f16 2\n"
18163 		"        %v3f16   = OpTypeVector %f16 3\n"
18164 		"        %v4f16   = OpTypeVector %f16 4\n"
18165 		"        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18166 		"        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18167 		"        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18168 		"        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18169 		"        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18170 		"        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18171 		"        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18172 		"        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18173 		"        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18174 
18175 		"       %fp_v2i32 = OpTypePointer Function %v2i32\n"
18176 		"       %fp_v3i32 = OpTypePointer Function %v3i32\n"
18177 		"       %fp_v4i32 = OpTypePointer Function %v4i32\n"
18178 
18179 		"      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18180 		" %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18181 		"        %c_u32_5 = OpConstant %u32 5\n"
18182 		"        %c_u32_6 = OpConstant %u32 6\n"
18183 		"        %c_u32_7 = OpConstant %u32 7\n"
18184 		"        %c_u32_8 = OpConstant %u32 8\n"
18185 		"        %c_f16_0 = OpConstant %f16 0\n"
18186 		"        %c_f16_1 = OpConstant %f16 1\n"
18187 		"      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18188 		"         %up_u32 = OpTypePointer Uniform %u32\n"
18189 		"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18190 		" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18191 
18192 		"    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18193 		"  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18194 		"%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18195 		"         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18196 		"       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18197 		"    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18198 		"           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18199 		"        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18200 		"      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18201 		"     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18202 		"  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18203 		"           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18204 		"        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18205 		"        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18206 		"     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18207 		"  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18208 		"           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18209 		"        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18210 		"        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18211 		"     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18212 		"  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18213 		"           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18214 		"        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18215 		"        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18216 		"     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18217 		"  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18218 		"           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18219 		"        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18220 		"        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18221 		"     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18222 		"  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18223 
18224 		"         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18225 		"       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18226 		"       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18227 		"       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18228 		"     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18229 		"     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18230 		"     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18231 		"     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18232 		"     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18233 		"     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18234 		"     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18235 		"     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18236 		"     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18237 		"    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18238 		"  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18239 		"  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18240 		"  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18241 		"%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18242 		"%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18243 		"%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18244 		"%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18245 		"%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18246 		"%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18247 		"%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18248 		"%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18249 		"%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18250 		"${arg_vars}"
18251 	);
18252 
18253 	const StringTemplate decoration
18254 	(
18255 		"OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18256 		"OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18257 		"OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18258 
18259 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
18260 		"OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18261 		"OpDecorate %SSBO_u32_ndp BufferBlock\n"
18262 
18263 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
18264 		"OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18265 		"OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18266 		"OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18267 
18268 		"OpDecorate %ra_u32_4 ArrayStride 4\n"
18269 		"OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18270 		"OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18271 		"OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18272 
18273 		"OpDecorate %ra_u32_3 ArrayStride 4\n"
18274 		"OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18275 		"OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18276 		"OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18277 
18278 		"OpDecorate %ra_u32_6 ArrayStride 4\n"
18279 		"OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18280 		"OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18281 		"OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18282 
18283 		"OpDecorate %ra_u32_8 ArrayStride 4\n"
18284 		"OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18285 		"OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18286 		"OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18287 
18288 		"${arg_decorations}"
18289 	);
18290 
18291 	const StringTemplate testFun
18292 	(
18293 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18294 		"    %param = OpFunctionParameter %v4f32\n"
18295 		"    %entry = OpLabel\n"
18296 
18297 		"        %i = OpVariable %fp_i32 Function\n"
18298 		"${arg_infunc_vars}"
18299 		"             OpStore %i %c_i32_0\n"
18300 		"             OpBranch %loop\n"
18301 
18302 		"     %loop = OpLabel\n"
18303 		"    %i_cmp = OpLoad %i32 %i\n"
18304 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18305 		"             OpLoopMerge %merge %next None\n"
18306 		"             OpBranchConditional %lt %write %merge\n"
18307 
18308 		"    %write = OpLabel\n"
18309 		"      %ndx = OpLoad %i32 %i\n"
18310 
18311 		"${arg_func_call}"
18312 
18313 		"             OpBranch %next\n"
18314 
18315 		"     %next = OpLabel\n"
18316 		"    %i_cur = OpLoad %i32 %i\n"
18317 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18318 		"             OpStore %i %i_new\n"
18319 		"             OpBranch %loop\n"
18320 
18321 		"    %merge = OpLabel\n"
18322 		"             OpReturnValue %param\n"
18323 		"             OpFunctionEnd\n"
18324 	);
18325 
18326 	const Math16ArgFragments	argFragment1	=
18327 	{
18328 		"     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18329 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18330 		"     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18331 		"",
18332 		"",
18333 		"",
18334 	};
18335 
18336 	const Math16ArgFragments	argFragment2	=
18337 	{
18338 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18339 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18340 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18341 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18342 		"",
18343 		"",
18344 		"",
18345 	};
18346 
18347 	const Math16ArgFragments	argFragment3	=
18348 	{
18349 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18350 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18351 		" %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18352 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18353 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18354 		"",
18355 		"",
18356 		"",
18357 	};
18358 
18359 	const Math16ArgFragments	argFragmentLdExp	=
18360 	{
18361 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18362 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18363 		"%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18364 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18365 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18366 
18367 		"",
18368 
18369 		"",
18370 
18371 		"",
18372 	};
18373 
18374 	const Math16ArgFragments	argFragmentModfFrac	=
18375 	{
18376 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18377 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18378 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18379 
18380 		"   %fp_tmp = OpTypePointer Function %${tr}\n",
18381 
18382 		"",
18383 
18384 		"      %tmp = OpVariable %fp_tmp Function\n",
18385 	};
18386 
18387 	const Math16ArgFragments	argFragmentModfInt	=
18388 	{
18389 		" %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18390 		"%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18391 		"     %tmp0  = OpAccessChain %fp_tmp %tmp\n"
18392 		"  %val_dst  = OpLoad %${tr} %tmp0\n"
18393 		"      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18394 
18395 		"   %fp_tmp  = OpTypePointer Function %${tr}\n",
18396 
18397 		"",
18398 
18399 		"      %tmp  = OpVariable %fp_tmp Function\n",
18400 	};
18401 
18402 	const Math16ArgFragments	argFragmentModfStruct	=
18403 	{
18404 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18405 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18406 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18407 		"             OpStore %tmp_ptr_s %val_tmp\n"
18408 		"%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18409 		"  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18410 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18411 
18412 		"  %fp_${tr} = OpTypePointer Function %${tr}\n"
18413 		"   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18414 		"   %fp_tmp = OpTypePointer Function %st_tmp\n"
18415 		"   %c_frac = OpConstant %i32 0\n"
18416 		"    %c_int = OpConstant %i32 1\n",
18417 
18418 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18419 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18420 
18421 		"      %tmp = OpVariable %fp_tmp Function\n",
18422 	};
18423 
18424 	const Math16ArgFragments	argFragmentFrexpStructS	=
18425 	{
18426 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18427 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18428 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18429 		"             OpStore %tmp_ptr_s %val_tmp\n"
18430 		"%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18431 		"  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18432 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18433 
18434 		"  %fp_${tr} = OpTypePointer Function %${tr}\n"
18435 		"   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18436 		"   %fp_tmp = OpTypePointer Function %st_tmp\n",
18437 
18438 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18439 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18440 
18441 		"      %tmp = OpVariable %fp_tmp Function\n",
18442 	};
18443 
18444 	const Math16ArgFragments	argFragmentFrexpStructE	=
18445 	{
18446 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18447 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18448 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18449 		"             OpStore %tmp_ptr_s %val_tmp\n"
18450 		"%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18451 		"%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18452 		"  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18453 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18454 
18455 		"   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18456 		"   %fp_tmp = OpTypePointer Function %st_tmp\n",
18457 
18458 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18459 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18460 
18461 		"      %tmp = OpVariable %fp_tmp Function\n",
18462 	};
18463 
18464 	const Math16ArgFragments	argFragmentFrexpS		=
18465 	{
18466 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18467 		"  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18468 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18469 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18470 
18471 		"",
18472 
18473 		"",
18474 
18475 		"      %tmp = OpVariable %fp_${dr}i32 Function\n",
18476 	};
18477 
18478 	const Math16ArgFragments	argFragmentFrexpE		=
18479 	{
18480 		" %val_src0  = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18481 		"  %out_exp  = OpAccessChain %fp_${dr}i32 %tmp\n"
18482 		"%val_unused = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18483 		"%val_dst_i  = OpLoad %${dr}i32 %out_exp\n"
18484 		"  %val_dst  = OpConvertSToF %${tr} %val_dst_i\n"
18485 		"      %dst  = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18486 
18487 		"",
18488 
18489 		"",
18490 
18491 		"      %tmp  = OpVariable %fp_${dr}i32 Function\n",
18492 	};
18493 
18494 	string load_funcs[MATH16_TYPE_LAST];
18495 	load_funcs[SCALAR] = loadScalarF16FromUint;
18496 	load_funcs[VEC2]   = loadV2F16FromUint;
18497 	load_funcs[VEC3]   = loadV3F16FromUints;
18498 	load_funcs[VEC4]   = loadV4F16FromUints;
18499 	load_funcs[MAT2X2] = loadM2x2F16FromUints;
18500 	load_funcs[MAT2X3] = loadM2x3F16FromUints;
18501 	load_funcs[MAT2X4] = loadM2x4F16FromUints;
18502 	load_funcs[MAT3X2] = loadM3x2F16FromUints;
18503 	load_funcs[MAT3X3] = loadM3x3F16FromUints;
18504 	load_funcs[MAT3X4] = loadM3x4F16FromUints;
18505 	load_funcs[MAT4X2] = loadM4x2F16FromUints;
18506 	load_funcs[MAT4X3] = loadM4x3F16FromUints;
18507 	load_funcs[MAT4X4] = loadM4x4F16FromUints;
18508 
18509 	string store_funcs[MATH16_TYPE_LAST];
18510 	store_funcs[SCALAR] = storeScalarF16AsUint;
18511 	store_funcs[VEC2]   = storeV2F16AsUint;
18512 	store_funcs[VEC3]   = storeV3F16AsUints;
18513 	store_funcs[VEC4]   = storeV4F16AsUints;
18514 	store_funcs[MAT2X2] = storeM2x2F16AsUints;
18515 	store_funcs[MAT2X3] = storeM2x3F16AsUints;
18516 	store_funcs[MAT2X4] = storeM2x4F16AsUints;
18517 	store_funcs[MAT3X2] = storeM3x2F16AsUints;
18518 	store_funcs[MAT3X3] = storeM3x3F16AsUints;
18519 	store_funcs[MAT3X4] = storeM3x4F16AsUints;
18520 	store_funcs[MAT4X2] = storeM4x2F16AsUints;
18521 	store_funcs[MAT4X3] = storeM4x3F16AsUints;
18522 	store_funcs[MAT4X4] = storeM4x4F16AsUints;
18523 
18524 	const Math16TestType&		testType				= testTypes[testTypeIdx];
18525 	const string				funcNameString			= string(testFunc.funcName) + string(testFunc.funcSuffix);
18526 	const string				testName				= de::toLower(funcNameString);
18527 	const Math16ArgFragments*	argFragments			= DE_NULL;
18528 	const size_t				typeStructStride		= testType.typeStructStride;
18529 	const bool					extInst					= !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18530 	const size_t				numFloatsPerArg0Type	= testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18531 	const size_t				iterations				= numDataPoints / numFloatsPerArg0Type;
18532 	const size_t				numFloatsPerResultType	= testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18533 	const vector<deFloat16>		float16UnusedOutput		(iterations * numFloatsPerResultType, 0);
18534 	VulkanFeatures				features;
18535 	SpecResource				specResource;
18536 	map<string, string>			specs;
18537 	map<string, string>			fragments;
18538 	vector<string>				extensions;
18539 	string						funcCall;
18540 	string						funcVariables;
18541 	string						variables;
18542 	string						declarations;
18543 	string						decorations;
18544 	string						functions;
18545 
18546 	switch (testFunc.funcArgsCount)
18547 	{
18548 		case 1:
18549 		{
18550 			argFragments = &argFragment1;
18551 
18552 			if (funcNameString == "ModfFrac")		argFragments = &argFragmentModfFrac;
18553 			if (funcNameString == "ModfInt")		argFragments = &argFragmentModfInt;
18554 			if (funcNameString == "ModfStructFrac")	argFragments = &argFragmentModfStruct;
18555 			if (funcNameString == "ModfStructInt")	argFragments = &argFragmentModfStruct;
18556 			if (funcNameString == "FrexpS")			argFragments = &argFragmentFrexpS;
18557 			if (funcNameString == "FrexpE")			argFragments = &argFragmentFrexpE;
18558 			if (funcNameString == "FrexpStructS")	argFragments = &argFragmentFrexpStructS;
18559 			if (funcNameString == "FrexpStructE")	argFragments = &argFragmentFrexpStructE;
18560 
18561 			break;
18562 		}
18563 		case 2:
18564 		{
18565 			argFragments = &argFragment2;
18566 
18567 			if (funcNameString == "Ldexp")			argFragments = &argFragmentLdExp;
18568 
18569 			break;
18570 		}
18571 		case 3:
18572 		{
18573 			argFragments = &argFragment3;
18574 
18575 			break;
18576 		}
18577 		default:
18578 		{
18579 			TCU_THROW(InternalError, "Invalid number of arguments");
18580 		}
18581 	}
18582 
18583 	functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18584 	if (testFunc.funcArgsCount == 1)
18585 	{
18586 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18587 		variables +=
18588 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18589 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18590 
18591 		decorations +=
18592 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18593 			"OpDecorate %ssbo_src0 Binding 0\n"
18594 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18595 			"OpDecorate %ssbo_dst Binding 1\n";
18596 	}
18597 	else if (testFunc.funcArgsCount == 2)
18598 	{
18599 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18600 		functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18601 		variables +=
18602 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18603 			" %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18604 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18605 
18606 		decorations +=
18607 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18608 			"OpDecorate %ssbo_src0 Binding 0\n"
18609 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
18610 			"OpDecorate %ssbo_src1 Binding 1\n"
18611 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18612 			"OpDecorate %ssbo_dst Binding 2\n";
18613 	}
18614 	else if (testFunc.funcArgsCount == 3)
18615 	{
18616 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18617 		functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18618 		functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18619 		variables +=
18620 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18621 			" %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18622 			" %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18623 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18624 
18625 		decorations +=
18626 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18627 			"OpDecorate %ssbo_src0 Binding 0\n"
18628 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
18629 			"OpDecorate %ssbo_src1 Binding 1\n"
18630 			"OpDecorate %ssbo_src2 DescriptorSet 0\n"
18631 			"OpDecorate %ssbo_src2 Binding 2\n"
18632 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18633 			"OpDecorate %ssbo_dst Binding 3\n";
18634 	}
18635 	else
18636 	{
18637 		TCU_THROW(InternalError, "Invalid number of function arguments");
18638 	}
18639 
18640 	variables	+= argFragments->variables;
18641 	decorations	+= argFragments->decorations;
18642 
18643 	specs["dr"]					= testTypes[testFunc.typeResult].typePrefix;
18644 	specs["d0"]					= testTypes[testFunc.typeArg0].typePrefix;
18645 	specs["d1"]					= testTypes[testFunc.typeArg1].typePrefix;
18646 	specs["d2"]					= testTypes[testFunc.typeArg2].typePrefix;
18647 	specs["tr"]					= string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18648 	specs["t0"]					= string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18649 	specs["t1"]					= string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18650 	specs["t2"]					= string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18651 	specs["store_tr"]			= string(testTypes[testFunc.typeResult].storage_type);
18652 	specs["store_t0"]			= string(testTypes[testFunc.typeArg0].storage_type);
18653 	specs["store_t1"]			= string(testTypes[testFunc.typeArg1].storage_type);
18654 	specs["store_t2"]			= string(testTypes[testFunc.typeArg2].storage_type);
18655 	specs["struct_stride"]		= de::toString(typeStructStride);
18656 	specs["op"]					= extInst ? "OpExtInst" : testFunc.funcName;
18657 	specs["ext_inst"]			= extInst ? string("%ext_import ") + testFunc.funcName : "";
18658 	specs["struct_member"]		= de::toLower(testFunc.funcSuffix);
18659 
18660 	variables					= StringTemplate(variables).specialize(specs);
18661 	decorations					= StringTemplate(decorations).specialize(specs);
18662 	funcVariables				= StringTemplate(argFragments->funcVariables).specialize(specs);
18663 	funcCall					= StringTemplate(argFragments->bodies).specialize(specs);
18664 
18665 	specs["num_data_points"]	= de::toString(iterations);
18666 	specs["arg_vars"]			= variables;
18667 	specs["arg_decorations"]	= decorations;
18668 	specs["arg_infunc_vars"]	= funcVariables;
18669 	specs["arg_func_call"]		= funcCall;
18670 
18671 	fragments["extension"]		= "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18672 	fragments["capability"]		= "OpCapability Matrix\nOpCapability Float16\n";
18673 	fragments["decoration"]		= decoration.specialize(specs);
18674 	fragments["pre_main"]		= preMain.specialize(specs) + functions;
18675 	fragments["testfun"]		= testFun.specialize(specs);
18676 
18677 	for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18678 	{
18679 		const size_t			numFloatsPerItem	= (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18680 													: (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18681 													: (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18682 													: -1;
18683 		const vector<deFloat16>	inputData			= testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18684 
18685 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18686 	}
18687 
18688 	specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16UnusedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18689 	specResource.verifyIO = testFunc.verifyFunc;
18690 
18691 	extensions.push_back("VK_KHR_shader_float16_int8");
18692 
18693 	features.extFloat16Int8.shaderFloat16 = true;
18694 
18695 	finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18696 }
18697 
18698 template<size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18699 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18700 {
18701 	DE_STATIC_ASSERT(C >= 1 && C <= 4);
18702 
18703 	const std::string				testGroupName	(string("arithmetic_") + de::toString(C));
18704 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18705 	const Math16TestFunc			testFuncs[]		=
18706 	{
18707 		{	"OpFNegate",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16OpFNegate>					},
18708 		{	"Round",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Round>						},
18709 		{	"RoundEven",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16RoundEven>					},
18710 		{	"Trunc",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Trunc>						},
18711 		{	"FAbs",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FAbs>						},
18712 		{	"FSign",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FSign>						},
18713 		{	"Floor",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Floor>						},
18714 		{	"Ceil",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Ceil>						},
18715 		{	"Fract",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Fract>						},
18716 		{	"Radians",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Radians>						},
18717 		{	"Degrees",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Degrees>						},
18718 		{	"Sin",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sin>							},
18719 		{	"Cos",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cos>							},
18720 		{	"Tan",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tan>							},
18721 		{	"Asin",					"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asin>						},
18722 		{	"Acos",					"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acos>						},
18723 		{	"Atan",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atan>						},
18724 		{	"Sinh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sinh>						},
18725 		{	"Cosh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cosh>						},
18726 		{	"Tanh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tanh>						},
18727 		{	"Asinh",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asinh>						},
18728 		{	"Acosh",				"",			1,	C,		C,		0,		0, &getInputDataAC,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acosh>						},
18729 		{	"Atanh",				"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atanh>						},
18730 		{	"Exp",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp>							},
18731 		{	"Log",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log>							},
18732 		{	"Exp2",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp2>						},
18733 		{	"Log2",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log2>						},
18734 		{	"Sqrt",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sqrt>						},
18735 		{	"InverseSqrt",			"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16InverseSqrt>					},
18736 		{	"Modf",					"Frac",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>					},
18737 		{	"Modf",					"Int",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>						},
18738 		{	"ModfStruct",			"Frac",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>					},
18739 		{	"ModfStruct",			"Int",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>						},
18740 		{	"Frexp",				"S",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>						},
18741 		{	"Frexp",				"E",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>						},
18742 		{	"FrexpStruct",			"S",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>						},
18743 		{	"FrexpStruct",			"E",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>						},
18744 		{	"OpFAdd",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFAdd>						},
18745 		{	"OpFSub",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFSub>						},
18746 		{	"OpFMul",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFMul>						},
18747 		{	"OpFDiv",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFDiv>						},
18748 		{	"Atan2",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Atan2>						},
18749 		{	"Pow",					"",			2,	C,		C,		C,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Pow>							},
18750 		{	"FMin",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMin>						},
18751 		{	"FMax",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMax>						},
18752 		{	"Step",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Step>						},
18753 		{	"Ldexp",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Ldexp>						},
18754 		{	"FClamp",				"",			3,	C,		C,		C,		C, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FClamp>						},
18755 		{	"FMix",					"",			3,	C,		C,		C,		C, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FMix>						},
18756 		{	"SmoothStep",			"",			3,	C,		C,		C,		C, &getInputDataSS,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16SmoothStep>					},
18757 		{	"Fma",					"",			3,	C,		C,		C,		C, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16Fma>							},
18758 		{	"Length",				"",			1,	1,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  1,  C,  0,  0, fp16Length>						},
18759 		{	"Distance",				"",			2,	1,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Distance>					},
18760 		{	"Cross",				"",			2,	C,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Cross>						},
18761 		{	"Normalize",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Normalize>					},
18762 		{	"FaceForward",			"",			3,	C,		C,		C,		C, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FaceForward>					},
18763 		{	"Reflect",				"",			2,	C,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Reflect>						},
18764 		{	"Refract",				"",			3,	C,		C,		C,		1, &getInputDataN,	compareFP16ArithmeticFunc<  C,  C,  C,  1, fp16Refract>						},
18765 		{	"OpDot",				"",			2,	1,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Dot>							},
18766 		{	"OpVectorTimesScalar",	"",			2,	C,		C,		1,		0, &getInputDataV,	compareFP16ArithmeticFunc<  C,  C,  1,  0, fp16VectorTimesScalar>			},
18767 	};
18768 
18769 	for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18770 	{
18771 		const Math16TestFunc&	testFunc		= testFuncs[testFuncIdx];
18772 		const string			funcNameString	= testFunc.funcName;
18773 
18774 		if ((C != 3) && funcNameString == "Cross")
18775 			continue;
18776 
18777 		if ((C < 2) && funcNameString == "OpDot")
18778 			continue;
18779 
18780 		if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18781 			continue;
18782 
18783 		createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18784 	}
18785 
18786 	return testGroup.release();
18787 }
18788 
18789 template<class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18790 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18791 {
18792 	const std::string				testGroupName	("arithmetic");
18793 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18794 	const Math16TestFunc			testFuncs[]		=
18795 	{
18796 		{	"OpTranspose",			"2x2",		1,	MAT2X2,	MAT2X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Transpose<2,2> >				},
18797 		{	"OpTranspose",			"3x2",		1,	MAT2X3,	MAT3X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<3,2> >				},
18798 		{	"OpTranspose",			"4x2",		1,	MAT2X4,	MAT4X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<4,2> >				},
18799 		{	"OpTranspose",			"2x3",		1,	MAT3X2,	MAT2X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,3> >				},
18800 		{	"OpTranspose",			"3x3",		1,	MAT3X3,	MAT3X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,3> >				},
18801 		{	"OpTranspose",			"4x3",		1,	MAT3X4,	MAT4X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,3> >				},
18802 		{	"OpTranspose",			"2x4",		1,	MAT4X2,	MAT2X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,4> >				},
18803 		{	"OpTranspose",			"3x4",		1,	MAT4X3,	MAT3X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,4> >				},
18804 		{	"OpTranspose",			"4x4",		1,	MAT4X4,	MAT4X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,4> >				},
18805 		{	"OpMatrixTimesScalar",	"2x2",		2,	MAT2X2,	MAT2X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4,  1,  0, fp16MatrixTimesScalar<2,2> >		},
18806 		{	"OpMatrixTimesScalar",	"2x3",		2,	MAT2X3,	MAT2X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,3> >		},
18807 		{	"OpMatrixTimesScalar",	"2x4",		2,	MAT2X4,	MAT2X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,4> >		},
18808 		{	"OpMatrixTimesScalar",	"3x2",		2,	MAT3X2,	MAT3X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<3,2> >		},
18809 		{	"OpMatrixTimesScalar",	"3x3",		2,	MAT3X3,	MAT3X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,3> >		},
18810 		{	"OpMatrixTimesScalar",	"3x4",		2,	MAT3X4,	MAT3X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,4> >		},
18811 		{	"OpMatrixTimesScalar",	"4x2",		2,	MAT4X2,	MAT4X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<4,2> >		},
18812 		{	"OpMatrixTimesScalar",	"4x3",		2,	MAT4X3,	MAT4X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,3> >		},
18813 		{	"OpMatrixTimesScalar",	"4x4",		2,	MAT4X4,	MAT4X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,4> >		},
18814 		{	"OpVectorTimesMatrix",	"2x2",		2,	VEC2,	VEC2,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  2,  4,  0, fp16VectorTimesMatrix<2,2> >		},
18815 		{	"OpVectorTimesMatrix",	"2x3",		2,	VEC2,	VEC3,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  3,  8,  0, fp16VectorTimesMatrix<2,3> >		},
18816 		{	"OpVectorTimesMatrix",	"2x4",		2,	VEC2,	VEC4,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  4,  8,  0, fp16VectorTimesMatrix<2,4> >		},
18817 		{	"OpVectorTimesMatrix",	"3x2",		2,	VEC3,	VEC2,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  2,  8,  0, fp16VectorTimesMatrix<3,2> >		},
18818 		{	"OpVectorTimesMatrix",	"3x3",		2,	VEC3,	VEC3,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  3, 16,  0, fp16VectorTimesMatrix<3,3> >		},
18819 		{	"OpVectorTimesMatrix",	"3x4",		2,	VEC3,	VEC4,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  4, 16,  0, fp16VectorTimesMatrix<3,4> >		},
18820 		{	"OpVectorTimesMatrix",	"4x2",		2,	VEC4,	VEC2,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  2,  8,  0, fp16VectorTimesMatrix<4,2> >		},
18821 		{	"OpVectorTimesMatrix",	"4x3",		2,	VEC4,	VEC3,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  3, 16,  0, fp16VectorTimesMatrix<4,3> >		},
18822 		{	"OpVectorTimesMatrix",	"4x4",		2,	VEC4,	VEC4,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4, 16,  0, fp16VectorTimesMatrix<4,4> >		},
18823 		{	"OpMatrixTimesVector",	"2x2",		2,	VEC2,	MAT2X2,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  4,  2,  0, fp16MatrixTimesVector<2,2> >		},
18824 		{	"OpMatrixTimesVector",	"2x3",		2,	VEC3,	MAT2X3,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  8,  2,  0, fp16MatrixTimesVector<2,3> >		},
18825 		{	"OpMatrixTimesVector",	"2x4",		2,	VEC4,	MAT2X4,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  2,  0, fp16MatrixTimesVector<2,4> >		},
18826 		{	"OpMatrixTimesVector",	"3x2",		2,	VEC2,	MAT3X2,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  8,  3,  0, fp16MatrixTimesVector<3,2> >		},
18827 		{	"OpMatrixTimesVector",	"3x3",		2,	VEC3,	MAT3X3,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3, 16,  3,  0, fp16MatrixTimesVector<3,3> >		},
18828 		{	"OpMatrixTimesVector",	"3x4",		2,	VEC4,	MAT3X4,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4, 16,  3,  0, fp16MatrixTimesVector<3,4> >		},
18829 		{	"OpMatrixTimesVector",	"4x2",		2,	VEC2,	MAT4X2,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  8,  4,  0, fp16MatrixTimesVector<4,2> >		},
18830 		{	"OpMatrixTimesVector",	"4x3",		2,	VEC3,	MAT4X3,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3, 16,  4,  0, fp16MatrixTimesVector<4,3> >		},
18831 		{	"OpMatrixTimesVector",	"4x4",		2,	VEC4,	MAT4X4,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4, 16,  4,  0, fp16MatrixTimesVector<4,4> >		},
18832 		{	"OpMatrixTimesMatrix",	"2x2_2x2",	2,	MAT2X2,	MAT2X2,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4,  4,  0, fp16MatrixTimesMatrix<2,2,2,2> >	},
18833 		{	"OpMatrixTimesMatrix",	"2x2_3x2",	2,	MAT3X2,	MAT2X2,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,3,2> >	},
18834 		{	"OpMatrixTimesMatrix",	"2x2_4x2",	2,	MAT4X2,	MAT2X2,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,4,2> >	},
18835 		{	"OpMatrixTimesMatrix",	"2x3_2x2",	2,	MAT2X3,	MAT2X3,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,3,2,2> >	},
18836 		{	"OpMatrixTimesMatrix",	"2x3_3x2",	2,	MAT3X3,	MAT2X3,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,3,2> >	},
18837 		{	"OpMatrixTimesMatrix",	"2x3_4x2",	2,	MAT4X3,	MAT2X3,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,4,2> >	},
18838 		{	"OpMatrixTimesMatrix",	"2x4_2x2",	2,	MAT2X4,	MAT2X4,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,4,2,2> >	},
18839 		{	"OpMatrixTimesMatrix",	"2x4_3x2",	2,	MAT3X4,	MAT2X4,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,3,2> >	},
18840 		{	"OpMatrixTimesMatrix",	"2x4_4x2",	2,	MAT4X4,	MAT2X4,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,4,2> >	},
18841 		{	"OpMatrixTimesMatrix",	"3x2_2x3",	2,	MAT2X2,	MAT3X2,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<3,2,2,3> >	},
18842 		{	"OpMatrixTimesMatrix",	"3x2_3x3",	2,	MAT3X2,	MAT3X2,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,3,3> >	},
18843 		{	"OpMatrixTimesMatrix",	"3x2_4x3",	2,	MAT4X2,	MAT3X2,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,4,3> >	},
18844 		{	"OpMatrixTimesMatrix",	"3x3_2x3",	2,	MAT2X3,	MAT3X3,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,3,2,3> >	},
18845 		{	"OpMatrixTimesMatrix",	"3x3_3x3",	2,	MAT3X3,	MAT3X3,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,3,3> >	},
18846 		{	"OpMatrixTimesMatrix",	"3x3_4x3",	2,	MAT4X3,	MAT3X3,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,4,3> >	},
18847 		{	"OpMatrixTimesMatrix",	"3x4_2x3",	2,	MAT2X4,	MAT3X4,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,4,2,3> >	},
18848 		{	"OpMatrixTimesMatrix",	"3x4_3x3",	2,	MAT3X4,	MAT3X4,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,3,3> >	},
18849 		{	"OpMatrixTimesMatrix",	"3x4_4x3",	2,	MAT4X4,	MAT3X4,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,4,3> >	},
18850 		{	"OpMatrixTimesMatrix",	"4x2_2x4",	2,	MAT2X2,	MAT4X2,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<4,2,2,4> >	},
18851 		{	"OpMatrixTimesMatrix",	"4x2_3x4",	2,	MAT3X2,	MAT4X2,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,3,4> >	},
18852 		{	"OpMatrixTimesMatrix",	"4x2_4x4",	2,	MAT4X2,	MAT4X2,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,4,4> >	},
18853 		{	"OpMatrixTimesMatrix",	"4x3_2x4",	2,	MAT2X3,	MAT4X3,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,3,2,4> >	},
18854 		{	"OpMatrixTimesMatrix",	"4x3_3x4",	2,	MAT3X3,	MAT4X3,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,3,4> >	},
18855 		{	"OpMatrixTimesMatrix",	"4x3_4x4",	2,	MAT4X3,	MAT4X3,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,4,4> >	},
18856 		{	"OpMatrixTimesMatrix",	"4x4_2x4",	2,	MAT2X4,	MAT4X4,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,4,2,4> >	},
18857 		{	"OpMatrixTimesMatrix",	"4x4_3x4",	2,	MAT3X4,	MAT4X4,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,3,4> >	},
18858 		{	"OpMatrixTimesMatrix",	"4x4_4x4",	2,	MAT4X4,	MAT4X4,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,4,4> >	},
18859 		{	"OpOuterProduct",		"2x2",		2,	MAT2X2,	VEC2,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  2,  2,  0, fp16OuterProduct<2,2> >			},
18860 		{	"OpOuterProduct",		"2x3",		2,	MAT2X3,	VEC3,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  3,  2,  0, fp16OuterProduct<2,3> >			},
18861 		{	"OpOuterProduct",		"2x4",		2,	MAT2X4,	VEC4,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  2,  0, fp16OuterProduct<2,4> >			},
18862 		{	"OpOuterProduct",		"3x2",		2,	MAT3X2,	VEC2,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  2,  3,  0, fp16OuterProduct<3,2> >			},
18863 		{	"OpOuterProduct",		"3x3",		2,	MAT3X3,	VEC3,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  3,  3,  0, fp16OuterProduct<3,3> >			},
18864 		{	"OpOuterProduct",		"3x4",		2,	MAT3X4,	VEC4,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  4,  3,  0, fp16OuterProduct<3,4> >			},
18865 		{	"OpOuterProduct",		"4x2",		2,	MAT4X2,	VEC2,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  2,  4,  0, fp16OuterProduct<4,2> >			},
18866 		{	"OpOuterProduct",		"4x3",		2,	MAT4X3,	VEC3,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  3,  4,  0, fp16OuterProduct<4,3> >			},
18867 		{	"OpOuterProduct",		"4x4",		2,	MAT4X4,	VEC4,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  4,  4,  0, fp16OuterProduct<4,4> >			},
18868 		{	"Determinant",			"2x2",		1,	SCALAR,	MAT2X2,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1,  4,  0,  0, fp16Determinant<2> >				},
18869 		{	"Determinant",			"3x3",		1,	SCALAR,	MAT3X3,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<3> >				},
18870 		{	"Determinant",			"4x4",		1,	SCALAR,	MAT4X4,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<4> >				},
18871 		{	"MatrixInverse",		"2x2",		1,	MAT2X2,	MAT2X2,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Inverse<2> >					},
18872 	};
18873 
18874 	for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18875 	{
18876 		const Math16TestFunc&	testFunc	= testFuncs[testFuncIdx];
18877 
18878 		createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18879 	}
18880 
18881 	return testGroup.release();
18882 }
18883 
18884 struct ComparisonCase
18885 {
18886 	string name;
18887 	string desc;
18888 };
18889 
18890 template<size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)18891 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18892 {
18893 	const string					testGroupName	("comparison_" + de::toString(C));
18894 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18895 #ifndef CTS_USES_VULKANSC
18896 	const char*						dataDir			= "spirv_assembly/instruction/float32/comparison";
18897 
18898 	const ComparisonCase			amberTests[]	=
18899 	{
18900 		{ "modfstruct",		"modf and modfStruct"	},
18901 		{ "frexpstruct",	"frexp and frexpStruct"	}
18902 	};
18903 
18904 	for (ComparisonCase test : amberTests)
18905 	{
18906 		const string caseDesc ("Compare output of " + test.desc);
18907 		const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
18908 
18909 		testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18910 														   test.name.c_str(),
18911 														   caseDesc.c_str(),
18912 														   dataDir,
18913 														   fileName));
18914 	}
18915 #endif
18916 	return testGroup.release();
18917 }
18918 
18919 struct ShaderStage
18920 {
18921 	string			name;
18922 	vector<string>	requirement;
18923 };
18924 
18925 template<size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)18926 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
18927 {
18928 	const string					testGroupName	("comparison_" + de::toString(C));
18929 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18930 #ifndef CTS_USES_VULKANSC
18931 	const char*						dataDir			= "spirv_assembly/instruction/float32/comparison";
18932 
18933 	const ShaderStage				stages[]		=
18934 	{
18935 		{ "vert", vector<string>(0) },
18936 		{ "tesc", vector<string>(1, "Features.tessellationShader") },
18937 		{ "tese", vector<string>(1, "Features.tessellationShader") },
18938 		{ "geom", vector<string>(1, "Features.geometryShader") },
18939 		{ "frag", vector<string>(0) }
18940 	};
18941 
18942 	const ComparisonCase			amberTests[]	=
18943 	{
18944 		{ "modfstruct",		"modf and modfStruct"	},
18945 		{ "frexpstruct",	"frexp and frexpStruct"	}
18946 	};
18947 
18948 	for (ComparisonCase test : amberTests)
18949 	for (ShaderStage stage : stages)
18950 	{
18951 		const string caseName (test.name + "_" + stage.name);
18952 		const string caseDesc ("Compare output of " + test.desc);
18953 		const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
18954 
18955 		testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18956 														   caseName.c_str(),
18957 														   caseDesc.c_str(),
18958 														   dataDir,
18959 														   fileName,
18960 														   stage.requirement));
18961 	}
18962 #endif
18963 
18964 	return testGroup.release();
18965 }
18966 
getNumberTypeName(const NumberType type)18967 const string getNumberTypeName (const NumberType type)
18968 {
18969 	if (type == NUMBERTYPE_INT32)
18970 	{
18971 		return "int";
18972 	}
18973 	else if (type == NUMBERTYPE_UINT32)
18974 	{
18975 		return "uint";
18976 	}
18977 	else if (type == NUMBERTYPE_FLOAT32)
18978 	{
18979 		return "float";
18980 	}
18981 	else
18982 	{
18983 		DE_ASSERT(false);
18984 		return "";
18985 	}
18986 }
18987 
getInt(de::Random & rnd)18988 deInt32 getInt(de::Random& rnd)
18989 {
18990 	return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18991 }
18992 
repeatString(const string & str,int times)18993 const string repeatString (const string& str, int times)
18994 {
18995 	string filler;
18996 	for (int i = 0; i < times; ++i)
18997 	{
18998 		filler += str;
18999 	}
19000 	return filler;
19001 }
19002 
getRandomConstantString(const NumberType type,de::Random & rnd)19003 const string getRandomConstantString (const NumberType type, de::Random& rnd)
19004 {
19005 	if (type == NUMBERTYPE_INT32)
19006 	{
19007 		return numberToString<deInt32>(getInt(rnd));
19008 	}
19009 	else if (type == NUMBERTYPE_UINT32)
19010 	{
19011 		return numberToString<deUint32>(rnd.getUint32());
19012 	}
19013 	else if (type == NUMBERTYPE_FLOAT32)
19014 	{
19015 		return numberToString<float>(rnd.getFloat());
19016 	}
19017 	else
19018 	{
19019 		DE_ASSERT(false);
19020 		return "";
19021 	}
19022 }
19023 
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19024 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19025 {
19026 	map<string, string> params;
19027 
19028 	// Vec2 to Vec4
19029 	for (int width = 2; width <= 4; ++width)
19030 	{
19031 		const string randomConst = numberToString(getInt(rnd));
19032 		const string widthStr = numberToString(width);
19033 		const string composite_type = "${customType}vec" + widthStr;
19034 		const int index = rnd.getInt(0, width-1);
19035 
19036 		params["type"]			= "vec";
19037 		params["name"]			= params["type"] + "_" + widthStr;
19038 		params["compositeDecl"]		= composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19039 		params["compositeType"]		= composite_type;
19040 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19041 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19042 		params["indexes"]		= numberToString(index);
19043 		testCases.push_back(params);
19044 	}
19045 }
19046 
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19047 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19048 {
19049 	const int limit = 10;
19050 	map<string, string> params;
19051 
19052 	for (int width = 2; width <= limit; ++width)
19053 	{
19054 		string randomConst = numberToString(getInt(rnd));
19055 		string widthStr = numberToString(width);
19056 		int index = rnd.getInt(0, width-1);
19057 
19058 		params["type"]			= "array";
19059 		params["name"]			= params["type"] + "_" + widthStr;
19060 		params["compositeDecl"]		= string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19061 											+	 "%composite = OpTypeArray ${customType} %arraywidth\n";
19062 		params["compositeType"]		= "%composite";
19063 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19064 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19065 		params["indexes"]		= numberToString(index);
19066 		testCases.push_back(params);
19067 	}
19068 }
19069 
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19070 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19071 {
19072 	const int limit = 10;
19073 	map<string, string> params;
19074 
19075 	for (int width = 2; width <= limit; ++width)
19076 	{
19077 		string randomConst = numberToString(getInt(rnd));
19078 		int index = rnd.getInt(0, width-1);
19079 
19080 		params["type"]			= "struct";
19081 		params["name"]			= params["type"] + "_" + numberToString(width);
19082 		params["compositeDecl"]		= "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19083 		params["compositeType"]		= "%composite";
19084 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19085 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19086 		params["indexes"]		= numberToString(index);
19087 		testCases.push_back(params);
19088 	}
19089 }
19090 
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19091 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19092 {
19093 	map<string, string> params;
19094 
19095 	// Vec2 to Vec4
19096 	for (int width = 2; width <= 4; ++width)
19097 	{
19098 		string widthStr = numberToString(width);
19099 
19100 		for (int column = 2 ; column <= 4; ++column)
19101 		{
19102 			int index_0 = rnd.getInt(0, column-1);
19103 			int index_1 = rnd.getInt(0, width-1);
19104 			string columnStr = numberToString(column);
19105 
19106 			params["type"]		= "matrix";
19107 			params["name"]		= params["type"] + "_" + widthStr + "x" + columnStr;
19108 			params["compositeDecl"]	= string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
19109 												+	 "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19110 			params["compositeType"]	= "%composite";
19111 
19112 			params["filler"]	= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19113 												+	 "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19114 
19115 			params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19116 			params["indexes"]	= numberToString(index_0) + " " + numberToString(index_1);
19117 			testCases.push_back(params);
19118 		}
19119 	}
19120 }
19121 
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19122 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19123 {
19124 	createVectorCompositeCases(testCases, rnd, type);
19125 	createArrayCompositeCases(testCases, rnd, type);
19126 	createStructCompositeCases(testCases, rnd, type);
19127 	// Matrix only supports float types
19128 	if (type == NUMBERTYPE_FLOAT32)
19129 	{
19130 		createMatrixCompositeCases(testCases, rnd, type);
19131 	}
19132 }
19133 
getAssemblyTypeDeclaration(const NumberType type)19134 const string getAssemblyTypeDeclaration (const NumberType type)
19135 {
19136 	switch (type)
19137 	{
19138 		case NUMBERTYPE_INT32:		return "OpTypeInt 32 1";
19139 		case NUMBERTYPE_UINT32:		return "OpTypeInt 32 0";
19140 		case NUMBERTYPE_FLOAT32:	return "OpTypeFloat 32";
19141 		default:			DE_ASSERT(false); return "";
19142 	}
19143 }
19144 
getAssemblyTypeName(const NumberType type)19145 const string getAssemblyTypeName (const NumberType type)
19146 {
19147 	switch (type)
19148 	{
19149 		case NUMBERTYPE_INT32:		return "%i32";
19150 		case NUMBERTYPE_UINT32:		return "%u32";
19151 		case NUMBERTYPE_FLOAT32:	return "%f32";
19152 		default:			DE_ASSERT(false); return "";
19153 	}
19154 }
19155 
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19156 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19157 {
19158 	map<string, string>	parameters(params);
19159 
19160 	const string customType = getAssemblyTypeName(type);
19161 	map<string, string> substCustomType;
19162 	substCustomType["customType"] = customType;
19163 	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19164 	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19165 	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19166 	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19167 	parameters["customType"] = customType;
19168 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19169 
19170 	if (parameters.at("compositeType") != "%u32vec3")
19171 	{
19172 		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19173 	}
19174 
19175 	return StringTemplate(
19176 		"OpCapability Shader\n"
19177 		"OpCapability Matrix\n"
19178 		"OpMemoryModel Logical GLSL450\n"
19179 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19180 		"OpExecutionMode %main LocalSize 1 1 1\n"
19181 
19182 		"OpSource GLSL 430\n"
19183 		"OpName %main           \"main\"\n"
19184 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19185 
19186 		// Decorators
19187 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19188 		"OpDecorate %buf BufferBlock\n"
19189 		"OpDecorate %indata DescriptorSet 0\n"
19190 		"OpDecorate %indata Binding 0\n"
19191 		"OpDecorate %outdata DescriptorSet 0\n"
19192 		"OpDecorate %outdata Binding 1\n"
19193 		"OpDecorate %customarr ArrayStride 4\n"
19194 		"${compositeDecorator}"
19195 		"OpMemberDecorate %buf 0 Offset 0\n"
19196 
19197 		// General types
19198 		"%void      = OpTypeVoid\n"
19199 		"%voidf     = OpTypeFunction %void\n"
19200 		"%u32       = OpTypeInt 32 0\n"
19201 		"%i32       = OpTypeInt 32 1\n"
19202 		"%f32       = OpTypeFloat 32\n"
19203 
19204 		// Composite declaration
19205 		"${compositeDecl}"
19206 
19207 		// Constants
19208 		"${filler}"
19209 
19210 		"${u32vec3Decl:opt}"
19211 		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19212 
19213 		// Inherited from custom
19214 		"%customptr = OpTypePointer Uniform ${customType}\n"
19215 		"%customarr = OpTypeRuntimeArray ${customType}\n"
19216 		"%buf       = OpTypeStruct %customarr\n"
19217 		"%bufptr    = OpTypePointer Uniform %buf\n"
19218 
19219 		"%indata    = OpVariable %bufptr Uniform\n"
19220 		"%outdata   = OpVariable %bufptr Uniform\n"
19221 
19222 		"%id        = OpVariable %uvec3ptr Input\n"
19223 		"%zero      = OpConstant %i32 0\n"
19224 
19225 		"%main      = OpFunction %void None %voidf\n"
19226 		"%label     = OpLabel\n"
19227 		"%idval     = OpLoad %u32vec3 %id\n"
19228 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19229 
19230 		"%inloc     = OpAccessChain %customptr %indata %zero %x\n"
19231 		"%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
19232 		// Read the input value
19233 		"%inval     = OpLoad ${customType} %inloc\n"
19234 		// Create the composite and fill it
19235 		"${compositeConstruct}"
19236 		// Insert the input value to a place
19237 		"%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19238 		// Read back the value from the position
19239 		"%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19240 		// Store it in the output position
19241 		"             OpStore %outloc %out_val\n"
19242 		"             OpReturn\n"
19243 		"             OpFunctionEnd\n"
19244 	).specialize(parameters);
19245 }
19246 
19247 template<typename T>
createCompositeBuffer(T number)19248 BufferSp createCompositeBuffer(T number)
19249 {
19250 	return BufferSp(new Buffer<T>(vector<T>(1, number)));
19251 }
19252 
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19253 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19254 {
19255 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19256 	de::Random						rnd		(deStringHash(group->getName()));
19257 
19258 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19259 	{
19260 		NumberType						numberType		= NumberType(type);
19261 		const string					typeName		= getNumberTypeName(numberType);
19262 		const string					description		= "Test the OpCompositeInsert instruction with " + typeName + "s";
19263 		de::MovePtr<tcu::TestCaseGroup>	subGroup		(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19264 		vector<map<string, string> >	testCases;
19265 
19266 		createCompositeCases(testCases, rnd, numberType);
19267 
19268 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19269 		{
19270 			ComputeShaderSpec	spec;
19271 
19272 			spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19273 
19274 			switch (numberType)
19275 			{
19276 				case NUMBERTYPE_INT32:
19277 				{
19278 					deInt32 number = getInt(rnd);
19279 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19280 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19281 					break;
19282 				}
19283 				case NUMBERTYPE_UINT32:
19284 				{
19285 					deUint32 number = rnd.getUint32();
19286 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19287 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19288 					break;
19289 				}
19290 				case NUMBERTYPE_FLOAT32:
19291 				{
19292 					float number = rnd.getFloat();
19293 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19294 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19295 					break;
19296 				}
19297 				default:
19298 					DE_ASSERT(false);
19299 			}
19300 
19301 			spec.numWorkGroups = IVec3(1, 1, 1);
19302 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
19303 		}
19304 		group->addChild(subGroup.release());
19305 	}
19306 	return group.release();
19307 }
19308 
19309 struct AssemblyStructInfo
19310 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19311 	AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19312 	: components	(comp)
19313 	, index			(idx)
19314 	{}
19315 
19316 	deUint32 components;
19317 	deUint32 index;
19318 };
19319 
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19320 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19321 {
19322 	// Create the full index string
19323 	string				fullIndex	= numberToString(structInfo.index) + " " + params.at("indexes");
19324 	// Convert it to list of indexes
19325 	vector<string>		indexes		= de::splitString(fullIndex, ' ');
19326 
19327 	map<string, string>	parameters	(params);
19328 	parameters["structType"]	= repeatString(" ${compositeType}", structInfo.components);
19329 	parameters["structConstruct"]	= repeatString(" %instance", structInfo.components);
19330 	parameters["insertIndexes"]	= fullIndex;
19331 
19332 	// In matrix cases the last two index is the CompositeExtract indexes
19333 	const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19334 
19335 	// Construct the extractIndex
19336 	for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19337 	{
19338 		parameters["extractIndexes"] += " " + *index;
19339 	}
19340 
19341 	// Remove the last 1 or 2 element depends on matrix case or not
19342 	indexes.erase(indexes.end() - extractIndexes, indexes.end());
19343 
19344 	deUint32 id = 0;
19345 	// Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19346 	for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19347 	{
19348 		string indexId = "%index_" + numberToString(id++);
19349 		parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
19350 		parameters["accessChainIndexes"] += " " + indexId;
19351 	}
19352 
19353 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19354 
19355 	const string customType = getAssemblyTypeName(type);
19356 	map<string, string> substCustomType;
19357 	substCustomType["customType"] = customType;
19358 	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19359 	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19360 	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19361 	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19362 	parameters["customType"] = customType;
19363 
19364 	const string compositeType = parameters.at("compositeType");
19365 	map<string, string> substCompositeType;
19366 	substCompositeType["compositeType"] = compositeType;
19367 	parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19368 	if (compositeType != "%u32vec3")
19369 	{
19370 		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19371 	}
19372 
19373 	return StringTemplate(
19374 		"OpCapability Shader\n"
19375 		"OpCapability Matrix\n"
19376 		"OpMemoryModel Logical GLSL450\n"
19377 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19378 		"OpExecutionMode %main LocalSize 1 1 1\n"
19379 
19380 		"OpSource GLSL 430\n"
19381 		"OpName %main           \"main\"\n"
19382 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19383 		// Decorators
19384 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19385 		"OpDecorate %buf BufferBlock\n"
19386 		"OpDecorate %indata DescriptorSet 0\n"
19387 		"OpDecorate %indata Binding 0\n"
19388 		"OpDecorate %outdata DescriptorSet 0\n"
19389 		"OpDecorate %outdata Binding 1\n"
19390 		"OpDecorate %customarr ArrayStride 4\n"
19391 		"${compositeDecorator}"
19392 		"OpMemberDecorate %buf 0 Offset 0\n"
19393 		// General types
19394 		"%void      = OpTypeVoid\n"
19395 		"%voidf     = OpTypeFunction %void\n"
19396 		"%i32       = OpTypeInt 32 1\n"
19397 		"%u32       = OpTypeInt 32 0\n"
19398 		"%f32       = OpTypeFloat 32\n"
19399 		// Custom types
19400 		"${compositeDecl}"
19401 		// %u32vec3 if not already declared in ${compositeDecl}
19402 		"${u32vec3Decl:opt}"
19403 		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19404 		// Inherited from composite
19405 		"%composite_p = OpTypePointer Function ${compositeType}\n"
19406 		"%struct_t  = OpTypeStruct${structType}\n"
19407 		"%struct_p  = OpTypePointer Function %struct_t\n"
19408 		// Constants
19409 		"${filler}"
19410 		"${accessChainConstDeclaration}"
19411 		// Inherited from custom
19412 		"%customptr = OpTypePointer Uniform ${customType}\n"
19413 		"%customarr = OpTypeRuntimeArray ${customType}\n"
19414 		"%buf       = OpTypeStruct %customarr\n"
19415 		"%bufptr    = OpTypePointer Uniform %buf\n"
19416 		"%indata    = OpVariable %bufptr Uniform\n"
19417 		"%outdata   = OpVariable %bufptr Uniform\n"
19418 
19419 		"%id        = OpVariable %uvec3ptr Input\n"
19420 		"%zero      = OpConstant %u32 0\n"
19421 		"%main      = OpFunction %void None %voidf\n"
19422 		"%label     = OpLabel\n"
19423 		"%struct_v  = OpVariable %struct_p Function\n"
19424 		"%idval     = OpLoad %u32vec3 %id\n"
19425 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19426 		// Create the input/output type
19427 		"%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19428 		"%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19429 		// Read the input value
19430 		"%inval     = OpLoad ${customType} %inloc\n"
19431 		// Create the composite and fill it
19432 		"${compositeConstruct}"
19433 		// Create the struct and fill it with the composite
19434 		"%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
19435 		// Insert the value
19436 		"%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19437 		// Store the object
19438 		"             OpStore %struct_v %comp_obj\n"
19439 		// Get deepest possible composite pointer
19440 		"%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19441 		"%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
19442 		// Read back the stored value
19443 		"%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19444 		"             OpStore %outloc %read_val\n"
19445 		"             OpReturn\n"
19446 		"             OpFunctionEnd\n"
19447 	).specialize(parameters);
19448 }
19449 
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19450 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19451 {
19452 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19453 	de::Random						rnd				(deStringHash(group->getName()));
19454 
19455 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19456 	{
19457 		NumberType						numberType	= NumberType(type);
19458 		const string					typeName	= getNumberTypeName(numberType);
19459 		const string					description	= "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
19460 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19461 
19462 		vector<map<string, string> >	testCases;
19463 		createCompositeCases(testCases, rnd, numberType);
19464 
19465 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19466 		{
19467 			ComputeShaderSpec	spec;
19468 
19469 			// Number of components inside of a struct
19470 			deUint32 structComponents = rnd.getInt(2, 8);
19471 			// Component index value
19472 			deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19473 			AssemblyStructInfo structInfo(structComponents, structIndex);
19474 
19475 			spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19476 
19477 			switch (numberType)
19478 			{
19479 				case NUMBERTYPE_INT32:
19480 				{
19481 					deInt32 number = getInt(rnd);
19482 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19483 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19484 					break;
19485 				}
19486 				case NUMBERTYPE_UINT32:
19487 				{
19488 					deUint32 number = rnd.getUint32();
19489 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19490 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19491 					break;
19492 				}
19493 				case NUMBERTYPE_FLOAT32:
19494 				{
19495 					float number = rnd.getFloat();
19496 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19497 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19498 					break;
19499 				}
19500 				default:
19501 					DE_ASSERT(false);
19502 			}
19503 			spec.numWorkGroups = IVec3(1, 1, 1);
19504 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
19505 		}
19506 		group->addChild(subGroup.release());
19507 	}
19508 	return group.release();
19509 }
19510 
19511 // If the params missing, uninitialized case
19512 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19513 {
19514 	map<string, string> parameters(params);
19515 
19516 	parameters["customType"]	= getAssemblyTypeName(type);
19517 
19518 	// Declare the const value, and use it in the initializer
19519 	if (params.find("constValue") != params.end())
19520 	{
19521 		parameters["variableInitializer"]	= " %const";
19522 	}
19523 	// Uninitialized case
19524 	else
19525 	{
19526 		parameters["commentDecl"]	= ";";
19527 	}
19528 
19529 	return StringTemplate(
19530 		"OpCapability Shader\n"
19531 		"OpMemoryModel Logical GLSL450\n"
19532 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19533 		"OpExecutionMode %main LocalSize 1 1 1\n"
19534 		"OpSource GLSL 430\n"
19535 		"OpName %main           \"main\"\n"
19536 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19537 		// Decorators
19538 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19539 		"OpDecorate %indata DescriptorSet 0\n"
19540 		"OpDecorate %indata Binding 0\n"
19541 		"OpDecorate %outdata DescriptorSet 0\n"
19542 		"OpDecorate %outdata Binding 1\n"
19543 		"OpDecorate %in_arr ArrayStride 4\n"
19544 		"OpDecorate %in_buf BufferBlock\n"
19545 		"OpMemberDecorate %in_buf 0 Offset 0\n"
19546 		// Base types
19547 		"%void       = OpTypeVoid\n"
19548 		"%voidf      = OpTypeFunction %void\n"
19549 		"%u32        = OpTypeInt 32 0\n"
19550 		"%i32        = OpTypeInt 32 1\n"
19551 		"%f32        = OpTypeFloat 32\n"
19552 		"%uvec3      = OpTypeVector %u32 3\n"
19553 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
19554 		"${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
19555 		// Derived types
19556 		"%in_ptr     = OpTypePointer Uniform ${customType}\n"
19557 		"%in_arr     = OpTypeRuntimeArray ${customType}\n"
19558 		"%in_buf     = OpTypeStruct %in_arr\n"
19559 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
19560 		"%indata     = OpVariable %in_bufptr Uniform\n"
19561 		"%outdata    = OpVariable %in_bufptr Uniform\n"
19562 		"%id         = OpVariable %uvec3ptr Input\n"
19563 		"%var_ptr    = OpTypePointer Function ${customType}\n"
19564 		// Constants
19565 		"%zero       = OpConstant %i32 0\n"
19566 		// Main function
19567 		"%main       = OpFunction %void None %voidf\n"
19568 		"%label      = OpLabel\n"
19569 		"%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19570 		"%idval      = OpLoad %uvec3 %id\n"
19571 		"%x          = OpCompositeExtract %u32 %idval 0\n"
19572 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
19573 		"%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
19574 
19575 		"%outval     = OpLoad ${customType} %out_var\n"
19576 		"              OpStore %outloc %outval\n"
19577 		"              OpReturn\n"
19578 		"              OpFunctionEnd\n"
19579 	).specialize(parameters);
19580 }
19581 
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)19582 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19583 {
19584 	DE_ASSERT(outputAllocs.size() != 0);
19585 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19586 
19587 	// Use custom epsilon because of the float->string conversion
19588 	const float	epsilon	= 0.00001f;
19589 
19590 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19591 	{
19592 		vector<deUint8>	expectedBytes;
19593 		float			expected;
19594 		float			actual;
19595 
19596 		expectedOutputs[outputNdx].getBytes(expectedBytes);
19597 		memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19598 		memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19599 
19600 		// Test with epsilon
19601 		if (fabs(expected - actual) > epsilon)
19602 		{
19603 			log << TestLog::Message << "Error: The actual and expected values not matching."
19604 				<< " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19605 			return false;
19606 		}
19607 	}
19608 	return true;
19609 }
19610 
19611 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)19612 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19613 {
19614 	DE_ASSERT(outputAllocs.size() != 0);
19615 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19616 
19617 	// Copy and discard the result.
19618 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19619 	{
19620 		vector<deUint8>	expectedBytes;
19621 		expectedOutputs[outputNdx].getBytes(expectedBytes);
19622 
19623 		const size_t	width			= expectedBytes.size();
19624 		vector<char>	data			(width);
19625 
19626 		memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19627 	}
19628 	return true;
19629 }
19630 
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)19631 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19632 {
19633 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19634 	de::Random						rnd		(deStringHash(group->getName()));
19635 
19636 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19637 	{
19638 		NumberType						numberType	= NumberType(type);
19639 		const string					typeName	= getNumberTypeName(numberType);
19640 		const string					description	= "Test the OpVariable initializer with " + typeName + ".";
19641 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19642 
19643 		// 2 similar subcases (initialized and uninitialized)
19644 		for (int subCase = 0; subCase < 2; ++subCase)
19645 		{
19646 			ComputeShaderSpec spec;
19647 			spec.numWorkGroups = IVec3(1, 1, 1);
19648 
19649 			map<string, string>				params;
19650 
19651 			switch (numberType)
19652 			{
19653 				case NUMBERTYPE_INT32:
19654 				{
19655 					deInt32 number = getInt(rnd);
19656 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19657 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19658 					params["constValue"] = numberToString(number);
19659 					break;
19660 				}
19661 				case NUMBERTYPE_UINT32:
19662 				{
19663 					deUint32 number = rnd.getUint32();
19664 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19665 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19666 					params["constValue"] = numberToString(number);
19667 					break;
19668 				}
19669 				case NUMBERTYPE_FLOAT32:
19670 				{
19671 					float number = rnd.getFloat();
19672 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19673 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19674 					spec.verifyIO = &compareFloats;
19675 					params["constValue"] = numberToString(number);
19676 					break;
19677 				}
19678 				default:
19679 					DE_ASSERT(false);
19680 			}
19681 
19682 			// Initialized subcase
19683 			if (!subCase)
19684 			{
19685 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19686 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19687 			}
19688 			// Uninitialized subcase
19689 			else
19690 			{
19691 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19692 				spec.verifyIO = &passthruVerify;
19693 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19694 			}
19695 		}
19696 		group->addChild(subGroup.release());
19697 	}
19698 	return group.release();
19699 }
19700 
createOpNopTests(tcu::TestContext & testCtx)19701 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19702 {
19703 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19704 	RGBA							defaultColors[4];
19705 	map<string, string>				opNopFragments;
19706 
19707 	getDefaultColors(defaultColors);
19708 
19709 	opNopFragments["testfun"]		=
19710 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19711 		"%param1 = OpFunctionParameter %v4f32\n"
19712 		"%label_testfun = OpLabel\n"
19713 		"OpNop\n"
19714 		"OpNop\n"
19715 		"OpNop\n"
19716 		"OpNop\n"
19717 		"OpNop\n"
19718 		"OpNop\n"
19719 		"OpNop\n"
19720 		"OpNop\n"
19721 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19722 		"%b = OpFAdd %f32 %a %a\n"
19723 		"OpNop\n"
19724 		"%c = OpFSub %f32 %b %a\n"
19725 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19726 		"OpNop\n"
19727 		"OpNop\n"
19728 		"OpReturnValue %ret\n"
19729 		"OpFunctionEnd\n";
19730 
19731 	createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19732 
19733 	return testGroup.release();
19734 }
19735 
createOpNameTests(tcu::TestContext & testCtx)19736 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19737 {
19738 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19739 	RGBA							defaultColors[4];
19740 	map<string, string>				opNameFragments;
19741 
19742 	getDefaultColors(defaultColors);
19743 
19744 	opNameFragments["testfun"] =
19745 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19746 		"%param1     = OpFunctionParameter %v4f32\n"
19747 		"%label_func = OpLabel\n"
19748 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19749 		"%b          = OpFAdd %f32 %a %a\n"
19750 		"%c          = OpFSub %f32 %b %a\n"
19751 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19752 		"OpReturnValue %ret\n"
19753 		"OpFunctionEnd\n";
19754 
19755 	opNameFragments["debug"] =
19756 		"OpName %BP_main \"not_main\"";
19757 
19758 	createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19759 
19760 	return testGroup.release();
19761 }
19762 
createFloat16Tests(tcu::TestContext & testCtx)19763 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19764 {
19765 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19766 
19767 	testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19768 	testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19769 	testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19770 	testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19771 	testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19772 	testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19773 	testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19774 	testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19775 	testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19776 	testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19777 	testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19778 	testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19779 	testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19780 	testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19781 	testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19782 
19783 	return testGroup.release();
19784 }
19785 
createFloat32Tests(tcu::TestContext & testCtx)19786 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19787 {
19788 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19789 
19790 	testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19791 	testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19792 	testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19793 	testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19794 
19795 	return testGroup.release();
19796 }
19797 
createFloat16Group(tcu::TestContext & testCtx)19798 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19799 {
19800 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19801 
19802 	testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19803 	testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19804 	testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19805 	testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19806 	testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19807 	testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19808 	testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19809 	testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19810 	testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19811 	testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19812 	testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19813 	testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19814 	testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19815 	testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19816 	testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19817 
19818 	return testGroup.release();
19819 }
19820 
createFloat32Group(tcu::TestContext & testCtx)19821 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19822 {
19823 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19824 
19825 	testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19826 	testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19827 	testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19828 	testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19829 
19830 	return testGroup.release();
19831 }
19832 
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)19833 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19834 {
19835 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19836 
19837 	de::Random						rnd				(deStringHash(group->getName()));
19838 	const int		numElements		= 100;
19839 	vector<float>	inputData		(numElements, 0);
19840 	vector<float>	outputData		(numElements, 0);
19841 	fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19842 
19843 	const StringTemplate			shaderTemplate	(
19844 		"${CAPS}\n"
19845 		"OpMemoryModel Logical GLSL450\n"
19846 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19847 		"OpExecutionMode %main LocalSize 1 1 1\n"
19848 		"OpSource GLSL 430\n"
19849 		"OpName %main           \"main\"\n"
19850 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19851 
19852 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19853 
19854 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19855 
19856 		"%id        = OpVariable %uvec3ptr Input\n"
19857 		"${CONST}\n"
19858 		"%main      = OpFunction %void None %voidf\n"
19859 		"%label     = OpLabel\n"
19860 		"%idval     = OpLoad %uvec3 %id\n"
19861 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19862 		"%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19863 
19864 		"${TEST}\n"
19865 
19866 		"%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19867 		"             OpStore %outloc %res\n"
19868 		"             OpReturn\n"
19869 		"             OpFunctionEnd\n"
19870 	);
19871 
19872 	// Each test case produces 4 boolean values, and we want each of these values
19873 	// to come froma different combination of the available bit-sizes, so compute
19874 	// all possible combinations here.
19875 	vector<deUint32>	widths;
19876 	widths.push_back(32);
19877 	widths.push_back(16);
19878 	widths.push_back(8);
19879 
19880 	vector<IVec4>	cases;
19881 	for (size_t width0 = 0; width0 < widths.size(); width0++)
19882 	{
19883 		for (size_t width1 = 0; width1 < widths.size(); width1++)
19884 		{
19885 			for (size_t width2 = 0; width2 < widths.size(); width2++)
19886 			{
19887 				for (size_t width3 = 0; width3 < widths.size(); width3++)
19888 				{
19889 					cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19890 				}
19891 			}
19892 		}
19893 	}
19894 
19895 	for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19896 	{
19897 		/// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19898 		if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19899 			continue;
19900 
19901 		map<string, string>	specializations;
19902 		ComputeShaderSpec	spec;
19903 
19904 		// Inject appropriate capabilities and reference constants depending
19905 		// on the bit-sizes required by this test case
19906 		bool hasFloat32	= cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19907 		bool hasFloat16	= cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19908 		bool hasInt8	= cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19909 
19910 		string capsStr	= "OpCapability Shader\n";
19911 		string constStr	=
19912 			"%c0i32     = OpConstant %i32 0\n"
19913 			"%c1f32     = OpConstant %f32 1.0\n"
19914 			"%c0f32     = OpConstant %f32 0.0\n";
19915 
19916 		if (hasFloat32)
19917 		{
19918 			constStr	+=
19919 				"%c10f32    = OpConstant %f32 10.0\n"
19920 				"%c25f32    = OpConstant %f32 25.0\n"
19921 				"%c50f32    = OpConstant %f32 50.0\n"
19922 				"%c90f32    = OpConstant %f32 90.0\n";
19923 		}
19924 
19925 		if (hasFloat16)
19926 		{
19927 			capsStr		+= "OpCapability Float16\n";
19928 			constStr	+=
19929 				"%f16       = OpTypeFloat 16\n"
19930 				"%c10f16    = OpConstant %f16 10.0\n"
19931 				"%c25f16    = OpConstant %f16 25.0\n"
19932 				"%c50f16    = OpConstant %f16 50.0\n"
19933 				"%c90f16    = OpConstant %f16 90.0\n";
19934 		}
19935 
19936 		if (hasInt8)
19937 		{
19938 			capsStr		+= "OpCapability Int8\n";
19939 			constStr	+=
19940 				"%i8        = OpTypeInt 8 1\n"
19941 				"%c10i8     = OpConstant %i8 10\n"
19942 				"%c25i8     = OpConstant %i8 25\n"
19943 				"%c50i8     = OpConstant %i8 50\n"
19944 				"%c90i8     = OpConstant %i8 90\n";
19945 		}
19946 
19947 		// Each invocation reads a different float32 value as input. Depending on
19948 		// the bit-sizes required by the particular test case, we also produce
19949 		// float16 and/or and int8 values by converting from the 32-bit float.
19950 		string testStr	= "";
19951 		testStr			+= "%inval32   = OpLoad %f32 %inloc\n";
19952 		if (hasFloat16)
19953 			testStr		+= "%inval16   = OpFConvert %f16 %inval32\n";
19954 		if (hasInt8)
19955 			testStr		+= "%inval8    = OpConvertFToS %i8 %inval32\n";
19956 
19957 		// Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19958 		// that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19959 		// when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19960 		// other way around, so in this case we want < instead of <=.
19961 		if (cases[caseNdx][0] == 32)
19962 			testStr		+= "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19963 		else if (cases[caseNdx][0] == 16)
19964 			testStr		+= "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19965 		else
19966 			testStr		+= "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19967 
19968 		if (cases[caseNdx][1] == 32)
19969 			testStr		+= "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
19970 		else if (cases[caseNdx][1] == 16)
19971 			testStr		+= "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
19972 		else
19973 			testStr		+= "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
19974 
19975 		if (cases[caseNdx][2] == 32)
19976 			testStr		+= "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
19977 		else if (cases[caseNdx][2] == 16)
19978 			testStr		+= "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
19979 		else
19980 			testStr		+= "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
19981 
19982 		if (cases[caseNdx][3] == 32)
19983 			testStr		+= "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19984 		else if (cases[caseNdx][3] == 16)
19985 			testStr		+= "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19986 		else
19987 			testStr		+= "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19988 
19989 		testStr			+= "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
19990 		testStr			+= "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
19991 		testStr			+= "%or2       = OpLogicalOr %bool %and1 %or1\n";
19992 		testStr			+= "%not1      = OpLogicalNot %bool %or2\n";
19993 		testStr			+= "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19994 
19995 		specializations["CAPS"]		= capsStr;
19996 		specializations["CONST"]	= constStr;
19997 		specializations["TEST"]		= testStr;
19998 
19999 		// Compute expected result by evaluating the boolean expression computed in the shader for each input value
20000 		for (size_t ndx = 0; ndx < numElements; ++ndx)
20001 			outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
20002 
20003 		spec.assembly = shaderTemplate.specialize(specializations);
20004 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
20005 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
20006 		spec.numWorkGroups = IVec3(numElements, 1, 1);
20007 		if (hasFloat16)
20008 			spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
20009 		if (hasInt8)
20010 			spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
20011 		spec.extensions.push_back("VK_KHR_shader_float16_int8");
20012 
20013 		string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
20014 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
20015 	}
20016 
20017 	return group.release();
20018 }
20019 
createBoolGroup(tcu::TestContext & testCtx)20020 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20021 {
20022 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20023 
20024 	testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20025 
20026 	return testGroup.release();
20027 }
20028 
createOpNameAbuseTests(tcu::TestContext & testCtx)20029 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20030 {
20031 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20032 	vector<CaseParameter>			abuseCases;
20033 	RGBA							defaultColors[4];
20034 	map<string, string>				opNameFragments;
20035 
20036 	getOpNameAbuseCases(abuseCases);
20037 	getDefaultColors(defaultColors);
20038 
20039 	opNameFragments["testfun"] =
20040 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20041 		"%param1     = OpFunctionParameter %v4f32\n"
20042 		"%label_func = OpLabel\n"
20043 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20044 		"%b          = OpFAdd %f32 %a %a\n"
20045 		"%c          = OpFSub %f32 %b %a\n"
20046 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20047 		"OpReturnValue %ret\n"
20048 		"OpFunctionEnd\n";
20049 
20050 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20051 	{
20052 		string casename;
20053 		casename = string("main") + abuseCases[i].name;
20054 
20055 		opNameFragments["debug"] =
20056 			"OpName %BP_main \"" + abuseCases[i].param + "\"";
20057 
20058 		createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20059 	}
20060 
20061 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20062 	{
20063 		string casename;
20064 		casename = string("b") + abuseCases[i].name;
20065 
20066 		opNameFragments["debug"] =
20067 			"OpName %b \"" + abuseCases[i].param + "\"";
20068 
20069 		createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20070 	}
20071 
20072 	{
20073 		opNameFragments["debug"] =
20074 			"OpName %test_code \"name1\"\n"
20075 			"OpName %param1    \"name2\"\n"
20076 			"OpName %a         \"name3\"\n"
20077 			"OpName %b         \"name4\"\n"
20078 			"OpName %c         \"name5\"\n"
20079 			"OpName %ret       \"name6\"\n";
20080 
20081 		createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20082 	}
20083 
20084 	{
20085 		opNameFragments["debug"] =
20086 			"OpName %test_code \"the_same\"\n"
20087 			"OpName %param1    \"the_same\"\n"
20088 			"OpName %a         \"the_same\"\n"
20089 			"OpName %b         \"the_same\"\n"
20090 			"OpName %c         \"the_same\"\n"
20091 			"OpName %ret       \"the_same\"\n";
20092 
20093 		createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20094 	}
20095 
20096 	{
20097 		opNameFragments["debug"] =
20098 			"OpName %BP_main \"to_be\"\n"
20099 			"OpName %BP_main \"or_not\"\n"
20100 			"OpName %BP_main \"to_be\"\n";
20101 
20102 		createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20103 	}
20104 
20105 	{
20106 		opNameFragments["debug"] =
20107 			"OpName %b \"to_be\"\n"
20108 			"OpName %b \"or_not\"\n"
20109 			"OpName %b \"to_be\"\n";
20110 
20111 		createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20112 	}
20113 
20114 	return abuseGroup.release();
20115 }
20116 
20117 
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20118 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20119 {
20120 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20121 	vector<CaseParameter>			abuseCases;
20122 	RGBA							defaultColors[4];
20123 	map<string, string>				opMemberNameFragments;
20124 
20125 	getOpNameAbuseCases(abuseCases);
20126 	getDefaultColors(defaultColors);
20127 
20128 	opMemberNameFragments["pre_main"] =
20129 		"%f3str = OpTypeStruct %f32 %f32 %f32\n";
20130 
20131 	opMemberNameFragments["testfun"] =
20132 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20133 		"%param1     = OpFunctionParameter %v4f32\n"
20134 		"%label_func = OpLabel\n"
20135 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20136 		"%b          = OpFAdd %f32 %a %a\n"
20137 		"%c          = OpFSub %f32 %b %a\n"
20138 		"%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
20139 		"%d          = OpCompositeExtract %f32 %cstr 0\n"
20140 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20141 		"OpReturnValue %ret\n"
20142 		"OpFunctionEnd\n";
20143 
20144 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20145 	{
20146 		string casename;
20147 		casename = string("f3str_x") + abuseCases[i].name;
20148 
20149 		opMemberNameFragments["debug"] =
20150 			"OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20151 
20152 		createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20153 	}
20154 
20155 	{
20156 		opMemberNameFragments["debug"] =
20157 			"OpMemberName %f3str 0 \"name1\"\n"
20158 			"OpMemberName %f3str 1 \"name2\"\n"
20159 			"OpMemberName %f3str 2 \"name3\"\n";
20160 
20161 		createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20162 	}
20163 
20164 	{
20165 		opMemberNameFragments["debug"] =
20166 			"OpMemberName %f3str 0 \"the_same\"\n"
20167 			"OpMemberName %f3str 1 \"the_same\"\n"
20168 			"OpMemberName %f3str 2 \"the_same\"\n";
20169 
20170 		createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20171 	}
20172 
20173 	{
20174 		opMemberNameFragments["debug"] =
20175 			"OpMemberName %f3str 0 \"to_be\"\n"
20176 			"OpMemberName %f3str 1 \"or_not\"\n"
20177 			"OpMemberName %f3str 0 \"to_be\"\n"
20178 			"OpMemberName %f3str 2 \"makes_no\"\n"
20179 			"OpMemberName %f3str 0 \"difference\"\n"
20180 			"OpMemberName %f3str 0 \"to_me\"\n";
20181 
20182 
20183 		createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20184 	}
20185 
20186 	return abuseGroup.release();
20187 }
20188 
getSparseIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20189 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20190 {
20191 	vector<deUint32>	result;
20192 	de::Random			rnd		(seed);
20193 
20194 	result.reserve(numDataPoints);
20195 
20196 	for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20197 		result.push_back(rnd.getUint32());
20198 
20199 	return result;
20200 }
20201 
getSparseIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2)20202 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20203 {
20204 	vector<deUint32>	result;
20205 
20206 	result.reserve(inData1.size());
20207 
20208 	for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20209 		result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20210 
20211 	return result;
20212 }
20213 
20214 template<class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20215 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20216 {
20217 	const deUint32			numDataPoints	= 16;
20218 	const std::string		testName		("sparse_ids");
20219 	const deUint32			seed			(deStringHash(testName.c_str()));
20220 	const vector<deUint32>	inData1			(getSparseIdsAbuseData(numDataPoints, seed + 1));
20221 	const vector<deUint32>	inData2			(getSparseIdsAbuseData(numDataPoints, seed + 2));
20222 	const vector<deUint32>	outData			(getSparseIdsAbuseResults(inData1, inData2));
20223 	const StringTemplate	preMain
20224 	(
20225 		"%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20226 		"   %up_u32 = OpTypePointer Uniform %u32\n"
20227 		"   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20228 		"   %SSBO32 = OpTypeStruct %ra_u32\n"
20229 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20230 		"%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20231 		"%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20232 		" %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20233 	);
20234 	const StringTemplate	decoration
20235 	(
20236 		"OpDecorate %ra_u32 ArrayStride 4\n"
20237 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
20238 		"OpDecorate %SSBO32 BufferBlock\n"
20239 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
20240 		"OpDecorate %ssbo_src0 Binding 0\n"
20241 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
20242 		"OpDecorate %ssbo_src1 Binding 1\n"
20243 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
20244 		"OpDecorate %ssbo_dst Binding 2\n"
20245 	);
20246 	const StringTemplate	testFun
20247 	(
20248 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20249 		"    %param = OpFunctionParameter %v4f32\n"
20250 
20251 		"    %entry = OpLabel\n"
20252 		"        %i = OpVariable %fp_i32 Function\n"
20253 		"             OpStore %i %c_i32_0\n"
20254 		"             OpBranch %loop\n"
20255 
20256 		"     %loop = OpLabel\n"
20257 		"    %i_cmp = OpLoad %i32 %i\n"
20258 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20259 		"             OpLoopMerge %merge %next None\n"
20260 		"             OpBranchConditional %lt %write %merge\n"
20261 
20262 		"    %write = OpLabel\n"
20263 		"      %ndx = OpLoad %i32 %i\n"
20264 
20265 		"      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20266 		"      %128 = OpLoad %u32 %127\n"
20267 
20268 		// The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20269 		"  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20270 		"  %4194001 = OpLoad %u32 %4194000\n"
20271 
20272 		"  %2097151 = OpIAdd %u32 %128 %4194001\n"
20273 		"  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20274 		"             OpStore %2097152 %2097151\n"
20275 		"             OpBranch %next\n"
20276 
20277 		"     %next = OpLabel\n"
20278 		"    %i_cur = OpLoad %i32 %i\n"
20279 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20280 		"             OpStore %i %i_new\n"
20281 		"             OpBranch %loop\n"
20282 
20283 		"    %merge = OpLabel\n"
20284 		"             OpReturnValue %param\n"
20285 
20286 		"             OpFunctionEnd\n"
20287 	);
20288 	SpecResource			specResource;
20289 	map<string, string>		specs;
20290 	VulkanFeatures			features;
20291 	map<string, string>		fragments;
20292 	vector<string>			extensions;
20293 
20294 	specs["num_data_points"]	= de::toString(numDataPoints);
20295 
20296 	fragments["decoration"]		= decoration.specialize(specs);
20297 	fragments["pre_main"]		= preMain.specialize(specs);
20298 	fragments["testfun"]		= testFun.specialize(specs);
20299 
20300 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20301 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20302 	specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20303 
20304 	if (std::is_base_of<GraphicsResources, SpecResource>::value)
20305 	{
20306 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
20307 		features.coreFeatures.fragmentStoresAndAtomics			= true;
20308 	}
20309 
20310 	finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20311 }
20312 
getLotsIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20313 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20314 {
20315 	vector<deUint32>	result;
20316 	de::Random			rnd		(seed);
20317 
20318 	result.reserve(numDataPoints);
20319 
20320 	// Fixed value
20321 	result.push_back(1u);
20322 
20323 	// Random values
20324 	for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20325 		result.push_back(rnd.getUint8());
20326 
20327 	return result;
20328 }
20329 
getLotsIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2,const deUint32 count)20330 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20331 {
20332 	vector<deUint32>	result;
20333 
20334 	result.reserve(inData1.size());
20335 
20336 	for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20337 		result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20338 
20339 	return result;
20340 }
20341 
20342 template<class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20343 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20344 {
20345 	const deUint32			numDataPoints	= 16;
20346 	const deUint32			firstNdx		= 100u;
20347 	const deUint32			sequenceCount	= 10000u;
20348 	const std::string		testName		("lots_ids");
20349 	const deUint32			seed			(deStringHash(testName.c_str()));
20350 	const vector<deUint32>	inData1			(getLotsIdsAbuseData(numDataPoints, seed + 1));
20351 	const vector<deUint32>	inData2			(getLotsIdsAbuseData(numDataPoints, seed + 2));
20352 	const vector<deUint32>	outData			(getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20353 	const StringTemplate preMain
20354 	(
20355 		"%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20356 		"   %up_u32 = OpTypePointer Uniform %u32\n"
20357 		"   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20358 		"   %SSBO32 = OpTypeStruct %ra_u32\n"
20359 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20360 		"%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20361 		"%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20362 		" %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20363 	);
20364 	const StringTemplate decoration
20365 	(
20366 		"OpDecorate %ra_u32 ArrayStride 4\n"
20367 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
20368 		"OpDecorate %SSBO32 BufferBlock\n"
20369 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
20370 		"OpDecorate %ssbo_src0 Binding 0\n"
20371 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
20372 		"OpDecorate %ssbo_src1 Binding 1\n"
20373 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
20374 		"OpDecorate %ssbo_dst Binding 2\n"
20375 	);
20376 	const StringTemplate testFun
20377 	(
20378 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20379 		"    %param = OpFunctionParameter %v4f32\n"
20380 
20381 		"    %entry = OpLabel\n"
20382 		"        %i = OpVariable %fp_i32 Function\n"
20383 		"             OpStore %i %c_i32_0\n"
20384 		"             OpBranch %loop\n"
20385 
20386 		"     %loop = OpLabel\n"
20387 		"    %i_cmp = OpLoad %i32 %i\n"
20388 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20389 		"             OpLoopMerge %merge %next None\n"
20390 		"             OpBranchConditional %lt %write %merge\n"
20391 
20392 		"    %write = OpLabel\n"
20393 		"      %ndx = OpLoad %i32 %i\n"
20394 
20395 		"       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20396 		"       %91 = OpLoad %u32 %90\n"
20397 
20398 		"       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20399 		"       %${zeroth_id} = OpLoad %u32 %98\n"
20400 
20401 		"${seq}\n"
20402 
20403 		// The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20404 		"      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20405 		"             OpStore %dst %${last_id}\n"
20406 		"             OpBranch %next\n"
20407 
20408 		"     %next = OpLabel\n"
20409 		"    %i_cur = OpLoad %i32 %i\n"
20410 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20411 		"             OpStore %i %i_new\n"
20412 		"             OpBranch %loop\n"
20413 
20414 		"    %merge = OpLabel\n"
20415 		"             OpReturnValue %param\n"
20416 
20417 		"             OpFunctionEnd\n"
20418 	);
20419 	deUint32				lastId			= firstNdx;
20420 	SpecResource			specResource;
20421 	map<string, string>		specs;
20422 	VulkanFeatures			features;
20423 	map<string, string>		fragments;
20424 	vector<string>			extensions;
20425 	std::string				sequence;
20426 
20427 	for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20428 	{
20429 		const deUint32		sequenceId		= sequenceNdx + firstNdx;
20430 		const std::string	sequenceIdStr	= de::toString(sequenceId);
20431 
20432 		sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20433 		lastId = sequenceId;
20434 
20435 		if (sequenceNdx == 0)
20436 			sequence.reserve((10 + sequence.length()) * sequenceCount);
20437 	}
20438 
20439 	specs["num_data_points"]	= de::toString(numDataPoints);
20440 	specs["zeroth_id"]			= de::toString(firstNdx - 1);
20441 	specs["last_id"]			= de::toString(lastId);
20442 	specs["seq"]				= sequence;
20443 
20444 	fragments["decoration"]		= decoration.specialize(specs);
20445 	fragments["pre_main"]		= preMain.specialize(specs);
20446 	fragments["testfun"]		= testFun.specialize(specs);
20447 
20448 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20449 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20450 	specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20451 
20452 	if (std::is_base_of<GraphicsResources, SpecResource>::value)
20453 	{
20454 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
20455 		features.coreFeatures.fragmentStoresAndAtomics			= true;
20456 	}
20457 
20458 	finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20459 }
20460 
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20461 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20462 {
20463 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20464 
20465 	createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20466 	createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20467 
20468 	return testGroup.release();
20469 }
20470 
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20471 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20472 {
20473 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20474 
20475 	createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20476 	createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20477 
20478 	return testGroup.release();
20479 }
20480 
createFunctionParamsGroup(tcu::TestContext & testCtx)20481 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20482 {
20483 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20484 #ifndef CTS_USES_VULKANSC
20485 	static const char data_dir[] = "spirv_assembly/instruction/function_params";
20486 
20487 	static const struct
20488 	{
20489 		const std::string name;
20490 		const std::string desc;
20491 	} cases[] =
20492 	{
20493 		{ "sampler_param", "Test combined image sampler as function parameter" },
20494 	};
20495 
20496 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20497 	{
20498 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20499 																			cases[i].name.c_str(),
20500 																			cases[i].desc.c_str(),
20501 																			data_dir,
20502 																			cases[i].name + ".amber");
20503 		testGroup->addChild(testCase);
20504 	}
20505 #endif
20506 	return testGroup.release();
20507 }
20508 
createEarlyFragmentTests(tcu::TestContext & testCtx)20509 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20510 {
20511 	de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20512 
20513 #ifndef CTS_USES_VULKANSC
20514 	static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20515 
20516 	static const struct Case
20517 	{
20518 		const string name;
20519 		const string desc;
20520 	}
20521 	cases[] =
20522 	{
20523 		// Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20524 		{ "depth_less",				"gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	},
20525 		{ "depth_greater",			"gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."	},
20526 		{ "depth_less_or_equal",	"gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20527 		{ "depth_greater_or_equal",	"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20528 		{ "depth_equal",			"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20529 		{ "depth_not_equal",		"gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	}
20530 	};
20531 
20532 	for (const auto& tCase : cases)
20533 	{
20534 		cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20535 			tCase.name.c_str(),
20536 			tCase.desc.c_str(),
20537 			dataDir,
20538 			tCase.name + ".amber");
20539 
20540 		earlyFragTests->addChild(testCase);
20541 	}
20542 #endif // CTS_USES_VULKANSC
20543 
20544 	return earlyFragTests.release();
20545 }
20546 
createEarlyAndLateFragmentTests(tcu::TestContext & testCtx)20547 tcu::TestCaseGroup* createEarlyAndLateFragmentTests(tcu::TestContext& testCtx)
20548 {
20549 	de::MovePtr<tcu::TestCaseGroup> earlyLateFragTests(new tcu::TestCaseGroup(testCtx, "early_and_late_fragment", "Early And Late Fragment Tests"));
20550 #ifndef CTS_USES_VULKANSC
20551 	static const char dataDir[] = "spirv_assembly/instruction/graphics/early_and_late_fragment";
20552 
20553 	static const struct Case
20554 	{
20555 		const string name;
20556 		const string desc;
20557 	}	cases[] =
20558 	{
20559 		{ "depth_less",				"gl_FragDepth < CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	},
20560 		{ "depth_greater",			"gl_FragDepth > CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."	},
20561 		{ "depth_less_or_equal",	"gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20562 		{ "depth_greater_or_equal",	"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20563 		{ "depth_equal",			"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20564 		{ "depth_not_equal",		"gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	}
20565 	};
20566 
20567 	for (const auto& tCase : cases)
20568 	{
20569 		cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20570 			tCase.name.c_str(),
20571 			tCase.desc.c_str(),
20572 			dataDir,
20573 			tCase.name + ".amber",
20574 			{ "VK_AMD_shader_early_and_late_fragment_tests" });
20575 
20576 		earlyLateFragTests->addChild(testCase);
20577 	}
20578 #endif
20579 
20580 	return earlyLateFragTests.release();
20581 }
20582 
createOpExecutionModeTests(tcu::TestContext & testCtx)20583 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20584 {
20585 	de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20586 
20587 #ifndef CTS_USES_VULKANSC
20588 	static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20589 
20590 	static const struct Case
20591 	{
20592 		const string name;
20593 		const string desc;
20594 	} cases[] =
20595 	{
20596 		{ "depthless_0",		"FragDepth < Polygon depth: depth test should pass." },
20597 		{ "depthless_1",		"FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20598 		{ "depthless_2",		"FragDepth < Polygon depth: depth test should fail." },
20599 		{ "depthless_3",		"FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20600 		{ "depthless_4",		"FragDepth < Polygon depth: depth test should pass." },
20601 		{ "depthgreater_0",		"FragDepth > Polygon depth: depth test should pass." },
20602 		{ "depthgreater_1",		"FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20603 		{ "depthgreater_2",		"FragDepth > Polygon depth: depth test should fail." },
20604 		{ "depthgreater_3",		"FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20605 		{ "depthgreater_4",		"FragDepth > Polygon depth: depth test should pass." },
20606 		{ "depthunchanged_0",	"FragDepth == Polygon depth: depth test should pass." },
20607 		{ "depthunchanged_1",	"FragDepth == Polygon depth: depth test should fail." },
20608 		{ "depthunchanged_2",	"FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20609 		{ "depthunchanged_3",	"FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20610 	};
20611 
20612 	for (const auto& case_ : cases)
20613 	{
20614 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20615 																			case_.name.c_str(),
20616 																			case_.desc.c_str(),
20617 																			dataDir,
20618 																			case_.name + ".amber");
20619 		testGroup->addChild(testCase);
20620 	}
20621 #endif // CTS_USES_VULKANSC
20622 
20623 	return testGroup.release();
20624 }
20625 
createOpMulExtendedGroup(tcu::TestContext & testCtx)20626 tcu::TestCaseGroup* createOpMulExtendedGroup (tcu::TestContext& testCtx)
20627 {
20628 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "mul_extended", "Op[S/U]MulExtended tests"));
20629 
20630 #ifndef CTS_USES_VULKANSC
20631 	static const char	dataDir[]	= "spirv_assembly/instruction/compute/mul_extended";
20632 
20633 	static const struct Case
20634 	{
20635 		const string name;
20636 		const vector<string> features;
20637 	} cases[] =
20638 	{
20639 		{	"signed_16bit",		{"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}			},
20640 		{	"signed_32bit",		{}																					},
20641 		{	"signed_64bit",		{"Features.shaderInt64"}															},
20642 		{	"signed_8bit",		{"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}	},
20643 		{	"unsigned_16bit",	{"Features.shaderInt16", "Storage16BitFeatures.storageBuffer16BitAccess"}			},
20644 		{	"unsigned_32bit",	{}																					},
20645 		{	"unsigned_64bit",	{"Features.shaderInt64"}															},
20646 		{	"unsigned_8bit",	{"Float16Int8Features.shaderInt8", "Storage8BitFeatures.storageBuffer8BitAccess"}	}
20647 	};
20648 
20649 	for (const auto& test : cases)
20650 	{
20651 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20652 																			test.name.c_str(),
20653 																			"",
20654 																			dataDir,
20655 																			test.name + ".amber",
20656 																			test.features);
20657 		testGroup->addChild(testCase);
20658 	}
20659 #endif // CTS_USES_VULKANSC
20660 
20661 	return testGroup.release();
20662 }
20663 
createQueryGroup(tcu::TestContext & testCtx)20664 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20665 {
20666 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20667 
20668 #ifndef CTS_USES_VULKANSC
20669 	static const char data_dir[] = "spirv_assembly/instruction/image_query";
20670 
20671 	static const struct
20672 	{
20673 		const std::string name;
20674 		const std::string desc;
20675 	} cases[] =
20676 	{
20677 		{ "samples_storage", "Test samples query can be used on storage images" },
20678 	};
20679 
20680 	vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20681 
20682 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20683 	{
20684 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20685 																			cases[i].name.c_str(),
20686 																			cases[i].desc.c_str(),
20687 																			data_dir,
20688 																			cases[i].name + ".amber",
20689 																			requirements);
20690 		testGroup->addChild(testCase);
20691 	}
20692 #endif // CTS_USES_VULKANSC
20693 
20694 	return testGroup.release();
20695 }
20696 
createInstructionTests(tcu::TestContext & testCtx)20697 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20698 {
20699 	const bool testComputePipeline = true;
20700 
20701 	de::MovePtr<tcu::TestCaseGroup> instructionTests	(new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20702 	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20703 	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20704 
20705 	computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20706 	computeTests->addChild(createLocalSizeGroup(testCtx, false));
20707 	computeTests->addChild(createLocalSizeGroup(testCtx, true));
20708 	computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20709 	computeTests->addChild(createOpNopGroup(testCtx));
20710 	computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20711 	computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20712 	computeTests->addChild(createOpAtomicGroup(testCtx, false));
20713 	computeTests->addChild(createOpAtomicGroup(testCtx, true));					// Using new StorageBuffer decoration
20714 	computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));	// Return value validation
20715 	computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true));	// volatile atomics
20716 	computeTests->addChild(createOpLineGroup(testCtx));
20717 	computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20718 	computeTests->addChild(createOpNoLineGroup(testCtx));
20719 	computeTests->addChild(createOpConstantNullGroup(testCtx));
20720 	computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20721 	computeTests->addChild(createOpConstantUsageGroup(testCtx));
20722 	computeTests->addChild(createSpecConstantGroup(testCtx));
20723 	computeTests->addChild(createOpSourceGroup(testCtx));
20724 	computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20725 	computeTests->addChild(createDecorationGroupGroup(testCtx));
20726 	computeTests->addChild(createOpPhiGroup(testCtx));
20727 	computeTests->addChild(createLoopControlGroup(testCtx));
20728 	computeTests->addChild(createFunctionControlGroup(testCtx));
20729 	computeTests->addChild(createSelectionControlGroup(testCtx));
20730 	computeTests->addChild(createBlockOrderGroup(testCtx));
20731 	computeTests->addChild(createMultipleShaderGroup(testCtx));
20732 	computeTests->addChild(createMemoryAccessGroup(testCtx));
20733 	computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20734 	computeTests->addChild(createOpCopyObjectGroup(testCtx));
20735 	computeTests->addChild(createNoContractionGroup(testCtx));
20736 	computeTests->addChild(createOpUndefGroup(testCtx));
20737 	computeTests->addChild(createOpUnreachableGroup(testCtx));
20738 	computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20739 	computeTests->addChild(createOpFRemGroup(testCtx));
20740 	computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20741 	computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20742 	computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20743 	computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20744 #ifndef CTS_USES_VULKANSC
20745 	computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20746 	computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20747 	computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20748 	computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20749 	computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20750 	computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20751 #endif // CTS_USES_VULKANSC
20752 	computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20753 	computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20754 	computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20755 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20756 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20757 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20758 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20759 	computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20760 	computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20761 	computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20762 	computeTests->addChild(createOpNMinGroup(testCtx));
20763 	computeTests->addChild(createOpNMaxGroup(testCtx));
20764 	computeTests->addChild(createOpNClampGroup(testCtx));
20765 	computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20766 	{
20767 		de::MovePtr<tcu::TestCaseGroup>	computeAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20768 
20769 		computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20770 		computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20771 
20772 		computeTests->addChild(computeAndroidTests.release());
20773 	}
20774 
20775 	computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20776 	computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20777 	computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20778 	computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20779 	computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20780 	computeTests->addChild(createVariableInitComputeGroup(testCtx));
20781 	computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20782 	computeTests->addChild(createIndexingComputeGroup(testCtx));
20783 	computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20784 	computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20785 	computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20786 	computeTests->addChild(createOpNameGroup(testCtx));
20787 	computeTests->addChild(createOpMemberNameGroup(testCtx));
20788 	computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20789 	computeTests->addChild(createFloat16Group(testCtx));
20790 #ifndef CTS_USES_VULKANSC
20791 	computeTests->addChild(createFloat32Group(testCtx));
20792 #endif // CTS_USES_VULKANSC
20793 	computeTests->addChild(createBoolGroup(testCtx));
20794 	computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20795 	computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20796 #ifndef CTS_USES_VULKANSC
20797 	computeTests->addChild(createSignedIntCompareGroup(testCtx));
20798 	computeTests->addChild(createSignedOpTestsGroup(testCtx));
20799 #endif // CTS_USES_VULKANSC
20800 	computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20801 #ifndef CTS_USES_VULKANSC
20802 	computeTests->addChild(createPtrAccessChainGroup(testCtx));
20803 	computeTests->addChild(createVectorShuffleGroup(testCtx));
20804 #endif // CTS_USES_VULKANSC
20805 	computeTests->addChild(createHlslComputeGroup(testCtx));
20806 	computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20807 	computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20808 #ifndef CTS_USES_VULKANSC
20809 	computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20810 #endif // CTS_USES_VULKANSC
20811 	computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20812 	computeTests->addChild(createOpMulExtendedGroup(testCtx));
20813 
20814 	graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20815 	graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20816 	graphicsTests->addChild(createOpNopTests(testCtx));
20817 	graphicsTests->addChild(createOpSourceTests(testCtx));
20818 	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20819 	graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20820 	graphicsTests->addChild(createOpLineTests(testCtx));
20821 	graphicsTests->addChild(createOpNoLineTests(testCtx));
20822 	graphicsTests->addChild(createOpConstantNullTests(testCtx));
20823 	graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20824 	graphicsTests->addChild(createMemoryAccessTests(testCtx));
20825 	graphicsTests->addChild(createOpUndefTests(testCtx));
20826 	graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20827 	graphicsTests->addChild(createModuleTests(testCtx));
20828 	graphicsTests->addChild(createUnusedVariableTests(testCtx));
20829 	graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20830 	graphicsTests->addChild(createOpPhiTests(testCtx));
20831 	graphicsTests->addChild(createNoContractionTests(testCtx));
20832 	graphicsTests->addChild(createOpQuantizeTests(testCtx));
20833 	graphicsTests->addChild(createLoopTests(testCtx));
20834 	graphicsTests->addChild(createSpecConstantTests(testCtx));
20835 	graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20836 	graphicsTests->addChild(createBarrierTests(testCtx));
20837 	graphicsTests->addChild(createDecorationGroupTests(testCtx));
20838 	graphicsTests->addChild(createFRemTests(testCtx));
20839 	graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20840 	graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20841 
20842 	{
20843 		de::MovePtr<tcu::TestCaseGroup>	graphicsAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20844 
20845 		graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20846 		graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20847 
20848 		graphicsTests->addChild(graphicsAndroidTests.release());
20849 	}
20850 
20851 	graphicsTests->addChild(createOpNameTests(testCtx));
20852 	graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20853 	graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20854 
20855 	graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20856 	graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20857 	graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20858 	graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20859 	graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20860 	graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20861 	graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20862 	graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20863 	graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20864 	graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20865 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20866 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20867 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20868 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20869 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20870 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20871 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20872 	graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20873 	graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20874 	graphicsTests->addChild(createFloat16Tests(testCtx));
20875 #ifndef CTS_USES_VULKANSC
20876 	graphicsTests->addChild(createFloat32Tests(testCtx));
20877 #endif // CTS_USES_VULKANSC
20878 	graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20879 	graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20880 	graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20881 	graphicsTests->addChild(createEarlyAndLateFragmentTests(testCtx));
20882 	graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20883 
20884 	instructionTests->addChild(computeTests.release());
20885 	instructionTests->addChild(graphicsTests.release());
20886 #ifndef CTS_USES_VULKANSC
20887 	instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20888 	instructionTests->addChild(createFunctionParamsGroup(testCtx));
20889 #endif // CTS_USES_VULKANSC
20890 	instructionTests->addChild(createQueryGroup(testCtx));
20891 	instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20892 	instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20893 
20894 	return instructionTests.release();
20895 }
20896 
20897 } // SpirVAssembly
20898 } // vkt
20899