• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  * Copyright (c) 2016 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktSpvAsmInstructionTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 
28 #include "tcuCommandLine.hpp"
29 #include "tcuFormatUtil.hpp"
30 #include "tcuFloat.hpp"
31 #include "tcuFloatFormat.hpp"
32 #include "tcuRGBA.hpp"
33 #include "tcuStringTemplate.hpp"
34 #include "tcuTestLog.hpp"
35 #include "tcuVectorUtil.hpp"
36 #include "tcuInterval.hpp"
37 
38 #include "vkDefs.hpp"
39 #include "vkDeviceUtil.hpp"
40 #include "vkMemUtil.hpp"
41 #include "vkPlatform.hpp"
42 #include "vkPrograms.hpp"
43 #include "vkQueryUtil.hpp"
44 #include "vkRef.hpp"
45 #include "vkRefUtil.hpp"
46 #include "vkStrUtil.hpp"
47 #include "vkTypeUtil.hpp"
48 
49 #include "deStringUtil.hpp"
50 #include "deUniquePtr.hpp"
51 #include "deMath.h"
52 #include "deRandom.hpp"
53 #include "tcuStringTemplate.hpp"
54 
55 #include "vktSpvAsmCrossStageInterfaceTests.hpp"
56 #include "vktSpvAsm8bitStorageTests.hpp"
57 #include "vktSpvAsm16bitStorageTests.hpp"
58 #include "vktSpvAsmUboMatrixPaddingTests.hpp"
59 #include "vktSpvAsmConditionalBranchTests.hpp"
60 #include "vktSpvAsmIndexingTests.hpp"
61 #include "vktSpvAsmImageSamplerTests.hpp"
62 #include "vktSpvAsmComputeShaderCase.hpp"
63 #include "vktSpvAsmComputeShaderTestUtil.hpp"
64 #include "vktSpvAsmFloatControlsTests.hpp"
65 #include "vktSpvAsmFromHlslTests.hpp"
66 #include "vktSpvAsmEmptyStructTests.hpp"
67 #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
68 #include "vktSpvAsmVariablePointersTests.hpp"
69 #include "vktSpvAsmVariableInitTests.hpp"
70 #include "vktSpvAsmPointerParameterTests.hpp"
71 #include "vktSpvAsmSpirvVersion1p4Tests.hpp"
72 #include "vktSpvAsmSpirvVersionTests.hpp"
73 #include "vktTestCaseUtil.hpp"
74 #include "vktSpvAsmLoopDepLenTests.hpp"
75 #include "vktSpvAsmLoopDepInfTests.hpp"
76 #include "vktSpvAsmCompositeInsertTests.hpp"
77 #include "vktSpvAsmVaryingNameTests.hpp"
78 #include "vktSpvAsmWorkgroupMemoryTests.hpp"
79 #include "vktSpvAsmSignedIntCompareTests.hpp"
80 #include "vktSpvAsmSignedOpTests.hpp"
81 #include "vktSpvAsmPtrAccessChainTests.hpp"
82 #include "vktSpvAsmVectorShuffleTests.hpp"
83 #include "vktSpvAsmFloatControlsExtensionlessTests.hpp"
84 #include "vktSpvAsmNonSemanticInfoTests.hpp"
85 #include "vktSpvAsm64bitCompareTests.hpp"
86 #include "vktSpvAsmTrinaryMinMaxTests.hpp"
87 #include "vktSpvAsmTerminateInvocationTests.hpp"
88 #include "vktSpvAsmIntegerDotProductTests.hpp"
89 #include "vktSpvAsmPhysicalStorageBufferPointerTests.hpp"
90 
91 #include <cmath>
92 #include <limits>
93 #include <map>
94 #include <string>
95 #include <sstream>
96 #include <utility>
97 #include <stack>
98 
99 namespace vkt
100 {
101 namespace SpirVAssembly
102 {
103 
104 namespace
105 {
106 
107 using namespace vk;
108 using std::map;
109 using std::string;
110 using std::vector;
111 using tcu::IVec3;
112 using tcu::IVec4;
113 using tcu::RGBA;
114 using tcu::TestLog;
115 using tcu::TestStatus;
116 using tcu::Vec4;
117 using de::UniquePtr;
118 using tcu::StringTemplate;
119 using tcu::Vec4;
120 
121 const bool TEST_WITH_NAN	= true;
122 const bool TEST_WITHOUT_NAN	= false;
123 
124 const string loadScalarF16FromUint =
125 	"%ld_arg_${var} = OpFunction %f16 None %f16_i32_fn\n"
126 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
127 	"%ld_arg_${var}_entry = OpLabel\n"
128 	"%ld_arg_${var}_conv = OpBitcast %u32 %ld_arg_${var}_param\n"
129 	"%ld_arg_${var}_div = OpUDiv %u32 %ld_arg_${var}_conv %c_u32_2\n"
130 	"%ld_arg_${var}_and_low = OpBitwiseAnd %u32 %ld_arg_${var}_param %c_u32_1\n"
131 	"%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_div\n"
132 	"%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
133 	"%ld_arg_${var}_unpack = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
134 	"%ld_arg_${var}_ex = OpVectorExtractDynamic %f16 %ld_arg_${var}_unpack %ld_arg_${var}_and_low\n"
135 	"OpReturnValue %ld_arg_${var}_ex\n"
136 	"OpFunctionEnd\n";
137 
138 const string loadV2F16FromUint =
139 	"%ld_arg_${var} = OpFunction %v2f16 None %v2f16_i32_fn\n"
140 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
141 	"%ld_arg_${var}_entry = OpLabel\n"
142 	"%ld_arg_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param\n"
143 	"%ld_arg_${var}_ld = OpLoad %u32 %ld_arg_${var}_gep\n"
144 	"%ld_arg_${var}_cast = OpBitcast %v2f16 %ld_arg_${var}_ld\n"
145 	"OpReturnValue %ld_arg_${var}_cast\n"
146 	"OpFunctionEnd\n";
147 
148 const string loadV3F16FromUints =
149 	// Since we allocate a vec4 worth of values, this case is almost the
150 	// same as that case.
151 	"%ld_arg_${var} = OpFunction %v3f16 None %v3f16_i32_fn\n"
152 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
153 	"%ld_arg_${var}_entry = OpLabel\n"
154 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
155 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
156 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
157 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
158 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
159 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
160 	"%ld_arg_${var}_shuffle = OpVectorShuffle %v3f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2\n"
161 	"OpReturnValue %ld_arg_${var}_shuffle\n"
162 	"OpFunctionEnd\n";
163 
164 const string loadV4F16FromUints =
165 	"%ld_arg_${var} = OpFunction %v4f16 None %v4f16_i32_fn\n"
166 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
167 	"%ld_arg_${var}_entry = OpLabel\n"
168 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
169 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
170 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
171 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
172 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
173 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
174 	"%ld_arg_${var}_shuffle = OpVectorShuffle %v4f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 0 1 2 3\n"
175 	"OpReturnValue %ld_arg_${var}_shuffle\n"
176 	"OpFunctionEnd\n";
177 
178 const string loadM2x2F16FromUints =
179 	"%ld_arg_${var} = OpFunction %m2x2f16 None %m2x2f16_i32_fn\n"
180 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
181 	"%ld_arg_${var}_entry = OpLabel\n"
182 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
183 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
184 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
185 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
186 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
187 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
188 	"%ld_arg_${var}_cons = OpCompositeConstruct %m2x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1\n"
189 	"OpReturnValue %ld_arg_${var}_cons\n"
190 	"OpFunctionEnd\n";
191 
192 const string loadM2x3F16FromUints =
193 	"%ld_arg_${var} = OpFunction %m2x3f16 None %m2x3f16_i32_fn\n"
194 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
195 	"%ld_arg_${var}_entry = OpLabel\n"
196 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
197 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
198 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
199 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
200 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
201 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
202 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
203 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
204 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
205 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
206 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
207 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
208 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
209 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
210 	"%ld_arg_${var}_mat = OpCompositeConstruct %m2x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
211 	"OpReturnValue %ld_arg_${var}_mat\n"
212 	"OpFunctionEnd\n";
213 
214 const string loadM2x4F16FromUints =
215 	"%ld_arg_${var} = OpFunction %m2x4f16 None %m2x4f16_i32_fn\n"
216 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
217 	"%ld_arg_${var}_entry = OpLabel\n"
218 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
219 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
220 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
221 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
222 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
223 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
224 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
225 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
226 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
227 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
228 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
229 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
230 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
231 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
232 	"%ld_arg_${var}_mat = OpCompositeConstruct %m2x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1\n"
233 	"OpReturnValue %ld_arg_${var}_mat\n"
234 	"OpFunctionEnd\n";
235 
236 const string loadM3x2F16FromUints =
237 	"%ld_arg_${var} = OpFunction %m3x2f16 None %m3x2f16_i32_fn\n"
238 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
239 	"%ld_arg_${var}_entry = OpLabel\n"
240 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
241 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
242 	"%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
243 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
244 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
245 	"%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
246 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
247 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
248 	"%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
249 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2\n"
250 	"OpReturnValue %ld_arg_${var}_mat\n"
251 	"OpFunctionEnd\n";
252 
253 const string loadM3x3F16FromUints =
254 	"%ld_arg_${var} = OpFunction %m3x3f16 None %m3x3f16_i32_fn\n"
255 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
256 	"%ld_arg_${var}_entry = OpLabel\n"
257 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
258 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
259 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
260 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
261 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
262 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
263 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
264 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
265 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
266 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
267 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
268 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
269 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
270 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
271 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
272 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
273 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
274 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
275 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
276 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
277 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
278 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
279 	"OpReturnValue %ld_arg_${var}_mat\n"
280 	"OpFunctionEnd\n";
281 
282 const string loadM3x4F16FromUints =
283 	"%ld_arg_${var} = OpFunction %m3x4f16 None %m3x4f16_i32_fn\n"
284 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
285 	"%ld_arg_${var}_entry = OpLabel\n"
286 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
287 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
288 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
289 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
290 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
291 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
292 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
293 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
294 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
295 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
296 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
297 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
298 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
299 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
300 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
301 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
302 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
303 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
304 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
305 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
306 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
307 	"%ld_arg_${var}_mat = OpCompositeConstruct %m3x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2\n"
308 	"OpReturnValue %ld_arg_${var}_mat\n"
309 	"OpFunctionEnd\n";
310 
311 const string loadM4x2F16FromUints =
312 	"%ld_arg_${var} = OpFunction %m4x2f16 None %m4x2f16_i32_fn\n"
313 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
314 	"%ld_arg_${var}_entry = OpLabel\n"
315 	"%ld_arg_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
316 	"%ld_arg_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
317 	"%ld_arg_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
318 	"%ld_arg_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
319 	"%ld_arg_${var}_ld0 = OpLoad %u32 %ld_arg_${var}_gep0\n"
320 	"%ld_arg_${var}_ld1 = OpLoad %u32 %ld_arg_${var}_gep1\n"
321 	"%ld_arg_${var}_ld2 = OpLoad %u32 %ld_arg_${var}_gep2\n"
322 	"%ld_arg_${var}_ld3 = OpLoad %u32 %ld_arg_${var}_gep3\n"
323 	"%ld_arg_${var}_bc0 = OpBitcast %v2f16 %ld_arg_${var}_ld0\n"
324 	"%ld_arg_${var}_bc1 = OpBitcast %v2f16 %ld_arg_${var}_ld1\n"
325 	"%ld_arg_${var}_bc2 = OpBitcast %v2f16 %ld_arg_${var}_ld2\n"
326 	"%ld_arg_${var}_bc3 = OpBitcast %v2f16 %ld_arg_${var}_ld3\n"
327 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x2f16 %ld_arg_${var}_bc0 %ld_arg_${var}_bc1 %ld_arg_${var}_bc2 %ld_arg_${var}_bc3\n"
328 	"OpReturnValue %ld_arg_${var}_mat\n"
329 	"OpFunctionEnd\n";
330 
331 const string loadM4x3F16FromUints =
332 	"%ld_arg_${var} = OpFunction %m4x3f16 None %m4x3f16_i32_fn\n"
333 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
334 	"%ld_arg_${var}_entry = OpLabel\n"
335 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
336 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
337 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
338 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
339 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
340 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
341 	"%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
342 	"%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
343 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
344 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
345 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
346 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
347 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
348 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
349 	"%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
350 	"%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
351 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
352 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
353 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
354 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
355 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
356 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
357 	"%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
358 	"%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
359 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2\n"
360 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2\n"
361 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2\n"
362 	"%ld_arg_${var}_vec3 = OpVectorShuffle %v3f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2\n"
363 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x3f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
364 	"OpReturnValue %ld_arg_${var}_mat\n"
365 	"OpFunctionEnd\n";
366 
367 const string loadM4x4F16FromUints =
368 	"%ld_arg_${var} = OpFunction %m4x4f16 None %m4x4f16_i32_fn\n"
369 	"%ld_arg_${var}_param = OpFunctionParameter %i32\n"
370 	"%ld_arg_${var}_entry = OpLabel\n"
371 	"%ld_arg_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_0\n"
372 	"%ld_arg_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_1\n"
373 	"%ld_arg_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_2\n"
374 	"%ld_arg_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_3\n"
375 	"%ld_arg_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_4\n"
376 	"%ld_arg_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_5\n"
377 	"%ld_arg_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_6\n"
378 	"%ld_arg_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %ld_arg_${var}_param %c_u32_7\n"
379 	"%ld_arg_${var}_ld00 = OpLoad %u32 %ld_arg_${var}_gep00\n"
380 	"%ld_arg_${var}_ld01 = OpLoad %u32 %ld_arg_${var}_gep01\n"
381 	"%ld_arg_${var}_ld10 = OpLoad %u32 %ld_arg_${var}_gep10\n"
382 	"%ld_arg_${var}_ld11 = OpLoad %u32 %ld_arg_${var}_gep11\n"
383 	"%ld_arg_${var}_ld20 = OpLoad %u32 %ld_arg_${var}_gep20\n"
384 	"%ld_arg_${var}_ld21 = OpLoad %u32 %ld_arg_${var}_gep21\n"
385 	"%ld_arg_${var}_ld30 = OpLoad %u32 %ld_arg_${var}_gep30\n"
386 	"%ld_arg_${var}_ld31 = OpLoad %u32 %ld_arg_${var}_gep31\n"
387 	"%ld_arg_${var}_bc00 = OpBitcast %v2f16 %ld_arg_${var}_ld00\n"
388 	"%ld_arg_${var}_bc01 = OpBitcast %v2f16 %ld_arg_${var}_ld01\n"
389 	"%ld_arg_${var}_bc10 = OpBitcast %v2f16 %ld_arg_${var}_ld10\n"
390 	"%ld_arg_${var}_bc11 = OpBitcast %v2f16 %ld_arg_${var}_ld11\n"
391 	"%ld_arg_${var}_bc20 = OpBitcast %v2f16 %ld_arg_${var}_ld20\n"
392 	"%ld_arg_${var}_bc21 = OpBitcast %v2f16 %ld_arg_${var}_ld21\n"
393 	"%ld_arg_${var}_bc30 = OpBitcast %v2f16 %ld_arg_${var}_ld30\n"
394 	"%ld_arg_${var}_bc31 = OpBitcast %v2f16 %ld_arg_${var}_ld31\n"
395 	"%ld_arg_${var}_vec0 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc00 %ld_arg_${var}_bc01 0 1 2 3\n"
396 	"%ld_arg_${var}_vec1 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc10 %ld_arg_${var}_bc11 0 1 2 3\n"
397 	"%ld_arg_${var}_vec2 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc20 %ld_arg_${var}_bc21 0 1 2 3\n"
398 	"%ld_arg_${var}_vec3 = OpVectorShuffle %v4f16 %ld_arg_${var}_bc30 %ld_arg_${var}_bc31 0 1 2 3\n"
399 	"%ld_arg_${var}_mat = OpCompositeConstruct %m4x4f16 %ld_arg_${var}_vec0 %ld_arg_${var}_vec1 %ld_arg_${var}_vec2 %ld_arg_${var}_vec3\n"
400 	"OpReturnValue %ld_arg_${var}_mat\n"
401 	"OpFunctionEnd\n";
402 
403 const string storeScalarF16AsUint =
404 	// This version is sensitive to the initial value in the output buffer.
405 	// The infrastructure sets all output buffer bits to one before invoking
406 	// the shader so this version uses an atomic and to generate the correct
407 	// zeroes.
408 	"%st_fn_${var} = OpFunction %void None %void_f16_i32_fn\n"
409 	"%st_fn_${var}_param1 = OpFunctionParameter %f16\n"
410 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
411 	"%st_fn_${var}_entry = OpLabel\n"
412 	"%st_fn_${var}_and_low = OpBitwiseAnd %u32 %st_fn_${var}_param2 %c_u32_1\n"
413 	"%st_fn_${var}_zero_vec = OpBitcast %v2f16 %c_u32_0\n"
414 	"%st_fn_${var}_insert = OpVectorInsertDynamic %v2f16 %st_fn_${var}_zero_vec %st_fn_${var}_param1 %st_fn_${var}_and_low\n"
415 	"%st_fn_${var}_odd = OpIEqual %bool %st_fn_${var}_and_low %c_u32_1\n"
416 	// Or 16 bits of ones into the half that was not populated with the result.
417 	"%st_fn_${var}_sel = OpSelect %u32 %st_fn_${var}_odd %c_u32_low_ones %c_u32_high_ones\n"
418 	"%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_insert\n"
419 	"%st_fn_${var}_or = OpBitwiseOr %u32 %st_fn_${var}_cast %st_fn_${var}_sel\n"
420 	"%st_fn_${var}_conv = OpBitcast %u32 %st_fn_${var}_param2\n"
421 	"%st_fn_${var}_div = OpUDiv %u32 %st_fn_${var}_conv %c_u32_2\n"
422 	"%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_div\n"
423 	"%st_fn_${var}_and = OpAtomicAnd %u32 %st_fn_${var}_gep %c_u32_1 %c_u32_0 %st_fn_${var}_or\n"
424 	"OpReturn\n"
425 	"OpFunctionEnd\n";
426 
427 const string storeV2F16AsUint =
428 	"%st_fn_${var} = OpFunction %void None %void_v2f16_i32_fn\n"
429 	"%st_fn_${var}_param1 = OpFunctionParameter %v2f16\n"
430 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
431 	"%st_fn_${var}_entry = OpLabel\n"
432 	"%st_fn_${var}_cast = OpBitcast %u32 %st_fn_${var}_param1\n"
433 	"%st_fn_${var}_gep = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2\n"
434 	"OpStore %st_fn_${var}_gep %st_fn_${var}_cast\n"
435 	"OpReturn\n"
436 	"OpFunctionEnd\n";
437 
438 const string storeV3F16AsUints =
439 	// Since we allocate a vec4 worth of values, this case can be treated
440 	// almost the same as a vec4 case. We will store some extra data that
441 	// should not be compared.
442 	"%st_fn_${var} = OpFunction %void None %void_v3f16_i32_fn\n"
443 	"%st_fn_${var}_param1 = OpFunctionParameter %v3f16\n"
444 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
445 	"%st_fn_${var}_entry = OpLabel\n"
446 	"%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
447 	"%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
448 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
449 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
450 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
451 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
452 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
453 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
454 	"OpReturn\n"
455 	"OpFunctionEnd\n";
456 
457 const string storeV4F16AsUints =
458 	"%st_fn_${var} = OpFunction %void None %void_v4f16_i32_fn\n"
459 	"%st_fn_${var}_param1 = OpFunctionParameter %v4f16\n"
460 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
461 	"%st_fn_${var}_entry = OpLabel\n"
462 	"%st_fn_${var}_shuffle0 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 0 1\n"
463 	"%st_fn_${var}_shuffle1 = OpVectorShuffle %v2f16 %st_fn_${var}_param1 %st_fn_${var}_param1 2 3\n"
464 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_shuffle0\n"
465 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_shuffle1\n"
466 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
467 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
468 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
469 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
470 	"OpReturn\n"
471 	"OpFunctionEnd\n";
472 
473 const string storeM2x2F16AsUints =
474 	"%st_fn_${var} = OpFunction %void None %void_m2x2f16_i32_fn\n"
475 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x2f16\n"
476 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
477 	"%st_fn_${var}_entry = OpLabel\n"
478 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
479 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
480 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
481 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
482 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
483 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
484 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
485 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
486 	"OpReturn\n"
487 	"OpFunctionEnd\n";
488 
489 const string storeM2x3F16AsUints =
490 	// In the extracted elements for 01 and 11 the second element doesn't
491 	// matter.
492 	"%st_fn_${var} = OpFunction %void None %void_m2x3f16_i32_fn\n"
493 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x3f16\n"
494 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
495 	"%st_fn_${var}_entry = OpLabel\n"
496 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
497 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
498 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
499 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
500 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
501 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
502 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
503 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
504 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
505 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
506 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
507 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
508 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
509 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
510 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
511 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
512 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
513 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
514 	"OpReturn\n"
515 	"OpFunctionEnd\n";
516 
517 const string storeM2x4F16AsUints =
518 	"%st_fn_${var} = OpFunction %void None %void_m2x4f16_i32_fn\n"
519 	"%st_fn_${var}_param1 = OpFunctionParameter %m2x4f16\n"
520 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
521 	"%st_fn_${var}_entry = OpLabel\n"
522 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
523 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
524 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
525 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
526 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
527 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
528 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
529 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
530 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
531 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
532 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
533 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
534 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
535 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
536 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
537 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
538 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
539 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
540 	"OpReturn\n"
541 	"OpFunctionEnd\n";
542 
543 const string storeM3x2F16AsUints =
544 	"%st_fn_${var} = OpFunction %void None %void_m3x2f16_i32_fn\n"
545 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x2f16\n"
546 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
547 	"%st_fn_${var}_entry = OpLabel\n"
548 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
549 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
550 	"%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
551 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
552 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
553 	"%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
554 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
555 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
556 	"%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
557 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
558 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
559 	"OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
560 	"OpReturn\n"
561 	"OpFunctionEnd\n";
562 
563 const string storeM3x3F16AsUints =
564 	// The second element of the each broken down vec3 doesn't matter.
565 	"%st_fn_${var} = OpFunction %void None %void_m3x3f16_i32_fn\n"
566 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x3f16\n"
567 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
568 	"%st_fn_${var}_entry = OpLabel\n"
569 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
570 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
571 	"%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
572 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
573 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
574 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
575 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
576 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
577 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
578 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
579 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
580 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
581 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
582 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
583 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
584 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
585 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
586 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
587 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
588 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
589 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
590 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
591 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
592 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
593 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
594 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
595 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
596 	"OpReturn\n"
597 	"OpFunctionEnd\n";
598 
599 const string storeM3x4F16AsUints =
600 	"%st_fn_${var} = OpFunction %void None %void_m3x4f16_i32_fn\n"
601 	"%st_fn_${var}_param1 = OpFunctionParameter %m3x4f16\n"
602 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
603 	"%st_fn_${var}_entry = OpLabel\n"
604 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
605 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
606 	"%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
607 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
608 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
609 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
610 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
611 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
612 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
613 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
614 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
615 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
616 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
617 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
618 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
619 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
620 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
621 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
622 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
623 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
624 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
625 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
626 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
627 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
628 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
629 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
630 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
631 	"OpReturn\n"
632 	"OpFunctionEnd\n";
633 
634 const string storeM4x2F16AsUints =
635 	"%st_fn_${var} = OpFunction %void None %void_m4x2f16_i32_fn\n"
636 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x2f16\n"
637 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
638 	"%st_fn_${var}_entry = OpLabel\n"
639 	"%st_fn_${var}_ex0 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 0\n"
640 	"%st_fn_${var}_ex1 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 1\n"
641 	"%st_fn_${var}_ex2 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 2\n"
642 	"%st_fn_${var}_ex3 = OpCompositeExtract %v2f16 %st_fn_${var}_param1 3\n"
643 	"%st_fn_${var}_bc0 = OpBitcast %u32 %st_fn_${var}_ex0\n"
644 	"%st_fn_${var}_bc1 = OpBitcast %u32 %st_fn_${var}_ex1\n"
645 	"%st_fn_${var}_bc2 = OpBitcast %u32 %st_fn_${var}_ex2\n"
646 	"%st_fn_${var}_bc3 = OpBitcast %u32 %st_fn_${var}_ex3\n"
647 	"%st_fn_${var}_gep0 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
648 	"%st_fn_${var}_gep1 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
649 	"%st_fn_${var}_gep2 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
650 	"%st_fn_${var}_gep3 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
651 	"OpStore %st_fn_${var}_gep0 %st_fn_${var}_bc0\n"
652 	"OpStore %st_fn_${var}_gep1 %st_fn_${var}_bc1\n"
653 	"OpStore %st_fn_${var}_gep2 %st_fn_${var}_bc2\n"
654 	"OpStore %st_fn_${var}_gep3 %st_fn_${var}_bc3\n"
655 	"OpReturn\n"
656 	"OpFunctionEnd\n";
657 
658 const string storeM4x3F16AsUints =
659 	// The last element of each decomposed vec3 doesn't matter.
660 	"%st_fn_${var} = OpFunction %void None %void_m4x3f16_i32_fn\n"
661 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x3f16\n"
662 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
663 	"%st_fn_${var}_entry = OpLabel\n"
664 	"%st_fn_${var}_ex0 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 0\n"
665 	"%st_fn_${var}_ex1 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 1\n"
666 	"%st_fn_${var}_ex2 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 2\n"
667 	"%st_fn_${var}_ex3 = OpCompositeExtract %v3f16 %st_fn_${var}_param1 3\n"
668 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
669 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
670 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
671 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
672 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
673 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
674 	"%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
675 	"%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
676 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
677 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
678 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
679 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
680 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
681 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
682 	"%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
683 	"%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
684 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
685 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
686 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
687 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
688 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
689 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
690 	"%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
691 	"%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
692 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
693 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
694 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
695 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
696 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
697 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
698 	"OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
699 	"OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
700 	"OpReturn\n"
701 	"OpFunctionEnd\n";
702 
703 const string storeM4x4F16AsUints =
704 	"%st_fn_${var} = OpFunction %void None %void_m4x4f16_i32_fn\n"
705 	"%st_fn_${var}_param1 = OpFunctionParameter %m4x4f16\n"
706 	"%st_fn_${var}_param2 = OpFunctionParameter %i32\n"
707 	"%st_fn_${var}_entry = OpLabel\n"
708 	"%st_fn_${var}_ex0 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 0\n"
709 	"%st_fn_${var}_ex1 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 1\n"
710 	"%st_fn_${var}_ex2 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 2\n"
711 	"%st_fn_${var}_ex3 = OpCompositeExtract %v4f16 %st_fn_${var}_param1 3\n"
712 	"%st_fn_${var}_ele00 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 0 1\n"
713 	"%st_fn_${var}_ele01 = OpVectorShuffle %v2f16 %st_fn_${var}_ex0 %st_fn_${var}_ex0 2 3\n"
714 	"%st_fn_${var}_ele10 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 0 1\n"
715 	"%st_fn_${var}_ele11 = OpVectorShuffle %v2f16 %st_fn_${var}_ex1 %st_fn_${var}_ex1 2 3\n"
716 	"%st_fn_${var}_ele20 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 0 1\n"
717 	"%st_fn_${var}_ele21 = OpVectorShuffle %v2f16 %st_fn_${var}_ex2 %st_fn_${var}_ex2 2 3\n"
718 	"%st_fn_${var}_ele30 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 0 1\n"
719 	"%st_fn_${var}_ele31 = OpVectorShuffle %v2f16 %st_fn_${var}_ex3 %st_fn_${var}_ex3 2 3\n"
720 	"%st_fn_${var}_bc00 = OpBitcast %u32 %st_fn_${var}_ele00\n"
721 	"%st_fn_${var}_bc01 = OpBitcast %u32 %st_fn_${var}_ele01\n"
722 	"%st_fn_${var}_bc10 = OpBitcast %u32 %st_fn_${var}_ele10\n"
723 	"%st_fn_${var}_bc11 = OpBitcast %u32 %st_fn_${var}_ele11\n"
724 	"%st_fn_${var}_bc20 = OpBitcast %u32 %st_fn_${var}_ele20\n"
725 	"%st_fn_${var}_bc21 = OpBitcast %u32 %st_fn_${var}_ele21\n"
726 	"%st_fn_${var}_bc30 = OpBitcast %u32 %st_fn_${var}_ele30\n"
727 	"%st_fn_${var}_bc31 = OpBitcast %u32 %st_fn_${var}_ele31\n"
728 	"%st_fn_${var}_gep00 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_0\n"
729 	"%st_fn_${var}_gep01 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_1\n"
730 	"%st_fn_${var}_gep10 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_2\n"
731 	"%st_fn_${var}_gep11 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_3\n"
732 	"%st_fn_${var}_gep20 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_4\n"
733 	"%st_fn_${var}_gep21 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_5\n"
734 	"%st_fn_${var}_gep30 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_6\n"
735 	"%st_fn_${var}_gep31 = OpAccessChain %up_u32 %${var} %c_u32_0 %st_fn_${var}_param2 %c_u32_7\n"
736 	"OpStore %st_fn_${var}_gep00 %st_fn_${var}_bc00\n"
737 	"OpStore %st_fn_${var}_gep01 %st_fn_${var}_bc01\n"
738 	"OpStore %st_fn_${var}_gep10 %st_fn_${var}_bc10\n"
739 	"OpStore %st_fn_${var}_gep11 %st_fn_${var}_bc11\n"
740 	"OpStore %st_fn_${var}_gep20 %st_fn_${var}_bc20\n"
741 	"OpStore %st_fn_${var}_gep21 %st_fn_${var}_bc21\n"
742 	"OpStore %st_fn_${var}_gep30 %st_fn_${var}_bc30\n"
743 	"OpStore %st_fn_${var}_gep31 %st_fn_${var}_bc31\n"
744 	"OpReturn\n"
745 	"OpFunctionEnd\n";
746 
747 template<typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)748 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
749 {
750 	T* const typedPtr = (T*)dst;
751 	for (int ndx = 0; ndx < numValues; ndx++)
752 		typedPtr[offset + ndx] = de::randomScalar<T>(rnd, minValue, maxValue);
753 }
754 
755 // Filter is a function that returns true if a value should pass, false otherwise.
756 template<typename T, typename FilterT>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,FilterT filter,int offset=0)757 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, FilterT filter, int offset = 0)
758 {
759 	T* const typedPtr = (T*)dst;
760 	T value;
761 	for (int ndx = 0; ndx < numValues; ndx++)
762 	{
763 		do
764 			value = de::randomScalar<T>(rnd, minValue, maxValue);
765 		while (!filter(value));
766 
767 		typedPtr[offset + ndx] = value;
768 	}
769 }
770 
771 // Gets a 64-bit integer with a more logarithmic distribution
randomInt64LogDistributed(de::Random & rnd)772 deInt64 randomInt64LogDistributed (de::Random& rnd)
773 {
774 	deInt64 val = rnd.getUint64();
775 	val &= (1ull << rnd.getInt(1, 63)) - 1;
776 	if (rnd.getBool())
777 		val = -val;
778 	return val;
779 }
780 
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues)781 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues)
782 {
783 	for (int ndx = 0; ndx < numValues; ndx++)
784 		dst[ndx] = randomInt64LogDistributed(rnd);
785 }
786 
787 template<typename FilterT>
fillRandomInt64sLogDistributed(de::Random & rnd,vector<deInt64> & dst,int numValues,FilterT filter)788 static void fillRandomInt64sLogDistributed (de::Random& rnd, vector<deInt64>& dst, int numValues, FilterT filter)
789 {
790 	for (int ndx = 0; ndx < numValues; ndx++)
791 	{
792 		deInt64 value;
793 		do {
794 			value = randomInt64LogDistributed(rnd);
795 		} while (!filter(value));
796 		dst[ndx] = value;
797 	}
798 }
799 
filterNonNegative(const deInt64 value)800 inline bool filterNonNegative (const deInt64 value)
801 {
802 	return value >= 0;
803 }
804 
filterPositive(const deInt64 value)805 inline bool filterPositive (const deInt64 value)
806 {
807 	return value > 0;
808 }
809 
filterNotZero(const deInt64 value)810 inline bool filterNotZero (const deInt64 value)
811 {
812 	return value != 0;
813 }
814 
floorAll(vector<float> & values)815 static void floorAll (vector<float>& values)
816 {
817 	for (size_t i = 0; i < values.size(); i++)
818 		values[i] = deFloatFloor(values[i]);
819 }
820 
floorAll(vector<Vec4> & values)821 static void floorAll (vector<Vec4>& values)
822 {
823 	for (size_t i = 0; i < values.size(); i++)
824 		values[i] = floor(values[i]);
825 }
826 
827 struct CaseParameter
828 {
829 	const char*		name;
830 	string			param;
831 
CaseParametervkt::SpirVAssembly::__anon043fb9e60111::CaseParameter832 	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
833 };
834 
835 // Assembly code used for testing LocalSize, OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
836 //
837 // #version 430
838 //
839 // layout(std140, set = 0, binding = 0) readonly buffer Input {
840 //   float elements[];
841 // } input_data;
842 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
843 //   float elements[];
844 // } output_data;
845 //
846 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
847 //
848 // void main() {
849 //   uint x = gl_GlobalInvocationID.x;
850 //   output_data.elements[x] = -input_data.elements[x];
851 // }
852 
getAsmForLocalSizeTest(bool useLiteralLocalSize,bool useLiteralLocalSizeId,bool useSpecConstantWorkgroupSize,IVec3 workGroupSize,deUint32 ndx)853 static string getAsmForLocalSizeTest(bool useLiteralLocalSize, bool useLiteralLocalSizeId, bool useSpecConstantWorkgroupSize, IVec3 workGroupSize, deUint32 ndx)
854 {
855 	std::ostringstream out;
856 	out << "OpCapability Shader\n"
857 		   "OpMemoryModel Logical GLSL450\n";
858 
859 	if (useLiteralLocalSizeId)
860 	{
861 		out << "OpEntryPoint GLCompute %main \"main\" %id %indata %outdata\n"
862 			   "OpExecutionModeId %main LocalSizeId %const_0 %const_1 %const_2\n";
863 	}
864 	else
865 	{
866 		out << "OpEntryPoint GLCompute %main \"main\" %id\n";
867 
868 		if (useLiteralLocalSize)
869 		{
870 			out << "OpExecutionMode %main LocalSize "
871 				<< workGroupSize.x() << " " << workGroupSize.y() << " " << workGroupSize.z() << "\n";
872 		}
873 	}
874 
875 	out << "OpSource GLSL 430\n"
876 		   "OpName %main           \"main\"\n"
877 		   "OpName %id             \"gl_GlobalInvocationID\"\n"
878 		   "OpDecorate %id BuiltIn GlobalInvocationId\n";
879 
880 	if (useSpecConstantWorkgroupSize)
881 	{
882 		out << "OpDecorate %spec_0 SpecId 100\n"
883 			   "OpDecorate %spec_1 SpecId 101\n"
884 			   "OpDecorate %spec_2 SpecId 102\n"
885 			   "OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize\n";
886 	}
887 
888 	if (useLiteralLocalSizeId)
889 	{
890 		out << getComputeAsmInputOutputBufferTraits("Block")
891 			<< getComputeAsmCommonTypes("StorageBuffer")
892 			<< getComputeAsmInputOutputBuffer("StorageBuffer")
893 			<< "%const_0  = OpConstant %u32 " << workGroupSize.x() << "\n"
894 			   "%const_1  = OpConstant %u32 " << workGroupSize.y() << "\n"
895 			   "%const_2  = OpConstant %u32 " << workGroupSize.z() << "\n";
896 	}
897 	else
898 	{
899 		out << getComputeAsmInputOutputBufferTraits()
900 			<< getComputeAsmCommonTypes()
901 			<< getComputeAsmInputOutputBuffer();
902 	}
903 
904 	out << "%id        = OpVariable %uvec3ptr Input\n"
905 		   "%zero      = OpConstant %i32 0 \n";
906 
907 	if (useSpecConstantWorkgroupSize)
908 	{
909 		out << "%spec_0   = OpSpecConstant %u32 "<< workGroupSize.x() << "\n"
910 			   "%spec_1   = OpSpecConstant %u32 "<< workGroupSize.y() << "\n"
911 			   "%spec_2   = OpSpecConstant %u32 "<< workGroupSize.z() << "\n"
912 			   "%gl_WorkGroupSize = OpSpecConstantComposite %uvec3 %spec_0 %spec_1 %spec_2\n";
913 	}
914 
915 	out << "%main      = OpFunction %void None %voidf\n"
916 		   "%label     = OpLabel\n"
917 		   "%idval     = OpLoad %uvec3 %id\n"
918 		   "%ndx       = OpCompositeExtract %u32 %idval " << ndx << "\n"
919 
920 		   "%inloc     = OpAccessChain %f32ptr %indata %zero %ndx\n"
921 		   "%inval     = OpLoad %f32 %inloc\n"
922 		   "%neg       = OpFNegate %f32 %inval\n"
923 		   "%outloc    = OpAccessChain %f32ptr %outdata %zero %ndx\n"
924 		   "             OpStore %outloc %neg\n"
925 		   "             OpReturn\n"
926 		   "             OpFunctionEnd\n";
927 
928 	return out.str();
929 }
930 
createLocalSizeGroup(tcu::TestContext & testCtx,bool useLocalSizeId)931 tcu::TestCaseGroup* createLocalSizeGroup(tcu::TestContext& testCtx, bool useLocalSizeId)
932 {
933 	const char*		groupName[]{ "localsize", "localsize_id" };
934 
935 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, groupName[useLocalSizeId], ""));
936 	ComputeShaderSpec				spec;
937 	de::Random						rnd				(deStringHash(group->getName()));
938 	const deUint32					numElements		= 64u;
939 	vector<float>					positiveFloats	(numElements, 0);
940 	vector<float>					negativeFloats	(numElements, 0);
941 
942 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
943 
944 	for (size_t ndx = 0; ndx < numElements; ++ndx)
945 		negativeFloats[ndx] = -positiveFloats[ndx];
946 
947 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
948 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
949 
950 	if (useLocalSizeId)
951 	{
952 		spec.spirvVersion = SPIRV_VERSION_1_5;
953 		spec.extensions.push_back("VK_KHR_maintenance4");
954 	}
955 
956 	spec.numWorkGroups = IVec3(numElements, 1, 1);
957 
958 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, 1), 0u);
959 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize", "", spec));
960 
961 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, 1), 0u);
962 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize", "", spec));
963 
964 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
965 	{
966 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, 1), 0u);
967 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize", "", spec));
968 	}
969 
970 	spec.numWorkGroups = IVec3(1, 1, 1);
971 
972 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(numElements, 1, 1), 0u);
973 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_x", "", spec));
974 
975 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(numElements, 1, 1), 0u);
976 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_x", "", spec));
977 
978 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
979 	{
980 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(numElements, 1, 1), 0u);
981 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_x", "", spec));
982 	}
983 
984 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, numElements, 1), 1u);
985 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_y", "", spec));
986 
987 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, numElements, 1), 1u);
988 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_y", "", spec));
989 
990 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
991 	{
992 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, numElements, 1), 1u);
993 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_y", "", spec));
994 	}
995 
996 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, false, IVec3(1, 1, numElements), 2u);
997 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_localsize_z", "", spec));
998 
999 	spec.assembly = getAsmForLocalSizeTest(true, useLocalSizeId, true, IVec3(1, 1, numElements), 2u);
1000 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "literal_and_specid_localsize_z", "", spec));
1001 
1002 	if (!useLocalSizeId)	// dont repeat this test when useLocalSizeId is true
1003 	{
1004 		spec.assembly = getAsmForLocalSizeTest(false, false, true, IVec3(1, 1, numElements), 2u);
1005 		group->addChild(new SpvAsmComputeShaderCase(testCtx, "specid_localsize_z", "", spec));
1006 	}
1007 
1008 	return group.release();
1009 }
1010 
createOpNopGroup(tcu::TestContext & testCtx)1011 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
1012 {
1013 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
1014 	ComputeShaderSpec				spec;
1015 	de::Random						rnd				(deStringHash(group->getName()));
1016 	const int						numElements		= 100;
1017 	vector<float>					positiveFloats	(numElements, 0);
1018 	vector<float>					negativeFloats	(numElements, 0);
1019 
1020 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1021 
1022 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1023 		negativeFloats[ndx] = -positiveFloats[ndx];
1024 
1025 	spec.assembly =
1026 		string(getComputeAsmShaderPreamble()) +
1027 
1028 		"OpSource GLSL 430\n"
1029 		"OpName %main           \"main\"\n"
1030 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1031 
1032 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1033 
1034 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1035 
1036 		+ string(getComputeAsmInputOutputBuffer()) +
1037 
1038 		"%id        = OpVariable %uvec3ptr Input\n"
1039 		"%zero      = OpConstant %i32 0\n"
1040 
1041 		"%main      = OpFunction %void None %voidf\n"
1042 		"%label     = OpLabel\n"
1043 		"%idval     = OpLoad %uvec3 %id\n"
1044 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1045 
1046 		"             OpNop\n" // Inside a function body
1047 
1048 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1049 		"%inval     = OpLoad %f32 %inloc\n"
1050 		"%neg       = OpFNegate %f32 %inval\n"
1051 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1052 		"             OpStore %outloc %neg\n"
1053 		"             OpReturn\n"
1054 		"             OpFunctionEnd\n";
1055 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1056 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1057 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1058 
1059 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
1060 
1061 	return group.release();
1062 }
1063 
createUnusedVariableComputeTests(tcu::TestContext & testCtx)1064 tcu::TestCaseGroup* createUnusedVariableComputeTests (tcu::TestContext& testCtx)
1065 {
1066 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "unused_variables", "Compute shaders with unused variables"));
1067 	de::Random						rnd				(deStringHash(group->getName()));
1068 	const int						numElements		= 100;
1069 	vector<float>					positiveFloats	(numElements, 0);
1070 	vector<float>					negativeFloats	(numElements, 0);
1071 
1072 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1073 
1074 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1075 		negativeFloats[ndx] = -positiveFloats[ndx];
1076 
1077 	const VariableLocation			testLocations[] =
1078 	{
1079 		// Set		Binding
1080 		{ 0,		5			},
1081 		{ 5,		5			},
1082 	};
1083 
1084 	for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
1085 	{
1086 		const VariableLocation& location = testLocations[locationNdx];
1087 
1088 		// Unused variable.
1089 		{
1090 			ComputeShaderSpec				spec;
1091 
1092 			spec.assembly =
1093 				string(getComputeAsmShaderPreamble()) +
1094 
1095 				"OpDecorate %id BuiltIn GlobalInvocationId\n"
1096 
1097 				+ getUnusedDecorations(location)
1098 
1099 				+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1100 
1101 				+ getUnusedTypesAndConstants()
1102 
1103 				+ string(getComputeAsmInputOutputBuffer())
1104 
1105 				+ getUnusedBuffer() +
1106 
1107 				"%id        = OpVariable %uvec3ptr Input\n"
1108 				"%zero      = OpConstant %i32 0\n"
1109 
1110 				"%main      = OpFunction %void None %voidf\n"
1111 				"%label     = OpLabel\n"
1112 				"%idval     = OpLoad %uvec3 %id\n"
1113 				"%x         = OpCompositeExtract %u32 %idval 0\n"
1114 
1115 				"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1116 				"%inval     = OpLoad %f32 %inloc\n"
1117 				"%neg       = OpFNegate %f32 %inval\n"
1118 				"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1119 				"             OpStore %outloc %neg\n"
1120 				"             OpReturn\n"
1121 				"             OpFunctionEnd\n";
1122 			spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1123 			spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1124 			spec.numWorkGroups = IVec3(numElements, 1, 1);
1125 
1126 			std::string testName		= "variable_" + location.toString();
1127 			std::string testDescription	= "Unused variable test with " + location.toDescription();
1128 
1129 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1130 		}
1131 
1132 		// Unused function.
1133 		{
1134 			ComputeShaderSpec				spec;
1135 
1136 			spec.assembly =
1137 				string(getComputeAsmShaderPreamble("", "", "", getUnusedEntryPoint())) +
1138 
1139 				"OpDecorate %id BuiltIn GlobalInvocationId\n"
1140 
1141 				+ getUnusedDecorations(location)
1142 
1143 				+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes())
1144 
1145 				+ getUnusedTypesAndConstants() +
1146 
1147 				"%c_i32_0 = OpConstant %i32 0\n"
1148 				"%c_i32_1 = OpConstant %i32 1\n"
1149 
1150 				+ string(getComputeAsmInputOutputBuffer())
1151 
1152 				+ getUnusedBuffer() +
1153 
1154 				"%id        = OpVariable %uvec3ptr Input\n"
1155 				"%zero      = OpConstant %i32 0\n"
1156 
1157 				"%main      = OpFunction %void None %voidf\n"
1158 				"%label     = OpLabel\n"
1159 				"%idval     = OpLoad %uvec3 %id\n"
1160 				"%x         = OpCompositeExtract %u32 %idval 0\n"
1161 
1162 				"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1163 				"%inval     = OpLoad %f32 %inloc\n"
1164 				"%neg       = OpFNegate %f32 %inval\n"
1165 				"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1166 				"             OpStore %outloc %neg\n"
1167 				"             OpReturn\n"
1168 				"             OpFunctionEnd\n"
1169 
1170 				+ getUnusedFunctionBody();
1171 
1172 			spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1173 			spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1174 			spec.numWorkGroups = IVec3(numElements, 1, 1);
1175 
1176 			std::string testName		= "function_" + location.toString();
1177 			std::string testDescription	= "Unused function test with " + location.toDescription();
1178 
1179 			group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), testDescription.c_str(), spec));
1180 		}
1181 	}
1182 
1183 	return group.release();
1184 }
1185 
1186 template<bool nanSupported>
compareFUnord(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)1187 bool compareFUnord (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
1188 {
1189 	if (outputAllocs.size() != 1)
1190 		return false;
1191 
1192 	vector<deUint8>	input1Bytes;
1193 	vector<deUint8>	input2Bytes;
1194 	vector<deUint8>	expectedBytes;
1195 
1196 	inputs[0].getBytes(input1Bytes);
1197 	inputs[1].getBytes(input2Bytes);
1198 	expectedOutputs[0].getBytes(expectedBytes);
1199 
1200 	const deInt32* const	expectedOutputAsInt		= reinterpret_cast<const deInt32*>(&expectedBytes.front());
1201 	const deInt32* const	outputAsInt				= static_cast<const deInt32*>(outputAllocs[0]->getHostPtr());
1202 	const float* const		input1AsFloat			= reinterpret_cast<const float*>(&input1Bytes.front());
1203 	const float* const		input2AsFloat			= reinterpret_cast<const float*>(&input2Bytes.front());
1204 	bool returnValue								= true;
1205 
1206 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(deInt32); ++idx)
1207 	{
1208 		if (!nanSupported && (tcu::Float32(input1AsFloat[idx]).isNaN() || tcu::Float32(input2AsFloat[idx]).isNaN()))
1209 			continue;
1210 
1211 		if (outputAsInt[idx] != expectedOutputAsInt[idx])
1212 		{
1213 			log << TestLog::Message << "ERROR: Sub-case failed. inputs: " << input1AsFloat[idx] << "," << input2AsFloat[idx] << " output: " << outputAsInt[idx]<< " expected output: " << expectedOutputAsInt[idx] << TestLog::EndMessage;
1214 			returnValue = false;
1215 		}
1216 	}
1217 	return returnValue;
1218 }
1219 
1220 typedef VkBool32 (*compareFuncType) (float, float);
1221 
1222 struct OpFUnordCase
1223 {
1224 	const char*		name;
1225 	const char*		opCode;
1226 	compareFuncType	compareFunc;
1227 
OpFUnordCasevkt::SpirVAssembly::__anon043fb9e60111::OpFUnordCase1228 					OpFUnordCase			(const char* _name, const char* _opCode, compareFuncType _compareFunc)
1229 						: name				(_name)
1230 						, opCode			(_opCode)
1231 						, compareFunc		(_compareFunc) {}
1232 };
1233 
1234 #define ADD_OPFUNORD_CASE(NAME, OPCODE, OPERATOR) \
1235 do { \
1236 	struct compare_##NAME { static VkBool32 compare(float x, float y) { return (x OPERATOR y) ? VK_TRUE : VK_FALSE; } }; \
1237 	cases.push_back(OpFUnordCase(#NAME, OPCODE, compare_##NAME::compare)); \
1238 } while (deGetFalse())
1239 
createOpFUnordGroup(tcu::TestContext & testCtx,const bool testWithNan)1240 tcu::TestCaseGroup* createOpFUnordGroup (tcu::TestContext& testCtx, const bool testWithNan)
1241 {
1242 	const string					nan				= testWithNan ? "_nan" : "";
1243 	const string					groupName		= "opfunord" + nan;
1244 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpFUnord* opcodes"));
1245 	de::Random						rnd				(deStringHash(group->getName()));
1246 	const int						numElements		= 100;
1247 	vector<OpFUnordCase>			cases;
1248 	string							extensions		= testWithNan ? "OpExtension \"SPV_KHR_float_controls\"\n" : "";
1249 	string							capabilities	= testWithNan ? "OpCapability SignedZeroInfNanPreserve\n" : "";
1250 	string							exeModes		= testWithNan ? "OpExecutionMode %main SignedZeroInfNanPreserve 32\n" : "";
1251 	const StringTemplate			shaderTemplate	(
1252 		string(getComputeAsmShaderPreamble(capabilities, extensions, exeModes)) +
1253 		"OpSource GLSL 430\n"
1254 		"OpName %main           \"main\"\n"
1255 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1256 
1257 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1258 
1259 		"OpDecorate %buf BufferBlock\n"
1260 		"OpDecorate %buf2 BufferBlock\n"
1261 		"OpDecorate %indata1 DescriptorSet 0\n"
1262 		"OpDecorate %indata1 Binding 0\n"
1263 		"OpDecorate %indata2 DescriptorSet 0\n"
1264 		"OpDecorate %indata2 Binding 1\n"
1265 		"OpDecorate %outdata DescriptorSet 0\n"
1266 		"OpDecorate %outdata Binding 2\n"
1267 		"OpDecorate %f32arr ArrayStride 4\n"
1268 		"OpDecorate %i32arr ArrayStride 4\n"
1269 		"OpMemberDecorate %buf 0 Offset 0\n"
1270 		"OpMemberDecorate %buf2 0 Offset 0\n"
1271 
1272 		+ string(getComputeAsmCommonTypes()) +
1273 
1274 		"%buf        = OpTypeStruct %f32arr\n"
1275 		"%bufptr     = OpTypePointer Uniform %buf\n"
1276 		"%indata1    = OpVariable %bufptr Uniform\n"
1277 		"%indata2    = OpVariable %bufptr Uniform\n"
1278 
1279 		"%buf2       = OpTypeStruct %i32arr\n"
1280 		"%buf2ptr    = OpTypePointer Uniform %buf2\n"
1281 		"%outdata    = OpVariable %buf2ptr Uniform\n"
1282 
1283 		"%id        = OpVariable %uvec3ptr Input\n"
1284 		"%zero      = OpConstant %i32 0\n"
1285 		"%consti1   = OpConstant %i32 1\n"
1286 		"%constf1   = OpConstant %f32 1.0\n"
1287 
1288 		"%main      = OpFunction %void None %voidf\n"
1289 		"%label     = OpLabel\n"
1290 		"%idval     = OpLoad %uvec3 %id\n"
1291 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1292 
1293 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
1294 		"%inval1    = OpLoad %f32 %inloc1\n"
1295 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
1296 		"%inval2    = OpLoad %f32 %inloc2\n"
1297 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1298 
1299 		"%result    = ${OPCODE} %bool %inval1 %inval2\n"
1300 		"%int_res   = OpSelect %i32 %result %consti1 %zero\n"
1301 		"             OpStore %outloc %int_res\n"
1302 
1303 		"             OpReturn\n"
1304 		"             OpFunctionEnd\n");
1305 
1306 	ADD_OPFUNORD_CASE(equal, "OpFUnordEqual", ==);
1307 	ADD_OPFUNORD_CASE(less, "OpFUnordLessThan", <);
1308 	ADD_OPFUNORD_CASE(lessequal, "OpFUnordLessThanEqual", <=);
1309 	ADD_OPFUNORD_CASE(greater, "OpFUnordGreaterThan", >);
1310 	ADD_OPFUNORD_CASE(greaterequal, "OpFUnordGreaterThanEqual", >=);
1311 	ADD_OPFUNORD_CASE(notequal, "OpFUnordNotEqual", !=);
1312 
1313 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1314 	{
1315 		map<string, string>			specializations;
1316 		ComputeShaderSpec			spec;
1317 		const float					NaN				= std::numeric_limits<float>::quiet_NaN();
1318 		vector<float>				inputFloats1	(numElements, 0);
1319 		vector<float>				inputFloats2	(numElements, 0);
1320 		vector<deInt32>				expectedInts	(numElements, 0);
1321 
1322 		specializations["OPCODE"]	= cases[caseNdx].opCode;
1323 		spec.assembly				= shaderTemplate.specialize(specializations);
1324 
1325 		fillRandomScalars(rnd, 1.f, 100.f, &inputFloats1[0], numElements);
1326 		for (size_t ndx = 0; ndx < numElements; ++ndx)
1327 		{
1328 			switch (ndx % 6)
1329 			{
1330 				case 0:		inputFloats2[ndx] = inputFloats1[ndx] + 1.0f; break;
1331 				case 1:		inputFloats2[ndx] = inputFloats1[ndx] - 1.0f; break;
1332 				case 2:		inputFloats2[ndx] = inputFloats1[ndx]; break;
1333 				case 3:		inputFloats2[ndx] = NaN; break;
1334 				case 4:		inputFloats2[ndx] = inputFloats1[ndx];	inputFloats1[ndx] = NaN; break;
1335 				case 5:		inputFloats2[ndx] = NaN;				inputFloats1[ndx] = NaN; break;
1336 			}
1337 			expectedInts[ndx] = tcu::Float32(inputFloats1[ndx]).isNaN() || tcu::Float32(inputFloats2[ndx]).isNaN() || cases[caseNdx].compareFunc(inputFloats1[ndx], inputFloats2[ndx]);
1338 		}
1339 
1340 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1341 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1342 		spec.outputs.push_back(BufferSp(new Int32Buffer(expectedInts)));
1343 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
1344 		spec.verifyIO		= testWithNan ? &compareFUnord<true> : &compareFUnord<false>;
1345 
1346 		if (testWithNan)
1347 		{
1348 			spec.extensions.push_back("VK_KHR_shader_float_controls");
1349 			spec.requestedVulkanFeatures.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat32 = DE_TRUE;
1350 		}
1351 
1352 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1353 	}
1354 
1355 	return group.release();
1356 }
1357 
1358 struct OpAtomicCase
1359 {
1360 	const char*		name;
1361 	const char*		assembly;
1362 	const char*		retValAssembly;
1363 	OpAtomicType	opAtomic;
1364 	deInt32			numOutputElements;
1365 
OpAtomicCasevkt::SpirVAssembly::__anon043fb9e60111::OpAtomicCase1366 					OpAtomicCase(const char* _name, const char* _assembly, const char* _retValAssembly, OpAtomicType _opAtomic, deInt32 _numOutputElements)
1367 						: name				(_name)
1368 						, assembly			(_assembly)
1369 						, retValAssembly	(_retValAssembly)
1370 						, opAtomic			(_opAtomic)
1371 						, numOutputElements	(_numOutputElements) {}
1372 };
1373 
createOpAtomicGroup(tcu::TestContext & testCtx,bool useStorageBuffer,int numElements=65535,bool verifyReturnValues=false,bool volatileAtomic=false)1374 tcu::TestCaseGroup* createOpAtomicGroup (tcu::TestContext& testCtx, bool useStorageBuffer, int numElements = 65535, bool verifyReturnValues = false, bool volatileAtomic = false)
1375 {
1376 	std::string						groupName			("opatomic");
1377 	if (useStorageBuffer)
1378 		groupName += "_storage_buffer";
1379 	if (verifyReturnValues)
1380 		groupName += "_return_values";
1381 	if (volatileAtomic)
1382 		groupName += "_volatile";
1383 	de::MovePtr<tcu::TestCaseGroup>	group				(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Test the OpAtomic* opcodes"));
1384 	vector<OpAtomicCase>			cases;
1385 
1386 	const StringTemplate			shaderTemplate	(
1387 
1388 		string("OpCapability Shader\n") +
1389 		(volatileAtomic ? "OpCapability VulkanMemoryModelKHR\n" : "") +
1390 		(useStorageBuffer ? "OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n" : "") +
1391 		(volatileAtomic ? "OpExtension \"SPV_KHR_vulkan_memory_model\"\n" : "") +
1392 		(volatileAtomic ? "OpMemoryModel Logical VulkanKHR\n" : "OpMemoryModel Logical GLSL450\n") +
1393 		"OpEntryPoint GLCompute %main \"main\" %id\n"
1394 		"OpExecutionMode %main LocalSize 1 1 1\n" +
1395 
1396 		"OpSource GLSL 430\n"
1397 		"OpName %main           \"main\"\n"
1398 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1399 
1400 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1401 
1402 		"OpDecorate %buf ${BLOCK_DECORATION}\n"
1403 		"OpDecorate %indata DescriptorSet 0\n"
1404 		"OpDecorate %indata Binding 0\n"
1405 		"OpDecorate %i32arr ArrayStride 4\n"
1406 		"OpMemberDecorate %buf 0 Offset 0\n"
1407 
1408 		"OpDecorate %sumbuf ${BLOCK_DECORATION}\n"
1409 		"OpDecorate %sum DescriptorSet 0\n"
1410 		"OpDecorate %sum Binding 1\n"
1411 		"OpMemberDecorate %sumbuf 0 Offset 0\n"
1412 
1413 		"${RETVAL_BUF_DECORATE}"
1414 
1415 		+ getComputeAsmCommonTypes("${BLOCK_POINTER_TYPE}") +
1416 
1417 		"%buf       = OpTypeStruct %i32arr\n"
1418 		"%bufptr    = OpTypePointer ${BLOCK_POINTER_TYPE} %buf\n"
1419 		"%indata    = OpVariable %bufptr ${BLOCK_POINTER_TYPE}\n"
1420 
1421 		"%sumbuf    = OpTypeStruct %i32arr\n"
1422 		"%sumbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %sumbuf\n"
1423 		"%sum       = OpVariable %sumbufptr ${BLOCK_POINTER_TYPE}\n"
1424 
1425 		"${RETVAL_BUF_DECL}"
1426 
1427 		"%id        = OpVariable %uvec3ptr Input\n"
1428 		"%minusone  = OpConstant %i32 -1\n"
1429 		"%zero      = OpConstant %i32 0\n"
1430 		"%one       = OpConstant %u32 1\n"
1431 		"%two       = OpConstant %i32 2\n"
1432 		"%five      = OpConstant %i32 5\n"
1433 		"%volbit    = OpConstant %i32 32768\n"
1434 
1435 		"%main      = OpFunction %void None %voidf\n"
1436 		"%label     = OpLabel\n"
1437 		"%idval     = OpLoad %uvec3 %id\n"
1438 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1439 
1440 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1441 		"%inval     = OpLoad %i32 %inloc\n"
1442 
1443 		"%outloc    = OpAccessChain %i32ptr %sum %zero ${INDEX}\n"
1444 		"${INSTRUCTION}"
1445 		"${RETVAL_ASSEMBLY}"
1446 
1447 		"             OpReturn\n"
1448 		"             OpFunctionEnd\n");
1449 
1450 	#define ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS) \
1451 	do { \
1452 		DE_ASSERT((NUM_OUTPUT_ELEMENTS) == 1 || (NUM_OUTPUT_ELEMENTS) == numElements); \
1453 		cases.push_back(OpAtomicCase(#NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, NUM_OUTPUT_ELEMENTS)); \
1454 	} while (deGetFalse())
1455 	#define ADD_OPATOMIC_CASE_1(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, 1)
1456 	#define ADD_OPATOMIC_CASE_N(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC) ADD_OPATOMIC_CASE(NAME, ASSEMBLY, RETVAL_ASSEMBLY, OPATOMIC, numElements)
1457 
1458 	ADD_OPATOMIC_CASE_1(iadd,	"%retv      = OpAtomicIAdd %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1459 								"             OpStore %retloc %retv\n", OPATOMIC_IADD );
1460 	ADD_OPATOMIC_CASE_1(isub,	"%retv      = OpAtomicISub %i32 %outloc ${SCOPE} ${SEMANTICS} %inval\n",
1461 								"             OpStore %retloc %retv\n", OPATOMIC_ISUB );
1462 	ADD_OPATOMIC_CASE_1(iinc,	"%retv      = OpAtomicIIncrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1463 								"             OpStore %retloc %retv\n", OPATOMIC_IINC );
1464 	ADD_OPATOMIC_CASE_1(idec,	"%retv      = OpAtomicIDecrement %i32 %outloc ${SCOPE} ${SEMANTICS}\n",
1465 								"             OpStore %retloc %retv\n", OPATOMIC_IDEC );
1466 	if (!verifyReturnValues)
1467 	{
1468 		ADD_OPATOMIC_CASE_N(load,	"%inval2    = OpAtomicLoad %i32 %inloc ${SCOPE} ${SEMANTICS}\n"
1469 									"             OpStore %outloc %inval2\n", "", OPATOMIC_LOAD );
1470 		ADD_OPATOMIC_CASE_N(store,	"             OpAtomicStore %outloc ${SCOPE} ${SEMANTICS} %inval\n", "", OPATOMIC_STORE );
1471 	}
1472 
1473 	ADD_OPATOMIC_CASE_N(compex, "%even      = OpSMod %i32 %inval %two\n"
1474 								"             OpStore %outloc %even\n"
1475 								"%retv      = OpAtomicCompareExchange %i32 %outloc ${SCOPE} ${SEMANTICS} ${SEMANTICS} %minusone %zero\n",
1476 								"			  OpStore %retloc %retv\n", OPATOMIC_COMPEX );
1477 
1478 
1479 	#undef ADD_OPATOMIC_CASE
1480 	#undef ADD_OPATOMIC_CASE_1
1481 	#undef ADD_OPATOMIC_CASE_N
1482 
1483 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1484 	{
1485 		map<string, string>			specializations;
1486 		ComputeShaderSpec			spec;
1487 		vector<deInt32>				inputInts		(numElements, 0);
1488 		vector<deInt32>				expected		(cases[caseNdx].numOutputElements, -1);
1489 
1490 		if (volatileAtomic)
1491 		{
1492 			spec.extensions.push_back("VK_KHR_vulkan_memory_model");
1493 			spec.requestedVulkanFeatures.extVulkanMemoryModel.vulkanMemoryModel = true;
1494 
1495 			// volatile, queuefamily scope
1496 			specializations["SEMANTICS"] = "%volbit";
1497 			specializations["SCOPE"] = "%five";
1498 		}
1499 		else
1500 		{
1501 			// non-volatile, device scope
1502 			specializations["SEMANTICS"] = "%zero";
1503 			specializations["SCOPE"] = "%one";
1504 		}
1505 		specializations["INDEX"]				= (cases[caseNdx].numOutputElements == 1) ? "%zero" : "%x";
1506 		specializations["INSTRUCTION"]			= cases[caseNdx].assembly;
1507 		specializations["BLOCK_DECORATION"]		= useStorageBuffer ? "Block" : "BufferBlock";
1508 		specializations["BLOCK_POINTER_TYPE"]	= useStorageBuffer ? "StorageBuffer" : "Uniform";
1509 
1510 		if (verifyReturnValues)
1511 		{
1512 			const StringTemplate blockDecoration	(
1513 				"\n"
1514 				"OpDecorate %retbuf ${BLOCK_DECORATION}\n"
1515 				"OpDecorate %ret DescriptorSet 0\n"
1516 				"OpDecorate %ret Binding 2\n"
1517 				"OpMemberDecorate %retbuf 0 Offset 0\n\n");
1518 
1519 			const StringTemplate blockDeclaration	(
1520 				"\n"
1521 				"%retbuf    = OpTypeStruct %i32arr\n"
1522 				"%retbufptr = OpTypePointer ${BLOCK_POINTER_TYPE} %retbuf\n"
1523 				"%ret       = OpVariable %retbufptr ${BLOCK_POINTER_TYPE}\n\n");
1524 
1525 			specializations["RETVAL_ASSEMBLY"] =
1526 				"%retloc    = OpAccessChain %i32ptr %ret %zero %x\n"
1527 				+ std::string(cases[caseNdx].retValAssembly);
1528 
1529 			specializations["RETVAL_BUF_DECORATE"]	= blockDecoration.specialize(specializations);
1530 			specializations["RETVAL_BUF_DECL"]		= blockDeclaration.specialize(specializations);
1531 		}
1532 		else
1533 		{
1534 			specializations["RETVAL_ASSEMBLY"]		= "";
1535 			specializations["RETVAL_BUF_DECORATE"]	= "";
1536 			specializations["RETVAL_BUF_DECL"]		= "";
1537 		}
1538 
1539 		spec.assembly							= shaderTemplate.specialize(specializations);
1540 
1541 		// Specialize one more time, to catch things that were in a template parameter
1542 		const StringTemplate					assemblyTemplate(spec.assembly);
1543 		spec.assembly							= assemblyTemplate.specialize(specializations);
1544 
1545 		if (useStorageBuffer)
1546 			spec.extensions.push_back("VK_KHR_storage_buffer_storage_class");
1547 
1548 		spec.inputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_INPUT)));
1549 		spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_EXPECTED)));
1550 		if (verifyReturnValues)
1551 			spec.outputs.push_back(BufferSp(new OpAtomicBuffer(numElements, cases[caseNdx].numOutputElements, cases[caseNdx].opAtomic, BUFFERTYPE_ATOMIC_RET)));
1552 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1553 
1554 		if (verifyReturnValues)
1555 		{
1556 			switch (cases[caseNdx].opAtomic)
1557 			{
1558 				case OPATOMIC_IADD:
1559 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IADD>;
1560 					break;
1561 				case OPATOMIC_ISUB:
1562 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_ISUB>;
1563 					break;
1564 				case OPATOMIC_IINC:
1565 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IINC>;
1566 					break;
1567 				case OPATOMIC_IDEC:
1568 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_IDEC>;
1569 					break;
1570 				case OPATOMIC_COMPEX:
1571 					spec.verifyIO = OpAtomicBuffer::compareWithRetvals<OPATOMIC_COMPEX>;
1572 					break;
1573 				default:
1574 					DE_FATAL("Unsupported OpAtomic type for return value verification");
1575 			}
1576 		}
1577 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1578 	}
1579 
1580 	return group.release();
1581 }
1582 
createOpLineGroup(tcu::TestContext & testCtx)1583 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
1584 {
1585 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
1586 	ComputeShaderSpec				spec;
1587 	de::Random						rnd				(deStringHash(group->getName()));
1588 	const int						numElements		= 100;
1589 	vector<float>					positiveFloats	(numElements, 0);
1590 	vector<float>					negativeFloats	(numElements, 0);
1591 
1592 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1593 
1594 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1595 		negativeFloats[ndx] = -positiveFloats[ndx];
1596 
1597 	spec.assembly =
1598 		string(getComputeAsmShaderPreamble()) +
1599 
1600 		"%fname1 = OpString \"negateInputs.comp\"\n"
1601 		"%fname2 = OpString \"negateInputs\"\n"
1602 
1603 		"OpSource GLSL 430\n"
1604 		"OpName %main           \"main\"\n"
1605 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1606 
1607 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1608 
1609 		+ string(getComputeAsmInputOutputBufferTraits()) +
1610 
1611 		"OpLine %fname1 0 0\n" // At the earliest possible position
1612 
1613 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1614 
1615 		"OpLine %fname1 0 1\n" // Multiple OpLines in sequence
1616 		"OpLine %fname2 1 0\n" // Different filenames
1617 		"OpLine %fname1 1000 100000\n"
1618 
1619 		"%id        = OpVariable %uvec3ptr Input\n"
1620 		"%zero      = OpConstant %i32 0\n"
1621 
1622 		"OpLine %fname1 1 1\n" // Before a function
1623 
1624 		"%main      = OpFunction %void None %voidf\n"
1625 		"%label     = OpLabel\n"
1626 
1627 		"OpLine %fname1 1 1\n" // In a function
1628 
1629 		"%idval     = OpLoad %uvec3 %id\n"
1630 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1631 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1632 		"%inval     = OpLoad %f32 %inloc\n"
1633 		"%neg       = OpFNegate %f32 %inval\n"
1634 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1635 		"             OpStore %outloc %neg\n"
1636 		"             OpReturn\n"
1637 		"             OpFunctionEnd\n";
1638 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1639 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1640 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1641 
1642 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
1643 
1644 	return group.release();
1645 }
1646 
veryfiBinaryShader(const ProgramBinary & binary)1647 bool veryfiBinaryShader (const ProgramBinary& binary)
1648 {
1649 	const size_t	paternCount			= 3u;
1650 	bool paternsCheck[paternCount]		=
1651 	{
1652 		false, false, false
1653 	};
1654 	const string patersns[paternCount]	=
1655 	{
1656 		"VULKAN CTS",
1657 		"Negative values",
1658 		"Date: 2017/09/21"
1659 	};
1660 	size_t			paternNdx		= 0u;
1661 
1662 	for (size_t ndx = 0u; ndx < binary.getSize(); ++ndx)
1663 	{
1664 		if (false == paternsCheck[paternNdx] &&
1665 			patersns[paternNdx][0] == static_cast<char>(binary.getBinary()[ndx]) &&
1666 			deMemoryEqual((const char*)&binary.getBinary()[ndx], &patersns[paternNdx][0], patersns[paternNdx].length()))
1667 		{
1668 			paternsCheck[paternNdx]= true;
1669 			paternNdx++;
1670 			if (paternNdx == paternCount)
1671 				break;
1672 		}
1673 	}
1674 
1675 	for (size_t ndx = 0u; ndx < paternCount; ++ndx)
1676 	{
1677 		if (!paternsCheck[ndx])
1678 			return false;
1679 	}
1680 
1681 	return true;
1682 }
1683 
createOpModuleProcessedGroup(tcu::TestContext & testCtx)1684 tcu::TestCaseGroup* createOpModuleProcessedGroup (tcu::TestContext& testCtx)
1685 {
1686 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "Test the OpModuleProcessed instruction"));
1687 	ComputeShaderSpec				spec;
1688 	de::Random						rnd				(deStringHash(group->getName()));
1689 	const int						numElements		= 10;
1690 	vector<float>					positiveFloats	(numElements, 0);
1691 	vector<float>					negativeFloats	(numElements, 0);
1692 
1693 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1694 
1695 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1696 		negativeFloats[ndx] = -positiveFloats[ndx];
1697 
1698 	spec.assembly =
1699 		string(getComputeAsmShaderPreamble()) +
1700 		"%fname = OpString \"negateInputs.comp\"\n"
1701 
1702 		"OpSource GLSL 430\n"
1703 		"OpName %main           \"main\"\n"
1704 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1705 		"OpModuleProcessed \"VULKAN CTS\"\n"					//OpModuleProcessed;
1706 		"OpModuleProcessed \"Negative values\"\n"
1707 		"OpModuleProcessed \"Date: 2017/09/21\"\n"
1708 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1709 
1710 		+ string(getComputeAsmInputOutputBufferTraits())
1711 
1712 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1713 
1714 		"OpLine %fname 0 1\n"
1715 
1716 		"OpLine %fname 1000 1\n"
1717 
1718 		"%id        = OpVariable %uvec3ptr Input\n"
1719 		"%zero      = OpConstant %i32 0\n"
1720 		"%main      = OpFunction %void None %voidf\n"
1721 
1722 		"%label     = OpLabel\n"
1723 		"%idval     = OpLoad %uvec3 %id\n"
1724 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1725 
1726 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1727 		"%inval     = OpLoad %f32 %inloc\n"
1728 		"%neg       = OpFNegate %f32 %inval\n"
1729 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1730 		"             OpStore %outloc %neg\n"
1731 		"             OpReturn\n"
1732 		"             OpFunctionEnd\n";
1733 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1734 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1735 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1736 	spec.verifyBinary = veryfiBinaryShader;
1737 	spec.spirvVersion = SPIRV_VERSION_1_3;
1738 
1739 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpModuleProcessed Tests", spec));
1740 
1741 	return group.release();
1742 }
1743 
createOpNoLineGroup(tcu::TestContext & testCtx)1744 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
1745 {
1746 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
1747 	ComputeShaderSpec				spec;
1748 	de::Random						rnd				(deStringHash(group->getName()));
1749 	const int						numElements		= 100;
1750 	vector<float>					positiveFloats	(numElements, 0);
1751 	vector<float>					negativeFloats	(numElements, 0);
1752 
1753 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1754 
1755 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1756 		negativeFloats[ndx] = -positiveFloats[ndx];
1757 
1758 	spec.assembly =
1759 		string(getComputeAsmShaderPreamble()) +
1760 
1761 		"%fname = OpString \"negateInputs.comp\"\n"
1762 
1763 		"OpSource GLSL 430\n"
1764 		"OpName %main           \"main\"\n"
1765 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1766 
1767 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1768 
1769 		+ string(getComputeAsmInputOutputBufferTraits()) +
1770 
1771 		"OpNoLine\n" // At the earliest possible position, without preceding OpLine
1772 
1773 		+ string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
1774 
1775 		"OpLine %fname 0 1\n"
1776 		"OpNoLine\n" // Immediately following a preceding OpLine
1777 
1778 		"OpLine %fname 1000 1\n"
1779 
1780 		"%id        = OpVariable %uvec3ptr Input\n"
1781 		"%zero      = OpConstant %i32 0\n"
1782 
1783 		"OpNoLine\n" // Contents after the previous OpLine
1784 
1785 		"%main      = OpFunction %void None %voidf\n"
1786 		"%label     = OpLabel\n"
1787 		"%idval     = OpLoad %uvec3 %id\n"
1788 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1789 
1790 		"OpNoLine\n" // Multiple OpNoLine
1791 		"OpNoLine\n"
1792 		"OpNoLine\n"
1793 
1794 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1795 		"%inval     = OpLoad %f32 %inloc\n"
1796 		"%neg       = OpFNegate %f32 %inval\n"
1797 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1798 		"             OpStore %outloc %neg\n"
1799 		"             OpReturn\n"
1800 		"             OpFunctionEnd\n";
1801 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1802 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1803 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1804 
1805 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
1806 
1807 	return group.release();
1808 }
1809 
1810 // Compare instruction for the contraction compute case.
1811 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1812 bool compareNoContractCase(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1813 {
1814 	if (outputAllocs.size() != 1)
1815 		return false;
1816 
1817 	// Only size is needed because we are not comparing the exact values.
1818 	size_t byteSize = expectedOutputs[0].getByteSize();
1819 
1820 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
1821 
1822 	for(size_t i = 0; i < byteSize / sizeof(float); ++i) {
1823 		if (outputAsFloat[i] != 0.f &&
1824 			outputAsFloat[i] != -ldexp(1, -24)) {
1825 			return false;
1826 		}
1827 	}
1828 
1829 	return true;
1830 }
1831 
createNoContractionGroup(tcu::TestContext & testCtx)1832 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
1833 {
1834 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
1835 	vector<CaseParameter>			cases;
1836 	const int						numElements		= 100;
1837 	vector<float>					inputFloats1	(numElements, 0);
1838 	vector<float>					inputFloats2	(numElements, 0);
1839 	vector<float>					outputFloats	(numElements, 0);
1840 	const StringTemplate			shaderTemplate	(
1841 		string(getComputeAsmShaderPreamble()) +
1842 
1843 		"OpName %main           \"main\"\n"
1844 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1845 
1846 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1847 
1848 		"${DECORATION}\n"
1849 
1850 		"OpDecorate %buf BufferBlock\n"
1851 		"OpDecorate %indata1 DescriptorSet 0\n"
1852 		"OpDecorate %indata1 Binding 0\n"
1853 		"OpDecorate %indata2 DescriptorSet 0\n"
1854 		"OpDecorate %indata2 Binding 1\n"
1855 		"OpDecorate %outdata DescriptorSet 0\n"
1856 		"OpDecorate %outdata Binding 2\n"
1857 		"OpDecorate %f32arr ArrayStride 4\n"
1858 		"OpMemberDecorate %buf 0 Offset 0\n"
1859 
1860 		+ string(getComputeAsmCommonTypes()) +
1861 
1862 		"%buf        = OpTypeStruct %f32arr\n"
1863 		"%bufptr     = OpTypePointer Uniform %buf\n"
1864 		"%indata1    = OpVariable %bufptr Uniform\n"
1865 		"%indata2    = OpVariable %bufptr Uniform\n"
1866 		"%outdata    = OpVariable %bufptr Uniform\n"
1867 
1868 		"%id         = OpVariable %uvec3ptr Input\n"
1869 		"%zero       = OpConstant %i32 0\n"
1870 		"%c_f_m1     = OpConstant %f32 -1.\n"
1871 
1872 		"%main       = OpFunction %void None %voidf\n"
1873 		"%label      = OpLabel\n"
1874 		"%idval      = OpLoad %uvec3 %id\n"
1875 		"%x          = OpCompositeExtract %u32 %idval 0\n"
1876 		"%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
1877 		"%inval1     = OpLoad %f32 %inloc1\n"
1878 		"%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
1879 		"%inval2     = OpLoad %f32 %inloc2\n"
1880 		"%mul        = OpFMul %f32 %inval1 %inval2\n"
1881 		"%add        = OpFAdd %f32 %mul %c_f_m1\n"
1882 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1883 		"              OpStore %outloc %add\n"
1884 		"              OpReturn\n"
1885 		"              OpFunctionEnd\n");
1886 
1887 	cases.push_back(CaseParameter("multiplication",	"OpDecorate %mul NoContraction"));
1888 	cases.push_back(CaseParameter("addition",		"OpDecorate %add NoContraction"));
1889 	cases.push_back(CaseParameter("both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
1890 
1891 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1892 	{
1893 		inputFloats1[ndx]	= 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
1894 		inputFloats2[ndx]	= 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
1895 		// Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
1896 		// conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
1897 		// So the final result will be 0.f or 0x1p-24.
1898 		// If the operation is combined into a precise fused multiply-add, then the result would be
1899 		// 2^-46 (0xa8800000).
1900 		outputFloats[ndx]	= 0.f;
1901 	}
1902 
1903 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1904 	{
1905 		map<string, string>		specializations;
1906 		ComputeShaderSpec		spec;
1907 
1908 		specializations["DECORATION"] = cases[caseNdx].param;
1909 		spec.assembly = shaderTemplate.specialize(specializations);
1910 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1911 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1912 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1913 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1914 		// Check against the two possible answers based on rounding mode.
1915 		spec.verifyIO = &compareNoContractCase;
1916 
1917 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1918 	}
1919 	return group.release();
1920 }
1921 
compareFRem(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)1922 bool compareFRem(const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
1923 {
1924 	if (outputAllocs.size() != 1)
1925 		return false;
1926 
1927 	vector<deUint8>	expectedBytes;
1928 	expectedOutputs[0].getBytes(expectedBytes);
1929 
1930 	const float*	expectedOutputAsFloat	= reinterpret_cast<const float*>(&expectedBytes.front());
1931 	const float*	outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
1932 
1933 	for (size_t idx = 0; idx < expectedBytes.size() / sizeof(float); ++idx)
1934 	{
1935 		const float f0 = expectedOutputAsFloat[idx];
1936 		const float f1 = outputAsFloat[idx];
1937 		// \todo relative error needs to be fairly high because FRem may be implemented as
1938 		// (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
1939 		if (deFloatAbs((f1 - f0) / f0) > 0.02)
1940 			return false;
1941 	}
1942 
1943 	return true;
1944 }
1945 
createOpFRemGroup(tcu::TestContext & testCtx)1946 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
1947 {
1948 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
1949 	ComputeShaderSpec				spec;
1950 	de::Random						rnd				(deStringHash(group->getName()));
1951 	const int						numElements		= 200;
1952 	vector<float>					inputFloats1	(numElements, 0);
1953 	vector<float>					inputFloats2	(numElements, 0);
1954 	vector<float>					outputFloats	(numElements, 0);
1955 
1956 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
1957 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
1958 
1959 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1960 	{
1961 		// Guard against divisors near zero.
1962 		if (std::fabs(inputFloats2[ndx]) < 1e-3)
1963 			inputFloats2[ndx] = 8.f;
1964 
1965 		// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
1966 		outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
1967 	}
1968 
1969 	spec.assembly =
1970 		string(getComputeAsmShaderPreamble()) +
1971 
1972 		"OpName %main           \"main\"\n"
1973 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1974 
1975 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1976 
1977 		"OpDecorate %buf BufferBlock\n"
1978 		"OpDecorate %indata1 DescriptorSet 0\n"
1979 		"OpDecorate %indata1 Binding 0\n"
1980 		"OpDecorate %indata2 DescriptorSet 0\n"
1981 		"OpDecorate %indata2 Binding 1\n"
1982 		"OpDecorate %outdata DescriptorSet 0\n"
1983 		"OpDecorate %outdata Binding 2\n"
1984 		"OpDecorate %f32arr ArrayStride 4\n"
1985 		"OpMemberDecorate %buf 0 Offset 0\n"
1986 
1987 		+ string(getComputeAsmCommonTypes()) +
1988 
1989 		"%buf        = OpTypeStruct %f32arr\n"
1990 		"%bufptr     = OpTypePointer Uniform %buf\n"
1991 		"%indata1    = OpVariable %bufptr Uniform\n"
1992 		"%indata2    = OpVariable %bufptr Uniform\n"
1993 		"%outdata    = OpVariable %bufptr Uniform\n"
1994 
1995 		"%id        = OpVariable %uvec3ptr Input\n"
1996 		"%zero      = OpConstant %i32 0\n"
1997 
1998 		"%main      = OpFunction %void None %voidf\n"
1999 		"%label     = OpLabel\n"
2000 		"%idval     = OpLoad %uvec3 %id\n"
2001 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2002 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2003 		"%inval1    = OpLoad %f32 %inloc1\n"
2004 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2005 		"%inval2    = OpLoad %f32 %inloc2\n"
2006 		"%rem       = OpFRem %f32 %inval1 %inval2\n"
2007 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2008 		"             OpStore %outloc %rem\n"
2009 		"             OpReturn\n"
2010 		"             OpFunctionEnd\n";
2011 
2012 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2013 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2014 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2015 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2016 	spec.verifyIO = &compareFRem;
2017 
2018 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2019 
2020 	return group.release();
2021 }
2022 
compareNMin(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2023 bool compareNMin (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2024 {
2025 	if (outputAllocs.size() != 1)
2026 		return false;
2027 
2028 	const BufferSp&			expectedOutput			(expectedOutputs[0].getBuffer());
2029 	std::vector<deUint8>	data;
2030 	expectedOutput->getBytes(data);
2031 
2032 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2033 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2034 
2035 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2036 	{
2037 		const float f0 = expectedOutputAsFloat[idx];
2038 		const float f1 = outputAsFloat[idx];
2039 
2040 		// For NMin, we accept NaN as output if both inputs were NaN.
2041 		// Otherwise the NaN is the wrong choise, as on architectures that
2042 		// do not handle NaN, those are huge values.
2043 		if (!(tcu::Float32(f1).isNaN() && tcu::Float32(f0).isNaN()) && deFloatAbs(f1 - f0) > 0.00001f)
2044 			return false;
2045 	}
2046 
2047 	return true;
2048 }
2049 
createOpNMinGroup(tcu::TestContext & testCtx)2050 tcu::TestCaseGroup* createOpNMinGroup (tcu::TestContext& testCtx)
2051 {
2052 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnmin", "Test the OpNMin instruction"));
2053 	ComputeShaderSpec				spec;
2054 	de::Random						rnd				(deStringHash(group->getName()));
2055 	const int						numElements		= 200;
2056 	vector<float>					inputFloats1	(numElements, 0);
2057 	vector<float>					inputFloats2	(numElements, 0);
2058 	vector<float>					outputFloats	(numElements, 0);
2059 
2060 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2061 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2062 
2063 	// Make the first case a full-NAN case.
2064 	inputFloats1[0] = TCU_NAN;
2065 	inputFloats2[0] = TCU_NAN;
2066 
2067 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2068 	{
2069 		// By default, pick the smallest
2070 		outputFloats[ndx] = std::min(inputFloats1[ndx], inputFloats2[ndx]);
2071 
2072 		// Make half of the cases NaN cases
2073 		if ((ndx & 1) == 0)
2074 		{
2075 			// Alternate between the NaN operand
2076 			if ((ndx & 2) == 0)
2077 			{
2078 				outputFloats[ndx] = inputFloats2[ndx];
2079 				inputFloats1[ndx] = TCU_NAN;
2080 			}
2081 			else
2082 			{
2083 				outputFloats[ndx] = inputFloats1[ndx];
2084 				inputFloats2[ndx] = TCU_NAN;
2085 			}
2086 		}
2087 	}
2088 
2089 	spec.assembly =
2090 		"OpCapability Shader\n"
2091 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2092 		"OpMemoryModel Logical GLSL450\n"
2093 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2094 		"OpExecutionMode %main LocalSize 1 1 1\n"
2095 
2096 		"OpName %main           \"main\"\n"
2097 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2098 
2099 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2100 
2101 		"OpDecorate %buf BufferBlock\n"
2102 		"OpDecorate %indata1 DescriptorSet 0\n"
2103 		"OpDecorate %indata1 Binding 0\n"
2104 		"OpDecorate %indata2 DescriptorSet 0\n"
2105 		"OpDecorate %indata2 Binding 1\n"
2106 		"OpDecorate %outdata DescriptorSet 0\n"
2107 		"OpDecorate %outdata Binding 2\n"
2108 		"OpDecorate %f32arr ArrayStride 4\n"
2109 		"OpMemberDecorate %buf 0 Offset 0\n"
2110 
2111 		+ string(getComputeAsmCommonTypes()) +
2112 
2113 		"%buf        = OpTypeStruct %f32arr\n"
2114 		"%bufptr     = OpTypePointer Uniform %buf\n"
2115 		"%indata1    = OpVariable %bufptr Uniform\n"
2116 		"%indata2    = OpVariable %bufptr Uniform\n"
2117 		"%outdata    = OpVariable %bufptr Uniform\n"
2118 
2119 		"%id        = OpVariable %uvec3ptr Input\n"
2120 		"%zero      = OpConstant %i32 0\n"
2121 
2122 		"%main      = OpFunction %void None %voidf\n"
2123 		"%label     = OpLabel\n"
2124 		"%idval     = OpLoad %uvec3 %id\n"
2125 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2126 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2127 		"%inval1    = OpLoad %f32 %inloc1\n"
2128 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2129 		"%inval2    = OpLoad %f32 %inloc2\n"
2130 		"%rem       = OpExtInst %f32 %std450 NMin %inval1 %inval2\n"
2131 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2132 		"             OpStore %outloc %rem\n"
2133 		"             OpReturn\n"
2134 		"             OpFunctionEnd\n";
2135 
2136 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2137 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2138 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2139 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2140 	spec.verifyIO = &compareNMin;
2141 
2142 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2143 
2144 	return group.release();
2145 }
2146 
compareNMax(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2147 bool compareNMax (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2148 {
2149 	if (outputAllocs.size() != 1)
2150 		return false;
2151 
2152 	const BufferSp&			expectedOutput			= expectedOutputs[0].getBuffer();
2153 	std::vector<deUint8>	data;
2154 	expectedOutput->getBytes(data);
2155 
2156 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2157 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2158 
2159 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float); ++idx)
2160 	{
2161 		const float f0 = expectedOutputAsFloat[idx];
2162 		const float f1 = outputAsFloat[idx];
2163 
2164 		// For NMax, NaN is considered acceptable result, since in
2165 		// architectures that do not handle NaNs, those are huge values.
2166 		if (!tcu::Float32(f1).isNaN() && deFloatAbs(f1 - f0) > 0.00001f)
2167 			return false;
2168 	}
2169 
2170 	return true;
2171 }
2172 
createOpNMaxGroup(tcu::TestContext & testCtx)2173 tcu::TestCaseGroup* createOpNMaxGroup (tcu::TestContext& testCtx)
2174 {
2175 	de::MovePtr<tcu::TestCaseGroup>	group(new tcu::TestCaseGroup(testCtx, "opnmax", "Test the OpNMax instruction"));
2176 	ComputeShaderSpec				spec;
2177 	de::Random						rnd				(deStringHash(group->getName()));
2178 	const int						numElements		= 200;
2179 	vector<float>					inputFloats1	(numElements, 0);
2180 	vector<float>					inputFloats2	(numElements, 0);
2181 	vector<float>					outputFloats	(numElements, 0);
2182 
2183 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2184 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2185 
2186 	// Make the first case a full-NAN case.
2187 	inputFloats1[0] = TCU_NAN;
2188 	inputFloats2[0] = TCU_NAN;
2189 
2190 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2191 	{
2192 		// By default, pick the biggest
2193 		outputFloats[ndx] = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2194 
2195 		// Make half of the cases NaN cases
2196 		if ((ndx & 1) == 0)
2197 		{
2198 			// Alternate between the NaN operand
2199 			if ((ndx & 2) == 0)
2200 			{
2201 				outputFloats[ndx] = inputFloats2[ndx];
2202 				inputFloats1[ndx] = TCU_NAN;
2203 			}
2204 			else
2205 			{
2206 				outputFloats[ndx] = inputFloats1[ndx];
2207 				inputFloats2[ndx] = TCU_NAN;
2208 			}
2209 		}
2210 	}
2211 
2212 	spec.assembly =
2213 		"OpCapability Shader\n"
2214 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2215 		"OpMemoryModel Logical GLSL450\n"
2216 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2217 		"OpExecutionMode %main LocalSize 1 1 1\n"
2218 
2219 		"OpName %main           \"main\"\n"
2220 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2221 
2222 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2223 
2224 		"OpDecorate %buf BufferBlock\n"
2225 		"OpDecorate %indata1 DescriptorSet 0\n"
2226 		"OpDecorate %indata1 Binding 0\n"
2227 		"OpDecorate %indata2 DescriptorSet 0\n"
2228 		"OpDecorate %indata2 Binding 1\n"
2229 		"OpDecorate %outdata DescriptorSet 0\n"
2230 		"OpDecorate %outdata Binding 2\n"
2231 		"OpDecorate %f32arr ArrayStride 4\n"
2232 		"OpMemberDecorate %buf 0 Offset 0\n"
2233 
2234 		+ string(getComputeAsmCommonTypes()) +
2235 
2236 		"%buf        = OpTypeStruct %f32arr\n"
2237 		"%bufptr     = OpTypePointer Uniform %buf\n"
2238 		"%indata1    = OpVariable %bufptr Uniform\n"
2239 		"%indata2    = OpVariable %bufptr Uniform\n"
2240 		"%outdata    = OpVariable %bufptr Uniform\n"
2241 
2242 		"%id        = OpVariable %uvec3ptr Input\n"
2243 		"%zero      = OpConstant %i32 0\n"
2244 
2245 		"%main      = OpFunction %void None %voidf\n"
2246 		"%label     = OpLabel\n"
2247 		"%idval     = OpLoad %uvec3 %id\n"
2248 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2249 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2250 		"%inval1    = OpLoad %f32 %inloc1\n"
2251 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2252 		"%inval2    = OpLoad %f32 %inloc2\n"
2253 		"%rem       = OpExtInst %f32 %std450 NMax %inval1 %inval2\n"
2254 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2255 		"             OpStore %outloc %rem\n"
2256 		"             OpReturn\n"
2257 		"             OpFunctionEnd\n";
2258 
2259 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2260 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2261 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2262 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2263 	spec.verifyIO = &compareNMax;
2264 
2265 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2266 
2267 	return group.release();
2268 }
2269 
compareNClamp(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)2270 bool compareNClamp (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
2271 {
2272 	if (outputAllocs.size() != 1)
2273 		return false;
2274 
2275 	const BufferSp&			expectedOutput			= expectedOutputs[0].getBuffer();
2276 	std::vector<deUint8>	data;
2277 	expectedOutput->getBytes(data);
2278 
2279 	const float* const		expectedOutputAsFloat	= reinterpret_cast<const float*>(&data.front());
2280 	const float* const		outputAsFloat			= static_cast<const float*>(outputAllocs[0]->getHostPtr());
2281 
2282 	for (size_t idx = 0; idx < expectedOutput->getByteSize() / sizeof(float) / 2; ++idx)
2283 	{
2284 		const float e0 = expectedOutputAsFloat[idx * 2];
2285 		const float e1 = expectedOutputAsFloat[idx * 2 + 1];
2286 		const float res = outputAsFloat[idx];
2287 
2288 		// For NClamp, we have two possible outcomes based on
2289 		// whether NaNs are handled or not.
2290 		// If either min or max value is NaN, the result is undefined,
2291 		// so this test doesn't stress those. If the clamped value is
2292 		// NaN, and NaNs are handled, the result is min; if NaNs are not
2293 		// handled, they are big values that result in max.
2294 		// If all three parameters are NaN, the result should be NaN.
2295 		if (!((tcu::Float32(e0).isNaN() && tcu::Float32(res).isNaN()) ||
2296 			 (deFloatAbs(e0 - res) < 0.00001f) ||
2297 			 (deFloatAbs(e1 - res) < 0.00001f)))
2298 			return false;
2299 	}
2300 
2301 	return true;
2302 }
2303 
createOpNClampGroup(tcu::TestContext & testCtx)2304 tcu::TestCaseGroup* createOpNClampGroup (tcu::TestContext& testCtx)
2305 {
2306 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnclamp", "Test the OpNClamp instruction"));
2307 	ComputeShaderSpec				spec;
2308 	de::Random						rnd				(deStringHash(group->getName()));
2309 	const int						numElements		= 200;
2310 	vector<float>					inputFloats1	(numElements, 0);
2311 	vector<float>					inputFloats2	(numElements, 0);
2312 	vector<float>					inputFloats3	(numElements, 0);
2313 	vector<float>					outputFloats	(numElements * 2, 0);
2314 
2315 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
2316 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats2[0], numElements);
2317 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats3[0], numElements);
2318 
2319 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2320 	{
2321 		// Results are only defined if max value is bigger than min value.
2322 		if (inputFloats2[ndx] > inputFloats3[ndx])
2323 		{
2324 			float t = inputFloats2[ndx];
2325 			inputFloats2[ndx] = inputFloats3[ndx];
2326 			inputFloats3[ndx] = t;
2327 		}
2328 
2329 		// By default, do the clamp, setting both possible answers
2330 		float defaultRes = std::min(std::max(inputFloats1[ndx], inputFloats2[ndx]), inputFloats3[ndx]);
2331 
2332 		float maxResA = std::max(inputFloats1[ndx], inputFloats2[ndx]);
2333 		float maxResB = maxResA;
2334 
2335 		// Alternate between the NaN cases
2336 		if (ndx & 1)
2337 		{
2338 			inputFloats1[ndx] = TCU_NAN;
2339 			// If NaN is handled, the result should be same as the clamp minimum.
2340 			// If NaN is not handled, the result should clamp to the clamp maximum.
2341 			maxResA = inputFloats2[ndx];
2342 			maxResB = inputFloats3[ndx];
2343 		}
2344 		else
2345 		{
2346 			// Not a NaN case - only one legal result.
2347 			maxResA = defaultRes;
2348 			maxResB = defaultRes;
2349 		}
2350 
2351 		outputFloats[ndx * 2] = maxResA;
2352 		outputFloats[ndx * 2 + 1] = maxResB;
2353 	}
2354 
2355 	// Make the first case a full-NAN case.
2356 	inputFloats1[0] = TCU_NAN;
2357 	inputFloats2[0] = TCU_NAN;
2358 	inputFloats3[0] = TCU_NAN;
2359 	outputFloats[0] = TCU_NAN;
2360 	outputFloats[1] = TCU_NAN;
2361 
2362 	spec.assembly =
2363 		"OpCapability Shader\n"
2364 		"%std450	= OpExtInstImport \"GLSL.std.450\"\n"
2365 		"OpMemoryModel Logical GLSL450\n"
2366 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2367 		"OpExecutionMode %main LocalSize 1 1 1\n"
2368 
2369 		"OpName %main           \"main\"\n"
2370 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2371 
2372 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2373 
2374 		"OpDecorate %buf BufferBlock\n"
2375 		"OpDecorate %indata1 DescriptorSet 0\n"
2376 		"OpDecorate %indata1 Binding 0\n"
2377 		"OpDecorate %indata2 DescriptorSet 0\n"
2378 		"OpDecorate %indata2 Binding 1\n"
2379 		"OpDecorate %indata3 DescriptorSet 0\n"
2380 		"OpDecorate %indata3 Binding 2\n"
2381 		"OpDecorate %outdata DescriptorSet 0\n"
2382 		"OpDecorate %outdata Binding 3\n"
2383 		"OpDecorate %f32arr ArrayStride 4\n"
2384 		"OpMemberDecorate %buf 0 Offset 0\n"
2385 
2386 		+ string(getComputeAsmCommonTypes()) +
2387 
2388 		"%buf        = OpTypeStruct %f32arr\n"
2389 		"%bufptr     = OpTypePointer Uniform %buf\n"
2390 		"%indata1    = OpVariable %bufptr Uniform\n"
2391 		"%indata2    = OpVariable %bufptr Uniform\n"
2392 		"%indata3    = OpVariable %bufptr Uniform\n"
2393 		"%outdata    = OpVariable %bufptr Uniform\n"
2394 
2395 		"%id        = OpVariable %uvec3ptr Input\n"
2396 		"%zero      = OpConstant %i32 0\n"
2397 
2398 		"%main      = OpFunction %void None %voidf\n"
2399 		"%label     = OpLabel\n"
2400 		"%idval     = OpLoad %uvec3 %id\n"
2401 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2402 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
2403 		"%inval1    = OpLoad %f32 %inloc1\n"
2404 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
2405 		"%inval2    = OpLoad %f32 %inloc2\n"
2406 		"%inloc3    = OpAccessChain %f32ptr %indata3 %zero %x\n"
2407 		"%inval3    = OpLoad %f32 %inloc3\n"
2408 		"%rem       = OpExtInst %f32 %std450 NClamp %inval1 %inval2 %inval3\n"
2409 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2410 		"             OpStore %outloc %rem\n"
2411 		"             OpReturn\n"
2412 		"             OpFunctionEnd\n";
2413 
2414 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
2415 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2416 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
2417 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2418 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2419 	spec.verifyIO = &compareNClamp;
2420 
2421 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
2422 
2423 	return group.release();
2424 }
2425 
createOpSRemComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2426 tcu::TestCaseGroup* createOpSRemComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2427 {
2428 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem", "Test the OpSRem instruction"));
2429 	de::Random						rnd				(deStringHash(group->getName()));
2430 	const int						numElements		= 200;
2431 
2432 	const struct CaseParams
2433 	{
2434 		const char*		name;
2435 		const char*		failMessage;		// customized status message
2436 		qpTestResult	failResult;			// override status on failure
2437 		int				op1Min, op1Max;		// operand ranges
2438 		int				op2Min, op2Max;
2439 	} cases[] =
2440 	{
2441 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
2442 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
2443 	};
2444 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2445 
2446 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2447 	{
2448 		const CaseParams&	params		= cases[caseNdx];
2449 		ComputeShaderSpec	spec;
2450 		vector<deInt32>		inputInts1	(numElements, 0);
2451 		vector<deInt32>		inputInts2	(numElements, 0);
2452 		vector<deInt32>		outputInts	(numElements, 0);
2453 
2454 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2455 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2456 
2457 		for (int ndx = 0; ndx < numElements; ++ndx)
2458 		{
2459 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2460 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2461 		}
2462 
2463 		spec.assembly =
2464 			string(getComputeAsmShaderPreamble()) +
2465 
2466 			"OpName %main           \"main\"\n"
2467 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2468 
2469 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2470 
2471 			"OpDecorate %buf BufferBlock\n"
2472 			"OpDecorate %indata1 DescriptorSet 0\n"
2473 			"OpDecorate %indata1 Binding 0\n"
2474 			"OpDecorate %indata2 DescriptorSet 0\n"
2475 			"OpDecorate %indata2 Binding 1\n"
2476 			"OpDecorate %outdata DescriptorSet 0\n"
2477 			"OpDecorate %outdata Binding 2\n"
2478 			"OpDecorate %i32arr ArrayStride 4\n"
2479 			"OpMemberDecorate %buf 0 Offset 0\n"
2480 
2481 			+ string(getComputeAsmCommonTypes()) +
2482 
2483 			"%buf        = OpTypeStruct %i32arr\n"
2484 			"%bufptr     = OpTypePointer Uniform %buf\n"
2485 			"%indata1    = OpVariable %bufptr Uniform\n"
2486 			"%indata2    = OpVariable %bufptr Uniform\n"
2487 			"%outdata    = OpVariable %bufptr Uniform\n"
2488 
2489 			"%id        = OpVariable %uvec3ptr Input\n"
2490 			"%zero      = OpConstant %i32 0\n"
2491 
2492 			"%main      = OpFunction %void None %voidf\n"
2493 			"%label     = OpLabel\n"
2494 			"%idval     = OpLoad %uvec3 %id\n"
2495 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2496 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2497 			"%inval1    = OpLoad %i32 %inloc1\n"
2498 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2499 			"%inval2    = OpLoad %i32 %inloc2\n"
2500 			"%rem       = OpSRem %i32 %inval1 %inval2\n"
2501 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2502 			"             OpStore %outloc %rem\n"
2503 			"             OpReturn\n"
2504 			"             OpFunctionEnd\n";
2505 
2506 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
2507 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
2508 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
2509 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2510 		spec.failResult			= params.failResult;
2511 		spec.failMessage		= params.failMessage;
2512 
2513 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2514 	}
2515 
2516 	return group.release();
2517 }
2518 
createOpSRemComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2519 tcu::TestCaseGroup* createOpSRemComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2520 {
2521 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsrem64", "Test the 64-bit OpSRem instruction"));
2522 	de::Random						rnd				(deStringHash(group->getName()));
2523 	const int						numElements		= 200;
2524 
2525 	const struct CaseParams
2526 	{
2527 		const char*		name;
2528 		const char*		failMessage;		// customized status message
2529 		qpTestResult	failResult;			// override status on failure
2530 		bool			positive;
2531 	} cases[] =
2532 	{
2533 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
2534 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
2535 	};
2536 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2537 
2538 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2539 	{
2540 		const CaseParams&	params		= cases[caseNdx];
2541 		ComputeShaderSpec	spec;
2542 		vector<deInt64>		inputInts1	(numElements, 0);
2543 		vector<deInt64>		inputInts2	(numElements, 0);
2544 		vector<deInt64>		outputInts	(numElements, 0);
2545 
2546 		if (params.positive)
2547 		{
2548 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2549 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2550 		}
2551 		else
2552 		{
2553 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2554 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2555 		}
2556 
2557 		for (int ndx = 0; ndx < numElements; ++ndx)
2558 		{
2559 			// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
2560 			outputInts[ndx] = inputInts1[ndx] % inputInts2[ndx];
2561 		}
2562 
2563 		spec.assembly =
2564 			"OpCapability Int64\n"
2565 
2566 			+ string(getComputeAsmShaderPreamble()) +
2567 
2568 			"OpName %main           \"main\"\n"
2569 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2570 
2571 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2572 
2573 			"OpDecorate %buf BufferBlock\n"
2574 			"OpDecorate %indata1 DescriptorSet 0\n"
2575 			"OpDecorate %indata1 Binding 0\n"
2576 			"OpDecorate %indata2 DescriptorSet 0\n"
2577 			"OpDecorate %indata2 Binding 1\n"
2578 			"OpDecorate %outdata DescriptorSet 0\n"
2579 			"OpDecorate %outdata Binding 2\n"
2580 			"OpDecorate %i64arr ArrayStride 8\n"
2581 			"OpMemberDecorate %buf 0 Offset 0\n"
2582 
2583 			+ string(getComputeAsmCommonTypes())
2584 			+ string(getComputeAsmCommonInt64Types()) +
2585 
2586 			"%buf        = OpTypeStruct %i64arr\n"
2587 			"%bufptr     = OpTypePointer Uniform %buf\n"
2588 			"%indata1    = OpVariable %bufptr Uniform\n"
2589 			"%indata2    = OpVariable %bufptr Uniform\n"
2590 			"%outdata    = OpVariable %bufptr Uniform\n"
2591 
2592 			"%id        = OpVariable %uvec3ptr Input\n"
2593 			"%zero      = OpConstant %i64 0\n"
2594 
2595 			"%main      = OpFunction %void None %voidf\n"
2596 			"%label     = OpLabel\n"
2597 			"%idval     = OpLoad %uvec3 %id\n"
2598 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2599 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2600 			"%inval1    = OpLoad %i64 %inloc1\n"
2601 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2602 			"%inval2    = OpLoad %i64 %inloc2\n"
2603 			"%rem       = OpSRem %i64 %inval1 %inval2\n"
2604 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2605 			"             OpStore %outloc %rem\n"
2606 			"             OpReturn\n"
2607 			"             OpFunctionEnd\n";
2608 
2609 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
2610 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
2611 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
2612 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2613 		spec.failResult			= params.failResult;
2614 		spec.failMessage		= params.failMessage;
2615 
2616 		spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2617 
2618 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2619 	}
2620 
2621 	return group.release();
2622 }
2623 
createOpSModComputeGroup(tcu::TestContext & testCtx,qpTestResult negFailResult)2624 tcu::TestCaseGroup* createOpSModComputeGroup (tcu::TestContext& testCtx, qpTestResult negFailResult)
2625 {
2626 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod", "Test the OpSMod instruction"));
2627 	de::Random						rnd				(deStringHash(group->getName()));
2628 	const int						numElements		= 200;
2629 
2630 	const struct CaseParams
2631 	{
2632 		const char*		name;
2633 		const char*		failMessage;		// customized status message
2634 		qpTestResult	failResult;			// override status on failure
2635 		int				op1Min, op1Max;		// operand ranges
2636 		int				op2Min, op2Max;
2637 	} cases[] =
2638 	{
2639 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	0,		65536,	0,		100 },
2640 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			-65536,	65536,	-100,	100 },	// see below
2641 	};
2642 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2643 
2644 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2645 	{
2646 		const CaseParams&	params		= cases[caseNdx];
2647 
2648 		ComputeShaderSpec	spec;
2649 		vector<deInt32>		inputInts1	(numElements, 0);
2650 		vector<deInt32>		inputInts2	(numElements, 0);
2651 		vector<deInt32>		outputInts	(numElements, 0);
2652 
2653 		fillRandomScalars(rnd, params.op1Min, params.op1Max, &inputInts1[0], numElements);
2654 		fillRandomScalars(rnd, params.op2Min, params.op2Max, &inputInts2[0], numElements, filterNotZero);
2655 
2656 		for (int ndx = 0; ndx < numElements; ++ndx)
2657 		{
2658 			deInt32 rem = inputInts1[ndx] % inputInts2[ndx];
2659 			if (rem == 0)
2660 			{
2661 				outputInts[ndx] = 0;
2662 			}
2663 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2664 			{
2665 				// They have the same sign
2666 				outputInts[ndx] = rem;
2667 			}
2668 			else
2669 			{
2670 				// They have opposite sign.  The remainder operation takes the
2671 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2672 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2673 				// the result has the correct sign and that it is still
2674 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
2675 				//
2676 				// See also http://mathforum.org/library/drmath/view/52343.html
2677 				outputInts[ndx] = rem + inputInts2[ndx];
2678 			}
2679 		}
2680 
2681 		spec.assembly =
2682 			string(getComputeAsmShaderPreamble()) +
2683 
2684 			"OpName %main           \"main\"\n"
2685 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2686 
2687 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2688 
2689 			"OpDecorate %buf BufferBlock\n"
2690 			"OpDecorate %indata1 DescriptorSet 0\n"
2691 			"OpDecorate %indata1 Binding 0\n"
2692 			"OpDecorate %indata2 DescriptorSet 0\n"
2693 			"OpDecorate %indata2 Binding 1\n"
2694 			"OpDecorate %outdata DescriptorSet 0\n"
2695 			"OpDecorate %outdata Binding 2\n"
2696 			"OpDecorate %i32arr ArrayStride 4\n"
2697 			"OpMemberDecorate %buf 0 Offset 0\n"
2698 
2699 			+ string(getComputeAsmCommonTypes()) +
2700 
2701 			"%buf        = OpTypeStruct %i32arr\n"
2702 			"%bufptr     = OpTypePointer Uniform %buf\n"
2703 			"%indata1    = OpVariable %bufptr Uniform\n"
2704 			"%indata2    = OpVariable %bufptr Uniform\n"
2705 			"%outdata    = OpVariable %bufptr Uniform\n"
2706 
2707 			"%id        = OpVariable %uvec3ptr Input\n"
2708 			"%zero      = OpConstant %i32 0\n"
2709 
2710 			"%main      = OpFunction %void None %voidf\n"
2711 			"%label     = OpLabel\n"
2712 			"%idval     = OpLoad %uvec3 %id\n"
2713 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2714 			"%inloc1    = OpAccessChain %i32ptr %indata1 %zero %x\n"
2715 			"%inval1    = OpLoad %i32 %inloc1\n"
2716 			"%inloc2    = OpAccessChain %i32ptr %indata2 %zero %x\n"
2717 			"%inval2    = OpLoad %i32 %inloc2\n"
2718 			"%rem       = OpSMod %i32 %inval1 %inval2\n"
2719 			"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
2720 			"             OpStore %outloc %rem\n"
2721 			"             OpReturn\n"
2722 			"             OpFunctionEnd\n";
2723 
2724 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts1)));
2725 		spec.inputs.push_back	(BufferSp(new Int32Buffer(inputInts2)));
2726 		spec.outputs.push_back	(BufferSp(new Int32Buffer(outputInts)));
2727 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2728 		spec.failResult			= params.failResult;
2729 		spec.failMessage		= params.failMessage;
2730 
2731 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2732 	}
2733 
2734 	return group.release();
2735 }
2736 
createOpSModComputeGroup64(tcu::TestContext & testCtx,qpTestResult negFailResult)2737 tcu::TestCaseGroup* createOpSModComputeGroup64 (tcu::TestContext& testCtx, qpTestResult negFailResult)
2738 {
2739 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsmod64", "Test the OpSMod instruction"));
2740 	de::Random						rnd				(deStringHash(group->getName()));
2741 	const int						numElements		= 200;
2742 
2743 	const struct CaseParams
2744 	{
2745 		const char*		name;
2746 		const char*		failMessage;		// customized status message
2747 		qpTestResult	failResult;			// override status on failure
2748 		bool			positive;
2749 	} cases[] =
2750 	{
2751 		{ "positive",	"Output doesn't match with expected",				QP_TEST_RESULT_FAIL,	true },
2752 		{ "all",		"Inconsistent results, but within specification",	negFailResult,			false },	// see below
2753 	};
2754 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
2755 
2756 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
2757 	{
2758 		const CaseParams&	params		= cases[caseNdx];
2759 
2760 		ComputeShaderSpec	spec;
2761 		vector<deInt64>		inputInts1	(numElements, 0);
2762 		vector<deInt64>		inputInts2	(numElements, 0);
2763 		vector<deInt64>		outputInts	(numElements, 0);
2764 
2765 
2766 		if (params.positive)
2767 		{
2768 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements, filterNonNegative);
2769 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterPositive);
2770 		}
2771 		else
2772 		{
2773 			fillRandomInt64sLogDistributed(rnd, inputInts1, numElements);
2774 			fillRandomInt64sLogDistributed(rnd, inputInts2, numElements, filterNotZero);
2775 		}
2776 
2777 		for (int ndx = 0; ndx < numElements; ++ndx)
2778 		{
2779 			deInt64 rem = inputInts1[ndx] % inputInts2[ndx];
2780 			if (rem == 0)
2781 			{
2782 				outputInts[ndx] = 0;
2783 			}
2784 			else if ((inputInts1[ndx] >= 0) == (inputInts2[ndx] >= 0))
2785 			{
2786 				// They have the same sign
2787 				outputInts[ndx] = rem;
2788 			}
2789 			else
2790 			{
2791 				// They have opposite sign.  The remainder operation takes the
2792 				// sign inputInts1[ndx] but OpSMod is supposed to take ths sign
2793 				// of inputInts2[ndx].  Adding inputInts2[ndx] will ensure that
2794 				// the result has the correct sign and that it is still
2795 				// congruent to inputInts1[ndx] modulo inputInts2[ndx]
2796 				//
2797 				// See also http://mathforum.org/library/drmath/view/52343.html
2798 				outputInts[ndx] = rem + inputInts2[ndx];
2799 			}
2800 		}
2801 
2802 		spec.assembly =
2803 			"OpCapability Int64\n"
2804 
2805 			+ string(getComputeAsmShaderPreamble()) +
2806 
2807 			"OpName %main           \"main\"\n"
2808 			"OpName %id             \"gl_GlobalInvocationID\"\n"
2809 
2810 			"OpDecorate %id BuiltIn GlobalInvocationId\n"
2811 
2812 			"OpDecorate %buf BufferBlock\n"
2813 			"OpDecorate %indata1 DescriptorSet 0\n"
2814 			"OpDecorate %indata1 Binding 0\n"
2815 			"OpDecorate %indata2 DescriptorSet 0\n"
2816 			"OpDecorate %indata2 Binding 1\n"
2817 			"OpDecorate %outdata DescriptorSet 0\n"
2818 			"OpDecorate %outdata Binding 2\n"
2819 			"OpDecorate %i64arr ArrayStride 8\n"
2820 			"OpMemberDecorate %buf 0 Offset 0\n"
2821 
2822 			+ string(getComputeAsmCommonTypes())
2823 			+ string(getComputeAsmCommonInt64Types()) +
2824 
2825 			"%buf        = OpTypeStruct %i64arr\n"
2826 			"%bufptr     = OpTypePointer Uniform %buf\n"
2827 			"%indata1    = OpVariable %bufptr Uniform\n"
2828 			"%indata2    = OpVariable %bufptr Uniform\n"
2829 			"%outdata    = OpVariable %bufptr Uniform\n"
2830 
2831 			"%id        = OpVariable %uvec3ptr Input\n"
2832 			"%zero      = OpConstant %i64 0\n"
2833 
2834 			"%main      = OpFunction %void None %voidf\n"
2835 			"%label     = OpLabel\n"
2836 			"%idval     = OpLoad %uvec3 %id\n"
2837 			"%x         = OpCompositeExtract %u32 %idval 0\n"
2838 			"%inloc1    = OpAccessChain %i64ptr %indata1 %zero %x\n"
2839 			"%inval1    = OpLoad %i64 %inloc1\n"
2840 			"%inloc2    = OpAccessChain %i64ptr %indata2 %zero %x\n"
2841 			"%inval2    = OpLoad %i64 %inloc2\n"
2842 			"%rem       = OpSMod %i64 %inval1 %inval2\n"
2843 			"%outloc    = OpAccessChain %i64ptr %outdata %zero %x\n"
2844 			"             OpStore %outloc %rem\n"
2845 			"             OpReturn\n"
2846 			"             OpFunctionEnd\n";
2847 
2848 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts1)));
2849 		spec.inputs.push_back	(BufferSp(new Int64Buffer(inputInts2)));
2850 		spec.outputs.push_back	(BufferSp(new Int64Buffer(outputInts)));
2851 		spec.numWorkGroups		= IVec3(numElements, 1, 1);
2852 		spec.failResult			= params.failResult;
2853 		spec.failMessage		= params.failMessage;
2854 
2855 		spec.requestedVulkanFeatures.coreFeatures.shaderInt64 = VK_TRUE;
2856 
2857 		group->addChild(new SpvAsmComputeShaderCase(testCtx, params.name, "", spec));
2858 	}
2859 
2860 	return group.release();
2861 }
2862 
2863 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)2864 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
2865 {
2866 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
2867 	de::Random						rnd				(deStringHash(group->getName()));
2868 	const int						numElements		= 100;
2869 
2870 	// The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
2871 	ComputeShaderSpec				spec1;
2872 	vector<Vec4>					inputFloats1	(numElements);
2873 	vector<Vec4>					outputFloats1	(numElements);
2874 
2875 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
2876 
2877 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2878 	floorAll(inputFloats1);
2879 
2880 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2881 		outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
2882 
2883 	spec1.assembly =
2884 		string(getComputeAsmShaderPreamble()) +
2885 
2886 		"OpName %main           \"main\"\n"
2887 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2888 
2889 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2890 		"OpDecorate %vec4arr ArrayStride 16\n"
2891 
2892 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2893 
2894 		"%vec4       = OpTypeVector %f32 4\n"
2895 		"%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
2896 		"%vec4ptr_f  = OpTypePointer Function %vec4\n"
2897 		"%vec4arr    = OpTypeRuntimeArray %vec4\n"
2898 		"%buf        = OpTypeStruct %vec4arr\n"
2899 		"%bufptr     = OpTypePointer Uniform %buf\n"
2900 		"%indata     = OpVariable %bufptr Uniform\n"
2901 		"%outdata    = OpVariable %bufptr Uniform\n"
2902 
2903 		"%id         = OpVariable %uvec3ptr Input\n"
2904 		"%zero       = OpConstant %i32 0\n"
2905 		"%c_f_0      = OpConstant %f32 0.\n"
2906 		"%c_f_0_5    = OpConstant %f32 0.5\n"
2907 		"%c_f_1_5    = OpConstant %f32 1.5\n"
2908 		"%c_f_2_5    = OpConstant %f32 2.5\n"
2909 		"%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
2910 
2911 		"%main       = OpFunction %void None %voidf\n"
2912 		"%label      = OpLabel\n"
2913 		"%v_vec4     = OpVariable %vec4ptr_f Function\n"
2914 		"%idval      = OpLoad %uvec3 %id\n"
2915 		"%x          = OpCompositeExtract %u32 %idval 0\n"
2916 		"%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
2917 		"%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
2918 		"              OpCopyMemory %v_vec4 %inloc\n"
2919 		"%v_vec4_val = OpLoad %vec4 %v_vec4\n"
2920 		"%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
2921 		"              OpStore %outloc %add\n"
2922 		"              OpReturn\n"
2923 		"              OpFunctionEnd\n";
2924 
2925 	spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
2926 	spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
2927 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
2928 
2929 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
2930 
2931 	// The following case copies a float[100] variable from the input buffer to the output buffer.
2932 	ComputeShaderSpec				spec2;
2933 	vector<float>					inputFloats2	(numElements);
2934 	vector<float>					outputFloats2	(numElements);
2935 
2936 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
2937 
2938 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2939 		outputFloats2[ndx] = inputFloats2[ndx];
2940 
2941 	spec2.assembly =
2942 		string(getComputeAsmShaderPreamble()) +
2943 
2944 		"OpName %main           \"main\"\n"
2945 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2946 
2947 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2948 		"OpDecorate %f32arr100 ArrayStride 4\n"
2949 
2950 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
2951 
2952 		"%hundred        = OpConstant %u32 100\n"
2953 		"%f32arr100      = OpTypeArray %f32 %hundred\n"
2954 		"%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
2955 		"%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
2956 		"%buf            = OpTypeStruct %f32arr100\n"
2957 		"%bufptr         = OpTypePointer Uniform %buf\n"
2958 		"%indata         = OpVariable %bufptr Uniform\n"
2959 		"%outdata        = OpVariable %bufptr Uniform\n"
2960 
2961 		"%id             = OpVariable %uvec3ptr Input\n"
2962 		"%zero           = OpConstant %i32 0\n"
2963 
2964 		"%main           = OpFunction %void None %voidf\n"
2965 		"%label          = OpLabel\n"
2966 		"%var            = OpVariable %f32arr100ptr_f Function\n"
2967 		"%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
2968 		"%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
2969 		"                  OpCopyMemory %var %inarr\n"
2970 		"                  OpCopyMemory %outarr %var\n"
2971 		"                  OpReturn\n"
2972 		"                  OpFunctionEnd\n";
2973 
2974 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
2975 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
2976 	spec2.numWorkGroups = IVec3(1, 1, 1);
2977 
2978 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
2979 
2980 	// The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
2981 	ComputeShaderSpec				spec3;
2982 	vector<float>					inputFloats3	(16);
2983 	vector<float>					outputFloats3	(16);
2984 
2985 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
2986 
2987 	for (size_t ndx = 0; ndx < 16; ++ndx)
2988 		outputFloats3[ndx] = inputFloats3[ndx];
2989 
2990 	spec3.assembly =
2991 		string(getComputeAsmShaderPreamble()) +
2992 
2993 		"OpName %main           \"main\"\n"
2994 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2995 
2996 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2997 		//"OpMemberDecorate %buf 0 Offset 0\n"  - exists in getComputeAsmInputOutputBufferTraits
2998 		"OpMemberDecorate %buf 1 Offset 16\n"
2999 		"OpMemberDecorate %buf 2 Offset 32\n"
3000 		"OpMemberDecorate %buf 3 Offset 48\n"
3001 
3002 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3003 
3004 		"%vec4      = OpTypeVector %f32 4\n"
3005 		"%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
3006 		"%bufptr    = OpTypePointer Uniform %buf\n"
3007 		"%indata    = OpVariable %bufptr Uniform\n"
3008 		"%outdata   = OpVariable %bufptr Uniform\n"
3009 		"%vec4stptr = OpTypePointer Function %buf\n"
3010 
3011 		"%id        = OpVariable %uvec3ptr Input\n"
3012 		"%zero      = OpConstant %i32 0\n"
3013 
3014 		"%main      = OpFunction %void None %voidf\n"
3015 		"%label     = OpLabel\n"
3016 		"%var       = OpVariable %vec4stptr Function\n"
3017 		"             OpCopyMemory %var %indata\n"
3018 		"             OpCopyMemory %outdata %var\n"
3019 		"             OpReturn\n"
3020 		"             OpFunctionEnd\n";
3021 
3022 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3023 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
3024 	spec3.numWorkGroups = IVec3(1, 1, 1);
3025 
3026 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
3027 
3028 	// The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
3029 	ComputeShaderSpec				spec4;
3030 	vector<float>					inputFloats4	(numElements);
3031 	vector<float>					outputFloats4	(numElements);
3032 
3033 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
3034 
3035 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3036 		outputFloats4[ndx] = -inputFloats4[ndx];
3037 
3038 	spec4.assembly =
3039 		string(getComputeAsmShaderPreamble()) +
3040 
3041 		"OpName %main           \"main\"\n"
3042 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3043 
3044 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3045 
3046 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
3047 
3048 		"%f32ptr_f  = OpTypePointer Function %f32\n"
3049 		"%id        = OpVariable %uvec3ptr Input\n"
3050 		"%zero      = OpConstant %i32 0\n"
3051 
3052 		"%main      = OpFunction %void None %voidf\n"
3053 		"%label     = OpLabel\n"
3054 		"%var       = OpVariable %f32ptr_f Function\n"
3055 		"%idval     = OpLoad %uvec3 %id\n"
3056 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3057 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3058 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3059 		"             OpCopyMemory %var %inloc\n"
3060 		"%val       = OpLoad %f32 %var\n"
3061 		"%neg       = OpFNegate %f32 %val\n"
3062 		"             OpStore %outloc %neg\n"
3063 		"             OpReturn\n"
3064 		"             OpFunctionEnd\n";
3065 
3066 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3067 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
3068 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
3069 
3070 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
3071 
3072 	return group.release();
3073 }
3074 
createOpCopyObjectGroup(tcu::TestContext & testCtx)3075 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
3076 {
3077 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
3078 	ComputeShaderSpec				spec;
3079 	de::Random						rnd				(deStringHash(group->getName()));
3080 	const int						numElements		= 100;
3081 	vector<float>					inputFloats		(numElements, 0);
3082 	vector<float>					outputFloats	(numElements, 0);
3083 
3084 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
3085 
3086 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3087 	floorAll(inputFloats);
3088 
3089 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3090 		outputFloats[ndx] = inputFloats[ndx] + 7.5f;
3091 
3092 	spec.assembly =
3093 		string(getComputeAsmShaderPreamble()) +
3094 
3095 		"OpName %main           \"main\"\n"
3096 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3097 
3098 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3099 
3100 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3101 
3102 		"%fmat     = OpTypeMatrix %fvec3 3\n"
3103 		"%three    = OpConstant %u32 3\n"
3104 		"%farr     = OpTypeArray %f32 %three\n"
3105 		"%fst      = OpTypeStruct %f32 %f32\n"
3106 
3107 		+ string(getComputeAsmInputOutputBuffer()) +
3108 
3109 		"%id            = OpVariable %uvec3ptr Input\n"
3110 		"%zero          = OpConstant %i32 0\n"
3111 		"%c_f           = OpConstant %f32 1.5\n"
3112 		"%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
3113 		"%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
3114 		"%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
3115 		"%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
3116 
3117 		"%main          = OpFunction %void None %voidf\n"
3118 		"%label         = OpLabel\n"
3119 		"%c_f_copy      = OpCopyObject %f32   %c_f\n"
3120 		"%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
3121 		"%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
3122 		"%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
3123 		"%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
3124 		"%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
3125 		"%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
3126 		"%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
3127 		"%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
3128 		// Add up. 1.5 * 5 = 7.5.
3129 		"%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
3130 		"%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
3131 		"%add3          = OpFAdd %f32 %add2     %farr_elem\n"
3132 		"%add4          = OpFAdd %f32 %add3     %fst_elem\n"
3133 
3134 		"%idval         = OpLoad %uvec3 %id\n"
3135 		"%x             = OpCompositeExtract %u32 %idval 0\n"
3136 		"%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
3137 		"%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
3138 		"%inval         = OpLoad %f32 %inloc\n"
3139 		"%add           = OpFAdd %f32 %add4 %inval\n"
3140 		"                 OpStore %outloc %add\n"
3141 		"                 OpReturn\n"
3142 		"                 OpFunctionEnd\n";
3143 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3144 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3145 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3146 
3147 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
3148 
3149 	return group.release();
3150 }
3151 // Assembly code used for testing OpUnreachable is based on GLSL source code:
3152 //
3153 // #version 430
3154 //
3155 // layout(std140, set = 0, binding = 0) readonly buffer Input {
3156 //   float elements[];
3157 // } input_data;
3158 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
3159 //   float elements[];
3160 // } output_data;
3161 //
3162 // void not_called_func() {
3163 //   // place OpUnreachable here
3164 // }
3165 //
3166 // uint modulo4(uint val) {
3167 //   switch (val % uint(4)) {
3168 //     case 0:  return 3;
3169 //     case 1:  return 2;
3170 //     case 2:  return 1;
3171 //     case 3:  return 0;
3172 //     default: return 100; // place OpUnreachable here
3173 //   }
3174 // }
3175 //
3176 // uint const5() {
3177 //   return 5;
3178 //   // place OpUnreachable here
3179 // }
3180 //
3181 // void main() {
3182 //   uint x = gl_GlobalInvocationID.x;
3183 //   if (const5() > modulo4(1000)) {
3184 //     output_data.elements[x] = -input_data.elements[x];
3185 //   } else {
3186 //     // place OpUnreachable here
3187 //     output_data.elements[x] = input_data.elements[x];
3188 //   }
3189 // }
3190 
addOpUnreachableAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3191 void addOpUnreachableAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3192 {
3193 	static const char dataDir[] = "spirv_assembly/instruction/compute/unreachable";
3194 
3195 	struct Case
3196 	{
3197 		string	name;
3198 		string	desc;
3199 	};
3200 
3201 	static const Case cases[] =
3202 	{
3203 		{ "unreachable-switch-merge-in-loop",	"Test containing an unreachable switch merge block inside an infinite loop"	},
3204 	};
3205 
3206 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3207 	{
3208 		const string fileName = cases[i].name + ".amber";
3209 		group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3210 	}
3211 }
3212 
addOpSwitchAmberTests(tcu::TestCaseGroup & group,tcu::TestContext & testCtx)3213 void addOpSwitchAmberTests(tcu::TestCaseGroup& group, tcu::TestContext& testCtx)
3214 {
3215 	static const char dataDir[] = "spirv_assembly/instruction/compute/switch";
3216 
3217 	struct Case
3218 	{
3219 		string	name;
3220 		string	desc;
3221 	};
3222 
3223 	static const Case cases[] =
3224 	{
3225 		{ "switch-case-to-merge-block",	"Test switch containing a case that jumps directly to the merge block"	},
3226 	};
3227 
3228 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3229 	{
3230 		const string fileName = cases[i].name + ".amber";
3231 		group.addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3232 	}
3233 }
3234 
createOpArrayLengthComputeGroup(tcu::TestContext & testCtx)3235 tcu::TestCaseGroup* createOpArrayLengthComputeGroup (tcu::TestContext& testCtx)
3236 {
3237 	de::MovePtr<tcu::TestCaseGroup>	group		(new tcu::TestCaseGroup(testCtx, "oparraylength", "Test the OpArrayLength instruction"));
3238 	static const char				dataDir[]	= "spirv_assembly/instruction/compute/arraylength";
3239 
3240 	struct Case
3241 	{
3242 		string	name;
3243 		string	desc;
3244 	};
3245 
3246 	static const Case cases[] =
3247 	{
3248 		{ "array-stride-larger-than-element-size",	"Test using an unsized array with stride larger than the element size"	}
3249 	};
3250 
3251 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
3252 	{
3253 		const string fileName = cases[i].name + ".amber";
3254 		group->addChild(cts_amber::createAmberTestCase(testCtx, cases[i].name.c_str(), cases[i].desc.c_str(), dataDir, fileName));
3255 	}
3256 
3257 	return group.release();
3258 }
3259 
createOpUnreachableGroup(tcu::TestContext & testCtx)3260 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
3261 {
3262 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
3263 	ComputeShaderSpec				spec;
3264 	de::Random						rnd				(deStringHash(group->getName()));
3265 	const int						numElements		= 100;
3266 	vector<float>					positiveFloats	(numElements, 0);
3267 	vector<float>					negativeFloats	(numElements, 0);
3268 
3269 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3270 
3271 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3272 		negativeFloats[ndx] = -positiveFloats[ndx];
3273 
3274 	spec.assembly =
3275 		string(getComputeAsmShaderPreamble()) +
3276 
3277 		"OpSource GLSL 430\n"
3278 		"OpName %main            \"main\"\n"
3279 		"OpName %func_not_called_func \"not_called_func(\"\n"
3280 		"OpName %func_modulo4         \"modulo4(u1;\"\n"
3281 		"OpName %func_const5          \"const5(\"\n"
3282 		"OpName %id                   \"gl_GlobalInvocationID\"\n"
3283 
3284 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3285 
3286 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3287 
3288 		"%u32ptr    = OpTypePointer Function %u32\n"
3289 		"%uintfuint = OpTypeFunction %u32 %u32ptr\n"
3290 		"%unitf     = OpTypeFunction %u32\n"
3291 
3292 		"%id        = OpVariable %uvec3ptr Input\n"
3293 		"%zero      = OpConstant %u32 0\n"
3294 		"%one       = OpConstant %u32 1\n"
3295 		"%two       = OpConstant %u32 2\n"
3296 		"%three     = OpConstant %u32 3\n"
3297 		"%four      = OpConstant %u32 4\n"
3298 		"%five      = OpConstant %u32 5\n"
3299 		"%hundred   = OpConstant %u32 100\n"
3300 		"%thousand  = OpConstant %u32 1000\n"
3301 
3302 		+ string(getComputeAsmInputOutputBuffer()) +
3303 
3304 		// Main()
3305 		"%main   = OpFunction %void None %voidf\n"
3306 		"%main_entry  = OpLabel\n"
3307 		"%v_thousand  = OpVariable %u32ptr Function %thousand\n"
3308 		"%idval       = OpLoad %uvec3 %id\n"
3309 		"%x           = OpCompositeExtract %u32 %idval 0\n"
3310 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
3311 		"%inval       = OpLoad %f32 %inloc\n"
3312 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
3313 		"%ret_const5  = OpFunctionCall %u32 %func_const5\n"
3314 		"%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
3315 		"%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
3316 		"               OpSelectionMerge %if_end None\n"
3317 		"               OpBranchConditional %cmp_gt %if_true %if_false\n"
3318 		"%if_true     = OpLabel\n"
3319 		"%negate      = OpFNegate %f32 %inval\n"
3320 		"               OpStore %outloc %negate\n"
3321 		"               OpBranch %if_end\n"
3322 		"%if_false    = OpLabel\n"
3323 		"               OpUnreachable\n" // Unreachable else branch for if statement
3324 		"%if_end      = OpLabel\n"
3325 		"               OpReturn\n"
3326 		"               OpFunctionEnd\n"
3327 
3328 		// not_called_function()
3329 		"%func_not_called_func  = OpFunction %void None %voidf\n"
3330 		"%not_called_func_entry = OpLabel\n"
3331 		"                         OpUnreachable\n" // Unreachable entry block in not called static function
3332 		"                         OpFunctionEnd\n"
3333 
3334 		// modulo4()
3335 		"%func_modulo4  = OpFunction %u32 None %uintfuint\n"
3336 		"%valptr        = OpFunctionParameter %u32ptr\n"
3337 		"%modulo4_entry = OpLabel\n"
3338 		"%val           = OpLoad %u32 %valptr\n"
3339 		"%modulo        = OpUMod %u32 %val %four\n"
3340 		"                 OpSelectionMerge %switch_merge None\n"
3341 		"                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
3342 		"%case0         = OpLabel\n"
3343 		"                 OpReturnValue %three\n"
3344 		"%case1         = OpLabel\n"
3345 		"                 OpReturnValue %two\n"
3346 		"%case2         = OpLabel\n"
3347 		"                 OpReturnValue %one\n"
3348 		"%case3         = OpLabel\n"
3349 		"                 OpReturnValue %zero\n"
3350 		"%default       = OpLabel\n"
3351 		"                 OpUnreachable\n" // Unreachable default case for switch statement
3352 		"%switch_merge  = OpLabel\n"
3353 		"                 OpUnreachable\n" // Unreachable merge block for switch statement
3354 		"                 OpFunctionEnd\n"
3355 
3356 		// const5()
3357 		"%func_const5  = OpFunction %u32 None %unitf\n"
3358 		"%const5_entry = OpLabel\n"
3359 		"                OpReturnValue %five\n"
3360 		"%unreachable  = OpLabel\n"
3361 		"                OpUnreachable\n" // Unreachable block in function
3362 		"                OpFunctionEnd\n";
3363 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3364 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3365 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3366 
3367 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
3368 
3369 	addOpUnreachableAmberTests(*group, testCtx);
3370 
3371 	return group.release();
3372 }
3373 
3374 // Assembly code used for testing decoration group is based on GLSL source code:
3375 //
3376 // #version 430
3377 //
3378 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
3379 //   float elements[];
3380 // } input_data0;
3381 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
3382 //   float elements[];
3383 // } input_data1;
3384 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
3385 //   float elements[];
3386 // } input_data2;
3387 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
3388 //   float elements[];
3389 // } input_data3;
3390 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
3391 //   float elements[];
3392 // } input_data4;
3393 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
3394 //   float elements[];
3395 // } output_data;
3396 //
3397 // void main() {
3398 //   uint x = gl_GlobalInvocationID.x;
3399 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
3400 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)3401 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
3402 {
3403 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
3404 	ComputeShaderSpec				spec;
3405 	de::Random						rnd				(deStringHash(group->getName()));
3406 	const int						numElements		= 100;
3407 	vector<float>					inputFloats0	(numElements, 0);
3408 	vector<float>					inputFloats1	(numElements, 0);
3409 	vector<float>					inputFloats2	(numElements, 0);
3410 	vector<float>					inputFloats3	(numElements, 0);
3411 	vector<float>					inputFloats4	(numElements, 0);
3412 	vector<float>					outputFloats	(numElements, 0);
3413 
3414 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
3415 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
3416 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
3417 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
3418 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
3419 
3420 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3421 	floorAll(inputFloats0);
3422 	floorAll(inputFloats1);
3423 	floorAll(inputFloats2);
3424 	floorAll(inputFloats3);
3425 	floorAll(inputFloats4);
3426 
3427 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3428 		outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
3429 
3430 	spec.assembly =
3431 		string(getComputeAsmShaderPreamble()) +
3432 
3433 		"OpSource GLSL 430\n"
3434 		"OpName %main \"main\"\n"
3435 		"OpName %id \"gl_GlobalInvocationID\"\n"
3436 
3437 		// Not using group decoration on variable.
3438 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3439 		// Not using group decoration on type.
3440 		"OpDecorate %f32arr ArrayStride 4\n"
3441 
3442 		"OpDecorate %groups BufferBlock\n"
3443 		"OpDecorate %groupm Offset 0\n"
3444 		"%groups = OpDecorationGroup\n"
3445 		"%groupm = OpDecorationGroup\n"
3446 
3447 		// Group decoration on multiple structs.
3448 		"OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
3449 		// Group decoration on multiple struct members.
3450 		"OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
3451 
3452 		"OpDecorate %group1 DescriptorSet 0\n"
3453 		"OpDecorate %group3 DescriptorSet 0\n"
3454 		"OpDecorate %group3 NonWritable\n"
3455 		"OpDecorate %group3 Restrict\n"
3456 		"%group0 = OpDecorationGroup\n"
3457 		"%group1 = OpDecorationGroup\n"
3458 		"%group3 = OpDecorationGroup\n"
3459 
3460 		// Applying the same decoration group multiple times.
3461 		"OpGroupDecorate %group1 %outdata\n"
3462 		"OpGroupDecorate %group1 %outdata\n"
3463 		"OpGroupDecorate %group1 %outdata\n"
3464 		"OpDecorate %outdata DescriptorSet 0\n"
3465 		"OpDecorate %outdata Binding 5\n"
3466 		// Applying decoration group containing nothing.
3467 		"OpGroupDecorate %group0 %indata0\n"
3468 		"OpDecorate %indata0 DescriptorSet 0\n"
3469 		"OpDecorate %indata0 Binding 0\n"
3470 		// Applying decoration group containing one decoration.
3471 		"OpGroupDecorate %group1 %indata1\n"
3472 		"OpDecorate %indata1 Binding 1\n"
3473 		// Applying decoration group containing multiple decorations.
3474 		"OpGroupDecorate %group3 %indata2 %indata3\n"
3475 		"OpDecorate %indata2 Binding 2\n"
3476 		"OpDecorate %indata3 Binding 3\n"
3477 		// Applying multiple decoration groups (with overlapping).
3478 		"OpGroupDecorate %group0 %indata4\n"
3479 		"OpGroupDecorate %group1 %indata4\n"
3480 		"OpGroupDecorate %group3 %indata4\n"
3481 		"OpDecorate %indata4 Binding 4\n"
3482 
3483 		+ string(getComputeAsmCommonTypes()) +
3484 
3485 		"%id   = OpVariable %uvec3ptr Input\n"
3486 		"%zero = OpConstant %i32 0\n"
3487 
3488 		"%outbuf    = OpTypeStruct %f32arr\n"
3489 		"%outbufptr = OpTypePointer Uniform %outbuf\n"
3490 		"%outdata   = OpVariable %outbufptr Uniform\n"
3491 		"%inbuf0    = OpTypeStruct %f32arr\n"
3492 		"%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
3493 		"%indata0   = OpVariable %inbuf0ptr Uniform\n"
3494 		"%inbuf1    = OpTypeStruct %f32arr\n"
3495 		"%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
3496 		"%indata1   = OpVariable %inbuf1ptr Uniform\n"
3497 		"%inbuf2    = OpTypeStruct %f32arr\n"
3498 		"%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
3499 		"%indata2   = OpVariable %inbuf2ptr Uniform\n"
3500 		"%inbuf3    = OpTypeStruct %f32arr\n"
3501 		"%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
3502 		"%indata3   = OpVariable %inbuf3ptr Uniform\n"
3503 		"%inbuf4    = OpTypeStruct %f32arr\n"
3504 		"%inbufptr  = OpTypePointer Uniform %inbuf4\n"
3505 		"%indata4   = OpVariable %inbufptr Uniform\n"
3506 
3507 		"%main   = OpFunction %void None %voidf\n"
3508 		"%label  = OpLabel\n"
3509 		"%idval  = OpLoad %uvec3 %id\n"
3510 		"%x      = OpCompositeExtract %u32 %idval 0\n"
3511 		"%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
3512 		"%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
3513 		"%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
3514 		"%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
3515 		"%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
3516 		"%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
3517 		"%inval0 = OpLoad %f32 %inloc0\n"
3518 		"%inval1 = OpLoad %f32 %inloc1\n"
3519 		"%inval2 = OpLoad %f32 %inloc2\n"
3520 		"%inval3 = OpLoad %f32 %inloc3\n"
3521 		"%inval4 = OpLoad %f32 %inloc4\n"
3522 		"%add0   = OpFAdd %f32 %inval0 %inval1\n"
3523 		"%add1   = OpFAdd %f32 %add0 %inval2\n"
3524 		"%add2   = OpFAdd %f32 %add1 %inval3\n"
3525 		"%add    = OpFAdd %f32 %add2 %inval4\n"
3526 		"          OpStore %outloc %add\n"
3527 		"          OpReturn\n"
3528 		"          OpFunctionEnd\n";
3529 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
3530 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
3531 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
3532 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
3533 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
3534 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3535 	spec.numWorkGroups = IVec3(numElements, 1, 1);
3536 
3537 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
3538 
3539 	return group.release();
3540 }
3541 
3542 enum SpecConstantType
3543 {
3544 	SC_INT8,
3545 	SC_UINT8,
3546 	SC_INT16,
3547 	SC_UINT16,
3548 	SC_INT32,
3549 	SC_UINT32,
3550 	SC_INT64,
3551 	SC_UINT64,
3552 	SC_FLOAT16,
3553 	SC_FLOAT32,
3554 	SC_FLOAT64,
3555 };
3556 
3557 struct SpecConstantValue
3558 {
3559 	SpecConstantType type;
3560 	union ValueUnion {
3561 		deInt8			i8;
3562 		deUint8			u8;
3563 		deInt16			i16;
3564 		deUint16		u16;
3565 		deInt32			i32;
3566 		deUint32		u32;
3567 		deInt64			i64;
3568 		deUint64		u64;
3569 		tcu::Float16	f16;
3570 		tcu::Float32	f32;
3571 		tcu::Float64	f64;
3572 
ValueUnion(deInt8 v)3573 		ValueUnion (deInt8			v) : i8(v)	{}
ValueUnion(deUint8 v)3574 		ValueUnion (deUint8			v) : u8(v)	{}
ValueUnion(deInt16 v)3575 		ValueUnion (deInt16			v) : i16(v)	{}
ValueUnion(deUint16 v)3576 		ValueUnion (deUint16		v) : u16(v)	{}
ValueUnion(deInt32 v)3577 		ValueUnion (deInt32			v) : i32(v)	{}
ValueUnion(deUint32 v)3578 		ValueUnion (deUint32		v) : u32(v)	{}
ValueUnion(deInt64 v)3579 		ValueUnion (deInt64			v) : i64(v)	{}
ValueUnion(deUint64 v)3580 		ValueUnion (deUint64		v) : u64(v)	{}
ValueUnion(tcu::Float16 v)3581 		ValueUnion (tcu::Float16	v) : f16(v)	{}
ValueUnion(tcu::Float32 v)3582 		ValueUnion (tcu::Float32	v) : f32(v)	{}
ValueUnion(tcu::Float64 v)3583 		ValueUnion (tcu::Float64	v) : f64(v)	{}
3584 	} value;
3585 
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3586 	SpecConstantValue (deInt8			v) : type(SC_INT8)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3587 	SpecConstantValue (deUint8			v) : type(SC_UINT8)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3588 	SpecConstantValue (deInt16			v) : type(SC_INT16)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3589 	SpecConstantValue (deUint16			v) : type(SC_UINT16)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3590 	SpecConstantValue (deInt32			v) : type(SC_INT32)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3591 	SpecConstantValue (deUint32			v) : type(SC_UINT32)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3592 	SpecConstantValue (deInt64			v) : type(SC_INT64)		, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3593 	SpecConstantValue (deUint64			v) : type(SC_UINT64)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3594 	SpecConstantValue (tcu::Float16		v) : type(SC_FLOAT16)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3595 	SpecConstantValue (tcu::Float32		v) : type(SC_FLOAT32)	, value(v) {}
SpecConstantValuevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3596 	SpecConstantValue (tcu::Float64		v) : type(SC_FLOAT64)	, value(v) {}
3597 
appendTovkt::SpirVAssembly::__anon043fb9e60111::SpecConstantValue3598 	void appendTo(vkt::SpirVAssembly::SpecConstants& specConstants)
3599 	{
3600 		switch (type)
3601 		{
3602 		case SC_INT8:		specConstants.append(value.i8);		break;
3603 		case SC_UINT8:		specConstants.append(value.u8);		break;
3604 		case SC_INT16:		specConstants.append(value.i16);	break;
3605 		case SC_UINT16:		specConstants.append(value.u16);	break;
3606 		case SC_INT32:		specConstants.append(value.i32);	break;
3607 		case SC_UINT32:		specConstants.append(value.u32);	break;
3608 		case SC_INT64:		specConstants.append(value.i64);	break;
3609 		case SC_UINT64:		specConstants.append(value.u64);	break;
3610 		case SC_FLOAT16:	specConstants.append(value.f16);	break;
3611 		case SC_FLOAT32:	specConstants.append(value.f32);	break;
3612 		case SC_FLOAT64:	specConstants.append(value.f64);	break;
3613 		default:
3614 			DE_ASSERT(false);
3615 		}
3616 	}
3617 };
3618 
3619 enum CaseFlagBits
3620 {
3621 	FLAG_NONE		= 0,
3622 	FLAG_CONVERT	= 1,
3623 	FLAG_I8			= (1<<1),
3624 	FLAG_I16		= (1<<2),
3625 	FLAG_I64		= (1<<3),
3626 	FLAG_F16		= (1<<4),
3627 	FLAG_F64		= (1<<5),
3628 };
3629 using CaseFlags = deUint32;
3630 
3631 struct SpecConstantTwoValCase
3632 {
3633 	const std::string	caseName;
3634 	const std::string	scDefinition0;
3635 	const std::string	scDefinition1;
3636 	const std::string	scResultType;
3637 	const std::string	scOperation;
3638 	SpecConstantValue	scActualValue0;
3639 	SpecConstantValue	scActualValue1;
3640 	const std::string	resultOperation;
3641 	vector<deInt32>		expectedOutput;
3642 	CaseFlags			caseFlags;
3643 
SpecConstantTwoValCasevkt::SpirVAssembly::__anon043fb9e60111::SpecConstantTwoValCase3644 						SpecConstantTwoValCase (const std::string& name,
3645 												const std::string& definition0,
3646 												const std::string& definition1,
3647 												const std::string& resultType,
3648 												const std::string& operation,
3649 												SpecConstantValue value0,
3650 												SpecConstantValue value1,
3651 												const std::string& resultOp,
3652 												const vector<deInt32>& output,
3653 												CaseFlags flags = FLAG_NONE)
3654 							: caseName				(name)
3655 							, scDefinition0			(definition0)
3656 							, scDefinition1			(definition1)
3657 							, scResultType			(resultType)
3658 							, scOperation			(operation)
3659 							, scActualValue0		(value0)
3660 							, scActualValue1		(value1)
3661 							, resultOperation		(resultOp)
3662 							, expectedOutput		(output)
3663 							, caseFlags				(flags)
3664 							{}
3665 };
3666 
getSpecConstantOpStructConstantsAndTypes()3667 std::string getSpecConstantOpStructConstantsAndTypes ()
3668 {
3669 	return
3670 		"%zero        = OpConstant %i32 0\n"
3671 		"%one         = OpConstant %i32 1\n"
3672 		"%two         = OpConstant %i32 2\n"
3673 		"%three       = OpConstant %i32 3\n"
3674 		"%iarr3       = OpTypeArray %i32 %three\n"
3675 		"%imat3       = OpTypeArray %iarr3 %three\n"
3676 		"%struct      = OpTypeStruct %imat3\n"
3677 		;
3678 }
3679 
getSpecConstantOpStructComposites()3680 std::string getSpecConstantOpStructComposites ()
3681 {
3682 	return
3683 		"%iarr3_0     = OpConstantComposite %iarr3 %zero %zero %zero\n"
3684 		"%imat3_0     = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0\n"
3685 		"%struct_0    = OpConstantComposite %struct %imat3_0\n"
3686 		;
3687 }
3688 
getSpecConstantOpStructConstBlock()3689 std::string getSpecConstantOpStructConstBlock ()
3690 {
3691 	return
3692 		"%iarr3_a     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_0     0\n"                        // Compose (sc_0, sc_1, sc_2)
3693 		"%iarr3_b     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_a     1\n"
3694 		"%iarr3_c     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_b     2\n"
3695 
3696 		"%iarr3_d     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_0     0\n"                        // Compose (sc_1, sc_2, sc_0)
3697 		"%iarr3_e     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_d     1\n"
3698 		"%iarr3_f     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_e     2\n"
3699 
3700 		"%iarr3_g     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_2        %iarr3_0     0\n"                        // Compose (sc_2, sc_0, sc_1)
3701 		"%iarr3_h     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_0        %iarr3_g     1\n"
3702 		"%iarr3_i     = OpSpecConstantOp %iarr3  CompositeInsert  %sc_1        %iarr3_h     2\n"
3703 
3704 		"%imat3_a     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_c     %imat3_0     0\n"						// Matrix with the 3 previous arrays.
3705 		"%imat3_b     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_f     %imat3_a     1\n"
3706 		"%imat3_c     = OpSpecConstantOp %imat3  CompositeInsert  %iarr3_i     %imat3_b     2\n"
3707 
3708 		"%struct_a    = OpSpecConstantOp %struct CompositeInsert  %imat3_c     %struct_0    0\n"						// Save it in the struct.
3709 
3710 		"%comp_0_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 0\n"									// Extract some component pairs to compare them.
3711 		"%comp_1_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 0\n"
3712 
3713 		"%comp_0_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 0 1\n"
3714 		"%comp_2_2    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 2\n"
3715 
3716 		"%comp_2_0    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 2 0\n"
3717 		"%comp_1_1    = OpSpecConstantOp %i32    CompositeExtract %struct_a    0 1 1\n"
3718 
3719 		"%cmpres_0    = OpSpecConstantOp %bool   IEqual %comp_0_0 %comp_1_0\n"											// Must be false.
3720 		"%cmpres_1    = OpSpecConstantOp %bool   IEqual %comp_0_1 %comp_2_2\n"											// Must be true.
3721 		"%cmpres_2    = OpSpecConstantOp %bool   IEqual %comp_2_0 %comp_1_1\n"											// Must be true.
3722 
3723 		"%mustbe_0    = OpSpecConstantOp %i32    Select %cmpres_0 %one %zero\n"											// Must select 0
3724 		"%mustbe_1    = OpSpecConstantOp %i32    Select %cmpres_1 %one %zero\n"											// Must select 1
3725 		"%mustbe_2    = OpSpecConstantOp %i32    Select %cmpres_2 %two %one\n"											// Must select 2
3726 		;
3727 }
3728 
getSpecConstantOpStructInstructions()3729 std::string getSpecConstantOpStructInstructions ()
3730 {
3731 	return
3732 		// Multiply final result with (1-mustbezero)*(mustbeone)*(mustbetwo-1). If everything goes right, the factor should be 1 and
3733 		// the final result should not be altered.
3734 		"%subf_a      = OpISub %i32 %one %mustbe_0\n"
3735 		"%subf_b      = OpIMul %i32 %subf_a %mustbe_1\n"
3736 		"%subf_c      = OpISub %i32 %mustbe_2 %one\n"
3737 		"%factor      = OpIMul %i32 %subf_b %subf_c\n"
3738 		"%sc_final    = OpIMul %i32 %factor %sc_factor\n"
3739 		;
3740 }
3741 
createSpecConstantGroup(tcu::TestContext & testCtx)3742 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
3743 {
3744 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
3745 	vector<SpecConstantTwoValCase>	cases;
3746 	de::Random						rnd				(deStringHash(group->getName()));
3747 	const int						numElements		= 100;
3748 	vector<deInt32>					inputInts		(numElements, 0);
3749 	vector<deInt32>					outputInts1		(numElements, 0);
3750 	vector<deInt32>					outputInts2		(numElements, 0);
3751 	vector<deInt32>					outputInts3		(numElements, 0);
3752 	vector<deInt32>					outputInts4		(numElements, 0);
3753 	vector<deInt32>					outputInts5		(numElements, 0);
3754 	const StringTemplate			shaderTemplate	(
3755 		"${CAPABILITIES:opt}"
3756 		+ string(getComputeAsmShaderPreamble()) +
3757 
3758 		"OpName %main           \"main\"\n"
3759 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3760 
3761 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3762 		"OpDecorate %sc_0  SpecId 0\n"
3763 		"OpDecorate %sc_1  SpecId 1\n"
3764 		"OpDecorate %i32arr ArrayStride 4\n"
3765 
3766 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3767 
3768 		"${OPTYPE_DEFINITIONS:opt}"
3769 		"%buf     = OpTypeStruct %i32arr\n"
3770 		"%bufptr  = OpTypePointer Uniform %buf\n"
3771 		"%indata    = OpVariable %bufptr Uniform\n"
3772 		"%outdata   = OpVariable %bufptr Uniform\n"
3773 
3774 		"%id        = OpVariable %uvec3ptr Input\n"
3775 		"%zero      = OpConstant %i32 0\n"
3776 
3777 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
3778 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
3779 		"%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
3780 
3781 		"%main      = OpFunction %void None %voidf\n"
3782 		"%label     = OpLabel\n"
3783 		"${TYPE_CONVERT:opt}"
3784 		"%idval     = OpLoad %uvec3 %id\n"
3785 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3786 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
3787 		"%inval     = OpLoad %i32 %inloc\n"
3788 		"%final     = ${GEN_RESULT}\n"
3789 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
3790 		"             OpStore %outloc %final\n"
3791 		"             OpReturn\n"
3792 		"             OpFunctionEnd\n");
3793 
3794 	fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
3795 
3796 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3797 	{
3798 		outputInts1[ndx] = inputInts[ndx] + 42;
3799 		outputInts2[ndx] = inputInts[ndx];
3800 		outputInts3[ndx] = inputInts[ndx] - 11200;
3801 		outputInts4[ndx] = inputInts[ndx] + 1;
3802 		outputInts5[ndx] = inputInts[ndx] - 42;
3803 	}
3804 
3805 	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
3806 	const char addSc32ToInput[]		= "OpIAdd %i32 %inval %sc_final32";
3807 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
3808 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
3809 
3810 	cases.push_back(SpecConstantTwoValCase("iadd",						" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",			62,						-20,				addScToInput,		outputInts1));
3811 	cases.push_back(SpecConstantTwoValCase("isub",						" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",			100,					58,					addScToInput,		outputInts1));
3812 	cases.push_back(SpecConstantTwoValCase("imul",						" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",			-2,						-21,				addScToInput,		outputInts1));
3813 	cases.push_back(SpecConstantTwoValCase("sdiv",						" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",			-126,					-3,					addScToInput,		outputInts1));
3814 	cases.push_back(SpecConstantTwoValCase("udiv",						" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",			126,					3,					addScToInput,		outputInts1));
3815 	cases.push_back(SpecConstantTwoValCase("srem",						" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",			7,						3,					addScToInput,		outputInts4));
3816 	cases.push_back(SpecConstantTwoValCase("smod",						" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",			7,						3,					addScToInput,		outputInts4));
3817 	cases.push_back(SpecConstantTwoValCase("umod",						" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",			342,					50,					addScToInput,		outputInts1));
3818 	cases.push_back(SpecConstantTwoValCase("bitwiseand",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",			42,						63,					addScToInput,		outputInts1));
3819 	cases.push_back(SpecConstantTwoValCase("bitwiseor",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",			34,						8,					addScToInput,		outputInts1));
3820 	cases.push_back(SpecConstantTwoValCase("bitwisexor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",			18,						56,					addScToInput,		outputInts1));
3821 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical",			" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					2,					addScToInput,		outputInts1));
3822 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					2,					addScToInput,		outputInts5));
3823 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical",			" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						1,					addScToInput,		outputInts1));
3824 
3825 	// Shifts for other integer sizes.
3826 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftRightLogical    %sc_0 %sc_1",			deInt64{168},			deInt64{2},			addSc32ToInput,		outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3827 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i64",	" %i64 0",		" %i64 0",		"%i64",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt64{-168},			deInt64{2},			addSc32ToInput,		outputInts5, (FLAG_I64 | FLAG_CONVERT)));
3828 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt64{21},			deInt64{1},			addSc32ToInput,		outputInts1, (FLAG_I64 | FLAG_CONVERT)));
3829 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftRightLogical    %sc_0 %sc_1",			deInt16{168},			deInt16{2},			addSc32ToInput,		outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3830 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i16",	" %i16 0",		" %i16 0",		"%i16",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt16{-168},			deInt16{2},			addSc32ToInput,		outputInts5, (FLAG_I16 | FLAG_CONVERT)));
3831 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt16{21},			deInt16{1},			addSc32ToInput,		outputInts1, (FLAG_I16 | FLAG_CONVERT)));
3832 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftRightLogical    %sc_0 %sc_1",			deInt8{84},				deInt8{1},			addSc32ToInput,		outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3833 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_i8",	" %i8 0",		" %i8 0",		"%i8",		"ShiftRightArithmetic %sc_0 %sc_1",			deInt8{-84},			deInt8{1},			addSc32ToInput,		outputInts5, (FLAG_I8 | FLAG_CONVERT)));
3834 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftLeftLogical     %sc_0 %sc_1",			deInt8{21},				deInt8{1},			addSc32ToInput,		outputInts1, (FLAG_I8 | FLAG_CONVERT)));
3835 
3836 	// Shifts for other integer sizes but only in the shift amount.
3837 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					deInt64{2},			addScToInput,		outputInts1, (FLAG_I64)));
3838 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i64"," %i32 0",		" %i64 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					deInt64{2},			addScToInput,		outputInts5, (FLAG_I64)));
3839 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt64{1},			addScToInput,		outputInts1, (FLAG_I64)));
3840 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,					deInt16{2},			addScToInput,		outputInts1, (FLAG_I16)));
3841 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i16"," %i32 0",		" %i16 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-168,					deInt16{2},			addScToInput,		outputInts5, (FLAG_I16)));
3842 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt16{1},			addScToInput,		outputInts1, (FLAG_I16)));
3843 	cases.push_back(SpecConstantTwoValCase("shiftrightlogical_s_i8",	" %i32 0",		" %i8 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			84,						deInt8{1},			addScToInput,		outputInts1, (FLAG_I8)));
3844 	cases.push_back(SpecConstantTwoValCase("shiftrightarithmetic_s_i8",	" %i32 0",		" %i8 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			-84,					deInt8{1},			addScToInput,		outputInts5, (FLAG_I8)));
3845 	cases.push_back(SpecConstantTwoValCase("shiftleftlogical_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,						deInt8{1},			addScToInput,		outputInts1, (FLAG_I8)));
3846 
3847 	cases.push_back(SpecConstantTwoValCase("slessthan",					" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",			-20,					-10,				selectTrueUsingSc,	outputInts2));
3848 	cases.push_back(SpecConstantTwoValCase("ulessthan",					" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",			10,						20,					selectTrueUsingSc,	outputInts2));
3849 	cases.push_back(SpecConstantTwoValCase("sgreaterthan",				" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",			-1000,					50,					selectFalseUsingSc,	outputInts2));
3850 	cases.push_back(SpecConstantTwoValCase("ugreaterthan",				" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",			10,						5,					selectTrueUsingSc,	outputInts2));
3851 	cases.push_back(SpecConstantTwoValCase("slessthanequal",			" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",			-10,					-10,				selectTrueUsingSc,	outputInts2));
3852 	cases.push_back(SpecConstantTwoValCase("ulessthanequal",			" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",			50,						100,				selectTrueUsingSc,	outputInts2));
3853 	cases.push_back(SpecConstantTwoValCase("sgreaterthanequal",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",			-1000,					50,					selectFalseUsingSc,	outputInts2));
3854 	cases.push_back(SpecConstantTwoValCase("ugreaterthanequal",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",			10,						10,					selectTrueUsingSc,	outputInts2));
3855 	cases.push_back(SpecConstantTwoValCase("iequal",					" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",			42,						24,					selectFalseUsingSc,	outputInts2));
3856 	cases.push_back(SpecConstantTwoValCase("inotequal",					" %i32 0",		" %i32 0",		"%bool",	"INotEqual            %sc_0 %sc_1",			42,						24,					selectTrueUsingSc,	outputInts2));
3857 	cases.push_back(SpecConstantTwoValCase("logicaland",				"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",			0,						1,					selectFalseUsingSc,	outputInts2));
3858 	cases.push_back(SpecConstantTwoValCase("logicalor",					"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",			1,						0,					selectTrueUsingSc,	outputInts2));
3859 	cases.push_back(SpecConstantTwoValCase("logicalequal",				"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",			0,						1,					selectFalseUsingSc,	outputInts2));
3860 	cases.push_back(SpecConstantTwoValCase("logicalnotequal",			"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",			1,						0,					selectTrueUsingSc,	outputInts2));
3861 	cases.push_back(SpecConstantTwoValCase("snegate",					" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",				-42,					0,					addScToInput,		outputInts1));
3862 	cases.push_back(SpecConstantTwoValCase("not",						" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,					0,					addScToInput,		outputInts1));
3863 	cases.push_back(SpecConstantTwoValCase("logicalnot",				"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,						0,					selectFalseUsingSc,	outputInts2));
3864 	cases.push_back(SpecConstantTwoValCase("select",					"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,						42,					addScToInput,		outputInts1));
3865 	cases.push_back(SpecConstantTwoValCase("sconvert",					" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",				-11200,					0,					addSc32ToInput,		outputInts3, (FLAG_I16 | FLAG_CONVERT)));
3866 	cases.push_back(SpecConstantTwoValCase("fconvert",					" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",				tcu::Float32{-11200.0},	tcu::Float32{0.0},	addSc32ToInput,		outputInts3, (FLAG_F64 | FLAG_CONVERT)));
3867 	cases.push_back(SpecConstantTwoValCase("fconvert16",				" %f16 0",		" %f16 0",		"%f32",		"FConvert             %sc_0",				tcu::Float16{1.0},		tcu::Float16{0.0},	addSc32ToInput,		outputInts4, (FLAG_F16 | FLAG_CONVERT)));
3868 
3869 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3870 	{
3871 		map<string, string>		specializations;
3872 		ComputeShaderSpec		spec;
3873 
3874 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
3875 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
3876 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
3877 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
3878 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
3879 
3880 		// Special SPIR-V code when using 16-bit integers.
3881 		if (cases[caseNdx].caseFlags & FLAG_I16)
3882 		{
3883 			spec.requestedVulkanFeatures.coreFeatures.shaderInt16	= VK_TRUE;
3884 			specializations["CAPABILITIES"]							+= "OpCapability Int16\n";							// Adds 16-bit integer capability
3885 			specializations["OPTYPE_DEFINITIONS"]					+= "%i16 = OpTypeInt 16 1\n";						// Adds 16-bit integer type
3886 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3887 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpSConvert %i32 %sc_final\n";		// Converts 16-bit integer to 32-bit integer
3888 		}
3889 
3890 		// Special SPIR-V code when using 64-bit integers.
3891 		if (cases[caseNdx].caseFlags & FLAG_I64)
3892 		{
3893 			spec.requestedVulkanFeatures.coreFeatures.shaderInt64	= VK_TRUE;
3894 			specializations["CAPABILITIES"]							+= "OpCapability Int64\n";							// Adds 64-bit integer capability
3895 			specializations["OPTYPE_DEFINITIONS"]					+= "%i64 = OpTypeInt 64 1\n";						// Adds 64-bit integer type
3896 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3897 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpSConvert %i32 %sc_final\n";		// Converts 64-bit integer to 32-bit integer
3898 		}
3899 
3900 		// Special SPIR-V code when using 64-bit floats.
3901 		if (cases[caseNdx].caseFlags & FLAG_F64)
3902 		{
3903 			spec.requestedVulkanFeatures.coreFeatures.shaderFloat64	= VK_TRUE;
3904 			specializations["CAPABILITIES"]							+= "OpCapability Float64\n";						// Adds 64-bit float capability
3905 			specializations["OPTYPE_DEFINITIONS"]					+= "%f64 = OpTypeFloat 64\n";						// Adds 64-bit float type
3906 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3907 				specializations["TYPE_CONVERT"]						+= "%sc_final32 = OpConvertFToS %i32 %sc_final\n";	// Converts 64-bit float to 32-bit integer
3908 		}
3909 
3910 		// Extension needed for float16 and int8.
3911 		if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
3912 			spec.extensions.push_back("VK_KHR_shader_float16_int8");
3913 
3914 		// Special SPIR-V code when using 16-bit floats.
3915 		if (cases[caseNdx].caseFlags & FLAG_F16)
3916 		{
3917 			spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
3918 			specializations["CAPABILITIES"]				+= "OpCapability Float16\n";						// Adds 16-bit float capability
3919 			specializations["OPTYPE_DEFINITIONS"]		+= "%f16 = OpTypeFloat 16\n";						// Adds 16-bit float type
3920 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3921 				specializations["TYPE_CONVERT"]			+= "%sc_final32 = OpConvertFToS %i32 %sc_final\n";	// Converts 16-bit float to 32-bit integer
3922 		}
3923 
3924 		// Special SPIR-V code when using 8-bit integers.
3925 		if (cases[caseNdx].caseFlags & FLAG_I8)
3926 		{
3927 			spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
3928 			specializations["CAPABILITIES"]				+= "OpCapability Int8\n";						// Adds 8-bit integer capability
3929 			specializations["OPTYPE_DEFINITIONS"]		+= "%i8 = OpTypeInt 8 1\n";						// Adds 8-bit integer type
3930 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
3931 				specializations["TYPE_CONVERT"]			+= "%sc_final32 = OpSConvert %i32 %sc_final\n";	// Converts 8-bit integer to 32-bit integer
3932 		}
3933 
3934 		spec.assembly = shaderTemplate.specialize(specializations);
3935 		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
3936 		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
3937 		spec.numWorkGroups = IVec3(numElements, 1, 1);
3938 		cases[caseNdx].scActualValue0.appendTo(spec.specConstants);
3939 		cases[caseNdx].scActualValue1.appendTo(spec.specConstants);
3940 
3941 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName.c_str(), cases[caseNdx].caseName.c_str(), spec));
3942 	}
3943 
3944 	ComputeShaderSpec				spec;
3945 
3946 	spec.assembly =
3947 		string(getComputeAsmShaderPreamble()) +
3948 
3949 		"OpName %main           \"main\"\n"
3950 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3951 
3952 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3953 		"OpDecorate %sc_0  SpecId 0\n"
3954 		"OpDecorate %sc_1  SpecId 1\n"
3955 		"OpDecorate %sc_2  SpecId 2\n"
3956 		"OpDecorate %i32arr ArrayStride 4\n"
3957 
3958 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
3959 
3960 		"%ivec3       = OpTypeVector %i32 3\n"
3961 
3962 		+ getSpecConstantOpStructConstantsAndTypes() +
3963 
3964 		"%buf         = OpTypeStruct %i32arr\n"
3965 		"%bufptr      = OpTypePointer Uniform %buf\n"
3966 		"%indata      = OpVariable %bufptr Uniform\n"
3967 		"%outdata     = OpVariable %bufptr Uniform\n"
3968 
3969 		"%id          = OpVariable %uvec3ptr Input\n"
3970 		"%ivec3_0     = OpConstantComposite %ivec3 %zero %zero %zero\n"
3971 		"%vec3_undef  = OpUndef %ivec3\n"
3972 
3973 		+ getSpecConstantOpStructComposites () +
3974 
3975 		"%sc_0        = OpSpecConstant %i32 0\n"
3976 		"%sc_1        = OpSpecConstant %i32 0\n"
3977 		"%sc_2        = OpSpecConstant %i32 0\n"
3978 
3979 		+ getSpecConstantOpStructConstBlock () +
3980 
3981 		"%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0     0\n"							// (sc_0, 0, 0)
3982 		"%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0     1\n"							// (0, sc_1, 0)
3983 		"%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0     2\n"							// (0, 0, sc_2)
3984 		"%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
3985 		"%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
3986 		"%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
3987 		"%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
3988 		"%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
3989 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
3990 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
3991 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
3992 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
3993 		"%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"								// (sc_2 - sc_0) * sc_1
3994 
3995 		"%main      = OpFunction %void None %voidf\n"
3996 		"%label     = OpLabel\n"
3997 
3998 		+ getSpecConstantOpStructInstructions() +
3999 
4000 		"%idval     = OpLoad %uvec3 %id\n"
4001 		"%x         = OpCompositeExtract %u32 %idval 0\n"
4002 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
4003 		"%inval     = OpLoad %i32 %inloc\n"
4004 		"%final     = OpIAdd %i32 %inval %sc_final\n"
4005 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
4006 		"             OpStore %outloc %final\n"
4007 		"             OpReturn\n"
4008 		"             OpFunctionEnd\n";
4009 	spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
4010 	spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
4011 	spec.numWorkGroups = IVec3(numElements, 1, 1);
4012 	spec.specConstants.append<deInt32>(123);
4013 	spec.specConstants.append<deInt32>(56);
4014 	spec.specConstants.append<deInt32>(-77);
4015 
4016 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
4017 
4018 	return group.release();
4019 }
4020 
createOpPhiVartypeTests(de::MovePtr<tcu::TestCaseGroup> & group,tcu::TestContext & testCtx)4021 void createOpPhiVartypeTests (de::MovePtr<tcu::TestCaseGroup>& group, tcu::TestContext& testCtx)
4022 {
4023 	ComputeShaderSpec	specInt;
4024 	ComputeShaderSpec	specFloat;
4025 	ComputeShaderSpec	specFloat16;
4026 	ComputeShaderSpec	specVec3;
4027 	ComputeShaderSpec	specMat4;
4028 	ComputeShaderSpec	specArray;
4029 	ComputeShaderSpec	specStruct;
4030 	de::Random			rnd				(deStringHash(group->getName()));
4031 	const int			numElements		= 100;
4032 	vector<float>		inputFloats		(numElements, 0);
4033 	vector<float>		outputFloats	(numElements, 0);
4034 	vector<deUint32>	inputUints		(numElements, 0);
4035 	vector<deUint32>	outputUints		(numElements, 0);
4036 
4037 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4038 
4039 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4040 	floorAll(inputFloats);
4041 
4042 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4043 	{
4044 		// Just check if the value is positive or not
4045 		outputFloats[ndx] = (inputFloats[ndx] > 0) ? 1.0f : -1.0f;
4046 	}
4047 
4048 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4049 	{
4050 		inputUints[ndx] = tcu::Float16(inputFloats[ndx]).bits();
4051 		outputUints[ndx] = tcu::Float16(outputFloats[ndx]).bits();
4052 	}
4053 
4054 	// All of the tests are of the form:
4055 	//
4056 	// testtype r
4057 	//
4058 	// if (inputdata > 0)
4059 	//   r = 1
4060 	// else
4061 	//   r = -1
4062 	//
4063 	// return (float)r
4064 
4065 	specFloat.assembly =
4066 		string(getComputeAsmShaderPreamble()) +
4067 
4068 		"OpSource GLSL 430\n"
4069 		"OpName %main \"main\"\n"
4070 		"OpName %id \"gl_GlobalInvocationID\"\n"
4071 
4072 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4073 
4074 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4075 
4076 		"%id = OpVariable %uvec3ptr Input\n"
4077 		"%zero       = OpConstant %i32 0\n"
4078 		"%float_0    = OpConstant %f32 0.0\n"
4079 		"%float_1    = OpConstant %f32 1.0\n"
4080 		"%float_n1   = OpConstant %f32 -1.0\n"
4081 
4082 		"%main     = OpFunction %void None %voidf\n"
4083 		"%entry    = OpLabel\n"
4084 		"%idval    = OpLoad %uvec3 %id\n"
4085 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4086 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4087 		"%inval    = OpLoad %f32 %inloc\n"
4088 
4089 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4090 		"            OpSelectionMerge %cm None\n"
4091 		"            OpBranchConditional %comp %tb %fb\n"
4092 		"%tb       = OpLabel\n"
4093 		"            OpBranch %cm\n"
4094 		"%fb       = OpLabel\n"
4095 		"            OpBranch %cm\n"
4096 		"%cm       = OpLabel\n"
4097 		"%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4098 
4099 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4100 		"            OpStore %outloc %res\n"
4101 		"            OpReturn\n"
4102 
4103 		"            OpFunctionEnd\n";
4104 	specFloat.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4105 	specFloat.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4106 	specFloat.numWorkGroups = IVec3(numElements, 1, 1);
4107 
4108 	specFloat16.assembly =
4109 		"OpCapability Shader\n"
4110 		"OpCapability Float16\n"
4111 		"OpMemoryModel Logical GLSL450\n"
4112 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4113 		"OpExecutionMode %main LocalSize 1 1 1\n"
4114 
4115 		"OpSource GLSL 430\n"
4116 		"OpName %main \"main\"\n"
4117 		"OpName %id \"gl_GlobalInvocationID\"\n"
4118 
4119 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4120 
4121 		"OpDecorate %buf BufferBlock\n"
4122 		"OpDecorate %indata DescriptorSet 0\n"
4123 		"OpDecorate %indata Binding 0\n"
4124 		"OpDecorate %outdata DescriptorSet 0\n"
4125 		"OpDecorate %outdata Binding 1\n"
4126 		"OpDecorate %u32arr ArrayStride 4\n"
4127 		"OpMemberDecorate %buf 0 Offset 0\n"
4128 
4129 		+ string(getComputeAsmCommonTypes()) +
4130 
4131 		"%f16      = OpTypeFloat 16\n"
4132 		"%f16vec2  = OpTypeVector %f16 2\n"
4133 		"%fvec2    = OpTypeVector %f32 2\n"
4134 		"%u32ptr   = OpTypePointer Uniform %u32\n"
4135 		"%u32arr   = OpTypeRuntimeArray %u32\n"
4136 		"%f16_0    = OpConstant %f16 0.0\n"
4137 
4138 
4139 		"%buf      = OpTypeStruct %u32arr\n"
4140 		"%bufptr   = OpTypePointer Uniform %buf\n"
4141 		"%indata   = OpVariable %bufptr Uniform\n"
4142 		"%outdata  = OpVariable %bufptr Uniform\n"
4143 
4144 		"%id       = OpVariable %uvec3ptr Input\n"
4145 		"%zero     = OpConstant %i32 0\n"
4146 		"%float_0  = OpConstant %f32 0.0\n"
4147 		"%float_1  = OpConstant %f32 1.0\n"
4148 		"%float_n1 = OpConstant %f32 -1.0\n"
4149 
4150 		"%main     = OpFunction %void None %voidf\n"
4151 		"%entry    = OpLabel\n"
4152 		"%idval    = OpLoad %uvec3 %id\n"
4153 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4154 		"%inloc    = OpAccessChain %u32ptr %indata %zero %x\n"
4155 		"%inval    = OpLoad %u32 %inloc\n"
4156 		"%f16_vec2_inval = OpBitcast %f16vec2 %inval\n"
4157 		"%f16_inval = OpCompositeExtract %f16 %f16_vec2_inval 0\n"
4158 		"%f32_inval = OpFConvert %f32 %f16_inval\n"
4159 
4160 		"%comp     = OpFOrdGreaterThan %bool %f32_inval %float_0\n"
4161 		"            OpSelectionMerge %cm None\n"
4162 		"            OpBranchConditional %comp %tb %fb\n"
4163 		"%tb       = OpLabel\n"
4164 		"            OpBranch %cm\n"
4165 		"%fb       = OpLabel\n"
4166 		"            OpBranch %cm\n"
4167 		"%cm       = OpLabel\n"
4168 		"%res      = OpPhi %f32 %float_1 %tb %float_n1 %fb\n"
4169 		"%f16_res  = OpFConvert %f16 %res\n"
4170 
4171 		"%f16vec2_res = OpCompositeConstruct %f16vec2 %f16_res %f16_0\n"
4172 		"%u32_res  = OpBitcast %u32 %f16vec2_res\n"
4173 
4174 		"%outloc   = OpAccessChain %u32ptr %outdata %zero %x\n"
4175 		"            OpStore %outloc %u32_res\n"
4176 		"            OpReturn\n"
4177 
4178 		"            OpFunctionEnd\n";
4179 
4180 	specFloat16.inputs.push_back(BufferSp(new Uint32Buffer(inputUints)));
4181 	specFloat16.outputs.push_back(BufferSp(new Uint32Buffer(outputUints)));
4182 	specFloat16.numWorkGroups = IVec3(numElements, 1, 1);
4183 	specFloat16.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
4184 
4185 	specMat4.assembly =
4186 		string(getComputeAsmShaderPreamble()) +
4187 
4188 		"OpSource GLSL 430\n"
4189 		"OpName %main \"main\"\n"
4190 		"OpName %id \"gl_GlobalInvocationID\"\n"
4191 
4192 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4193 
4194 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4195 
4196 		"%id = OpVariable %uvec3ptr Input\n"
4197 		"%v4f32      = OpTypeVector %f32 4\n"
4198 		"%mat4v4f32  = OpTypeMatrix %v4f32 4\n"
4199 		"%zero       = OpConstant %i32 0\n"
4200 		"%float_0    = OpConstant %f32 0.0\n"
4201 		"%float_1    = OpConstant %f32 1.0\n"
4202 		"%float_n1   = OpConstant %f32 -1.0\n"
4203 		"%m11        = OpConstantComposite %v4f32 %float_1 %float_0 %float_0 %float_0\n"
4204 		"%m12        = OpConstantComposite %v4f32 %float_0 %float_1 %float_0 %float_0\n"
4205 		"%m13        = OpConstantComposite %v4f32 %float_0 %float_0 %float_1 %float_0\n"
4206 		"%m14        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_1\n"
4207 		"%m1         = OpConstantComposite %mat4v4f32 %m11 %m12 %m13 %m14\n"
4208 		"%m21        = OpConstantComposite %v4f32 %float_n1 %float_0 %float_0 %float_0\n"
4209 		"%m22        = OpConstantComposite %v4f32 %float_0 %float_n1 %float_0 %float_0\n"
4210 		"%m23        = OpConstantComposite %v4f32 %float_0 %float_0 %float_n1 %float_0\n"
4211 		"%m24        = OpConstantComposite %v4f32 %float_0 %float_0 %float_0 %float_n1\n"
4212 		"%m2         = OpConstantComposite %mat4v4f32 %m21 %m22 %m23 %m24\n"
4213 
4214 		"%main     = OpFunction %void None %voidf\n"
4215 		"%entry    = OpLabel\n"
4216 		"%idval    = OpLoad %uvec3 %id\n"
4217 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4218 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4219 		"%inval    = OpLoad %f32 %inloc\n"
4220 
4221 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4222 		"            OpSelectionMerge %cm None\n"
4223 		"            OpBranchConditional %comp %tb %fb\n"
4224 		"%tb       = OpLabel\n"
4225 		"            OpBranch %cm\n"
4226 		"%fb       = OpLabel\n"
4227 		"            OpBranch %cm\n"
4228 		"%cm       = OpLabel\n"
4229 		"%mres     = OpPhi %mat4v4f32 %m1 %tb %m2 %fb\n"
4230 		"%res      = OpCompositeExtract %f32 %mres 2 2\n"
4231 
4232 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4233 		"            OpStore %outloc %res\n"
4234 		"            OpReturn\n"
4235 
4236 		"            OpFunctionEnd\n";
4237 	specMat4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4238 	specMat4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4239 	specMat4.numWorkGroups = IVec3(numElements, 1, 1);
4240 
4241 	specVec3.assembly =
4242 		string(getComputeAsmShaderPreamble()) +
4243 
4244 		"OpSource GLSL 430\n"
4245 		"OpName %main \"main\"\n"
4246 		"OpName %id \"gl_GlobalInvocationID\"\n"
4247 
4248 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4249 
4250 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4251 
4252 		"%id = OpVariable %uvec3ptr Input\n"
4253 		"%zero       = OpConstant %i32 0\n"
4254 		"%float_0    = OpConstant %f32 0.0\n"
4255 		"%float_1    = OpConstant %f32 1.0\n"
4256 		"%float_n1   = OpConstant %f32 -1.0\n"
4257 		"%v1         = OpConstantComposite %fvec3 %float_1 %float_1 %float_1\n"
4258 		"%v2         = OpConstantComposite %fvec3 %float_n1 %float_n1 %float_n1\n"
4259 
4260 		"%main     = OpFunction %void None %voidf\n"
4261 		"%entry    = OpLabel\n"
4262 		"%idval    = OpLoad %uvec3 %id\n"
4263 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4264 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4265 		"%inval    = OpLoad %f32 %inloc\n"
4266 
4267 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4268 		"            OpSelectionMerge %cm None\n"
4269 		"            OpBranchConditional %comp %tb %fb\n"
4270 		"%tb       = OpLabel\n"
4271 		"            OpBranch %cm\n"
4272 		"%fb       = OpLabel\n"
4273 		"            OpBranch %cm\n"
4274 		"%cm       = OpLabel\n"
4275 		"%vres     = OpPhi %fvec3 %v1 %tb %v2 %fb\n"
4276 		"%res      = OpCompositeExtract %f32 %vres 2\n"
4277 
4278 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4279 		"            OpStore %outloc %res\n"
4280 		"            OpReturn\n"
4281 
4282 		"            OpFunctionEnd\n";
4283 	specVec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4284 	specVec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4285 	specVec3.numWorkGroups = IVec3(numElements, 1, 1);
4286 
4287 	specInt.assembly =
4288 		string(getComputeAsmShaderPreamble()) +
4289 
4290 		"OpSource GLSL 430\n"
4291 		"OpName %main \"main\"\n"
4292 		"OpName %id \"gl_GlobalInvocationID\"\n"
4293 
4294 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4295 
4296 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4297 
4298 		"%id = OpVariable %uvec3ptr Input\n"
4299 		"%zero       = OpConstant %i32 0\n"
4300 		"%float_0    = OpConstant %f32 0.0\n"
4301 		"%i1         = OpConstant %i32 1\n"
4302 		"%i2         = OpConstant %i32 -1\n"
4303 
4304 		"%main     = OpFunction %void None %voidf\n"
4305 		"%entry    = OpLabel\n"
4306 		"%idval    = OpLoad %uvec3 %id\n"
4307 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4308 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4309 		"%inval    = OpLoad %f32 %inloc\n"
4310 
4311 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4312 		"            OpSelectionMerge %cm None\n"
4313 		"            OpBranchConditional %comp %tb %fb\n"
4314 		"%tb       = OpLabel\n"
4315 		"            OpBranch %cm\n"
4316 		"%fb       = OpLabel\n"
4317 		"            OpBranch %cm\n"
4318 		"%cm       = OpLabel\n"
4319 		"%ires     = OpPhi %i32 %i1 %tb %i2 %fb\n"
4320 		"%res      = OpConvertSToF %f32 %ires\n"
4321 
4322 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4323 		"            OpStore %outloc %res\n"
4324 		"            OpReturn\n"
4325 
4326 		"            OpFunctionEnd\n";
4327 	specInt.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4328 	specInt.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4329 	specInt.numWorkGroups = IVec3(numElements, 1, 1);
4330 
4331 	specArray.assembly =
4332 		string(getComputeAsmShaderPreamble()) +
4333 
4334 		"OpSource GLSL 430\n"
4335 		"OpName %main \"main\"\n"
4336 		"OpName %id \"gl_GlobalInvocationID\"\n"
4337 
4338 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4339 
4340 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4341 
4342 		"%id = OpVariable %uvec3ptr Input\n"
4343 		"%zero       = OpConstant %i32 0\n"
4344 		"%u7         = OpConstant %u32 7\n"
4345 		"%float_0    = OpConstant %f32 0.0\n"
4346 		"%float_1    = OpConstant %f32 1.0\n"
4347 		"%float_n1   = OpConstant %f32 -1.0\n"
4348 		"%f32a7      = OpTypeArray %f32 %u7\n"
4349 		"%a1         = OpConstantComposite %f32a7 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1 %float_1\n"
4350 		"%a2         = OpConstantComposite %f32a7 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1 %float_n1\n"
4351 		"%main     = OpFunction %void None %voidf\n"
4352 		"%entry    = OpLabel\n"
4353 		"%idval    = OpLoad %uvec3 %id\n"
4354 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4355 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4356 		"%inval    = OpLoad %f32 %inloc\n"
4357 
4358 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4359 		"            OpSelectionMerge %cm None\n"
4360 		"            OpBranchConditional %comp %tb %fb\n"
4361 		"%tb       = OpLabel\n"
4362 		"            OpBranch %cm\n"
4363 		"%fb       = OpLabel\n"
4364 		"            OpBranch %cm\n"
4365 		"%cm       = OpLabel\n"
4366 		"%ares     = OpPhi %f32a7 %a1 %tb %a2 %fb\n"
4367 		"%res      = OpCompositeExtract %f32 %ares 5\n"
4368 
4369 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4370 		"            OpStore %outloc %res\n"
4371 		"            OpReturn\n"
4372 
4373 		"            OpFunctionEnd\n";
4374 	specArray.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4375 	specArray.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4376 	specArray.numWorkGroups = IVec3(numElements, 1, 1);
4377 
4378 	specStruct.assembly =
4379 		string(getComputeAsmShaderPreamble()) +
4380 
4381 		"OpSource GLSL 430\n"
4382 		"OpName %main \"main\"\n"
4383 		"OpName %id \"gl_GlobalInvocationID\"\n"
4384 
4385 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4386 
4387 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4388 
4389 		"%id = OpVariable %uvec3ptr Input\n"
4390 		"%zero       = OpConstant %i32 0\n"
4391 		"%float_0    = OpConstant %f32 0.0\n"
4392 		"%float_1    = OpConstant %f32 1.0\n"
4393 		"%float_n1   = OpConstant %f32 -1.0\n"
4394 
4395 		"%v2f32      = OpTypeVector %f32 2\n"
4396 		"%Data2      = OpTypeStruct %f32 %v2f32\n"
4397 		"%Data       = OpTypeStruct %Data2 %f32\n"
4398 
4399 		"%in1a       = OpConstantComposite %v2f32 %float_1 %float_1\n"
4400 		"%in1b       = OpConstantComposite %Data2 %float_1 %in1a\n"
4401 		"%s1         = OpConstantComposite %Data %in1b %float_1\n"
4402 		"%in2a       = OpConstantComposite %v2f32 %float_n1 %float_n1\n"
4403 		"%in2b       = OpConstantComposite %Data2 %float_n1 %in2a\n"
4404 		"%s2         = OpConstantComposite %Data %in2b %float_n1\n"
4405 
4406 		"%main     = OpFunction %void None %voidf\n"
4407 		"%entry    = OpLabel\n"
4408 		"%idval    = OpLoad %uvec3 %id\n"
4409 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4410 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4411 		"%inval    = OpLoad %f32 %inloc\n"
4412 
4413 		"%comp     = OpFOrdGreaterThan %bool %inval %float_0\n"
4414 		"            OpSelectionMerge %cm None\n"
4415 		"            OpBranchConditional %comp %tb %fb\n"
4416 		"%tb       = OpLabel\n"
4417 		"            OpBranch %cm\n"
4418 		"%fb       = OpLabel\n"
4419 		"            OpBranch %cm\n"
4420 		"%cm       = OpLabel\n"
4421 		"%sres     = OpPhi %Data %s1 %tb %s2 %fb\n"
4422 		"%res      = OpCompositeExtract %f32 %sres 0 0\n"
4423 
4424 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4425 		"            OpStore %outloc %res\n"
4426 		"            OpReturn\n"
4427 
4428 		"            OpFunctionEnd\n";
4429 	specStruct.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4430 	specStruct.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
4431 	specStruct.numWorkGroups = IVec3(numElements, 1, 1);
4432 
4433 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_int", "OpPhi with int variables", specInt));
4434 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float", "OpPhi with float variables", specFloat));
4435 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_float16", "OpPhi with 16bit float variables", specFloat16));
4436 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_vec3", "OpPhi with vec3 variables", specVec3));
4437 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_mat4", "OpPhi with mat4 variables", specMat4));
4438 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_array", "OpPhi with array variables", specArray));
4439 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vartype_struct", "OpPhi with struct variables", specStruct));
4440 }
4441 
generateConstantDefinitions(int count)4442 string generateConstantDefinitions (int count)
4443 {
4444 	std::ostringstream	r;
4445 	for (int i = 0; i < count; i++)
4446 		r << "%cf" << (i * 10 + 5) << " = OpConstant %f32 " <<(i * 10 + 5) << ".0\n";
4447 	r << "\n";
4448 	return r.str();
4449 }
4450 
generateSwitchCases(int count)4451 string generateSwitchCases (int count)
4452 {
4453 	std::ostringstream	r;
4454 	for (int i = 0; i < count; i++)
4455 		r << " " << i << " %case" << i;
4456 	r << "\n";
4457 	return r.str();
4458 }
4459 
generateSwitchTargets(int count)4460 string generateSwitchTargets (int count)
4461 {
4462 	std::ostringstream	r;
4463 	for (int i = 0; i < count; i++)
4464 		r << "%case" << i << " = OpLabel\n            OpBranch %phi\n";
4465 	r << "\n";
4466 	return r.str();
4467 }
4468 
generateOpPhiParams(int count)4469 string generateOpPhiParams (int count)
4470 {
4471 	std::ostringstream	r;
4472 	for (int i = 0; i < count; i++)
4473 		r << " %cf" << (i * 10 + 5) << " %case" << i;
4474 	r << "\n";
4475 	return r.str();
4476 }
4477 
generateIntWidth(int value)4478 string generateIntWidth (int value)
4479 {
4480 	std::ostringstream	r;
4481 	r << value;
4482 	return r.str();
4483 }
4484 
4485 // Expand input string by injecting "ABC" between the input
4486 // string characters. The acc/add/treshold parameters are used
4487 // to skip some of the injections to make the result less
4488 // uniform (and a lot shorter).
expandOpPhiCase5(const string & s,int & acc,int add,int treshold)4489 string expandOpPhiCase5 (const string& s, int &acc, int add, int treshold)
4490 {
4491 	std::ostringstream	res;
4492 	const char*			p = s.c_str();
4493 
4494 	while (*p)
4495 	{
4496 		res << *p;
4497 		acc += add;
4498 		if (acc > treshold)
4499 		{
4500 			acc -= treshold;
4501 			res << "ABC";
4502 		}
4503 		p++;
4504 	}
4505 	return res.str();
4506 }
4507 
4508 // Calculate expected result based on the code string
calcOpPhiCase5(float val,const string & s)4509 float calcOpPhiCase5 (float val, const string& s)
4510 {
4511 	const char*		p		= s.c_str();
4512 	float			x[8];
4513 	bool			b[8];
4514 	const float		tv[8]	= { 0.5f, 1.5f, 3.5f, 7.5f, 15.5f, 31.5f, 63.5f, 127.5f };
4515 	const float		v		= deFloatAbs(val);
4516 	float			res		= 0;
4517 	int				depth	= -1;
4518 	int				skip	= 0;
4519 
4520 	for (int i = 7; i >= 0; --i)
4521 		x[i] = std::fmod((float)v, (float)(2 << i));
4522 	for (int i = 7; i >= 0; --i)
4523 		b[i] = x[i] > tv[i];
4524 
4525 	while (*p)
4526 	{
4527 		if (*p == 'A')
4528 		{
4529 			depth++;
4530 			if (skip == 0 && b[depth])
4531 			{
4532 				res++;
4533 			}
4534 			else
4535 				skip++;
4536 		}
4537 		if (*p == 'B')
4538 		{
4539 			if (skip)
4540 				skip--;
4541 			if (b[depth] || skip)
4542 				skip++;
4543 		}
4544 		if (*p == 'C')
4545 		{
4546 			depth--;
4547 			if (skip)
4548 				skip--;
4549 		}
4550 		p++;
4551 	}
4552 	return res;
4553 }
4554 
4555 // In the code string, the letters represent the following:
4556 //
4557 // A:
4558 //     if (certain bit is set)
4559 //     {
4560 //       result++;
4561 //
4562 // B:
4563 //     } else {
4564 //
4565 // C:
4566 //     }
4567 //
4568 // examples:
4569 // AABCBC leads to if(){r++;if(){r++;}else{}}else{}
4570 // ABABCC leads to if(){r++;}else{if(){r++;}else{}}
4571 // ABCABC leads to if(){r++;}else{}if(){r++;}else{}
4572 //
4573 // Code generation gets a bit complicated due to the else-branches,
4574 // which do not generate new values. Thus, the generator needs to
4575 // keep track of the previous variable change seen by the else
4576 // branch.
generateOpPhiCase5(const string & s)4577 string generateOpPhiCase5 (const string& s)
4578 {
4579 	std::stack<int>				idStack;
4580 	std::stack<std::string>		value;
4581 	std::stack<std::string>		valueLabel;
4582 	std::stack<std::string>		mergeLeft;
4583 	std::stack<std::string>		mergeRight;
4584 	std::ostringstream			res;
4585 	const char*					p			= s.c_str();
4586 	int							depth		= -1;
4587 	int							currId		= 0;
4588 	int							iter		= 0;
4589 
4590 	idStack.push(-1);
4591 	value.push("%f32_0");
4592 	valueLabel.push("%f32_0 %entry");
4593 
4594 	while (*p)
4595 	{
4596 		if (*p == 'A')
4597 		{
4598 			depth++;
4599 			currId = iter;
4600 			idStack.push(currId);
4601 			res << "\tOpSelectionMerge %m" << currId << " None\n";
4602 			res << "\tOpBranchConditional %b" << depth << " %t" << currId << " %f" << currId << "\n";
4603 			res << "%t" << currId << " = OpLabel\n";
4604 			res << "%rt" << currId << " = OpFAdd %f32 " << value.top() << " %f32_1\n";
4605 			std::ostringstream tag;
4606 			tag << "%rt" << currId;
4607 			value.push(tag.str());
4608 			tag << " %t" << currId;
4609 			valueLabel.push(tag.str());
4610 		}
4611 
4612 		if (*p == 'B')
4613 		{
4614 			mergeLeft.push(valueLabel.top());
4615 			value.pop();
4616 			valueLabel.pop();
4617 			res << "\tOpBranch %m" << currId << "\n";
4618 			res << "%f" << currId << " = OpLabel\n";
4619 			std::ostringstream tag;
4620 			tag << value.top() << " %f" << currId;
4621 			valueLabel.pop();
4622 			valueLabel.push(tag.str());
4623 		}
4624 
4625 		if (*p == 'C')
4626 		{
4627 			mergeRight.push(valueLabel.top());
4628 			res << "\tOpBranch %m" << currId << "\n";
4629 			res << "%m" << currId << " = OpLabel\n";
4630 			if (*(p + 1) == 0)
4631 				res << "%res"; // last result goes to %res
4632 			else
4633 				res << "%rm" << currId;
4634 			res << " = OpPhi %f32  " << mergeLeft.top() << "  " << mergeRight.top() << "\n";
4635 			std::ostringstream tag;
4636 			tag << "%rm" << currId;
4637 			value.pop();
4638 			value.push(tag.str());
4639 			tag << " %m" << currId;
4640 			valueLabel.pop();
4641 			valueLabel.push(tag.str());
4642 			mergeLeft.pop();
4643 			mergeRight.pop();
4644 			depth--;
4645 			idStack.pop();
4646 			currId = idStack.top();
4647 		}
4648 		p++;
4649 		iter++;
4650 	}
4651 	return res.str();
4652 }
4653 
createOpPhiGroup(tcu::TestContext & testCtx)4654 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
4655 {
4656 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
4657 	ComputeShaderSpec				spec1;
4658 	ComputeShaderSpec				spec2;
4659 	ComputeShaderSpec				spec3;
4660 	ComputeShaderSpec				spec4;
4661 	ComputeShaderSpec				spec5;
4662 	de::Random						rnd				(deStringHash(group->getName()));
4663 	const int						numElements		= 100;
4664 	vector<float>					inputFloats		(numElements, 0);
4665 	vector<float>					outputFloats1	(numElements, 0);
4666 	vector<float>					outputFloats2	(numElements, 0);
4667 	vector<float>					outputFloats3	(numElements, 0);
4668 	vector<float>					outputFloats4	(numElements, 0);
4669 	vector<float>					outputFloats5	(numElements, 0);
4670 	std::string						codestring		= "ABC";
4671 	const int						test4Width		= 512;
4672 
4673 	// Build case 5 code string. Each iteration makes the hierarchy more complicated.
4674 	// 9 iterations with (7, 24) parameters makes the hierarchy 8 deep with about 1500 lines of
4675 	// shader code.
4676 	for (int i = 0, acc = 0; i < 9; i++)
4677 		codestring = expandOpPhiCase5(codestring, acc, 7, 24);
4678 
4679 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
4680 
4681 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
4682 	floorAll(inputFloats);
4683 
4684 	for (size_t ndx = 0; ndx < numElements; ++ndx)
4685 	{
4686 		switch (ndx % 3)
4687 		{
4688 			case 0:		outputFloats1[ndx] = inputFloats[ndx] + 5.5f;	break;
4689 			case 1:		outputFloats1[ndx] = inputFloats[ndx] + 20.5f;	break;
4690 			case 2:		outputFloats1[ndx] = inputFloats[ndx] + 1.75f;	break;
4691 			default:	break;
4692 		}
4693 		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
4694 		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
4695 
4696 		int index4 = (int)deFloor(deAbs((float)ndx * inputFloats[ndx]));
4697 		outputFloats4[ndx] = (float)(index4 % test4Width) * 10.0f + 5.0f;
4698 
4699 		outputFloats5[ndx] = calcOpPhiCase5(inputFloats[ndx], codestring);
4700 	}
4701 
4702 	spec1.assembly =
4703 		string(getComputeAsmShaderPreamble()) +
4704 
4705 		"OpSource GLSL 430\n"
4706 		"OpName %main \"main\"\n"
4707 		"OpName %id \"gl_GlobalInvocationID\"\n"
4708 
4709 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4710 
4711 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4712 
4713 		"%id = OpVariable %uvec3ptr Input\n"
4714 		"%zero       = OpConstant %i32 0\n"
4715 		"%three      = OpConstant %u32 3\n"
4716 		"%constf5p5  = OpConstant %f32 5.5\n"
4717 		"%constf20p5 = OpConstant %f32 20.5\n"
4718 		"%constf1p75 = OpConstant %f32 1.75\n"
4719 		"%constf8p5  = OpConstant %f32 8.5\n"
4720 		"%constf6p5  = OpConstant %f32 6.5\n"
4721 
4722 		"%main     = OpFunction %void None %voidf\n"
4723 		"%entry    = OpLabel\n"
4724 		"%idval    = OpLoad %uvec3 %id\n"
4725 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4726 		"%selector = OpUMod %u32 %x %three\n"
4727 		"            OpSelectionMerge %phi None\n"
4728 		"            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
4729 
4730 		// Case 1 before OpPhi.
4731 		"%case1    = OpLabel\n"
4732 		"            OpBranch %phi\n"
4733 
4734 		"%default  = OpLabel\n"
4735 		"            OpUnreachable\n"
4736 
4737 		"%phi      = OpLabel\n"
4738 		"%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
4739 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4740 		"%inval    = OpLoad %f32 %inloc\n"
4741 		"%add      = OpFAdd %f32 %inval %operand\n"
4742 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4743 		"            OpStore %outloc %add\n"
4744 		"            OpReturn\n"
4745 
4746 		// Case 0 after OpPhi.
4747 		"%case0    = OpLabel\n"
4748 		"            OpBranch %phi\n"
4749 
4750 
4751 		// Case 2 after OpPhi.
4752 		"%case2    = OpLabel\n"
4753 		"            OpBranch %phi\n"
4754 
4755 		"            OpFunctionEnd\n";
4756 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4757 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
4758 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
4759 
4760 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
4761 
4762 	spec2.assembly =
4763 		string(getComputeAsmShaderPreamble()) +
4764 
4765 		"OpName %main \"main\"\n"
4766 		"OpName %id \"gl_GlobalInvocationID\"\n"
4767 
4768 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4769 
4770 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4771 
4772 		"%id         = OpVariable %uvec3ptr Input\n"
4773 		"%zero       = OpConstant %i32 0\n"
4774 		"%one        = OpConstant %i32 1\n"
4775 		"%three      = OpConstant %i32 3\n"
4776 		"%constf6p5  = OpConstant %f32 6.5\n"
4777 
4778 		"%main       = OpFunction %void None %voidf\n"
4779 		"%entry      = OpLabel\n"
4780 		"%idval      = OpLoad %uvec3 %id\n"
4781 		"%x          = OpCompositeExtract %u32 %idval 0\n"
4782 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4783 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4784 		"%inval      = OpLoad %f32 %inloc\n"
4785 		"              OpBranch %phi\n"
4786 
4787 		"%phi        = OpLabel\n"
4788 		"%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
4789 		"%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
4790 		"%step_next  = OpIAdd %i32 %step %one\n"
4791 		"%accum_next = OpFAdd %f32 %accum %constf6p5\n"
4792 		"%still_loop = OpSLessThan %bool %step %three\n"
4793 		"              OpLoopMerge %exit %phi None\n"
4794 		"              OpBranchConditional %still_loop %phi %exit\n"
4795 
4796 		"%exit       = OpLabel\n"
4797 		"              OpStore %outloc %accum\n"
4798 		"              OpReturn\n"
4799 		"              OpFunctionEnd\n";
4800 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4801 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
4802 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
4803 
4804 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
4805 
4806 	spec3.assembly =
4807 		string(getComputeAsmShaderPreamble()) +
4808 
4809 		"OpName %main \"main\"\n"
4810 		"OpName %id \"gl_GlobalInvocationID\"\n"
4811 
4812 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4813 
4814 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4815 
4816 		"%f32ptr_f   = OpTypePointer Function %f32\n"
4817 		"%id         = OpVariable %uvec3ptr Input\n"
4818 		"%true       = OpConstantTrue %bool\n"
4819 		"%false      = OpConstantFalse %bool\n"
4820 		"%zero       = OpConstant %i32 0\n"
4821 		"%constf8p5  = OpConstant %f32 8.5\n"
4822 
4823 		"%main       = OpFunction %void None %voidf\n"
4824 		"%entry      = OpLabel\n"
4825 		"%b          = OpVariable %f32ptr_f Function %constf8p5\n"
4826 		"%idval      = OpLoad %uvec3 %id\n"
4827 		"%x          = OpCompositeExtract %u32 %idval 0\n"
4828 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
4829 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
4830 		"%a_init     = OpLoad %f32 %inloc\n"
4831 		"%b_init     = OpLoad %f32 %b\n"
4832 		"              OpBranch %phi\n"
4833 
4834 		"%phi        = OpLabel\n"
4835 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
4836 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
4837 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
4838 		"              OpLoopMerge %exit %phi None\n"
4839 		"              OpBranchConditional %still_loop %phi %exit\n"
4840 
4841 		"%exit       = OpLabel\n"
4842 		"%sub        = OpFSub %f32 %a_next %b_next\n"
4843 		"              OpStore %outloc %sub\n"
4844 		"              OpReturn\n"
4845 		"              OpFunctionEnd\n";
4846 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4847 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
4848 	spec3.numWorkGroups = IVec3(numElements, 1, 1);
4849 
4850 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
4851 
4852 	spec4.assembly =
4853 		"OpCapability Shader\n"
4854 		"%ext = OpExtInstImport \"GLSL.std.450\"\n"
4855 		"OpMemoryModel Logical GLSL450\n"
4856 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4857 		"OpExecutionMode %main LocalSize 1 1 1\n"
4858 
4859 		"OpSource GLSL 430\n"
4860 		"OpName %main \"main\"\n"
4861 		"OpName %id \"gl_GlobalInvocationID\"\n"
4862 
4863 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4864 
4865 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4866 
4867 		"%id       = OpVariable %uvec3ptr Input\n"
4868 		"%zero     = OpConstant %i32 0\n"
4869 		"%cimod    = OpConstant %u32 " + generateIntWidth(test4Width) + "\n"
4870 
4871 		+ generateConstantDefinitions(test4Width) +
4872 
4873 		"%main     = OpFunction %void None %voidf\n"
4874 		"%entry    = OpLabel\n"
4875 		"%idval    = OpLoad %uvec3 %id\n"
4876 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4877 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4878 		"%inval    = OpLoad %f32 %inloc\n"
4879 		"%xf       = OpConvertUToF %f32 %x\n"
4880 		"%xm       = OpFMul %f32 %xf %inval\n"
4881 		"%xa       = OpExtInst %f32 %ext FAbs %xm\n"
4882 		"%xi       = OpConvertFToU %u32 %xa\n"
4883 		"%selector = OpUMod %u32 %xi %cimod\n"
4884 		"            OpSelectionMerge %phi None\n"
4885 		"            OpSwitch %selector %default "
4886 
4887 		+ generateSwitchCases(test4Width) +
4888 
4889 		"%default  = OpLabel\n"
4890 		"            OpUnreachable\n"
4891 
4892 		+ generateSwitchTargets(test4Width) +
4893 
4894 		"%phi      = OpLabel\n"
4895 		"%result   = OpPhi %f32"
4896 
4897 		+ generateOpPhiParams(test4Width) +
4898 
4899 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4900 		"            OpStore %outloc %result\n"
4901 		"            OpReturn\n"
4902 
4903 		"            OpFunctionEnd\n";
4904 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4905 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
4906 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
4907 
4908 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "wide", "OpPhi with a lot of parameters", spec4));
4909 
4910 	spec5.assembly =
4911 		"OpCapability Shader\n"
4912 		"%ext      = OpExtInstImport \"GLSL.std.450\"\n"
4913 		"OpMemoryModel Logical GLSL450\n"
4914 		"OpEntryPoint GLCompute %main \"main\" %id\n"
4915 		"OpExecutionMode %main LocalSize 1 1 1\n"
4916 		"%code     = OpString \"" + codestring + "\"\n"
4917 
4918 		"OpSource GLSL 430\n"
4919 		"OpName %main \"main\"\n"
4920 		"OpName %id \"gl_GlobalInvocationID\"\n"
4921 
4922 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
4923 
4924 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
4925 
4926 		"%id       = OpVariable %uvec3ptr Input\n"
4927 		"%zero     = OpConstant %i32 0\n"
4928 		"%f32_0    = OpConstant %f32 0.0\n"
4929 		"%f32_0_5  = OpConstant %f32 0.5\n"
4930 		"%f32_1    = OpConstant %f32 1.0\n"
4931 		"%f32_1_5  = OpConstant %f32 1.5\n"
4932 		"%f32_2    = OpConstant %f32 2.0\n"
4933 		"%f32_3_5  = OpConstant %f32 3.5\n"
4934 		"%f32_4    = OpConstant %f32 4.0\n"
4935 		"%f32_7_5  = OpConstant %f32 7.5\n"
4936 		"%f32_8    = OpConstant %f32 8.0\n"
4937 		"%f32_15_5 = OpConstant %f32 15.5\n"
4938 		"%f32_16   = OpConstant %f32 16.0\n"
4939 		"%f32_31_5 = OpConstant %f32 31.5\n"
4940 		"%f32_32   = OpConstant %f32 32.0\n"
4941 		"%f32_63_5 = OpConstant %f32 63.5\n"
4942 		"%f32_64   = OpConstant %f32 64.0\n"
4943 		"%f32_127_5 = OpConstant %f32 127.5\n"
4944 		"%f32_128  = OpConstant %f32 128.0\n"
4945 		"%f32_256  = OpConstant %f32 256.0\n"
4946 
4947 		"%main     = OpFunction %void None %voidf\n"
4948 		"%entry    = OpLabel\n"
4949 		"%idval    = OpLoad %uvec3 %id\n"
4950 		"%x        = OpCompositeExtract %u32 %idval 0\n"
4951 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
4952 		"%inval    = OpLoad %f32 %inloc\n"
4953 
4954 		"%xabs     = OpExtInst %f32 %ext FAbs %inval\n"
4955 		"%x8       = OpFMod %f32 %xabs %f32_256\n"
4956 		"%x7       = OpFMod %f32 %xabs %f32_128\n"
4957 		"%x6       = OpFMod %f32 %xabs %f32_64\n"
4958 		"%x5       = OpFMod %f32 %xabs %f32_32\n"
4959 		"%x4       = OpFMod %f32 %xabs %f32_16\n"
4960 		"%x3       = OpFMod %f32 %xabs %f32_8\n"
4961 		"%x2       = OpFMod %f32 %xabs %f32_4\n"
4962 		"%x1       = OpFMod %f32 %xabs %f32_2\n"
4963 
4964 		"%b7       = OpFOrdGreaterThanEqual %bool %x8 %f32_127_5\n"
4965 		"%b6       = OpFOrdGreaterThanEqual %bool %x7 %f32_63_5\n"
4966 		"%b5       = OpFOrdGreaterThanEqual %bool %x6 %f32_31_5\n"
4967 		"%b4       = OpFOrdGreaterThanEqual %bool %x5 %f32_15_5\n"
4968 		"%b3       = OpFOrdGreaterThanEqual %bool %x4 %f32_7_5\n"
4969 		"%b2       = OpFOrdGreaterThanEqual %bool %x3 %f32_3_5\n"
4970 		"%b1       = OpFOrdGreaterThanEqual %bool %x2 %f32_1_5\n"
4971 		"%b0       = OpFOrdGreaterThanEqual %bool %x1 %f32_0_5\n"
4972 
4973 		+ generateOpPhiCase5(codestring) +
4974 
4975 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
4976 		"            OpStore %outloc %res\n"
4977 		"            OpReturn\n"
4978 
4979 		"            OpFunctionEnd\n";
4980 	spec5.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
4981 	spec5.outputs.push_back(BufferSp(new Float32Buffer(outputFloats5)));
4982 	spec5.numWorkGroups = IVec3(numElements, 1, 1);
4983 
4984 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "nested", "Stress OpPhi with a lot of nesting", spec5));
4985 
4986 	createOpPhiVartypeTests(group, testCtx);
4987 
4988 	return group.release();
4989 }
4990 
4991 // Assembly code used for testing block order is based on GLSL source code:
4992 //
4993 // #version 430
4994 //
4995 // layout(std140, set = 0, binding = 0) readonly buffer Input {
4996 //   float elements[];
4997 // } input_data;
4998 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
4999 //   float elements[];
5000 // } output_data;
5001 //
5002 // void main() {
5003 //   uint x = gl_GlobalInvocationID.x;
5004 //   output_data.elements[x] = input_data.elements[x];
5005 //   if (x > uint(50)) {
5006 //     switch (x % uint(3)) {
5007 //       case 0: output_data.elements[x] += 1.5f; break;
5008 //       case 1: output_data.elements[x] += 42.f; break;
5009 //       case 2: output_data.elements[x] -= 27.f; break;
5010 //       default: break;
5011 //     }
5012 //   } else {
5013 //     output_data.elements[x] = -input_data.elements[x];
5014 //   }
5015 // }
createBlockOrderGroup(tcu::TestContext & testCtx)5016 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
5017 {
5018 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
5019 	ComputeShaderSpec				spec;
5020 	de::Random						rnd				(deStringHash(group->getName()));
5021 	const int						numElements		= 100;
5022 	vector<float>					inputFloats		(numElements, 0);
5023 	vector<float>					outputFloats	(numElements, 0);
5024 
5025 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
5026 
5027 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
5028 	floorAll(inputFloats);
5029 
5030 	for (size_t ndx = 0; ndx <= 50; ++ndx)
5031 		outputFloats[ndx] = -inputFloats[ndx];
5032 
5033 	for (size_t ndx = 51; ndx < numElements; ++ndx)
5034 	{
5035 		switch (ndx % 3)
5036 		{
5037 			case 0:		outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
5038 			case 1:		outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
5039 			case 2:		outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
5040 			default:	break;
5041 		}
5042 	}
5043 
5044 	spec.assembly =
5045 		string(getComputeAsmShaderPreamble()) +
5046 
5047 		"OpSource GLSL 430\n"
5048 		"OpName %main \"main\"\n"
5049 		"OpName %id \"gl_GlobalInvocationID\"\n"
5050 
5051 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5052 
5053 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5054 
5055 		"%u32ptr       = OpTypePointer Function %u32\n"
5056 		"%u32ptr_input = OpTypePointer Input %u32\n"
5057 
5058 		+ string(getComputeAsmInputOutputBuffer()) +
5059 
5060 		"%id        = OpVariable %uvec3ptr Input\n"
5061 		"%zero      = OpConstant %i32 0\n"
5062 		"%const3    = OpConstant %u32 3\n"
5063 		"%const50   = OpConstant %u32 50\n"
5064 		"%constf1p5 = OpConstant %f32 1.5\n"
5065 		"%constf27  = OpConstant %f32 27.0\n"
5066 		"%constf42  = OpConstant %f32 42.0\n"
5067 
5068 		"%main = OpFunction %void None %voidf\n"
5069 
5070 		// entry block.
5071 		"%entry    = OpLabel\n"
5072 
5073 		// Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
5074 		"%xvar     = OpVariable %u32ptr Function\n"
5075 		"%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
5076 		"%x        = OpLoad %u32 %xptr\n"
5077 		"            OpStore %xvar %x\n"
5078 
5079 		"%cmp      = OpUGreaterThan %bool %x %const50\n"
5080 		"            OpSelectionMerge %if_merge None\n"
5081 		"            OpBranchConditional %cmp %if_true %if_false\n"
5082 
5083 		// False branch for if-statement: placed in the middle of switch cases and before true branch.
5084 		"%if_false = OpLabel\n"
5085 		"%x_f      = OpLoad %u32 %xvar\n"
5086 		"%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
5087 		"%inval_f  = OpLoad %f32 %inloc_f\n"
5088 		"%negate   = OpFNegate %f32 %inval_f\n"
5089 		"%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
5090 		"            OpStore %outloc_f %negate\n"
5091 		"            OpBranch %if_merge\n"
5092 
5093 		// Merge block for if-statement: placed in the middle of true and false branch.
5094 		"%if_merge = OpLabel\n"
5095 		"            OpReturn\n"
5096 
5097 		// True branch for if-statement: placed in the middle of swtich cases and after the false branch.
5098 		"%if_true  = OpLabel\n"
5099 		"%xval_t   = OpLoad %u32 %xvar\n"
5100 		"%mod      = OpUMod %u32 %xval_t %const3\n"
5101 		"            OpSelectionMerge %switch_merge None\n"
5102 		"            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
5103 
5104 		// Merge block for switch-statement: placed before the case
5105 		// bodies.  But it must follow OpSwitch which dominates it.
5106 		"%switch_merge = OpLabel\n"
5107 		"                OpBranch %if_merge\n"
5108 
5109 		// Case 1 for switch-statement: placed before case 0.
5110 		// It must follow the OpSwitch that dominates it.
5111 		"%case1    = OpLabel\n"
5112 		"%x_1      = OpLoad %u32 %xvar\n"
5113 		"%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
5114 		"%inval_1  = OpLoad %f32 %inloc_1\n"
5115 		"%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
5116 		"%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
5117 		"            OpStore %outloc_1 %addf42\n"
5118 		"            OpBranch %switch_merge\n"
5119 
5120 		// Case 2 for switch-statement.
5121 		"%case2    = OpLabel\n"
5122 		"%x_2      = OpLoad %u32 %xvar\n"
5123 		"%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
5124 		"%inval_2  = OpLoad %f32 %inloc_2\n"
5125 		"%subf27   = OpFSub %f32 %inval_2 %constf27\n"
5126 		"%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
5127 		"            OpStore %outloc_2 %subf27\n"
5128 		"            OpBranch %switch_merge\n"
5129 
5130 		// Default case for switch-statement: placed in the middle of normal cases.
5131 		"%default = OpLabel\n"
5132 		"           OpBranch %switch_merge\n"
5133 
5134 		// Case 0 for switch-statement: out of order.
5135 		"%case0    = OpLabel\n"
5136 		"%x_0      = OpLoad %u32 %xvar\n"
5137 		"%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
5138 		"%inval_0  = OpLoad %f32 %inloc_0\n"
5139 		"%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
5140 		"%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
5141 		"            OpStore %outloc_0 %addf1p5\n"
5142 		"            OpBranch %switch_merge\n"
5143 
5144 		"            OpFunctionEnd\n";
5145 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5146 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5147 	spec.numWorkGroups = IVec3(numElements, 1, 1);
5148 
5149 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
5150 
5151 	return group.release();
5152 }
5153 
createMultipleShaderGroup(tcu::TestContext & testCtx)5154 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
5155 {
5156 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
5157 	ComputeShaderSpec				spec1;
5158 	ComputeShaderSpec				spec2;
5159 	de::Random						rnd				(deStringHash(group->getName()));
5160 	const int						numElements		= 100;
5161 	vector<float>					inputFloats		(numElements, 0);
5162 	vector<float>					outputFloats1	(numElements, 0);
5163 	vector<float>					outputFloats2	(numElements, 0);
5164 	fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
5165 
5166 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5167 	{
5168 		outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
5169 		outputFloats2[ndx] = -inputFloats[ndx];
5170 	}
5171 
5172 	const string assembly(
5173 		"OpCapability Shader\n"
5174 		"OpMemoryModel Logical GLSL450\n"
5175 		"OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
5176 		"OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
5177 		// A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
5178 		"OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
5179 		"OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
5180 		"OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
5181 
5182 		"OpName %comp_main1              \"entrypoint1\"\n"
5183 		"OpName %comp_main2              \"entrypoint2\"\n"
5184 		"OpName %vert_main               \"entrypoint2\"\n"
5185 		"OpName %id                      \"gl_GlobalInvocationID\"\n"
5186 		"OpName %vert_builtin_st         \"gl_PerVertex\"\n"
5187 		"OpName %vertexIndex             \"gl_VertexIndex\"\n"
5188 		"OpName %instanceIndex           \"gl_InstanceIndex\"\n"
5189 		"OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
5190 		"OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
5191 		"OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
5192 
5193 		"OpDecorate %id                      BuiltIn GlobalInvocationId\n"
5194 		"OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
5195 		"OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
5196 		"OpDecorate %vert_builtin_st         Block\n"
5197 		"OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
5198 		"OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
5199 		"OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
5200 
5201 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5202 
5203 		"%zero       = OpConstant %i32 0\n"
5204 		"%one        = OpConstant %u32 1\n"
5205 		"%c_f32_1    = OpConstant %f32 1\n"
5206 
5207 		"%i32inputptr         = OpTypePointer Input %i32\n"
5208 		"%vec4                = OpTypeVector %f32 4\n"
5209 		"%vec4ptr             = OpTypePointer Output %vec4\n"
5210 		"%f32arr1             = OpTypeArray %f32 %one\n"
5211 		"%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
5212 		"%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
5213 		"%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
5214 
5215 		"%id         = OpVariable %uvec3ptr Input\n"
5216 		"%vertexIndex = OpVariable %i32inputptr Input\n"
5217 		"%instanceIndex = OpVariable %i32inputptr Input\n"
5218 		"%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5219 
5220 		// gl_Position = vec4(1.);
5221 		"%vert_main  = OpFunction %void None %voidf\n"
5222 		"%vert_entry = OpLabel\n"
5223 		"%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
5224 		"              OpStore %position %c_vec4_1\n"
5225 		"              OpReturn\n"
5226 		"              OpFunctionEnd\n"
5227 
5228 		// Double inputs.
5229 		"%comp_main1  = OpFunction %void None %voidf\n"
5230 		"%comp1_entry = OpLabel\n"
5231 		"%idval1      = OpLoad %uvec3 %id\n"
5232 		"%x1          = OpCompositeExtract %u32 %idval1 0\n"
5233 		"%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
5234 		"%inval1      = OpLoad %f32 %inloc1\n"
5235 		"%add         = OpFAdd %f32 %inval1 %inval1\n"
5236 		"%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
5237 		"               OpStore %outloc1 %add\n"
5238 		"               OpReturn\n"
5239 		"               OpFunctionEnd\n"
5240 
5241 		// Negate inputs.
5242 		"%comp_main2  = OpFunction %void None %voidf\n"
5243 		"%comp2_entry = OpLabel\n"
5244 		"%idval2      = OpLoad %uvec3 %id\n"
5245 		"%x2          = OpCompositeExtract %u32 %idval2 0\n"
5246 		"%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
5247 		"%inval2      = OpLoad %f32 %inloc2\n"
5248 		"%neg         = OpFNegate %f32 %inval2\n"
5249 		"%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
5250 		"               OpStore %outloc2 %neg\n"
5251 		"               OpReturn\n"
5252 		"               OpFunctionEnd\n");
5253 
5254 	spec1.assembly = assembly;
5255 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5256 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
5257 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
5258 	spec1.entryPoint = "entrypoint1";
5259 
5260 	spec2.assembly = assembly;
5261 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5262 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
5263 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
5264 	spec2.entryPoint = "entrypoint2";
5265 
5266 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
5267 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
5268 
5269 	return group.release();
5270 }
5271 
makeLongUTF8String(size_t num4ByteChars)5272 inline std::string makeLongUTF8String (size_t num4ByteChars)
5273 {
5274 	// An example of a longest valid UTF-8 character.  Be explicit about the
5275 	// character type because Microsoft compilers can otherwise interpret the
5276 	// character string as being over wide (16-bit) characters. Ideally, we
5277 	// would just use a C++11 UTF-8 string literal, but we want to support older
5278 	// Microsoft compilers.
5279 	const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
5280 	std::string longString;
5281 	longString.reserve(num4ByteChars * 4);
5282 	for (size_t count = 0; count < num4ByteChars; count++)
5283 	{
5284 		longString += earthAfrica;
5285 	}
5286 	return longString;
5287 }
5288 
createOpSourceGroup(tcu::TestContext & testCtx)5289 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
5290 {
5291 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
5292 	vector<CaseParameter>			cases;
5293 	de::Random						rnd				(deStringHash(group->getName()));
5294 	const int						numElements		= 100;
5295 	vector<float>					positiveFloats	(numElements, 0);
5296 	vector<float>					negativeFloats	(numElements, 0);
5297 	const StringTemplate			shaderTemplate	(
5298 		"OpCapability Shader\n"
5299 		"OpMemoryModel Logical GLSL450\n"
5300 
5301 		"OpEntryPoint GLCompute %main \"main\" %id\n"
5302 		"OpExecutionMode %main LocalSize 1 1 1\n"
5303 
5304 		"${SOURCE}\n"
5305 
5306 		"OpName %main           \"main\"\n"
5307 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5308 
5309 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5310 
5311 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5312 
5313 		"%id        = OpVariable %uvec3ptr Input\n"
5314 		"%zero      = OpConstant %i32 0\n"
5315 
5316 		"%main      = OpFunction %void None %voidf\n"
5317 		"%label     = OpLabel\n"
5318 		"%idval     = OpLoad %uvec3 %id\n"
5319 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5320 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5321 		"%inval     = OpLoad %f32 %inloc\n"
5322 		"%neg       = OpFNegate %f32 %inval\n"
5323 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5324 		"             OpStore %outloc %neg\n"
5325 		"             OpReturn\n"
5326 		"             OpFunctionEnd\n");
5327 
5328 	cases.push_back(CaseParameter("unknown_source",							"OpSource Unknown 0"));
5329 	cases.push_back(CaseParameter("wrong_source",							"OpSource OpenCL_C 210"));
5330 	cases.push_back(CaseParameter("normal_filename",						"%fname = OpString \"filename\"\n"
5331 																			"OpSource GLSL 430 %fname"));
5332 	cases.push_back(CaseParameter("empty_filename",							"%fname = OpString \"\"\n"
5333 																			"OpSource GLSL 430 %fname"));
5334 	cases.push_back(CaseParameter("normal_source_code",						"%fname = OpString \"filename\"\n"
5335 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
5336 	cases.push_back(CaseParameter("empty_source_code",						"%fname = OpString \"filename\"\n"
5337 																			"OpSource GLSL 430 %fname \"\""));
5338 	cases.push_back(CaseParameter("long_source_code",						"%fname = OpString \"filename\"\n"
5339 																			"OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
5340 	cases.push_back(CaseParameter("utf8_source_code",						"%fname = OpString \"filename\"\n"
5341 																			"OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
5342 	cases.push_back(CaseParameter("normal_sourcecontinued",					"%fname = OpString \"filename\"\n"
5343 																			"OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
5344 																			"OpSourceContinued \"id main() {}\""));
5345 	cases.push_back(CaseParameter("empty_sourcecontinued",					"%fname = OpString \"filename\"\n"
5346 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5347 																			"OpSourceContinued \"\""));
5348 	cases.push_back(CaseParameter("long_sourcecontinued",					"%fname = OpString \"filename\"\n"
5349 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5350 																			"OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
5351 	cases.push_back(CaseParameter("utf8_sourcecontinued",					"%fname = OpString \"filename\"\n"
5352 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
5353 																			"OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
5354 	cases.push_back(CaseParameter("multi_sourcecontinued",					"%fname = OpString \"filename\"\n"
5355 																			"OpSource GLSL 430 %fname \"#version 430\n\"\n"
5356 																			"OpSourceContinued \"void\"\n"
5357 																			"OpSourceContinued \"main()\"\n"
5358 																			"OpSourceContinued \"{}\""));
5359 	cases.push_back(CaseParameter("empty_source_before_sourcecontinued",	"%fname = OpString \"filename\"\n"
5360 																			"OpSource GLSL 430 %fname \"\"\n"
5361 																			"OpSourceContinued \"#version 430\nvoid main() {}\""));
5362 
5363 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5364 
5365 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5366 		negativeFloats[ndx] = -positiveFloats[ndx];
5367 
5368 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5369 	{
5370 		map<string, string>		specializations;
5371 		ComputeShaderSpec		spec;
5372 
5373 		specializations["SOURCE"] = cases[caseNdx].param;
5374 		spec.assembly = shaderTemplate.specialize(specializations);
5375 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5376 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5377 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5378 
5379 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5380 	}
5381 
5382 	return group.release();
5383 }
5384 
createOpSourceExtensionGroup(tcu::TestContext & testCtx)5385 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
5386 {
5387 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
5388 	vector<CaseParameter>			cases;
5389 	de::Random						rnd				(deStringHash(group->getName()));
5390 	const int						numElements		= 100;
5391 	vector<float>					inputFloats		(numElements, 0);
5392 	vector<float>					outputFloats	(numElements, 0);
5393 	const StringTemplate			shaderTemplate	(
5394 		string(getComputeAsmShaderPreamble()) +
5395 
5396 		"OpSourceExtension \"${EXTENSION}\"\n"
5397 
5398 		"OpName %main           \"main\"\n"
5399 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5400 
5401 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5402 
5403 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5404 
5405 		"%id        = OpVariable %uvec3ptr Input\n"
5406 		"%zero      = OpConstant %i32 0\n"
5407 
5408 		"%main      = OpFunction %void None %voidf\n"
5409 		"%label     = OpLabel\n"
5410 		"%idval     = OpLoad %uvec3 %id\n"
5411 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5412 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5413 		"%inval     = OpLoad %f32 %inloc\n"
5414 		"%neg       = OpFNegate %f32 %inval\n"
5415 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5416 		"             OpStore %outloc %neg\n"
5417 		"             OpReturn\n"
5418 		"             OpFunctionEnd\n");
5419 
5420 	cases.push_back(CaseParameter("empty_extension",	""));
5421 	cases.push_back(CaseParameter("real_extension",		"GL_ARB_texture_rectangle"));
5422 	cases.push_back(CaseParameter("fake_extension",		"GL_ARB_im_the_ultimate_extension"));
5423 	cases.push_back(CaseParameter("utf8_extension",		"GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
5424 	cases.push_back(CaseParameter("long_extension",		makeLongUTF8String(65533) + "ccc")); // word count: 65535
5425 
5426 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
5427 
5428 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5429 		outputFloats[ndx] = -inputFloats[ndx];
5430 
5431 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5432 	{
5433 		map<string, string>		specializations;
5434 		ComputeShaderSpec		spec;
5435 
5436 		specializations["EXTENSION"] = cases[caseNdx].param;
5437 		spec.assembly = shaderTemplate.specialize(specializations);
5438 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
5439 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
5440 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5441 
5442 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5443 	}
5444 
5445 	return group.release();
5446 }
5447 
5448 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)5449 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
5450 {
5451 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
5452 	vector<CaseParameter>			cases;
5453 	de::Random						rnd				(deStringHash(group->getName()));
5454 	const int						numElements		= 100;
5455 	vector<float>					positiveFloats	(numElements, 0);
5456 	vector<float>					negativeFloats	(numElements, 0);
5457 	const StringTemplate			shaderTemplate	(
5458 		string(getComputeAsmShaderPreamble()) +
5459 
5460 		"OpSource GLSL 430\n"
5461 		"OpName %main           \"main\"\n"
5462 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5463 
5464 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5465 
5466 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
5467 		"%uvec2     = OpTypeVector %u32 2\n"
5468 		"%bvec3     = OpTypeVector %bool 3\n"
5469 		"%fvec4     = OpTypeVector %f32 4\n"
5470 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
5471 		"%const100  = OpConstant %u32 100\n"
5472 		"%uarr100   = OpTypeArray %i32 %const100\n"
5473 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
5474 		"%pointer   = OpTypePointer Function %i32\n"
5475 		+ string(getComputeAsmInputOutputBuffer()) +
5476 
5477 		"%null      = OpConstantNull ${TYPE}\n"
5478 
5479 		"%id        = OpVariable %uvec3ptr Input\n"
5480 		"%zero      = OpConstant %i32 0\n"
5481 
5482 		"%main      = OpFunction %void None %voidf\n"
5483 		"%label     = OpLabel\n"
5484 		"%idval     = OpLoad %uvec3 %id\n"
5485 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5486 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5487 		"%inval     = OpLoad %f32 %inloc\n"
5488 		"%neg       = OpFNegate %f32 %inval\n"
5489 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5490 		"             OpStore %outloc %neg\n"
5491 		"             OpReturn\n"
5492 		"             OpFunctionEnd\n");
5493 
5494 	cases.push_back(CaseParameter("bool",			"%bool"));
5495 	cases.push_back(CaseParameter("sint32",			"%i32"));
5496 	cases.push_back(CaseParameter("uint32",			"%u32"));
5497 	cases.push_back(CaseParameter("float32",		"%f32"));
5498 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
5499 	cases.push_back(CaseParameter("vec3bool",		"%bvec3"));
5500 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
5501 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
5502 	cases.push_back(CaseParameter("array",			"%uarr100"));
5503 	cases.push_back(CaseParameter("struct",			"%struct"));
5504 	cases.push_back(CaseParameter("pointer",		"%pointer"));
5505 
5506 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5507 
5508 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5509 		negativeFloats[ndx] = -positiveFloats[ndx];
5510 
5511 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5512 	{
5513 		map<string, string>		specializations;
5514 		ComputeShaderSpec		spec;
5515 
5516 		specializations["TYPE"] = cases[caseNdx].param;
5517 		spec.assembly = shaderTemplate.specialize(specializations);
5518 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5519 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5520 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5521 
5522 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5523 	}
5524 
5525 	return group.release();
5526 }
5527 
5528 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)5529 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
5530 {
5531 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
5532 	vector<CaseParameter>			cases;
5533 	de::Random						rnd				(deStringHash(group->getName()));
5534 	const int						numElements		= 100;
5535 	vector<float>					positiveFloats	(numElements, 0);
5536 	vector<float>					negativeFloats	(numElements, 0);
5537 	const StringTemplate			shaderTemplate	(
5538 		string(getComputeAsmShaderPreamble()) +
5539 
5540 		"OpSource GLSL 430\n"
5541 		"OpName %main           \"main\"\n"
5542 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5543 
5544 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5545 
5546 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5547 
5548 		"%id        = OpVariable %uvec3ptr Input\n"
5549 		"%zero      = OpConstant %i32 0\n"
5550 
5551 		"${CONSTANT}\n"
5552 
5553 		"%main      = OpFunction %void None %voidf\n"
5554 		"%label     = OpLabel\n"
5555 		"%idval     = OpLoad %uvec3 %id\n"
5556 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5557 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5558 		"%inval     = OpLoad %f32 %inloc\n"
5559 		"%neg       = OpFNegate %f32 %inval\n"
5560 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5561 		"             OpStore %outloc %neg\n"
5562 		"             OpReturn\n"
5563 		"             OpFunctionEnd\n");
5564 
5565 	cases.push_back(CaseParameter("vector",			"%five = OpConstant %u32 5\n"
5566 													"%const = OpConstantComposite %uvec3 %five %zero %five"));
5567 	cases.push_back(CaseParameter("matrix",			"%m3fvec3 = OpTypeMatrix %fvec3 3\n"
5568 													"%ten = OpConstant %f32 10.\n"
5569 													"%fzero = OpConstant %f32 0.\n"
5570 													"%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
5571 													"%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
5572 	cases.push_back(CaseParameter("struct",			"%m2vec3 = OpTypeMatrix %fvec3 2\n"
5573 													"%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
5574 													"%fzero = OpConstant %f32 0.\n"
5575 													"%one = OpConstant %f32 1.\n"
5576 													"%point5 = OpConstant %f32 0.5\n"
5577 													"%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
5578 													"%mat = OpConstantComposite %m2vec3 %vec %vec\n"
5579 													"%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
5580 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %u32 %f32\n"
5581 													"%st2 = OpTypeStruct %i32 %i32\n"
5582 													"%struct = OpTypeStruct %st1 %st2\n"
5583 													"%point5 = OpConstant %f32 0.5\n"
5584 													"%one = OpConstant %u32 1\n"
5585 													"%ten = OpConstant %i32 10\n"
5586 													"%st1val = OpConstantComposite %st1 %one %point5\n"
5587 													"%st2val = OpConstantComposite %st2 %ten %ten\n"
5588 													"%const = OpConstantComposite %struct %st1val %st2val"));
5589 
5590 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
5591 
5592 	for (size_t ndx = 0; ndx < numElements; ++ndx)
5593 		negativeFloats[ndx] = -positiveFloats[ndx];
5594 
5595 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
5596 	{
5597 		map<string, string>		specializations;
5598 		ComputeShaderSpec		spec;
5599 
5600 		specializations["CONSTANT"] = cases[caseNdx].param;
5601 		spec.assembly = shaderTemplate.specialize(specializations);
5602 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
5603 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
5604 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5605 
5606 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
5607 	}
5608 
5609 	return group.release();
5610 }
5611 
5612 // Creates a floating point number with the given exponent, and significand
5613 // bits set. It can only create normalized numbers. Only the least significant
5614 // 24 bits of the significand will be examined. The final bit of the
5615 // significand will also be ignored. This allows alignment to be written
5616 // similarly to C99 hex-floats.
5617 // For example if you wanted to write 0x1.7f34p-12 you would call
5618 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(deInt32 exponent,deUint32 significand)5619 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
5620 {
5621 	float f = 1.0f;
5622 
5623 	for (deInt32 idx = 0; idx < 23; ++idx)
5624 	{
5625 		f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
5626 		significand <<= 1;
5627 	}
5628 
5629 	return std::ldexp(f, exponent);
5630 }
5631 
5632 // Compare instruction for the OpQuantizeF16 compute exact case.
5633 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5634 bool compareOpQuantizeF16ComputeExactCase (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5635 {
5636 	if (outputAllocs.size() != 1)
5637 		return false;
5638 
5639 	// Only size is needed because we cannot compare Nans.
5640 	size_t byteSize = expectedOutputs[0].getByteSize();
5641 
5642 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
5643 
5644 	if (byteSize != 4*sizeof(float)) {
5645 		return false;
5646 	}
5647 
5648 	if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
5649 		*outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
5650 		return false;
5651 	}
5652 	outputAsFloat++;
5653 
5654 	if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
5655 		*outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
5656 		return false;
5657 	}
5658 	outputAsFloat++;
5659 
5660 	if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
5661 		*outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
5662 		return false;
5663 	}
5664 	outputAsFloat++;
5665 
5666 	if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
5667 		*outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
5668 		return false;
5669 	}
5670 
5671 	return true;
5672 }
5673 
5674 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)5675 bool compareNan (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
5676 {
5677 	if (outputAllocs.size() != 1)
5678 		return false;
5679 
5680 	// Only size is needed because we cannot compare Nans.
5681 	size_t byteSize = expectedOutputs[0].getByteSize();
5682 
5683 	const float* const	output_as_float	= static_cast<const float*>(outputAllocs[0]->getHostPtr());
5684 
5685 	for (size_t idx = 0; idx < byteSize / sizeof(float); ++idx)
5686 	{
5687 		if (!deFloatIsNaN(output_as_float[idx]))
5688 		{
5689 			return false;
5690 		}
5691 	}
5692 
5693 	return true;
5694 }
5695 
5696 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)5697 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
5698 {
5699 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
5700 
5701 	const std::string shader (
5702 		string(getComputeAsmShaderPreamble()) +
5703 
5704 		"OpSource GLSL 430\n"
5705 		"OpName %main           \"main\"\n"
5706 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5707 
5708 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5709 
5710 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5711 
5712 		"%id        = OpVariable %uvec3ptr Input\n"
5713 		"%zero      = OpConstant %i32 0\n"
5714 
5715 		"%main      = OpFunction %void None %voidf\n"
5716 		"%label     = OpLabel\n"
5717 		"%idval     = OpLoad %uvec3 %id\n"
5718 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5719 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
5720 		"%inval     = OpLoad %f32 %inloc\n"
5721 		"%quant     = OpQuantizeToF16 %f32 %inval\n"
5722 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5723 		"             OpStore %outloc %quant\n"
5724 		"             OpReturn\n"
5725 		"             OpFunctionEnd\n");
5726 
5727 	{
5728 		ComputeShaderSpec	spec;
5729 		const deUint32		numElements		= 100;
5730 		vector<float>		infinities;
5731 		vector<float>		results;
5732 
5733 		infinities.reserve(numElements);
5734 		results.reserve(numElements);
5735 
5736 		for (size_t idx = 0; idx < numElements; ++idx)
5737 		{
5738 			switch(idx % 4)
5739 			{
5740 				case 0:
5741 					infinities.push_back(std::numeric_limits<float>::infinity());
5742 					results.push_back(std::numeric_limits<float>::infinity());
5743 					break;
5744 				case 1:
5745 					infinities.push_back(-std::numeric_limits<float>::infinity());
5746 					results.push_back(-std::numeric_limits<float>::infinity());
5747 					break;
5748 				case 2:
5749 					infinities.push_back(std::ldexp(1.0f, 16));
5750 					results.push_back(std::numeric_limits<float>::infinity());
5751 					break;
5752 				case 3:
5753 					infinities.push_back(std::ldexp(-1.0f, 32));
5754 					results.push_back(-std::numeric_limits<float>::infinity());
5755 					break;
5756 			}
5757 		}
5758 
5759 		spec.assembly = shader;
5760 		spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
5761 		spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
5762 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5763 
5764 		group->addChild(new SpvAsmComputeShaderCase(
5765 			testCtx, "infinities", "Check that infinities propagated and created", spec));
5766 	}
5767 
5768 	{
5769 		ComputeShaderSpec	spec;
5770 		vector<float>		nans;
5771 		const deUint32		numElements		= 100;
5772 
5773 		nans.reserve(numElements);
5774 
5775 		for (size_t idx = 0; idx < numElements; ++idx)
5776 		{
5777 			if (idx % 2 == 0)
5778 			{
5779 				nans.push_back(std::numeric_limits<float>::quiet_NaN());
5780 			}
5781 			else
5782 			{
5783 				nans.push_back(-std::numeric_limits<float>::quiet_NaN());
5784 			}
5785 		}
5786 
5787 		spec.assembly = shader;
5788 		spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
5789 		spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
5790 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5791 		spec.verifyIO = &compareNan;
5792 
5793 		group->addChild(new SpvAsmComputeShaderCase(
5794 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
5795 	}
5796 
5797 	{
5798 		ComputeShaderSpec	spec;
5799 		vector<float>		small;
5800 		vector<float>		zeros;
5801 		const deUint32		numElements		= 100;
5802 
5803 		small.reserve(numElements);
5804 		zeros.reserve(numElements);
5805 
5806 		for (size_t idx = 0; idx < numElements; ++idx)
5807 		{
5808 			switch(idx % 6)
5809 			{
5810 				case 0:
5811 					small.push_back(0.f);
5812 					zeros.push_back(0.f);
5813 					break;
5814 				case 1:
5815 					small.push_back(-0.f);
5816 					zeros.push_back(-0.f);
5817 					break;
5818 				case 2:
5819 					small.push_back(std::ldexp(1.0f, -16));
5820 					zeros.push_back(0.f);
5821 					break;
5822 				case 3:
5823 					small.push_back(std::ldexp(-1.0f, -32));
5824 					zeros.push_back(-0.f);
5825 					break;
5826 				case 4:
5827 					small.push_back(std::ldexp(1.0f, -127));
5828 					zeros.push_back(0.f);
5829 					break;
5830 				case 5:
5831 					small.push_back(-std::ldexp(1.0f, -128));
5832 					zeros.push_back(-0.f);
5833 					break;
5834 			}
5835 		}
5836 
5837 		spec.assembly = shader;
5838 		spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
5839 		spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
5840 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5841 
5842 		group->addChild(new SpvAsmComputeShaderCase(
5843 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
5844 	}
5845 
5846 	{
5847 		ComputeShaderSpec	spec;
5848 		vector<float>		exact;
5849 		const deUint32		numElements		= 200;
5850 
5851 		exact.reserve(numElements);
5852 
5853 		for (size_t idx = 0; idx < numElements; ++idx)
5854 			exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
5855 
5856 		spec.assembly = shader;
5857 		spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
5858 		spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
5859 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5860 
5861 		group->addChild(new SpvAsmComputeShaderCase(
5862 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
5863 	}
5864 
5865 	{
5866 		ComputeShaderSpec	spec;
5867 		vector<float>		inputs;
5868 		const deUint32		numElements		= 4;
5869 
5870 		inputs.push_back(constructNormalizedFloat(8,	0x300300));
5871 		inputs.push_back(-constructNormalizedFloat(-7,	0x600800));
5872 		inputs.push_back(constructNormalizedFloat(2,	0x01E000));
5873 		inputs.push_back(constructNormalizedFloat(1,	0xFFE000));
5874 
5875 		spec.assembly = shader;
5876 		spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
5877 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5878 		spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
5879 		spec.numWorkGroups = IVec3(numElements, 1, 1);
5880 
5881 		group->addChild(new SpvAsmComputeShaderCase(
5882 			testCtx, "rounded", "Check that are rounded when needed", spec));
5883 	}
5884 
5885 	return group.release();
5886 }
5887 
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)5888 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
5889 {
5890 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
5891 
5892 	const std::string shader (
5893 		string(getComputeAsmShaderPreamble()) +
5894 
5895 		"OpName %main           \"main\"\n"
5896 		"OpName %id             \"gl_GlobalInvocationID\"\n"
5897 
5898 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
5899 
5900 		"OpDecorate %sc_0  SpecId 0\n"
5901 		"OpDecorate %sc_1  SpecId 1\n"
5902 		"OpDecorate %sc_2  SpecId 2\n"
5903 		"OpDecorate %sc_3  SpecId 3\n"
5904 		"OpDecorate %sc_4  SpecId 4\n"
5905 		"OpDecorate %sc_5  SpecId 5\n"
5906 
5907 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
5908 
5909 		"%id        = OpVariable %uvec3ptr Input\n"
5910 		"%zero      = OpConstant %i32 0\n"
5911 		"%c_u32_6   = OpConstant %u32 6\n"
5912 
5913 		"%sc_0      = OpSpecConstant %f32 0.\n"
5914 		"%sc_1      = OpSpecConstant %f32 0.\n"
5915 		"%sc_2      = OpSpecConstant %f32 0.\n"
5916 		"%sc_3      = OpSpecConstant %f32 0.\n"
5917 		"%sc_4      = OpSpecConstant %f32 0.\n"
5918 		"%sc_5      = OpSpecConstant %f32 0.\n"
5919 
5920 		"%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
5921 		"%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
5922 		"%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
5923 		"%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
5924 		"%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
5925 		"%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
5926 
5927 		"%main      = OpFunction %void None %voidf\n"
5928 		"%label     = OpLabel\n"
5929 		"%idval     = OpLoad %uvec3 %id\n"
5930 		"%x         = OpCompositeExtract %u32 %idval 0\n"
5931 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
5932 		"%selector  = OpUMod %u32 %x %c_u32_6\n"
5933 		"            OpSelectionMerge %exit None\n"
5934 		"            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
5935 
5936 		"%case0     = OpLabel\n"
5937 		"             OpStore %outloc %sc_0_quant\n"
5938 		"             OpBranch %exit\n"
5939 
5940 		"%case1     = OpLabel\n"
5941 		"             OpStore %outloc %sc_1_quant\n"
5942 		"             OpBranch %exit\n"
5943 
5944 		"%case2     = OpLabel\n"
5945 		"             OpStore %outloc %sc_2_quant\n"
5946 		"             OpBranch %exit\n"
5947 
5948 		"%case3     = OpLabel\n"
5949 		"             OpStore %outloc %sc_3_quant\n"
5950 		"             OpBranch %exit\n"
5951 
5952 		"%case4     = OpLabel\n"
5953 		"             OpStore %outloc %sc_4_quant\n"
5954 		"             OpBranch %exit\n"
5955 
5956 		"%case5     = OpLabel\n"
5957 		"             OpStore %outloc %sc_5_quant\n"
5958 		"             OpBranch %exit\n"
5959 
5960 		"%exit      = OpLabel\n"
5961 		"             OpReturn\n"
5962 
5963 		"             OpFunctionEnd\n");
5964 
5965 	{
5966 		ComputeShaderSpec	spec;
5967 		const deUint8		numCases	= 4;
5968 		vector<float>		inputs		(numCases, 0.f);
5969 		vector<float>		outputs;
5970 
5971 		spec.assembly		= shader;
5972 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
5973 
5974 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
5975 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
5976 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
5977 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
5978 
5979 		outputs.push_back(std::numeric_limits<float>::infinity());
5980 		outputs.push_back(-std::numeric_limits<float>::infinity());
5981 		outputs.push_back(std::numeric_limits<float>::infinity());
5982 		outputs.push_back(-std::numeric_limits<float>::infinity());
5983 
5984 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
5985 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
5986 
5987 		group->addChild(new SpvAsmComputeShaderCase(
5988 			testCtx, "infinities", "Check that infinities propagated and created", spec));
5989 	}
5990 
5991 	{
5992 		ComputeShaderSpec	spec;
5993 		const deUint8		numCases	= 2;
5994 		vector<float>		inputs		(numCases, 0.f);
5995 		vector<float>		outputs;
5996 
5997 		spec.assembly		= shader;
5998 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
5999 		spec.verifyIO		= &compareNan;
6000 
6001 		outputs.push_back(std::numeric_limits<float>::quiet_NaN());
6002 		outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
6003 
6004 		for (deUint8 idx = 0; idx < numCases; ++idx)
6005 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6006 
6007 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6008 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6009 
6010 		group->addChild(new SpvAsmComputeShaderCase(
6011 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
6012 	}
6013 
6014 	{
6015 		ComputeShaderSpec	spec;
6016 		const deUint8		numCases	= 6;
6017 		vector<float>		inputs		(numCases, 0.f);
6018 		vector<float>		outputs;
6019 
6020 		spec.assembly		= shader;
6021 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6022 
6023 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(0.f));
6024 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-0.f));
6025 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
6026 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
6027 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
6028 		spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
6029 
6030 		outputs.push_back(0.f);
6031 		outputs.push_back(-0.f);
6032 		outputs.push_back(0.f);
6033 		outputs.push_back(-0.f);
6034 		outputs.push_back(0.f);
6035 		outputs.push_back(-0.f);
6036 
6037 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6038 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6039 
6040 		group->addChild(new SpvAsmComputeShaderCase(
6041 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
6042 	}
6043 
6044 	{
6045 		ComputeShaderSpec	spec;
6046 		const deUint8		numCases	= 6;
6047 		vector<float>		inputs		(numCases, 0.f);
6048 		vector<float>		outputs;
6049 
6050 		spec.assembly		= shader;
6051 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6052 
6053 		for (deUint8 idx = 0; idx < 6; ++idx)
6054 		{
6055 			const float f = static_cast<float>(idx * 10 - 30) / 4.f;
6056 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(f));
6057 			outputs.push_back(f);
6058 		}
6059 
6060 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6061 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6062 
6063 		group->addChild(new SpvAsmComputeShaderCase(
6064 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
6065 	}
6066 
6067 	{
6068 		ComputeShaderSpec	spec;
6069 		const deUint8		numCases	= 4;
6070 		vector<float>		inputs		(numCases, 0.f);
6071 		vector<float>		outputs;
6072 
6073 		spec.assembly		= shader;
6074 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
6075 		spec.verifyIO		= &compareOpQuantizeF16ComputeExactCase;
6076 
6077 		outputs.push_back(constructNormalizedFloat(8, 0x300300));
6078 		outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
6079 		outputs.push_back(constructNormalizedFloat(2, 0x01E000));
6080 		outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
6081 
6082 		for (deUint8 idx = 0; idx < numCases; ++idx)
6083 			spec.specConstants.append<deInt32>(bitwiseCast<deUint32>(outputs[idx]));
6084 
6085 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
6086 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
6087 
6088 		group->addChild(new SpvAsmComputeShaderCase(
6089 			testCtx, "rounded", "Check that are rounded when needed", spec));
6090 	}
6091 
6092 	return group.release();
6093 }
6094 
6095 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)6096 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
6097 {
6098 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
6099 	ComputeShaderSpec				spec;
6100 	de::Random						rnd				(deStringHash(group->getName()));
6101 	const int						numElements		= 100;
6102 	vector<float>					positiveFloats	(numElements, 0);
6103 	vector<float>					negativeFloats	(numElements, 0);
6104 
6105 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
6106 
6107 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6108 		negativeFloats[ndx] = -positiveFloats[ndx];
6109 
6110 	spec.assembly =
6111 		"OpCapability Shader\n"
6112 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
6113 		"OpMemoryModel Logical GLSL450\n"
6114 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6115 		"OpExecutionMode %main LocalSize 1 1 1\n"
6116 
6117 		"OpSource GLSL 430\n"
6118 		"OpName %main           \"main\"\n"
6119 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6120 
6121 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6122 
6123 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6124 
6125 		"%fmat      = OpTypeMatrix %fvec3 3\n"
6126 		"%ten       = OpConstant %u32 10\n"
6127 		"%f32arr10  = OpTypeArray %f32 %ten\n"
6128 		"%fst       = OpTypeStruct %f32 %f32\n"
6129 
6130 		+ string(getComputeAsmInputOutputBuffer()) +
6131 
6132 		"%id        = OpVariable %uvec3ptr Input\n"
6133 		"%zero      = OpConstant %i32 0\n"
6134 
6135 		// Create a bunch of null values
6136 		"%unull     = OpConstantNull %u32\n"
6137 		"%fnull     = OpConstantNull %f32\n"
6138 		"%vnull     = OpConstantNull %fvec3\n"
6139 		"%mnull     = OpConstantNull %fmat\n"
6140 		"%anull     = OpConstantNull %f32arr10\n"
6141 		"%snull     = OpConstantComposite %fst %fnull %fnull\n"
6142 
6143 		"%main      = OpFunction %void None %voidf\n"
6144 		"%label     = OpLabel\n"
6145 		"%idval     = OpLoad %uvec3 %id\n"
6146 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6147 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6148 		"%inval     = OpLoad %f32 %inloc\n"
6149 		"%neg       = OpFNegate %f32 %inval\n"
6150 
6151 		// Get the abs() of (a certain element of) those null values
6152 		"%unull_cov = OpConvertUToF %f32 %unull\n"
6153 		"%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
6154 		"%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
6155 		"%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
6156 		"%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
6157 		"%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
6158 		"%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
6159 		"%anull_3   = OpCompositeExtract %f32 %anull 3\n"
6160 		"%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
6161 		"%snull_1   = OpCompositeExtract %f32 %snull 1\n"
6162 		"%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
6163 
6164 		// Add them all
6165 		"%add1      = OpFAdd %f32 %neg  %unull_abs\n"
6166 		"%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
6167 		"%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
6168 		"%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
6169 		"%add5      = OpFAdd %f32 %add4 %anull_abs\n"
6170 		"%final     = OpFAdd %f32 %add5 %snull_abs\n"
6171 
6172 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6173 		"             OpStore %outloc %final\n" // write to output
6174 		"             OpReturn\n"
6175 		"             OpFunctionEnd\n";
6176 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
6177 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
6178 	spec.numWorkGroups = IVec3(numElements, 1, 1);
6179 
6180 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
6181 
6182 	return group.release();
6183 }
6184 
6185 // Assembly code used for testing loop control is based on GLSL source code:
6186 // #version 430
6187 //
6188 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6189 //   float elements[];
6190 // } input_data;
6191 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6192 //   float elements[];
6193 // } output_data;
6194 //
6195 // void main() {
6196 //   uint x = gl_GlobalInvocationID.x;
6197 //   output_data.elements[x] = input_data.elements[x];
6198 //   for (uint i = 0; i < 4; ++i)
6199 //     output_data.elements[x] += 1.f;
6200 // }
createLoopControlGroup(tcu::TestContext & testCtx)6201 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
6202 {
6203 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
6204 	vector<CaseParameter>			cases;
6205 	de::Random						rnd				(deStringHash(group->getName()));
6206 	const int						numElements		= 100;
6207 	vector<float>					inputFloats		(numElements, 0);
6208 	vector<float>					outputFloats	(numElements, 0);
6209 	const StringTemplate			shaderTemplate	(
6210 		string(getComputeAsmShaderPreamble()) +
6211 
6212 		"OpSource GLSL 430\n"
6213 		"OpName %main \"main\"\n"
6214 		"OpName %id \"gl_GlobalInvocationID\"\n"
6215 
6216 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6217 
6218 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6219 
6220 		"%u32ptr      = OpTypePointer Function %u32\n"
6221 
6222 		"%id          = OpVariable %uvec3ptr Input\n"
6223 		"%zero        = OpConstant %i32 0\n"
6224 		"%uzero       = OpConstant %u32 0\n"
6225 		"%one         = OpConstant %i32 1\n"
6226 		"%constf1     = OpConstant %f32 1.0\n"
6227 		"%four        = OpConstant %u32 4\n"
6228 
6229 		"%main        = OpFunction %void None %voidf\n"
6230 		"%entry       = OpLabel\n"
6231 		"%i           = OpVariable %u32ptr Function\n"
6232 		"               OpStore %i %uzero\n"
6233 
6234 		"%idval       = OpLoad %uvec3 %id\n"
6235 		"%x           = OpCompositeExtract %u32 %idval 0\n"
6236 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
6237 		"%inval       = OpLoad %f32 %inloc\n"
6238 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
6239 		"               OpStore %outloc %inval\n"
6240 		"               OpBranch %loop_entry\n"
6241 
6242 		"%loop_entry  = OpLabel\n"
6243 		"%i_val       = OpLoad %u32 %i\n"
6244 		"%cmp_lt      = OpULessThan %bool %i_val %four\n"
6245 		"               OpLoopMerge %loop_merge %loop_body ${CONTROL}\n"
6246 		"               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
6247 		"%loop_body   = OpLabel\n"
6248 		"%outval      = OpLoad %f32 %outloc\n"
6249 		"%addf1       = OpFAdd %f32 %outval %constf1\n"
6250 		"               OpStore %outloc %addf1\n"
6251 		"%new_i       = OpIAdd %u32 %i_val %one\n"
6252 		"               OpStore %i %new_i\n"
6253 		"               OpBranch %loop_entry\n"
6254 		"%loop_merge  = OpLabel\n"
6255 		"               OpReturn\n"
6256 		"               OpFunctionEnd\n");
6257 
6258 	cases.push_back(CaseParameter("none",				"None"));
6259 	cases.push_back(CaseParameter("unroll",				"Unroll"));
6260 	cases.push_back(CaseParameter("dont_unroll",		"DontUnroll"));
6261 
6262 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6263 
6264 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6265 		outputFloats[ndx] = inputFloats[ndx] + 4.f;
6266 
6267 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6268 	{
6269 		map<string, string>		specializations;
6270 		ComputeShaderSpec		spec;
6271 
6272 		specializations["CONTROL"] = cases[caseNdx].param;
6273 		spec.assembly = shaderTemplate.specialize(specializations);
6274 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6275 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6276 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6277 
6278 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6279 	}
6280 
6281 	group->addChild(new SpvAsmLoopControlDependencyLengthCase(testCtx, "dependency_length", "dependency_length"));
6282 	group->addChild(new SpvAsmLoopControlDependencyInfiniteCase(testCtx, "dependency_infinite", "dependency_infinite"));
6283 
6284 	return group.release();
6285 }
6286 
6287 // Assembly code used for testing selection control is based on GLSL source code:
6288 // #version 430
6289 //
6290 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6291 //   float elements[];
6292 // } input_data;
6293 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6294 //   float elements[];
6295 // } output_data;
6296 //
6297 // void main() {
6298 //   uint x = gl_GlobalInvocationID.x;
6299 //   float val = input_data.elements[x];
6300 //   if (val > 10.f)
6301 //     output_data.elements[x] = val + 1.f;
6302 //   else
6303 //     output_data.elements[x] = val - 1.f;
6304 // }
createSelectionControlGroup(tcu::TestContext & testCtx)6305 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
6306 {
6307 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
6308 	vector<CaseParameter>			cases;
6309 	de::Random						rnd				(deStringHash(group->getName()));
6310 	const int						numElements		= 100;
6311 	vector<float>					inputFloats		(numElements, 0);
6312 	vector<float>					outputFloats	(numElements, 0);
6313 	const StringTemplate			shaderTemplate	(
6314 		string(getComputeAsmShaderPreamble()) +
6315 
6316 		"OpSource GLSL 430\n"
6317 		"OpName %main \"main\"\n"
6318 		"OpName %id \"gl_GlobalInvocationID\"\n"
6319 
6320 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6321 
6322 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6323 
6324 		"%id       = OpVariable %uvec3ptr Input\n"
6325 		"%zero     = OpConstant %i32 0\n"
6326 		"%constf1  = OpConstant %f32 1.0\n"
6327 		"%constf10 = OpConstant %f32 10.0\n"
6328 
6329 		"%main     = OpFunction %void None %voidf\n"
6330 		"%entry    = OpLabel\n"
6331 		"%idval    = OpLoad %uvec3 %id\n"
6332 		"%x        = OpCompositeExtract %u32 %idval 0\n"
6333 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
6334 		"%inval    = OpLoad %f32 %inloc\n"
6335 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
6336 		"%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
6337 
6338 		"            OpSelectionMerge %if_end ${CONTROL}\n"
6339 		"            OpBranchConditional %cmp_gt %if_true %if_false\n"
6340 		"%if_true  = OpLabel\n"
6341 		"%addf1    = OpFAdd %f32 %inval %constf1\n"
6342 		"            OpStore %outloc %addf1\n"
6343 		"            OpBranch %if_end\n"
6344 		"%if_false = OpLabel\n"
6345 		"%subf1    = OpFSub %f32 %inval %constf1\n"
6346 		"            OpStore %outloc %subf1\n"
6347 		"            OpBranch %if_end\n"
6348 		"%if_end   = OpLabel\n"
6349 		"            OpReturn\n"
6350 		"            OpFunctionEnd\n");
6351 
6352 	cases.push_back(CaseParameter("none",					"None"));
6353 	cases.push_back(CaseParameter("flatten",				"Flatten"));
6354 	cases.push_back(CaseParameter("dont_flatten",			"DontFlatten"));
6355 	cases.push_back(CaseParameter("flatten_dont_flatten",	"DontFlatten|Flatten"));
6356 
6357 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6358 
6359 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6360 	floorAll(inputFloats);
6361 
6362 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6363 		outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
6364 
6365 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6366 	{
6367 		map<string, string>		specializations;
6368 		ComputeShaderSpec		spec;
6369 
6370 		specializations["CONTROL"] = cases[caseNdx].param;
6371 		spec.assembly = shaderTemplate.specialize(specializations);
6372 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6373 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6374 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6375 
6376 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6377 	}
6378 
6379 	return group.release();
6380 }
6381 
getOpNameAbuseCases(vector<CaseParameter> & abuseCases)6382 void getOpNameAbuseCases (vector<CaseParameter> &abuseCases)
6383 {
6384 	// Generate a long name.
6385 	std::string longname;
6386 	longname.resize(65535, 'k'); // max string literal, spir-v 2.17
6387 
6388 	// Some bad names, abusing utf-8 encoding. This may also cause problems
6389 	// with the logs.
6390 	// 1. Various illegal code points in utf-8
6391 	std::string utf8illegal =
6392 		"Illegal bytes in UTF-8: "
6393 		"\xc0 \xc1 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff"
6394 		"illegal surrogates: \xed\xad\xbf \xed\xbe\x80";
6395 
6396 	// 2. Zero encoded as overlong, not exactly legal but often supported to differentiate from terminating zero
6397 	std::string utf8nul = "UTF-8 encoded nul \xC0\x80 (should not end name)";
6398 
6399 	// 3. Some overlong encodings
6400 	std::string utf8overlong =
6401 		"UTF-8 overlong \xF0\x82\x82\xAC \xfc\x83\xbf\xbf\xbf\xbf \xf8\x87\xbf\xbf\xbf "
6402 		"\xf0\x8f\xbf\xbf";
6403 
6404 	// 4. Internet "zalgo" meme "bleeding text"
6405 	std::string utf8zalgo =
6406 		"\x56\xcc\xb5\xcc\x85\xcc\x94\xcc\x88\xcd\x8a\xcc\x91\xcc\x88\xcd\x91\xcc\x83\xcd\x82"
6407 		"\xcc\x83\xcd\x90\xcc\x8a\xcc\x92\xcc\x92\xcd\x8b\xcc\x94\xcd\x9d\xcc\x98\xcc\xab\xcc"
6408 		"\xae\xcc\xa9\xcc\xad\xcc\x97\xcc\xb0\x75\xcc\xb6\xcc\xbe\xcc\x80\xcc\x82\xcc\x84\xcd"
6409 		"\x84\xcc\x90\xcd\x86\xcc\x9a\xcd\x84\xcc\x9b\xcd\x86\xcd\x92\xcc\x9a\xcd\x99\xcd\x99"
6410 		"\xcc\xbb\xcc\x98\xcd\x8e\xcd\x88\xcd\x9a\xcc\xa6\xcc\x9c\xcc\xab\xcc\x99\xcd\x94\xcd"
6411 		"\x99\xcd\x95\xcc\xa5\xcc\xab\xcd\x89\x6c\xcc\xb8\xcc\x8e\xcc\x8b\xcc\x8b\xcc\x9a\xcc"
6412 		"\x8e\xcd\x9d\xcc\x80\xcc\xa1\xcc\xad\xcd\x9c\xcc\xba\xcc\x96\xcc\xb3\xcc\xa2\xcd\x8e"
6413 		"\xcc\xa2\xcd\x96\x6b\xcc\xb8\xcc\x84\xcd\x81\xcc\xbf\xcc\x8d\xcc\x89\xcc\x85\xcc\x92"
6414 		"\xcc\x84\xcc\x90\xcd\x81\xcc\x93\xcd\x90\xcd\x92\xcd\x9d\xcc\x84\xcd\x98\xcd\x9d\xcd"
6415 		"\xa0\xcd\x91\xcc\x94\xcc\xb9\xcd\x93\xcc\xa5\xcd\x87\xcc\xad\xcc\xa7\xcd\x96\xcd\x99"
6416 		"\xcc\x9d\xcc\xbc\xcd\x96\xcd\x93\xcc\x9d\xcc\x99\xcc\xa8\xcc\xb1\xcd\x85\xcc\xba\xcc"
6417 		"\xa7\x61\xcc\xb8\xcc\x8e\xcc\x81\xcd\x90\xcd\x84\xcd\x8c\xcc\x8c\xcc\x85\xcd\x86\xcc"
6418 		"\x84\xcd\x84\xcc\x90\xcc\x84\xcc\x8d\xcd\x99\xcd\x8d\xcc\xb0\xcc\xa3\xcc\xa6\xcd\x89"
6419 		"\xcd\x8d\xcd\x87\xcc\x98\xcd\x8d\xcc\xa4\xcd\x9a\xcd\x8e\xcc\xab\xcc\xb9\xcc\xac\xcc"
6420 		"\xa2\xcd\x87\xcc\xa0\xcc\xb3\xcd\x89\xcc\xb9\xcc\xa7\xcc\xa6\xcd\x89\xcd\x95\x6e\xcc"
6421 		"\xb8\xcd\x8a\xcc\x8a\xcd\x82\xcc\x9b\xcd\x81\xcd\x90\xcc\x85\xcc\x9b\xcd\x80\xcd\x91"
6422 		"\xcd\x9b\xcc\x81\xcd\x81\xcc\x9a\xcc\xb3\xcd\x9c\xcc\x9e\xcc\x9d\xcd\x99\xcc\xa2\xcd"
6423 		"\x93\xcd\x96\xcc\x97\xff";
6424 
6425 	// General name abuses
6426 	abuseCases.push_back(CaseParameter("_has_very_long_name", longname));
6427 	abuseCases.push_back(CaseParameter("_utf8_illegal", utf8illegal));
6428 	abuseCases.push_back(CaseParameter("_utf8_nul", utf8nul));
6429 	abuseCases.push_back(CaseParameter("_utf8_overlong", utf8overlong));
6430 	abuseCases.push_back(CaseParameter("_utf8_zalgo", utf8zalgo));
6431 
6432 	// GL keywords
6433 	abuseCases.push_back(CaseParameter("_is_gl_Position", "gl_Position"));
6434 	abuseCases.push_back(CaseParameter("_is_gl_InstanceID", "gl_InstanceID"));
6435 	abuseCases.push_back(CaseParameter("_is_gl_PrimitiveID", "gl_PrimitiveID"));
6436 	abuseCases.push_back(CaseParameter("_is_gl_TessCoord", "gl_TessCoord"));
6437 	abuseCases.push_back(CaseParameter("_is_gl_PerVertex", "gl_PerVertex"));
6438 	abuseCases.push_back(CaseParameter("_is_gl_InvocationID", "gl_InvocationID"));
6439 	abuseCases.push_back(CaseParameter("_is_gl_PointSize", "gl_PointSize"));
6440 	abuseCases.push_back(CaseParameter("_is_gl_PointCoord", "gl_PointCoord"));
6441 	abuseCases.push_back(CaseParameter("_is_gl_Layer", "gl_Layer"));
6442 	abuseCases.push_back(CaseParameter("_is_gl_FragDepth", "gl_FragDepth"));
6443 	abuseCases.push_back(CaseParameter("_is_gl_NumWorkGroups", "gl_NumWorkGroups"));
6444 	abuseCases.push_back(CaseParameter("_is_gl_WorkGroupID", "gl_WorkGroupID"));
6445 	abuseCases.push_back(CaseParameter("_is_gl_LocalInvocationID", "gl_LocalInvocationID"));
6446 	abuseCases.push_back(CaseParameter("_is_gl_GlobalInvocationID", "gl_GlobalInvocationID"));
6447 	abuseCases.push_back(CaseParameter("_is_gl_MaxVertexAttribs", "gl_MaxVertexAttribs"));
6448 	abuseCases.push_back(CaseParameter("_is_gl_MaxViewports", "gl_MaxViewports"));
6449 	abuseCases.push_back(CaseParameter("_is_gl_MaxComputeWorkGroupCount", "gl_MaxComputeWorkGroupCount"));
6450 	abuseCases.push_back(CaseParameter("_is_mat3", "mat3"));
6451 	abuseCases.push_back(CaseParameter("_is_volatile", "volatile"));
6452 	abuseCases.push_back(CaseParameter("_is_inout", "inout"));
6453 	abuseCases.push_back(CaseParameter("_is_isampler3d", "isampler3d"));
6454 }
6455 
createOpNameGroup(tcu::TestContext & testCtx)6456 tcu::TestCaseGroup* createOpNameGroup (tcu::TestContext& testCtx)
6457 {
6458 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opname", "Tests OpName cases"));
6459 	de::MovePtr<tcu::TestCaseGroup>	entryMainGroup	(new tcu::TestCaseGroup(testCtx, "entry_main", "OpName tests with entry main"));
6460 	de::MovePtr<tcu::TestCaseGroup>	entryNotGroup	(new tcu::TestCaseGroup(testCtx, "entry_rdc", "OpName tests with entry rdc"));
6461 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup		(new tcu::TestCaseGroup(testCtx, "abuse", "OpName abuse tests"));
6462 	vector<CaseParameter>			cases;
6463 	vector<CaseParameter>			abuseCases;
6464 	vector<string>					testFunc;
6465 	de::Random						rnd				(deStringHash(group->getName()));
6466 	const int						numElements		= 128;
6467 	vector<float>					inputFloats		(numElements, 0);
6468 	vector<float>					outputFloats	(numElements, 0);
6469 
6470 	getOpNameAbuseCases(abuseCases);
6471 
6472 	fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6473 
6474 	for(size_t ndx = 0; ndx < numElements; ++ndx)
6475 		outputFloats[ndx] = -inputFloats[ndx];
6476 
6477 	const string commonShaderHeader =
6478 		"OpCapability Shader\n"
6479 		"OpMemoryModel Logical GLSL450\n"
6480 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6481 		"OpExecutionMode %main LocalSize 1 1 1\n";
6482 
6483 	const string commonShaderFooter =
6484 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6485 
6486 		+ string(getComputeAsmInputOutputBufferTraits())
6487 		+ string(getComputeAsmCommonTypes())
6488 		+ string(getComputeAsmInputOutputBuffer()) +
6489 
6490 		"%id        = OpVariable %uvec3ptr Input\n"
6491 		"%zero      = OpConstant %i32 0\n"
6492 
6493 		"%func      = OpFunction %void None %voidf\n"
6494 		"%5         = OpLabel\n"
6495 		"             OpReturn\n"
6496 		"             OpFunctionEnd\n"
6497 
6498 		"%main      = OpFunction %void None %voidf\n"
6499 		"%entry     = OpLabel\n"
6500 		"%7         = OpFunctionCall %void %func\n"
6501 
6502 		"%idval     = OpLoad %uvec3 %id\n"
6503 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6504 
6505 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6506 		"%inval     = OpLoad %f32 %inloc\n"
6507 		"%neg       = OpFNegate %f32 %inval\n"
6508 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6509 		"             OpStore %outloc %neg\n"
6510 
6511 		"             OpReturn\n"
6512 		"             OpFunctionEnd\n";
6513 
6514 	const StringTemplate shaderTemplate (
6515 		"OpCapability Shader\n"
6516 		"OpMemoryModel Logical GLSL450\n"
6517 		"OpEntryPoint GLCompute %main \"${ENTRY}\" %id\n"
6518 		"OpExecutionMode %main LocalSize 1 1 1\n"
6519 		"OpName %${ID} \"${NAME}\"\n" +
6520 		commonShaderFooter);
6521 
6522 	const std::string multipleNames =
6523 		commonShaderHeader +
6524 		"OpName %main \"to_be\"\n"
6525 		"OpName %id   \"or_not\"\n"
6526 		"OpName %main \"to_be\"\n"
6527 		"OpName %main \"makes_no\"\n"
6528 		"OpName %func \"difference\"\n"
6529 		"OpName %5    \"to_me\"\n" +
6530 		commonShaderFooter;
6531 
6532 	{
6533 		ComputeShaderSpec	spec;
6534 
6535 		spec.assembly		= multipleNames;
6536 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6537 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6538 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6539 
6540 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "main_has_multiple_names", "multiple_names", spec));
6541 	}
6542 
6543 	const std::string everythingNamed =
6544 		commonShaderHeader +
6545 		"OpName %main   \"name1\"\n"
6546 		"OpName %id     \"name2\"\n"
6547 		"OpName %zero   \"name3\"\n"
6548 		"OpName %entry  \"name4\"\n"
6549 		"OpName %func   \"name5\"\n"
6550 		"OpName %5      \"name6\"\n"
6551 		"OpName %7      \"name7\"\n"
6552 		"OpName %idval  \"name8\"\n"
6553 		"OpName %inloc  \"name9\"\n"
6554 		"OpName %inval  \"name10\"\n"
6555 		"OpName %neg    \"name11\"\n"
6556 		"OpName %outloc \"name12\"\n"+
6557 		commonShaderFooter;
6558 	{
6559 		ComputeShaderSpec	spec;
6560 
6561 		spec.assembly		= everythingNamed;
6562 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6563 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6564 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6565 
6566 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named", "everything_named", spec));
6567 	}
6568 
6569 	const std::string everythingNamedTheSame =
6570 		commonShaderHeader +
6571 		"OpName %main   \"the_same\"\n"
6572 		"OpName %id     \"the_same\"\n"
6573 		"OpName %zero   \"the_same\"\n"
6574 		"OpName %entry  \"the_same\"\n"
6575 		"OpName %func   \"the_same\"\n"
6576 		"OpName %5      \"the_same\"\n"
6577 		"OpName %7      \"the_same\"\n"
6578 		"OpName %idval  \"the_same\"\n"
6579 		"OpName %inloc  \"the_same\"\n"
6580 		"OpName %inval  \"the_same\"\n"
6581 		"OpName %neg    \"the_same\"\n"
6582 		"OpName %outloc \"the_same\"\n"+
6583 		commonShaderFooter;
6584 	{
6585 		ComputeShaderSpec	spec;
6586 
6587 		spec.assembly		= everythingNamedTheSame;
6588 		spec.numWorkGroups	= IVec3(numElements, 1, 1);
6589 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6590 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6591 
6592 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6593 	}
6594 
6595 	// main_is_...
6596 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6597 	{
6598 		map<string, string>	specializations;
6599 		ComputeShaderSpec	spec;
6600 
6601 		specializations["ENTRY"]	= "main";
6602 		specializations["ID"]		= "main";
6603 		specializations["NAME"]		= abuseCases[ndx].param;
6604 		spec.assembly				= shaderTemplate.specialize(specializations);
6605 		spec.numWorkGroups			= IVec3(numElements, 1, 1);
6606 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6607 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6608 
6609 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("main") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6610 	}
6611 
6612 	// x_is_....
6613 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6614 	{
6615 		map<string, string>	specializations;
6616 		ComputeShaderSpec	spec;
6617 
6618 		specializations["ENTRY"]	= "main";
6619 		specializations["ID"]		= "x";
6620 		specializations["NAME"]		= abuseCases[ndx].param;
6621 		spec.assembly				= shaderTemplate.specialize(specializations);
6622 		spec.numWorkGroups			= IVec3(numElements, 1, 1);
6623 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6624 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6625 
6626 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6627 	}
6628 
6629 	cases.push_back(CaseParameter("_is_main", "main"));
6630 	cases.push_back(CaseParameter("_is_not_main", "not_main"));
6631 	testFunc.push_back("main");
6632 	testFunc.push_back("func");
6633 
6634 	for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6635 	{
6636 		for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6637 		{
6638 			map<string, string>	specializations;
6639 			ComputeShaderSpec	spec;
6640 
6641 			specializations["ENTRY"]	= "main";
6642 			specializations["ID"]		= testFunc[fNdx];
6643 			specializations["NAME"]		= cases[ndx].param;
6644 			spec.assembly				= shaderTemplate.specialize(specializations);
6645 			spec.numWorkGroups			= IVec3(numElements, 1, 1);
6646 			spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6647 			spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6648 
6649 			entryMainGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6650 		}
6651 	}
6652 
6653 	cases.push_back(CaseParameter("_is_entry", "rdc"));
6654 
6655 	for(size_t fNdx = 0; fNdx < testFunc.size(); ++fNdx)
6656 	{
6657 		for(size_t ndx = 0; ndx < cases.size(); ++ndx)
6658 		{
6659 			map<string, string>     specializations;
6660 			ComputeShaderSpec       spec;
6661 
6662 			specializations["ENTRY"]	= "rdc";
6663 			specializations["ID"]		= testFunc[fNdx];
6664 			specializations["NAME"]		= cases[ndx].param;
6665 			spec.assembly				= shaderTemplate.specialize(specializations);
6666 			spec.numWorkGroups			= IVec3(numElements, 1, 1);
6667 			spec.entryPoint				= "rdc";
6668 			spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6669 			spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6670 
6671 			entryNotGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (testFunc[fNdx] + cases[ndx].name).c_str(), cases[ndx].name, spec));
6672 		}
6673 	}
6674 
6675 	group->addChild(entryMainGroup.release());
6676 	group->addChild(entryNotGroup.release());
6677 	group->addChild(abuseGroup.release());
6678 
6679 	return group.release();
6680 }
6681 
createOpMemberNameGroup(tcu::TestContext & testCtx)6682 tcu::TestCaseGroup* createOpMemberNameGroup (tcu::TestContext& testCtx)
6683 {
6684 	de::MovePtr<tcu::TestCaseGroup>	group(new tcu::TestCaseGroup(testCtx, "opmembername", "Tests OpMemberName cases"));
6685 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "abuse", "OpMemberName abuse tests"));
6686 	vector<CaseParameter>			abuseCases;
6687 	vector<string>					testFunc;
6688 	de::Random						rnd(deStringHash(group->getName()));
6689 	const int						numElements = 128;
6690 	vector<float>					inputFloats(numElements, 0);
6691 	vector<float>					outputFloats(numElements, 0);
6692 
6693 	getOpNameAbuseCases(abuseCases);
6694 
6695 	fillRandomScalars(rnd, -100.0f, 100.0f, &inputFloats[0], numElements);
6696 
6697 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6698 		outputFloats[ndx] = -inputFloats[ndx];
6699 
6700 	const string commonShaderHeader =
6701 		"OpCapability Shader\n"
6702 		"OpMemoryModel Logical GLSL450\n"
6703 		"OpEntryPoint GLCompute %main \"main\" %id\n"
6704 		"OpExecutionMode %main LocalSize 1 1 1\n";
6705 
6706 	const string commonShaderFooter =
6707 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6708 
6709 		+ string(getComputeAsmInputOutputBufferTraits())
6710 		+ string(getComputeAsmCommonTypes())
6711 		+ string(getComputeAsmInputOutputBuffer()) +
6712 
6713 		"%u3str     = OpTypeStruct %u32 %u32 %u32\n"
6714 
6715 		"%id        = OpVariable %uvec3ptr Input\n"
6716 		"%zero      = OpConstant %i32 0\n"
6717 
6718 		"%main      = OpFunction %void None %voidf\n"
6719 		"%entry     = OpLabel\n"
6720 
6721 		"%idval     = OpLoad %uvec3 %id\n"
6722 		"%x0        = OpCompositeExtract %u32 %idval 0\n"
6723 
6724 		"%idstr     = OpCompositeConstruct %u3str %x0 %x0 %x0\n"
6725 		"%x         = OpCompositeExtract %u32 %idstr 0\n"
6726 
6727 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
6728 		"%inval     = OpLoad %f32 %inloc\n"
6729 		"%neg       = OpFNegate %f32 %inval\n"
6730 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6731 		"             OpStore %outloc %neg\n"
6732 
6733 		"             OpReturn\n"
6734 		"             OpFunctionEnd\n";
6735 
6736 	const StringTemplate shaderTemplate(
6737 		commonShaderHeader +
6738 		"OpMemberName %u3str 0 \"${NAME}\"\n" +
6739 		commonShaderFooter);
6740 
6741 	const std::string multipleNames =
6742 		commonShaderHeader +
6743 		"OpMemberName %u3str 0 \"to_be\"\n"
6744 		"OpMemberName %u3str 1 \"or_not\"\n"
6745 		"OpMemberName %u3str 0 \"to_be\"\n"
6746 		"OpMemberName %u3str 2 \"makes_no\"\n"
6747 		"OpMemberName %u3str 0 \"difference\"\n"
6748 		"OpMemberName %u3str 0 \"to_me\"\n" +
6749 		commonShaderFooter;
6750 	{
6751 		ComputeShaderSpec	spec;
6752 
6753 		spec.assembly = multipleNames;
6754 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6755 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6756 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6757 
6758 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "u3str_x_has_multiple_names", "multiple_names", spec));
6759 	}
6760 
6761 	const std::string everythingNamedTheSame =
6762 		commonShaderHeader +
6763 		"OpMemberName %u3str 0 \"the_same\"\n"
6764 		"OpMemberName %u3str 1 \"the_same\"\n"
6765 		"OpMemberName %u3str 2 \"the_same\"\n" +
6766 		commonShaderFooter;
6767 
6768 	{
6769 		ComputeShaderSpec	spec;
6770 
6771 		spec.assembly = everythingNamedTheSame;
6772 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6773 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6774 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6775 
6776 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "everything_named_the_same", "everything_named_the_same", spec));
6777 	}
6778 
6779 	// u3str_x_is_....
6780 	for (size_t ndx = 0; ndx < abuseCases.size(); ++ndx)
6781 	{
6782 		map<string, string>	specializations;
6783 		ComputeShaderSpec	spec;
6784 
6785 		specializations["NAME"] = abuseCases[ndx].param;
6786 		spec.assembly = shaderTemplate.specialize(specializations);
6787 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6788 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6789 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6790 
6791 		abuseGroup->addChild(new SpvAsmComputeShaderCase(testCtx, (std::string("u3str_x") + abuseCases[ndx].name).c_str(), abuseCases[ndx].name, spec));
6792 	}
6793 
6794 	group->addChild(abuseGroup.release());
6795 
6796 	return group.release();
6797 }
6798 
6799 // Assembly code used for testing function control is based on GLSL source code:
6800 //
6801 // #version 430
6802 //
6803 // layout(std140, set = 0, binding = 0) readonly buffer Input {
6804 //   float elements[];
6805 // } input_data;
6806 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
6807 //   float elements[];
6808 // } output_data;
6809 //
6810 // float const10() { return 10.f; }
6811 //
6812 // void main() {
6813 //   uint x = gl_GlobalInvocationID.x;
6814 //   output_data.elements[x] = input_data.elements[x] + const10();
6815 // }
createFunctionControlGroup(tcu::TestContext & testCtx)6816 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
6817 {
6818 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
6819 	vector<CaseParameter>			cases;
6820 	de::Random						rnd				(deStringHash(group->getName()));
6821 	const int						numElements		= 100;
6822 	vector<float>					inputFloats		(numElements, 0);
6823 	vector<float>					outputFloats	(numElements, 0);
6824 	const StringTemplate			shaderTemplate	(
6825 		string(getComputeAsmShaderPreamble()) +
6826 
6827 		"OpSource GLSL 430\n"
6828 		"OpName %main \"main\"\n"
6829 		"OpName %func_const10 \"const10(\"\n"
6830 		"OpName %id \"gl_GlobalInvocationID\"\n"
6831 
6832 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6833 
6834 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6835 
6836 		"%f32f = OpTypeFunction %f32\n"
6837 		"%id = OpVariable %uvec3ptr Input\n"
6838 		"%zero = OpConstant %i32 0\n"
6839 		"%constf10 = OpConstant %f32 10.0\n"
6840 
6841 		"%main         = OpFunction %void None %voidf\n"
6842 		"%entry        = OpLabel\n"
6843 		"%idval        = OpLoad %uvec3 %id\n"
6844 		"%x            = OpCompositeExtract %u32 %idval 0\n"
6845 		"%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
6846 		"%inval        = OpLoad %f32 %inloc\n"
6847 		"%ret_10       = OpFunctionCall %f32 %func_const10\n"
6848 		"%fadd         = OpFAdd %f32 %inval %ret_10\n"
6849 		"%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
6850 		"                OpStore %outloc %fadd\n"
6851 		"                OpReturn\n"
6852 		"                OpFunctionEnd\n"
6853 
6854 		"%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
6855 		"%label        = OpLabel\n"
6856 		"                OpReturnValue %constf10\n"
6857 		"                OpFunctionEnd\n");
6858 
6859 	cases.push_back(CaseParameter("none",						"None"));
6860 	cases.push_back(CaseParameter("inline",						"Inline"));
6861 	cases.push_back(CaseParameter("dont_inline",				"DontInline"));
6862 	cases.push_back(CaseParameter("pure",						"Pure"));
6863 	cases.push_back(CaseParameter("const",						"Const"));
6864 	cases.push_back(CaseParameter("inline_pure",				"Inline|Pure"));
6865 	cases.push_back(CaseParameter("const_dont_inline",			"Const|DontInline"));
6866 	cases.push_back(CaseParameter("inline_dont_inline",			"Inline|DontInline"));
6867 	cases.push_back(CaseParameter("pure_inline_dont_inline",	"Pure|Inline|DontInline"));
6868 
6869 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6870 
6871 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
6872 	floorAll(inputFloats);
6873 
6874 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6875 		outputFloats[ndx] = inputFloats[ndx] + 10.f;
6876 
6877 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6878 	{
6879 		map<string, string>		specializations;
6880 		ComputeShaderSpec		spec;
6881 
6882 		specializations["CONTROL"] = cases[caseNdx].param;
6883 		spec.assembly = shaderTemplate.specialize(specializations);
6884 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6885 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6886 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6887 
6888 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6889 	}
6890 
6891 	return group.release();
6892 }
6893 
createMemoryAccessGroup(tcu::TestContext & testCtx)6894 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
6895 {
6896 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
6897 	vector<CaseParameter>			cases;
6898 	de::Random						rnd				(deStringHash(group->getName()));
6899 	const int						numElements		= 100;
6900 	vector<float>					inputFloats		(numElements, 0);
6901 	vector<float>					outputFloats	(numElements, 0);
6902 	const StringTemplate			shaderTemplate	(
6903 		string(getComputeAsmShaderPreamble()) +
6904 
6905 		"OpSource GLSL 430\n"
6906 		"OpName %main           \"main\"\n"
6907 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6908 
6909 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6910 
6911 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
6912 
6913 		"%f32ptr_f  = OpTypePointer Function %f32\n"
6914 
6915 		"%id        = OpVariable %uvec3ptr Input\n"
6916 		"%zero      = OpConstant %i32 0\n"
6917 		"%four      = OpConstant %i32 4\n"
6918 
6919 		"%main      = OpFunction %void None %voidf\n"
6920 		"%label     = OpLabel\n"
6921 		"%copy      = OpVariable %f32ptr_f Function\n"
6922 		"%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
6923 		"%x         = OpCompositeExtract %u32 %idval 0\n"
6924 		"%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
6925 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
6926 		"             OpCopyMemory %copy %inloc ${ACCESS}\n"
6927 		"%val1      = OpLoad %f32 %copy\n"
6928 		"%val2      = OpLoad %f32 %inloc\n"
6929 		"%add       = OpFAdd %f32 %val1 %val2\n"
6930 		"             OpStore %outloc %add ${ACCESS}\n"
6931 		"             OpReturn\n"
6932 		"             OpFunctionEnd\n");
6933 
6934 	cases.push_back(CaseParameter("null",					""));
6935 	cases.push_back(CaseParameter("none",					"None"));
6936 	cases.push_back(CaseParameter("volatile",				"Volatile"));
6937 	cases.push_back(CaseParameter("aligned",				"Aligned 4"));
6938 	cases.push_back(CaseParameter("nontemporal",			"Nontemporal"));
6939 	cases.push_back(CaseParameter("aligned_nontemporal",	"Aligned|Nontemporal 4"));
6940 	cases.push_back(CaseParameter("aligned_volatile",		"Volatile|Aligned 4"));
6941 
6942 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
6943 
6944 	for (size_t ndx = 0; ndx < numElements; ++ndx)
6945 		outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
6946 
6947 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6948 	{
6949 		map<string, string>		specializations;
6950 		ComputeShaderSpec		spec;
6951 
6952 		specializations["ACCESS"] = cases[caseNdx].param;
6953 		spec.assembly = shaderTemplate.specialize(specializations);
6954 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
6955 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
6956 		spec.numWorkGroups = IVec3(numElements, 1, 1);
6957 
6958 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
6959 	}
6960 
6961 	return group.release();
6962 }
6963 
6964 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)6965 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
6966 {
6967 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
6968 	vector<CaseParameter>			cases;
6969 	de::Random						rnd				(deStringHash(group->getName()));
6970 	const int						numElements		= 100;
6971 	vector<float>					positiveFloats	(numElements, 0);
6972 	vector<float>					negativeFloats	(numElements, 0);
6973 	const StringTemplate			shaderTemplate	(
6974 		string(getComputeAsmShaderPreamble()) +
6975 
6976 		"OpSource GLSL 430\n"
6977 		"OpName %main           \"main\"\n"
6978 		"OpName %id             \"gl_GlobalInvocationID\"\n"
6979 
6980 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
6981 
6982 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) +
6983 		"%uvec2     = OpTypeVector %u32 2\n"
6984 		"%fvec4     = OpTypeVector %f32 4\n"
6985 		"%fmat33    = OpTypeMatrix %fvec3 3\n"
6986 		"%image     = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
6987 		"%sampler   = OpTypeSampler\n"
6988 		"%simage    = OpTypeSampledImage %image\n"
6989 		"%const100  = OpConstant %u32 100\n"
6990 		"%uarr100   = OpTypeArray %i32 %const100\n"
6991 		"%struct    = OpTypeStruct %f32 %i32 %u32\n"
6992 		"%pointer   = OpTypePointer Function %i32\n"
6993 		+ string(getComputeAsmInputOutputBuffer()) +
6994 
6995 		"%id        = OpVariable %uvec3ptr Input\n"
6996 		"%zero      = OpConstant %i32 0\n"
6997 
6998 		"%main      = OpFunction %void None %voidf\n"
6999 		"%label     = OpLabel\n"
7000 
7001 		"%undef     = OpUndef ${TYPE}\n"
7002 
7003 		"%idval     = OpLoad %uvec3 %id\n"
7004 		"%x         = OpCompositeExtract %u32 %idval 0\n"
7005 
7006 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7007 		"%inval     = OpLoad %f32 %inloc\n"
7008 		"%neg       = OpFNegate %f32 %inval\n"
7009 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7010 		"             OpStore %outloc %neg\n"
7011 		"             OpReturn\n"
7012 		"             OpFunctionEnd\n");
7013 
7014 	cases.push_back(CaseParameter("bool",			"%bool"));
7015 	cases.push_back(CaseParameter("sint32",			"%i32"));
7016 	cases.push_back(CaseParameter("uint32",			"%u32"));
7017 	cases.push_back(CaseParameter("float32",		"%f32"));
7018 	cases.push_back(CaseParameter("vec4float32",	"%fvec4"));
7019 	cases.push_back(CaseParameter("vec2uint32",		"%uvec2"));
7020 	cases.push_back(CaseParameter("matrix",			"%fmat33"));
7021 	cases.push_back(CaseParameter("image",			"%image"));
7022 	cases.push_back(CaseParameter("sampler",		"%sampler"));
7023 	cases.push_back(CaseParameter("sampledimage",	"%simage"));
7024 	cases.push_back(CaseParameter("array",			"%uarr100"));
7025 	cases.push_back(CaseParameter("runtimearray",	"%f32arr"));
7026 	cases.push_back(CaseParameter("struct",			"%struct"));
7027 	cases.push_back(CaseParameter("pointer",		"%pointer"));
7028 
7029 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7030 
7031 	for (size_t ndx = 0; ndx < numElements; ++ndx)
7032 		negativeFloats[ndx] = -positiveFloats[ndx];
7033 
7034 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7035 	{
7036 		map<string, string>		specializations;
7037 		ComputeShaderSpec		spec;
7038 
7039 		specializations["TYPE"] = cases[caseNdx].param;
7040 		spec.assembly = shaderTemplate.specialize(specializations);
7041 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7042 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7043 		spec.numWorkGroups = IVec3(numElements, 1, 1);
7044 
7045 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7046 	}
7047 
7048 	// OpUndef with constants.
7049 	{
7050 		static const char data_dir[] = "spirv_assembly/instruction/compute/undef";
7051 
7052 		static const struct
7053 		{
7054 			const std::string name;
7055 			const std::string desc;
7056 		} amberCases[] =
7057 		{
7058 			{ "undefined_constant_composite",		"OpUndef value in OpConstantComposite"		},
7059 			{ "undefined_spec_constant_composite",	"OpUndef value in OpSpecConstantComposite"	},
7060 		};
7061 
7062 		for (int i = 0; i < DE_LENGTH_OF_ARRAY(amberCases); ++i)
7063 		{
7064 			cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
7065 																				amberCases[i].name.c_str(),
7066 																				amberCases[i].desc.c_str(),
7067 																				data_dir,
7068 																				amberCases[i].name + ".amber");
7069 			group->addChild(testCase);
7070 		}
7071 	}
7072 
7073 	return group.release();
7074 }
7075 
7076 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createFloat16OpConstantCompositeGroup(tcu::TestContext & testCtx)7077 tcu::TestCaseGroup* createFloat16OpConstantCompositeGroup (tcu::TestContext& testCtx)
7078 {
7079 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
7080 	vector<CaseParameter>			cases;
7081 	de::Random						rnd				(deStringHash(group->getName()));
7082 	const int						numElements		= 100;
7083 	vector<float>					positiveFloats	(numElements, 0);
7084 	vector<float>					negativeFloats	(numElements, 0);
7085 	const StringTemplate			shaderTemplate	(
7086 		"OpCapability Shader\n"
7087 		"OpCapability Float16\n"
7088 		"OpMemoryModel Logical GLSL450\n"
7089 		"OpEntryPoint GLCompute %main \"main\" %id\n"
7090 		"OpExecutionMode %main LocalSize 1 1 1\n"
7091 		"OpSource GLSL 430\n"
7092 		"OpName %main           \"main\"\n"
7093 		"OpName %id             \"gl_GlobalInvocationID\"\n"
7094 
7095 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
7096 
7097 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
7098 
7099 		"%id        = OpVariable %uvec3ptr Input\n"
7100 		"%zero      = OpConstant %i32 0\n"
7101 		"%f16       = OpTypeFloat 16\n"
7102 		"%c_f16_0   = OpConstant %f16 0.0\n"
7103 		"%c_f16_0_5 = OpConstant %f16 0.5\n"
7104 		"%c_f16_1   = OpConstant %f16 1.0\n"
7105 		"%v2f16     = OpTypeVector %f16 2\n"
7106 		"%v3f16     = OpTypeVector %f16 3\n"
7107 		"%v4f16     = OpTypeVector %f16 4\n"
7108 
7109 		"${CONSTANT}\n"
7110 
7111 		"%main      = OpFunction %void None %voidf\n"
7112 		"%label     = OpLabel\n"
7113 		"%idval     = OpLoad %uvec3 %id\n"
7114 		"%x         = OpCompositeExtract %u32 %idval 0\n"
7115 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
7116 		"%inval     = OpLoad %f32 %inloc\n"
7117 		"%neg       = OpFNegate %f32 %inval\n"
7118 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
7119 		"             OpStore %outloc %neg\n"
7120 		"             OpReturn\n"
7121 		"             OpFunctionEnd\n");
7122 
7123 
7124 	cases.push_back(CaseParameter("vector",			"%const = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"));
7125 	cases.push_back(CaseParameter("matrix",			"%m3v3f16 = OpTypeMatrix %v3f16 3\n"
7126 													"%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7127 													"%mat = OpConstantComposite %m3v3f16 %vec %vec %vec"));
7128 	cases.push_back(CaseParameter("struct",			"%m2v3f16 = OpTypeMatrix %v3f16 2\n"
7129 													"%struct = OpTypeStruct %i32 %f16 %v3f16 %m2v3f16\n"
7130 													"%vec = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0_5 %c_f16_1\n"
7131 													"%mat = OpConstantComposite %m2v3f16 %vec %vec\n"
7132 													"%const = OpConstantComposite %struct %zero %c_f16_0_5 %vec %mat\n"));
7133 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %i32 %f16\n"
7134 													"%st2 = OpTypeStruct %i32 %i32\n"
7135 													"%struct = OpTypeStruct %st1 %st2\n"
7136 													"%st1val = OpConstantComposite %st1 %zero %c_f16_0_5\n"
7137 													"%st2val = OpConstantComposite %st2 %zero %zero\n"
7138 													"%const = OpConstantComposite %struct %st1val %st2val"));
7139 
7140 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
7141 
7142 	for (size_t ndx = 0; ndx < numElements; ++ndx)
7143 		negativeFloats[ndx] = -positiveFloats[ndx];
7144 
7145 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
7146 	{
7147 		map<string, string>		specializations;
7148 		ComputeShaderSpec		spec;
7149 
7150 		specializations["CONSTANT"] = cases[caseNdx].param;
7151 		spec.assembly = shaderTemplate.specialize(specializations);
7152 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
7153 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
7154 		spec.numWorkGroups = IVec3(numElements, 1, 1);
7155 
7156 		spec.extensions.push_back("VK_KHR_shader_float16_int8");
7157 
7158 		spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
7159 
7160 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
7161 	}
7162 
7163 	return group.release();
7164 }
7165 
squarize(const vector<deFloat16> & inData,const deUint32 argNo)7166 const vector<deFloat16> squarize(const vector<deFloat16>& inData, const deUint32 argNo)
7167 {
7168 	const size_t		inDataLength	= inData.size();
7169 	vector<deFloat16>	result;
7170 
7171 	result.reserve(inDataLength * inDataLength);
7172 
7173 	if (argNo == 0)
7174 	{
7175 		for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7176 			result.insert(result.end(), inData.begin(), inData.end());
7177 	}
7178 
7179 	if (argNo == 1)
7180 	{
7181 		for (size_t numIdx = 0; numIdx < inDataLength; ++numIdx)
7182 		{
7183 			const vector<deFloat16>	tmp(inDataLength, inData[numIdx]);
7184 
7185 			result.insert(result.end(), tmp.begin(), tmp.end());
7186 		}
7187 	}
7188 
7189 	return result;
7190 }
7191 
squarizeVector(const vector<deFloat16> & inData,const deUint32 argNo)7192 const vector<deFloat16> squarizeVector(const vector<deFloat16>& inData, const deUint32 argNo)
7193 {
7194 	vector<deFloat16>	vec;
7195 	vector<deFloat16>	result;
7196 
7197 	// Create vectors. vec will contain each possible pair from inData
7198 	{
7199 		const size_t	inDataLength	= inData.size();
7200 
7201 		DE_ASSERT(inDataLength <= 64);
7202 
7203 		vec.reserve(2 * inDataLength * inDataLength);
7204 
7205 		for (size_t numIdxX = 0; numIdxX < inDataLength; ++numIdxX)
7206 		for (size_t numIdxY = 0; numIdxY < inDataLength; ++numIdxY)
7207 		{
7208 			vec.push_back(inData[numIdxX]);
7209 			vec.push_back(inData[numIdxY]);
7210 		}
7211 	}
7212 
7213 	// Create vector pairs. result will contain each possible pair from vec
7214 	{
7215 		const size_t	coordsPerVector	= 2;
7216 		const size_t	vectorsCount	= vec.size() / coordsPerVector;
7217 
7218 		result.reserve(coordsPerVector * vectorsCount * vectorsCount);
7219 
7220 		if (argNo == 0)
7221 		{
7222 			for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7223 			for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7224 			{
7225 				for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7226 					result.push_back(vec[coordsPerVector * numIdxY + coordNdx]);
7227 			}
7228 		}
7229 
7230 		if (argNo == 1)
7231 		{
7232 			for (size_t numIdxX = 0; numIdxX < vectorsCount; ++numIdxX)
7233 			for (size_t numIdxY = 0; numIdxY < vectorsCount; ++numIdxY)
7234 			{
7235 				for (size_t coordNdx = 0; coordNdx < coordsPerVector; ++coordNdx)
7236 					result.push_back(vec[coordsPerVector * numIdxX + coordNdx]);
7237 			}
7238 		}
7239 	}
7240 
7241 	return result;
7242 }
7243 
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isNan7244 struct fp16isNan			{ bool operator()(const tcu::Float16 in1, const tcu::Float16)		{ return in1.isNaN(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isInf7245 struct fp16isInf			{ bool operator()(const tcu::Float16 in1, const tcu::Float16)		{ return in1.isInf(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isEqual7246 struct fp16isEqual			{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() == in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isUnequal7247 struct fp16isUnequal		{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() != in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isLess7248 struct fp16isLess			{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() <  in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isGreater7249 struct fp16isGreater		{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() >  in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isLessOrEqual7250 struct fp16isLessOrEqual	{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() <= in2.asFloat(); } };
operator ()vkt::SpirVAssembly::__anon043fb9e60111::fp16isGreaterOrEqual7251 struct fp16isGreaterOrEqual	{ bool operator()(const tcu::Float16 in1, const tcu::Float16 in2)	{ return in1.asFloat() >= in2.asFloat(); } };
7252 
7253 template <class TestedLogicalFunction, bool onlyTestFunc, bool unationModeAnd, bool nanSupported>
compareFP16Logical(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)7254 bool compareFP16Logical (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
7255 {
7256 	if (inputs.size() != 2 || outputAllocs.size() != 1)
7257 		return false;
7258 
7259 	vector<deUint8>	input1Bytes;
7260 	vector<deUint8>	input2Bytes;
7261 
7262 	inputs[0].getBytes(input1Bytes);
7263 	inputs[1].getBytes(input2Bytes);
7264 
7265 	const deUint32			denormModesCount			= 2;
7266 	const deFloat16			float16one					= tcu::Float16(1.0f).bits();
7267 	const deFloat16			float16zero					= tcu::Float16(0.0f).bits();
7268 	const tcu::Float16		zero						= tcu::Float16::zero(1);
7269 	const deFloat16* const	outputAsFP16				= static_cast<deFloat16*>(outputAllocs[0]->getHostPtr());
7270 	const deFloat16* const	input1AsFP16				= reinterpret_cast<deFloat16* const>(&input1Bytes.front());
7271 	const deFloat16* const	input2AsFP16				= reinterpret_cast<deFloat16* const>(&input2Bytes.front());
7272 	deUint32				successfulRuns				= denormModesCount;
7273 	std::string				results[denormModesCount];
7274 	TestedLogicalFunction	testedLogicalFunction;
7275 
7276 	for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7277 	{
7278 		const bool flushToZero = (denormMode == 1);
7279 
7280 		for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deFloat16); ++idx)
7281 		{
7282 			const tcu::Float16	f1pre			= tcu::Float16(input1AsFP16[idx]);
7283 			const tcu::Float16	f2pre			= tcu::Float16(input2AsFP16[idx]);
7284 			const tcu::Float16	f1				= (flushToZero && f1pre.isDenorm()) ? zero : f1pre;
7285 			const tcu::Float16	f2				= (flushToZero && f2pre.isDenorm()) ? zero : f2pre;
7286 			deFloat16			expectedOutput	= float16zero;
7287 
7288 			if (onlyTestFunc)
7289 			{
7290 				if (testedLogicalFunction(f1, f2))
7291 					expectedOutput = float16one;
7292 			}
7293 			else
7294 			{
7295 				const bool	f1nan	= f1.isNaN();
7296 				const bool	f2nan	= f2.isNaN();
7297 
7298 				// Skip NaN floats if not supported by implementation
7299 				if (!nanSupported && (f1nan || f2nan))
7300 					continue;
7301 
7302 				if (unationModeAnd)
7303 				{
7304 					const bool	ordered		= !f1nan && !f2nan;
7305 
7306 					if (ordered && testedLogicalFunction(f1, f2))
7307 						expectedOutput = float16one;
7308 				}
7309 				else
7310 				{
7311 					const bool	unordered	= f1nan || f2nan;
7312 
7313 					if (unordered || testedLogicalFunction(f1, f2))
7314 						expectedOutput = float16one;
7315 				}
7316 			}
7317 
7318 			if (outputAsFP16[idx] != expectedOutput)
7319 			{
7320 				std::ostringstream str;
7321 
7322 				str << "ERROR: Sub-case #" << idx
7323 					<< " flushToZero:" << flushToZero
7324 					<< std::hex
7325 					<< " failed, inputs: 0x" << f1.bits()
7326 					<< ";0x" << f2.bits()
7327 					<< " output: 0x" << outputAsFP16[idx]
7328 					<< " expected output: 0x" << expectedOutput;
7329 
7330 				results[denormMode] = str.str();
7331 
7332 				successfulRuns--;
7333 
7334 				break;
7335 			}
7336 		}
7337 	}
7338 
7339 	if (successfulRuns == 0)
7340 		for (deUint32 denormMode = 0; denormMode < denormModesCount; denormMode++)
7341 			log << TestLog::Message << results[denormMode] << TestLog::EndMessage;
7342 
7343 	return successfulRuns > 0;
7344 }
7345 
7346 } // anonymous
7347 
createOpSourceTests(tcu::TestContext & testCtx)7348 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
7349 {
7350 	struct NameCodePair { string name, code; };
7351 	RGBA							defaultColors[4];
7352 	de::MovePtr<tcu::TestCaseGroup> opSourceTests			(new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
7353 	const std::string				opsourceGLSLWithFile	= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
7354 	map<string, string>				fragments				= passthruFragments();
7355 	const NameCodePair				tests[]					=
7356 	{
7357 		{"unknown", "OpSource Unknown 321"},
7358 		{"essl", "OpSource ESSL 310"},
7359 		{"glsl", "OpSource GLSL 450"},
7360 		{"opencl_cpp", "OpSource OpenCL_CPP 120"},
7361 		{"opencl_c", "OpSource OpenCL_C 120"},
7362 		{"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
7363 		{"file", opsourceGLSLWithFile},
7364 		{"source", opsourceGLSLWithFile + "\"void main(){}\""},
7365 		// Longest possible source string: SPIR-V limits instructions to 65535
7366 		// words, of which the first 4 are opsourceGLSLWithFile; the rest will
7367 		// contain 65530 UTF8 characters (one word each) plus one last word
7368 		// containing 3 ASCII characters and \0.
7369 		{"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
7370 	};
7371 
7372 	getDefaultColors(defaultColors);
7373 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7374 	{
7375 		fragments["debug"] = tests[testNdx].code;
7376 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7377 	}
7378 
7379 	return opSourceTests.release();
7380 }
7381 
createOpSourceContinuedTests(tcu::TestContext & testCtx)7382 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
7383 {
7384 	struct NameCodePair { string name, code; };
7385 	RGBA								defaultColors[4];
7386 	de::MovePtr<tcu::TestCaseGroup>		opSourceTests		(new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
7387 	map<string, string>					fragments			= passthruFragments();
7388 	const std::string					opsource			= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
7389 	const NameCodePair					tests[]				=
7390 	{
7391 		{"empty", opsource + "OpSourceContinued \"\""},
7392 		{"short", opsource + "OpSourceContinued \"abcde\""},
7393 		{"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
7394 		// Longest possible source string: SPIR-V limits instructions to 65535
7395 		// words, of which the first one is OpSourceContinued/length; the rest
7396 		// will contain 65533 UTF8 characters (one word each) plus one last word
7397 		// containing 3 ASCII characters and \0.
7398 		{"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
7399 	};
7400 
7401 	getDefaultColors(defaultColors);
7402 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
7403 	{
7404 		fragments["debug"] = tests[testNdx].code;
7405 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
7406 	}
7407 
7408 	return opSourceTests.release();
7409 }
createOpNoLineTests(tcu::TestContext & testCtx)7410 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
7411 {
7412 	RGBA								 defaultColors[4];
7413 	de::MovePtr<tcu::TestCaseGroup>		 opLineTests		 (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
7414 	map<string, string>					 fragments;
7415 	getDefaultColors(defaultColors);
7416 	fragments["debug"]			=
7417 		"%name = OpString \"name\"\n";
7418 
7419 	fragments["pre_main"]	=
7420 		"OpNoLine\n"
7421 		"OpNoLine\n"
7422 		"OpLine %name 1 1\n"
7423 		"OpNoLine\n"
7424 		"OpLine %name 1 1\n"
7425 		"OpLine %name 1 1\n"
7426 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7427 		"OpNoLine\n"
7428 		"OpLine %name 1 1\n"
7429 		"OpNoLine\n"
7430 		"OpLine %name 1 1\n"
7431 		"OpLine %name 1 1\n"
7432 		"%second_param1 = OpFunctionParameter %v4f32\n"
7433 		"OpNoLine\n"
7434 		"OpNoLine\n"
7435 		"%label_secondfunction = OpLabel\n"
7436 		"OpNoLine\n"
7437 		"OpReturnValue %second_param1\n"
7438 		"OpFunctionEnd\n"
7439 		"OpNoLine\n"
7440 		"OpNoLine\n";
7441 
7442 	fragments["testfun"]		=
7443 		// A %test_code function that returns its argument unchanged.
7444 		"OpNoLine\n"
7445 		"OpNoLine\n"
7446 		"OpLine %name 1 1\n"
7447 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7448 		"OpNoLine\n"
7449 		"%param1 = OpFunctionParameter %v4f32\n"
7450 		"OpNoLine\n"
7451 		"OpNoLine\n"
7452 		"%label_testfun = OpLabel\n"
7453 		"OpNoLine\n"
7454 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7455 		"OpReturnValue %val1\n"
7456 		"OpFunctionEnd\n"
7457 		"OpLine %name 1 1\n"
7458 		"OpNoLine\n";
7459 
7460 	createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
7461 
7462 	return opLineTests.release();
7463 }
7464 
createOpModuleProcessedTests(tcu::TestContext & testCtx)7465 tcu::TestCaseGroup* createOpModuleProcessedTests(tcu::TestContext& testCtx)
7466 {
7467 	RGBA								defaultColors[4];
7468 	de::MovePtr<tcu::TestCaseGroup>		opModuleProcessedTests			(new tcu::TestCaseGroup(testCtx, "opmoduleprocessed", "OpModuleProcessed instruction"));
7469 	map<string, string>					fragments;
7470 	std::vector<std::string>			noExtensions;
7471 	GraphicsResources					resources;
7472 
7473 	getDefaultColors(defaultColors);
7474 	resources.verifyBinary = veryfiBinaryShader;
7475 	resources.spirvVersion = SPIRV_VERSION_1_3;
7476 
7477 	fragments["moduleprocessed"]							=
7478 		"OpModuleProcessed \"VULKAN CTS\"\n"
7479 		"OpModuleProcessed \"Negative values\"\n"
7480 		"OpModuleProcessed \"Date: 2017/09/21\"\n";
7481 
7482 	fragments["pre_main"]	=
7483 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7484 		"%second_param1 = OpFunctionParameter %v4f32\n"
7485 		"%label_secondfunction = OpLabel\n"
7486 		"OpReturnValue %second_param1\n"
7487 		"OpFunctionEnd\n";
7488 
7489 	fragments["testfun"]		=
7490 		// A %test_code function that returns its argument unchanged.
7491 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7492 		"%param1 = OpFunctionParameter %v4f32\n"
7493 		"%label_testfun = OpLabel\n"
7494 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7495 		"OpReturnValue %val1\n"
7496 		"OpFunctionEnd\n";
7497 
7498 	createTestsForAllStages ("opmoduleprocessed", defaultColors, defaultColors, fragments, resources, noExtensions, opModuleProcessedTests.get());
7499 
7500 	return opModuleProcessedTests.release();
7501 }
7502 
7503 
createOpLineTests(tcu::TestContext & testCtx)7504 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
7505 {
7506 	RGBA													defaultColors[4];
7507 	de::MovePtr<tcu::TestCaseGroup>							opLineTests			(new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
7508 	map<string, string>										fragments;
7509 	std::vector<std::pair<std::string, std::string> >		problemStrings;
7510 
7511 	problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
7512 	problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
7513 	problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
7514 	getDefaultColors(defaultColors);
7515 
7516 	fragments["debug"]			=
7517 		"%other_name = OpString \"other_name\"\n";
7518 
7519 	fragments["pre_main"]	=
7520 		"OpLine %file_name 32 0\n"
7521 		"OpLine %file_name 32 32\n"
7522 		"OpLine %file_name 32 40\n"
7523 		"OpLine %other_name 32 40\n"
7524 		"OpLine %other_name 0 100\n"
7525 		"OpLine %other_name 0 4294967295\n"
7526 		"OpLine %other_name 4294967295 0\n"
7527 		"OpLine %other_name 32 40\n"
7528 		"OpLine %file_name 0 0\n"
7529 		"%second_function = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7530 		"OpLine %file_name 1 0\n"
7531 		"%second_param1 = OpFunctionParameter %v4f32\n"
7532 		"OpLine %file_name 1 3\n"
7533 		"OpLine %file_name 1 2\n"
7534 		"%label_secondfunction = OpLabel\n"
7535 		"OpLine %file_name 0 2\n"
7536 		"OpReturnValue %second_param1\n"
7537 		"OpFunctionEnd\n"
7538 		"OpLine %file_name 0 2\n"
7539 		"OpLine %file_name 0 2\n";
7540 
7541 	fragments["testfun"]		=
7542 		// A %test_code function that returns its argument unchanged.
7543 		"OpLine %file_name 1 0\n"
7544 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7545 		"OpLine %file_name 16 330\n"
7546 		"%param1 = OpFunctionParameter %v4f32\n"
7547 		"OpLine %file_name 14 442\n"
7548 		"%label_testfun = OpLabel\n"
7549 		"OpLine %file_name 11 1024\n"
7550 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
7551 		"OpLine %file_name 2 97\n"
7552 		"OpReturnValue %val1\n"
7553 		"OpFunctionEnd\n"
7554 		"OpLine %file_name 5 32\n";
7555 
7556 	for (size_t i = 0; i < problemStrings.size(); ++i)
7557 	{
7558 		map<string, string> testFragments = fragments;
7559 		testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
7560 		createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
7561 	}
7562 
7563 	return opLineTests.release();
7564 }
7565 
createOpConstantNullTests(tcu::TestContext & testCtx)7566 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
7567 {
7568 	de::MovePtr<tcu::TestCaseGroup> opConstantNullTests		(new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
7569 	RGBA							colors[4];
7570 
7571 
7572 	const char						functionStart[] =
7573 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7574 		"%param1 = OpFunctionParameter %v4f32\n"
7575 		"%lbl    = OpLabel\n";
7576 
7577 	const char						functionEnd[]	=
7578 		"OpReturnValue %transformed_param\n"
7579 		"OpFunctionEnd\n";
7580 
7581 	struct NameConstantsCode
7582 	{
7583 		string name;
7584 		string constants;
7585 		string code;
7586 	};
7587 
7588 	NameConstantsCode tests[] =
7589 	{
7590 		{
7591 			"vec4",
7592 			"%cnull = OpConstantNull %v4f32\n",
7593 			"%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
7594 		},
7595 		{
7596 			"float",
7597 			"%cnull = OpConstantNull %f32\n",
7598 			"%vp = OpVariable %fp_v4f32 Function\n"
7599 			"%v  = OpLoad %v4f32 %vp\n"
7600 			"%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
7601 			"%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
7602 			"%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
7603 			"%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
7604 			"%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
7605 		},
7606 		{
7607 			"bool",
7608 			"%cnull             = OpConstantNull %bool\n",
7609 			"%v                 = OpVariable %fp_v4f32 Function\n"
7610 			"                     OpStore %v %param1\n"
7611 			"                     OpSelectionMerge %false_label None\n"
7612 			"                     OpBranchConditional %cnull %true_label %false_label\n"
7613 			"%true_label        = OpLabel\n"
7614 			"                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
7615 			"                     OpBranch %false_label\n"
7616 			"%false_label       = OpLabel\n"
7617 			"%transformed_param = OpLoad %v4f32 %v\n"
7618 		},
7619 		{
7620 			"i32",
7621 			"%cnull             = OpConstantNull %i32\n",
7622 			"%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
7623 			"%b                 = OpIEqual %bool %cnull %c_i32_0\n"
7624 			"                     OpSelectionMerge %false_label None\n"
7625 			"                     OpBranchConditional %b %true_label %false_label\n"
7626 			"%true_label        = OpLabel\n"
7627 			"                     OpStore %v %param1\n"
7628 			"                     OpBranch %false_label\n"
7629 			"%false_label       = OpLabel\n"
7630 			"%transformed_param = OpLoad %v4f32 %v\n"
7631 		},
7632 		{
7633 			"struct",
7634 			"%stype             = OpTypeStruct %f32 %v4f32\n"
7635 			"%fp_stype          = OpTypePointer Function %stype\n"
7636 			"%cnull             = OpConstantNull %stype\n",
7637 			"%v                 = OpVariable %fp_stype Function %cnull\n"
7638 			"%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
7639 			"%f_val             = OpLoad %v4f32 %f\n"
7640 			"%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
7641 		},
7642 		{
7643 			"array",
7644 			"%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
7645 			"%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
7646 			"%cnull             = OpConstantNull %a4_v4f32\n",
7647 			"%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
7648 			"%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7649 			"%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
7650 			"%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
7651 			"%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
7652 			"%f_val             = OpLoad %v4f32 %f\n"
7653 			"%f1_val            = OpLoad %v4f32 %f1\n"
7654 			"%f2_val            = OpLoad %v4f32 %f2\n"
7655 			"%f3_val            = OpLoad %v4f32 %f3\n"
7656 			"%t0                = OpFAdd %v4f32 %param1 %f_val\n"
7657 			"%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
7658 			"%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
7659 			"%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
7660 		},
7661 		{
7662 			"matrix",
7663 			"%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
7664 			"%cnull             = OpConstantNull %mat4x4_f32\n",
7665 			// Our null matrix * any vector should result in a zero vector.
7666 			"%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
7667 			"%transformed_param = OpFAdd %v4f32 %param1 %v\n"
7668 		}
7669 	};
7670 
7671 	getHalfColorsFullAlpha(colors);
7672 
7673 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7674 	{
7675 		map<string, string> fragments;
7676 		fragments["pre_main"] = tests[testNdx].constants;
7677 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7678 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
7679 	}
7680 	return opConstantNullTests.release();
7681 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)7682 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
7683 {
7684 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
7685 	RGBA							inputColors[4];
7686 	RGBA							outputColors[4];
7687 
7688 
7689 	const char						functionStart[]	 =
7690 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7691 		"%param1 = OpFunctionParameter %v4f32\n"
7692 		"%lbl    = OpLabel\n";
7693 
7694 	const char						functionEnd[]		=
7695 		"OpReturnValue %transformed_param\n"
7696 		"OpFunctionEnd\n";
7697 
7698 	struct NameConstantsCode
7699 	{
7700 		string name;
7701 		string constants;
7702 		string code;
7703 	};
7704 
7705 	NameConstantsCode tests[] =
7706 	{
7707 		{
7708 			"vec4",
7709 
7710 			"%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
7711 			"%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
7712 		},
7713 		{
7714 			"struct",
7715 
7716 			"%stype             = OpTypeStruct %v4f32 %f32\n"
7717 			"%fp_stype          = OpTypePointer Function %stype\n"
7718 			"%f32_n_1           = OpConstant %f32 -1.0\n"
7719 			"%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
7720 			"%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
7721 			"%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
7722 
7723 			"%v                 = OpVariable %fp_stype Function %cval\n"
7724 			"%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
7725 			"%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
7726 			"%vec_val           = OpLoad %v4f32 %vec_ptr\n"
7727 			"%f32_val           = OpLoad %f32 %f32_ptr\n"
7728 			"%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
7729 			"%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
7730 			"%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
7731 		},
7732 		{
7733 			// [1|0|0|0.5] [x] = x + 0.5
7734 			// [0|1|0|0.5] [y] = y + 0.5
7735 			// [0|0|1|0.5] [z] = z + 0.5
7736 			// [0|0|0|1  ] [1] = 1
7737 			"matrix",
7738 
7739 			"%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
7740 			"%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
7741 			"%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
7742 			"%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
7743 			"%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
7744 			"%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
7745 
7746 			"%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
7747 		},
7748 		{
7749 			"array",
7750 
7751 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7752 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
7753 			"%f32_n_1             = OpConstant %f32 -1.0\n"
7754 			"%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
7755 			"%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
7756 
7757 			"%v                   = OpVariable %fp_a4f32 Function %carr\n"
7758 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
7759 			"%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
7760 			"%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
7761 			"%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
7762 			"%f_val               = OpLoad %f32 %f\n"
7763 			"%f1_val              = OpLoad %f32 %f1\n"
7764 			"%f2_val              = OpLoad %f32 %f2\n"
7765 			"%f3_val              = OpLoad %f32 %f3\n"
7766 			"%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
7767 			"%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
7768 			"%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
7769 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
7770 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7771 		},
7772 		{
7773 			//
7774 			// [
7775 			//   {
7776 			//      0.0,
7777 			//      [ 1.0, 1.0, 1.0, 1.0]
7778 			//   },
7779 			//   {
7780 			//      1.0,
7781 			//      [ 0.0, 0.5, 0.0, 0.0]
7782 			//   }, //     ^^^
7783 			//   {
7784 			//      0.0,
7785 			//      [ 1.0, 1.0, 1.0, 1.0]
7786 			//   }
7787 			// ]
7788 			"array_of_struct_of_array",
7789 
7790 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
7791 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
7792 			"%stype               = OpTypeStruct %f32 %a4f32\n"
7793 			"%a3stype             = OpTypeArray %stype %c_u32_3\n"
7794 			"%fp_a3stype          = OpTypePointer Function %a3stype\n"
7795 			"%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
7796 			"%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
7797 			"%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
7798 			"%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
7799 			"%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
7800 
7801 			"%v                   = OpVariable %fp_a3stype Function %carr\n"
7802 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
7803 			"%f_l                 = OpLoad %f32 %f\n"
7804 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
7805 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
7806 		}
7807 	};
7808 
7809 	getHalfColorsFullAlpha(inputColors);
7810 	outputColors[0] = RGBA(255, 255, 255, 255);
7811 	outputColors[1] = RGBA(255, 127, 127, 255);
7812 	outputColors[2] = RGBA(127, 255, 127, 255);
7813 	outputColors[3] = RGBA(127, 127, 255, 255);
7814 
7815 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
7816 	{
7817 		map<string, string> fragments;
7818 		fragments["pre_main"] = tests[testNdx].constants;
7819 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
7820 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
7821 	}
7822 	return opConstantCompositeTests.release();
7823 }
7824 
createSelectionBlockOrderTests(tcu::TestContext & testCtx)7825 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
7826 {
7827 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
7828 	RGBA							inputColors[4];
7829 	RGBA							outputColors[4];
7830 	map<string, string>				fragments;
7831 
7832 	// vec4 test_code(vec4 param) {
7833 	//   vec4 result = param;
7834 	//   for (int i = 0; i < 4; ++i) {
7835 	//     if (i == 0) result[i] = 0.;
7836 	//     else        result[i] = 1. - result[i];
7837 	//   }
7838 	//   return result;
7839 	// }
7840 	const char						function[]			=
7841 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7842 		"%param1    = OpFunctionParameter %v4f32\n"
7843 		"%lbl       = OpLabel\n"
7844 		"%iptr      = OpVariable %fp_i32 Function\n"
7845 		"%result    = OpVariable %fp_v4f32 Function\n"
7846 		"             OpStore %iptr %c_i32_0\n"
7847 		"             OpStore %result %param1\n"
7848 		"             OpBranch %loop\n"
7849 
7850 		// Loop entry block.
7851 		"%loop      = OpLabel\n"
7852 		"%ival      = OpLoad %i32 %iptr\n"
7853 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7854 		"             OpLoopMerge %exit %if_entry None\n"
7855 		"             OpBranchConditional %lt_4 %if_entry %exit\n"
7856 
7857 		// Merge block for loop.
7858 		"%exit      = OpLabel\n"
7859 		"%ret       = OpLoad %v4f32 %result\n"
7860 		"             OpReturnValue %ret\n"
7861 
7862 		// If-statement entry block.
7863 		"%if_entry  = OpLabel\n"
7864 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
7865 		"%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
7866 		"             OpSelectionMerge %if_exit None\n"
7867 		"             OpBranchConditional %eq_0 %if_true %if_false\n"
7868 
7869 		// False branch for if-statement.
7870 		"%if_false  = OpLabel\n"
7871 		"%val       = OpLoad %f32 %loc\n"
7872 		"%sub       = OpFSub %f32 %c_f32_1 %val\n"
7873 		"             OpStore %loc %sub\n"
7874 		"             OpBranch %if_exit\n"
7875 
7876 		// Merge block for if-statement.
7877 		"%if_exit   = OpLabel\n"
7878 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
7879 		"             OpStore %iptr %ival_next\n"
7880 		"             OpBranch %loop\n"
7881 
7882 		// True branch for if-statement.
7883 		"%if_true   = OpLabel\n"
7884 		"             OpStore %loc %c_f32_0\n"
7885 		"             OpBranch %if_exit\n"
7886 
7887 		"             OpFunctionEnd\n";
7888 
7889 	fragments["testfun"]	= function;
7890 
7891 	inputColors[0]			= RGBA(127, 127, 127, 0);
7892 	inputColors[1]			= RGBA(127, 0,   0,   0);
7893 	inputColors[2]			= RGBA(0,   127, 0,   0);
7894 	inputColors[3]			= RGBA(0,   0,   127, 0);
7895 
7896 	outputColors[0]			= RGBA(0, 128, 128, 255);
7897 	outputColors[1]			= RGBA(0, 255, 255, 255);
7898 	outputColors[2]			= RGBA(0, 128, 255, 255);
7899 	outputColors[3]			= RGBA(0, 255, 128, 255);
7900 
7901 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
7902 
7903 	return group.release();
7904 }
7905 
createSwitchBlockOrderTests(tcu::TestContext & testCtx)7906 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
7907 {
7908 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
7909 	RGBA							inputColors[4];
7910 	RGBA							outputColors[4];
7911 	map<string, string>				fragments;
7912 
7913 	const char						typesAndConstants[]	=
7914 		"%c_f32_p2  = OpConstant %f32 0.2\n"
7915 		"%c_f32_p4  = OpConstant %f32 0.4\n"
7916 		"%c_f32_p6  = OpConstant %f32 0.6\n"
7917 		"%c_f32_p8  = OpConstant %f32 0.8\n";
7918 
7919 	// vec4 test_code(vec4 param) {
7920 	//   vec4 result = param;
7921 	//   for (int i = 0; i < 4; ++i) {
7922 	//     switch (i) {
7923 	//       case 0: result[i] += .2; break;
7924 	//       case 1: result[i] += .6; break;
7925 	//       case 2: result[i] += .4; break;
7926 	//       case 3: result[i] += .8; break;
7927 	//       default: break; // unreachable
7928 	//     }
7929 	//   }
7930 	//   return result;
7931 	// }
7932 	const char						function[]			=
7933 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
7934 		"%param1    = OpFunctionParameter %v4f32\n"
7935 		"%lbl       = OpLabel\n"
7936 		"%iptr      = OpVariable %fp_i32 Function\n"
7937 		"%result    = OpVariable %fp_v4f32 Function\n"
7938 		"             OpStore %iptr %c_i32_0\n"
7939 		"             OpStore %result %param1\n"
7940 		"             OpBranch %loop\n"
7941 
7942 		// Loop entry block.
7943 		"%loop      = OpLabel\n"
7944 		"%ival      = OpLoad %i32 %iptr\n"
7945 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
7946 		"             OpLoopMerge %exit %cont None\n"
7947 		"             OpBranchConditional %lt_4 %switch_entry %exit\n"
7948 
7949 		// Merge block for loop.
7950 		"%exit      = OpLabel\n"
7951 		"%ret       = OpLoad %v4f32 %result\n"
7952 		"             OpReturnValue %ret\n"
7953 
7954 		// Switch-statement entry block.
7955 		"%switch_entry   = OpLabel\n"
7956 		"%loc            = OpAccessChain %fp_f32 %result %ival\n"
7957 		"%val            = OpLoad %f32 %loc\n"
7958 		"                  OpSelectionMerge %switch_exit None\n"
7959 		"                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
7960 
7961 		"%case2          = OpLabel\n"
7962 		"%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
7963 		"                  OpStore %loc %addp4\n"
7964 		"                  OpBranch %switch_exit\n"
7965 
7966 		"%switch_default = OpLabel\n"
7967 		"                  OpUnreachable\n"
7968 
7969 		"%case3          = OpLabel\n"
7970 		"%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
7971 		"                  OpStore %loc %addp8\n"
7972 		"                  OpBranch %switch_exit\n"
7973 
7974 		"%case0          = OpLabel\n"
7975 		"%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
7976 		"                  OpStore %loc %addp2\n"
7977 		"                  OpBranch %switch_exit\n"
7978 
7979 		// Merge block for switch-statement.
7980 		"%switch_exit    = OpLabel\n"
7981 		"%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
7982 		"                  OpStore %iptr %ival_next\n"
7983 		"                  OpBranch %cont\n"
7984 		"%cont           = OpLabel\n"
7985 		"                  OpBranch %loop\n"
7986 
7987 		"%case1          = OpLabel\n"
7988 		"%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
7989 		"                  OpStore %loc %addp6\n"
7990 		"                  OpBranch %switch_exit\n"
7991 
7992 		"                  OpFunctionEnd\n";
7993 
7994 	fragments["pre_main"]	= typesAndConstants;
7995 	fragments["testfun"]	= function;
7996 
7997 	inputColors[0]			= RGBA(127, 27,  127, 51);
7998 	inputColors[1]			= RGBA(127, 0,   0,   51);
7999 	inputColors[2]			= RGBA(0,   27,  0,   51);
8000 	inputColors[3]			= RGBA(0,   0,   127, 51);
8001 
8002 	outputColors[0]			= RGBA(178, 180, 229, 255);
8003 	outputColors[1]			= RGBA(178, 153, 102, 255);
8004 	outputColors[2]			= RGBA(51,  180, 102, 255);
8005 	outputColors[3]			= RGBA(51,  153, 229, 255);
8006 
8007 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
8008 
8009 	addOpSwitchAmberTests(*group, testCtx);
8010 
8011 	return group.release();
8012 }
8013 
createDecorationGroupTests(tcu::TestContext & testCtx)8014 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
8015 {
8016 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
8017 	RGBA							inputColors[4];
8018 	RGBA							outputColors[4];
8019 	map<string, string>				fragments;
8020 
8021 	const char						decorations[]		=
8022 		"OpDecorate %array_group         ArrayStride 4\n"
8023 		"OpDecorate %struct_member_group Offset 0\n"
8024 		"%array_group         = OpDecorationGroup\n"
8025 		"%struct_member_group = OpDecorationGroup\n"
8026 
8027 		"OpDecorate %group1 RelaxedPrecision\n"
8028 		"OpDecorate %group3 RelaxedPrecision\n"
8029 		"OpDecorate %group3 Flat\n"
8030 		"OpDecorate %group3 Restrict\n"
8031 		"%group0 = OpDecorationGroup\n"
8032 		"%group1 = OpDecorationGroup\n"
8033 		"%group3 = OpDecorationGroup\n";
8034 
8035 	const char						typesAndConstants[]	=
8036 		"%a3f32     = OpTypeArray %f32 %c_u32_3\n"
8037 		"%struct1   = OpTypeStruct %a3f32\n"
8038 		"%struct2   = OpTypeStruct %a3f32\n"
8039 		"%fp_struct1 = OpTypePointer Function %struct1\n"
8040 		"%fp_struct2 = OpTypePointer Function %struct2\n"
8041 		"%c_f32_2    = OpConstant %f32 2.\n"
8042 		"%c_f32_n2   = OpConstant %f32 -2.\n"
8043 
8044 		"%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
8045 		"%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
8046 		"%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
8047 		"%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
8048 
8049 	const char						function[]			=
8050 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8051 		"%param     = OpFunctionParameter %v4f32\n"
8052 		"%entry     = OpLabel\n"
8053 		"%result    = OpVariable %fp_v4f32 Function\n"
8054 		"%v_struct1 = OpVariable %fp_struct1 Function\n"
8055 		"%v_struct2 = OpVariable %fp_struct2 Function\n"
8056 		"             OpStore %result %param\n"
8057 		"             OpStore %v_struct1 %c_struct1\n"
8058 		"             OpStore %v_struct2 %c_struct2\n"
8059 		"%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
8060 		"%val1      = OpLoad %f32 %ptr1\n"
8061 		"%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
8062 		"%val2      = OpLoad %f32 %ptr2\n"
8063 		"%addvalues = OpFAdd %f32 %val1 %val2\n"
8064 		"%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8065 		"%val       = OpLoad %f32 %ptr\n"
8066 		"%addresult = OpFAdd %f32 %addvalues %val\n"
8067 		"             OpStore %ptr %addresult\n"
8068 		"%ret       = OpLoad %v4f32 %result\n"
8069 		"             OpReturnValue %ret\n"
8070 		"             OpFunctionEnd\n";
8071 
8072 	struct CaseNameDecoration
8073 	{
8074 		string name;
8075 		string decoration;
8076 	};
8077 
8078 	CaseNameDecoration tests[] =
8079 	{
8080 		{
8081 			"same_decoration_group_on_multiple_types",
8082 			"OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
8083 		},
8084 		{
8085 			"empty_decoration_group",
8086 			"OpGroupDecorate %group0      %a3f32\n"
8087 			"OpGroupDecorate %group0      %result\n"
8088 		},
8089 		{
8090 			"one_element_decoration_group",
8091 			"OpGroupDecorate %array_group %a3f32\n"
8092 		},
8093 		{
8094 			"multiple_elements_decoration_group",
8095 			"OpGroupDecorate %group3      %v_struct1\n"
8096 		},
8097 		{
8098 			"multiple_decoration_groups_on_same_variable",
8099 			"OpGroupDecorate %group0      %v_struct2\n"
8100 			"OpGroupDecorate %group1      %v_struct2\n"
8101 			"OpGroupDecorate %group3      %v_struct2\n"
8102 		},
8103 		{
8104 			"same_decoration_group_multiple_times",
8105 			"OpGroupDecorate %group1      %addvalues\n"
8106 			"OpGroupDecorate %group1      %addvalues\n"
8107 			"OpGroupDecorate %group1      %addvalues\n"
8108 		},
8109 
8110 	};
8111 
8112 	getHalfColorsFullAlpha(inputColors);
8113 	getHalfColorsFullAlpha(outputColors);
8114 
8115 	for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
8116 	{
8117 		fragments["decoration"]	= decorations + tests[idx].decoration;
8118 		fragments["pre_main"]	= typesAndConstants;
8119 		fragments["testfun"]	= function;
8120 
8121 		createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
8122 	}
8123 
8124 	return group.release();
8125 }
8126 
8127 struct SpecConstantTwoValGraphicsCase
8128 {
8129 	const std::string	caseName;
8130 	const std::string	scDefinition0;
8131 	const std::string	scDefinition1;
8132 	const std::string	scResultType;
8133 	const std::string	scOperation;
8134 	SpecConstantValue	scActualValue0;
8135 	SpecConstantValue	scActualValue1;
8136 	const std::string	resultOperation;
8137 	RGBA				expectedColors[4];
8138 	CaseFlags			caseFlags;
8139 
SpecConstantTwoValGraphicsCasevkt::SpirVAssembly::SpecConstantTwoValGraphicsCase8140 						SpecConstantTwoValGraphicsCase (const std::string&			name,
8141 														const std::string&			definition0,
8142 														const std::string&			definition1,
8143 														const std::string&			resultType,
8144 														const std::string&			operation,
8145 														const SpecConstantValue&	value0,
8146 														const SpecConstantValue&	value1,
8147 														const std::string&			resultOp,
8148 														const RGBA					(&output)[4],
8149 														CaseFlags					flags = FLAG_NONE)
8150 							: caseName				(name)
8151 							, scDefinition0			(definition0)
8152 							, scDefinition1			(definition1)
8153 							, scResultType			(resultType)
8154 							, scOperation			(operation)
8155 							, scActualValue0		(value0)
8156 							, scActualValue1		(value1)
8157 							, resultOperation		(resultOp)
8158 							, caseFlags				(flags)
8159 	{
8160 		expectedColors[0] = output[0];
8161 		expectedColors[1] = output[1];
8162 		expectedColors[2] = output[2];
8163 		expectedColors[3] = output[3];
8164 	}
8165 };
8166 
createSpecConstantTests(tcu::TestContext & testCtx)8167 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
8168 {
8169 	de::MovePtr<tcu::TestCaseGroup>			group (new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
8170 	vector<SpecConstantTwoValGraphicsCase>	cases;
8171 	RGBA									inputColors[4];
8172 	RGBA									outputColors0[4];
8173 	RGBA									outputColors1[4];
8174 	RGBA									outputColors2[4];
8175 
8176 	const char	decorations1[]			=
8177 		"OpDecorate %sc_0  SpecId 0\n"
8178 		"OpDecorate %sc_1  SpecId 1\n";
8179 
8180 	const char	typesAndConstants1[]	=
8181 		"${OPTYPE_DEFINITIONS:opt}"
8182 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
8183 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
8184 		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
8185 
8186 	const char	function1[]				=
8187 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8188 		"%param     = OpFunctionParameter %v4f32\n"
8189 		"%label     = OpLabel\n"
8190 		"%result    = OpVariable %fp_v4f32 Function\n"
8191 		"${TYPE_CONVERT:opt}"
8192 		"             OpStore %result %param\n"
8193 		"%gen       = ${GEN_RESULT}\n"
8194 		"%index     = OpIAdd %i32 %gen %c_i32_1\n"
8195 		"%loc       = OpAccessChain %fp_f32 %result %index\n"
8196 		"%val       = OpLoad %f32 %loc\n"
8197 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8198 		"             OpStore %loc %add\n"
8199 		"%ret       = OpLoad %v4f32 %result\n"
8200 		"             OpReturnValue %ret\n"
8201 		"             OpFunctionEnd\n";
8202 
8203 	inputColors[0] = RGBA(127, 127, 127, 255);
8204 	inputColors[1] = RGBA(127, 0,   0,   255);
8205 	inputColors[2] = RGBA(0,   127, 0,   255);
8206 	inputColors[3] = RGBA(0,   0,   127, 255);
8207 
8208 	// Derived from inputColors[x] by adding 128 to inputColors[x][0].
8209 	outputColors0[0] = RGBA(255, 127, 127, 255);
8210 	outputColors0[1] = RGBA(255, 0,   0,   255);
8211 	outputColors0[2] = RGBA(128, 127, 0,   255);
8212 	outputColors0[3] = RGBA(128, 0,   127, 255);
8213 
8214 	// Derived from inputColors[x] by adding 128 to inputColors[x][1].
8215 	outputColors1[0] = RGBA(127, 255, 127, 255);
8216 	outputColors1[1] = RGBA(127, 128, 0,   255);
8217 	outputColors1[2] = RGBA(0,   255, 0,   255);
8218 	outputColors1[3] = RGBA(0,   128, 127, 255);
8219 
8220 	// Derived from inputColors[x] by adding 128 to inputColors[x][2].
8221 	outputColors2[0] = RGBA(127, 127, 255, 255);
8222 	outputColors2[1] = RGBA(127, 0,   128, 255);
8223 	outputColors2[2] = RGBA(0,   127, 128, 255);
8224 	outputColors2[3] = RGBA(0,   0,   255, 255);
8225 
8226 	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
8227 	const char addZeroToSc32[]		= "OpIAdd %i32 %c_i32_0 %sc_op32";
8228 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
8229 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
8230 
8231 	cases.push_back(SpecConstantTwoValGraphicsCase("iadd",							" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",				19,					-20,				addZeroToSc,		outputColors0));
8232 	cases.push_back(SpecConstantTwoValGraphicsCase("isub",							" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",				19,					20,					addZeroToSc,		outputColors0));
8233 	cases.push_back(SpecConstantTwoValGraphicsCase("imul",							" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",				-1,					-1,					addZeroToSc,		outputColors2));
8234 	cases.push_back(SpecConstantTwoValGraphicsCase("sdiv",							" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",				-126,				126,				addZeroToSc,		outputColors0));
8235 	cases.push_back(SpecConstantTwoValGraphicsCase("udiv",							" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",				126,				126,				addZeroToSc,		outputColors2));
8236 	cases.push_back(SpecConstantTwoValGraphicsCase("srem",							" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",				3,					2,					addZeroToSc,		outputColors2));
8237 	cases.push_back(SpecConstantTwoValGraphicsCase("smod",							" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",				3,					2,					addZeroToSc,		outputColors2));
8238 	cases.push_back(SpecConstantTwoValGraphicsCase("umod",							" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",				1001,				500,				addZeroToSc,		outputColors2));
8239 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseand",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",				0x33,				0x0d,				addZeroToSc,		outputColors2));
8240 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwiseor",						" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",				0,					1,					addZeroToSc,		outputColors2));
8241 	cases.push_back(SpecConstantTwoValGraphicsCase("bitwisexor",					" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",				0x2e,				0x2f,				addZeroToSc,		outputColors2));
8242 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical",				" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					1,					addZeroToSc,		outputColors2));
8243 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic",			" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					2,					addZeroToSc,		outputColors0));
8244 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical",				" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					0,					addZeroToSc,		outputColors2));
8245 
8246 	// Shifts for other integer sizes.
8247 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i64",			" %i64 0",		" %i64 0",		"%i64",		"ShiftRightLogical    %sc_0 %sc_1",				deInt64{2},			deInt64{1},			addZeroToSc32,		outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8248 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i64",		" %i64 0",		" %i64 0",		"%i64",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt64{-4},		deInt64{2},			addZeroToSc32,		outputColors0, (FLAG_I64 | FLAG_CONVERT)));
8249 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i64",			" %i64 0",		" %i64 0",		"%i64",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt64{1},			deInt64{0},			addZeroToSc32,		outputColors2, (FLAG_I64 | FLAG_CONVERT)));
8250 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i16",			" %i16 0",		" %i16 0",		"%i16",		"ShiftRightLogical    %sc_0 %sc_1",				deInt16{2},			deInt16{1},			addZeroToSc32,		outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8251 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i16",		" %i16 0",		" %i16 0",		"%i16",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt16{-4},		deInt16{2},			addZeroToSc32,		outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8252 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i16",			" %i16 0",		" %i16 0",		"%i16",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt16{1},			deInt16{0},			addZeroToSc32,		outputColors2, (FLAG_I16 | FLAG_CONVERT)));
8253 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_i8",			" %i8 0",		" %i8 0",		"%i8",		"ShiftRightLogical    %sc_0 %sc_1",				deInt8{2},			deInt8{1},			addZeroToSc32,		outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8254 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_i8",		" %i8 0",		" %i8 0",		"%i8",		"ShiftRightArithmetic %sc_0 %sc_1",				deInt8{-4},			deInt8{2},			addZeroToSc32,		outputColors0, (FLAG_I8 | FLAG_CONVERT)));
8255 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_i8",			" %i8 0",		" %i8 0",		"%i8",		"ShiftLeftLogical     %sc_0 %sc_1",				deInt8{1},			deInt8{0},			addZeroToSc32,		outputColors2, (FLAG_I8 | FLAG_CONVERT)));
8256 
8257 	// Shifts for other integer sizes but only in the shift amount.
8258 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i64",		" %i32 0",		" %i64 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt64{1},			addZeroToSc,		outputColors2, (FLAG_I64)));
8259 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i64",	" %i32 0",		" %i64 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt64{2},			addZeroToSc,		outputColors0, (FLAG_I64)));
8260 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i64",		" %i32 0",		" %i64 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt64{0},			addZeroToSc,		outputColors2, (FLAG_I64)));
8261 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i16",		" %i32 0",		" %i16 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt16{1},			addZeroToSc,		outputColors2, (FLAG_I16)));
8262 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i16",	" %i32 0",		" %i16 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt16{2},			addZeroToSc,		outputColors0, (FLAG_I16)));
8263 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i16",		" %i32 0",		" %i16 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt16{0},			addZeroToSc,		outputColors2, (FLAG_I16)));
8264 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightlogical_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,					deInt8{1},			addZeroToSc,		outputColors2, (FLAG_I8)));
8265 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftrightarithmetic_s_i8",		" %i32 0",		" %i8 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,					deInt8{2},			addZeroToSc,		outputColors0, (FLAG_I8)));
8266 	cases.push_back(SpecConstantTwoValGraphicsCase("shiftleftlogical_s_i8",			" %i32 0",		" %i8 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,					deInt8{0},			addZeroToSc,		outputColors2, (FLAG_I8)));
8267 
8268 	cases.push_back(SpecConstantTwoValGraphicsCase("slessthan",						" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",				-20,				-10,				selectTrueUsingSc,	outputColors2));
8269 	cases.push_back(SpecConstantTwoValGraphicsCase("ulessthan",						" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",				10,					20,					selectTrueUsingSc,	outputColors2));
8270 	cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthan",					" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",				-1000,				50,					selectFalseUsingSc,	outputColors2));
8271 	cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthan",					" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",				10,					5,					selectTrueUsingSc,	outputColors2));
8272 	cases.push_back(SpecConstantTwoValGraphicsCase("slessthanequal",				" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",				-10,				-10,				selectTrueUsingSc,	outputColors2));
8273 	cases.push_back(SpecConstantTwoValGraphicsCase("ulessthanequal",				" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",				50,					100,				selectTrueUsingSc,	outputColors2));
8274 	cases.push_back(SpecConstantTwoValGraphicsCase("sgreaterthanequal",				" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",				-1000,				50,					selectFalseUsingSc,	outputColors2));
8275 	cases.push_back(SpecConstantTwoValGraphicsCase("ugreaterthanequal",				" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",				10,					10,					selectTrueUsingSc,	outputColors2));
8276 	cases.push_back(SpecConstantTwoValGraphicsCase("iequal",						" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",				42,					24,					selectFalseUsingSc,	outputColors2));
8277 	cases.push_back(SpecConstantTwoValGraphicsCase("inotequal",						" %i32 0",		" %i32 0",		"%bool",	"INotEqual            %sc_0 %sc_1",				42,					24,					selectTrueUsingSc,	outputColors2));
8278 	cases.push_back(SpecConstantTwoValGraphicsCase("logicaland",					"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",				0,					1,					selectFalseUsingSc,	outputColors2));
8279 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalor",						"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",				1,					0,					selectTrueUsingSc,	outputColors2));
8280 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalequal",					"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",				0,					1,					selectFalseUsingSc,	outputColors2));
8281 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalnotequal",				"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",				1,					0,					selectTrueUsingSc,	outputColors2));
8282 	cases.push_back(SpecConstantTwoValGraphicsCase("snegate",						" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",					-1,					0,					addZeroToSc,		outputColors2));
8283 	cases.push_back(SpecConstantTwoValGraphicsCase("not",							" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,					0,					addZeroToSc,		outputColors2));
8284 	cases.push_back(SpecConstantTwoValGraphicsCase("logicalnot",					"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,					0,					selectFalseUsingSc,	outputColors2));
8285 	cases.push_back(SpecConstantTwoValGraphicsCase("select",						"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,					1,					addZeroToSc,		outputColors2));
8286 	cases.push_back(SpecConstantTwoValGraphicsCase("sconvert",						" %i32 0",		" %i32 0",		"%i16",		"SConvert             %sc_0",					-1,					0,					addZeroToSc32,		outputColors0, (FLAG_I16 | FLAG_CONVERT)));
8287 	cases.push_back(SpecConstantTwoValGraphicsCase("fconvert",						" %f32 0",		" %f32 0",		"%f64",		"FConvert             %sc_0",					tcu::Float32(-1.0),	tcu::Float32(0.0),	addZeroToSc32,		outputColors0, (FLAG_F64 | FLAG_CONVERT)));
8288 	cases.push_back(SpecConstantTwoValGraphicsCase("fconvert16",					" %f16 0",		" %f16 0",		"%f32",		"FConvert             %sc_0",					tcu::Float16(-1.0),	tcu::Float16(0.0),	addZeroToSc32,		outputColors0, (FLAG_F16 | FLAG_CONVERT)));
8289 	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
8290 
8291 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
8292 	{
8293 		map<string, string>			specializations;
8294 		map<string, string>			fragments;
8295 		SpecConstants				specConstants;
8296 		PushConstants				noPushConstants;
8297 		GraphicsResources			noResources;
8298 		GraphicsInterfaces			noInterfaces;
8299 		vector<string>				extensions;
8300 		VulkanFeatures				requiredFeatures;
8301 
8302 		// Special SPIR-V code when using 16-bit integers.
8303 		if (cases[caseNdx].caseFlags & FLAG_I16)
8304 		{
8305 			requiredFeatures.coreFeatures.shaderInt16		= VK_TRUE;
8306 			fragments["capability"]							+= "OpCapability Int16\n";							// Adds 16-bit integer capability
8307 			specializations["OPTYPE_DEFINITIONS"]			+= "%i16 = OpTypeInt 16 1\n";						// Adds 16-bit integer type
8308 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8309 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpSConvert %i32 %sc_op\n";			// Converts 16-bit integer to 32-bit integer
8310 		}
8311 
8312 		// Special SPIR-V code when using 64-bit integers.
8313 		if (cases[caseNdx].caseFlags & FLAG_I64)
8314 		{
8315 			requiredFeatures.coreFeatures.shaderInt64		= VK_TRUE;
8316 			fragments["capability"]							+= "OpCapability Int64\n";							// Adds 64-bit integer capability
8317 			specializations["OPTYPE_DEFINITIONS"]			+= "%i64 = OpTypeInt 64 1\n";						// Adds 64-bit integer type
8318 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8319 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpSConvert %i32 %sc_op\n";			// Converts 64-bit integer to 32-bit integer
8320 		}
8321 
8322 		// Special SPIR-V code when using 64-bit floats.
8323 		if (cases[caseNdx].caseFlags & FLAG_F64)
8324 		{
8325 			requiredFeatures.coreFeatures.shaderFloat64		= VK_TRUE;
8326 			fragments["capability"]							+= "OpCapability Float64\n";						// Adds 64-bit float capability
8327 			specializations["OPTYPE_DEFINITIONS"]			+= "%f64 = OpTypeFloat 64\n";						// Adds 64-bit float type
8328 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8329 				specializations["TYPE_CONVERT"]				+= "%sc_op32 = OpConvertFToS %i32 %sc_op\n";		// Converts 64-bit float to 32-bit integer
8330 		}
8331 
8332 		// Extension needed for float16 and int8.
8333 		if (cases[caseNdx].caseFlags & (FLAG_F16 | FLAG_I8))
8334 			extensions.push_back("VK_KHR_shader_float16_int8");
8335 
8336 		// Special SPIR-V code when using 16-bit floats.
8337 		if (cases[caseNdx].caseFlags & FLAG_F16)
8338 		{
8339 			requiredFeatures.extFloat16Int8.shaderFloat16 = true;
8340 			fragments["capability"]						+= "OpCapability Float16\n";						// Adds 16-bit float capability
8341 			specializations["OPTYPE_DEFINITIONS"]		+= "%f16 = OpTypeFloat 16\n";						// Adds 16-bit float type
8342 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8343 				specializations["TYPE_CONVERT"]			+= "%sc_op32 = OpConvertFToS %i32 %sc_op\n";		// Converts 16-bit float to 32-bit integer
8344 		}
8345 
8346 		// Special SPIR-V code when using 8-bit integers.
8347 		if (cases[caseNdx].caseFlags & FLAG_I8)
8348 		{
8349 			requiredFeatures.extFloat16Int8.shaderInt8 = true;
8350 			fragments["capability"]						+= "OpCapability Int8\n";						// Adds 8-bit integer capability
8351 			specializations["OPTYPE_DEFINITIONS"]		+= "%i8 = OpTypeInt 8 1\n";						// Adds 8-bit integer type
8352 			if (cases[caseNdx].caseFlags & FLAG_CONVERT)
8353 				specializations["TYPE_CONVERT"]			+= "%sc_op32 = OpSConvert %i32 %sc_op\n";		// Converts 8-bit integer to 32-bit integer
8354 		}
8355 
8356 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
8357 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
8358 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
8359 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
8360 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
8361 
8362 		fragments["decoration"]				= tcu::StringTemplate(decorations1).specialize(specializations);
8363 		fragments["pre_main"]				= tcu::StringTemplate(typesAndConstants1).specialize(specializations);
8364 		fragments["testfun"]				= tcu::StringTemplate(function1).specialize(specializations);
8365 
8366 		cases[caseNdx].scActualValue0.appendTo(specConstants);
8367 		cases[caseNdx].scActualValue1.appendTo(specConstants);
8368 
8369 		createTestsForAllStages(
8370 			cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants,
8371 			noPushConstants, noResources, noInterfaces, extensions, requiredFeatures, group.get());
8372 	}
8373 
8374 	const char			decorations2[]		=
8375 		"OpDecorate %sc_0  SpecId 0\n"
8376 		"OpDecorate %sc_1  SpecId 1\n"
8377 		"OpDecorate %sc_2  SpecId 2\n";
8378 
8379 	const std::string	typesAndConstants2	=
8380 		"%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
8381 		"%vec3_undef  = OpUndef %v3i32\n"
8382 
8383 		+ getSpecConstantOpStructConstantsAndTypes() + getSpecConstantOpStructComposites() +
8384 
8385 		"%sc_0        = OpSpecConstant %i32 0\n"
8386 		"%sc_1        = OpSpecConstant %i32 0\n"
8387 		"%sc_2        = OpSpecConstant %i32 0\n"
8388 
8389 		+ getSpecConstantOpStructConstBlock() +
8390 
8391 		"%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0      0\n"							// (sc_0, 0,    0)
8392 		"%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0      1\n"							// (0,    sc_1, 0)
8393 		"%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0      2\n"							// (0,    0,    sc_2)
8394 		"%sc_vec3_0_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %vec3_undef  0          0xFFFFFFFF 2\n"	// (sc_0, ???,  0)
8395 		"%sc_vec3_1_s = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_1   %vec3_undef  0xFFFFFFFF 1          0\n"	// (???,  sc_1, 0)
8396 		"%sc_vec3_2_s = OpSpecConstantOp %v3i32 VectorShuffle    %vec3_undef  %sc_vec3_2   5          0xFFFFFFFF 5\n"	// (sc_2, ???,  sc_2)
8397 		"%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0_s %sc_vec3_1_s 1 0 4\n"						// (0,    sc_0, sc_1)
8398 		"%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2_s 5 1 2\n"						// (sc_2, sc_0, sc_1)
8399 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              0\n"							// sc_2
8400 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              1\n"							// sc_0
8401 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012              2\n"							// sc_1
8402 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"								// (sc_2 - sc_0)
8403 		"%sc_factor   = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";								// (sc_2 - sc_0) * sc_1
8404 
8405 	const std::string	function2			=
8406 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8407 		"%param     = OpFunctionParameter %v4f32\n"
8408 		"%label     = OpLabel\n"
8409 		"%result    = OpVariable %fp_v4f32 Function\n"
8410 
8411 		+ getSpecConstantOpStructInstructions() +
8412 
8413 		"             OpStore %result %param\n"
8414 		"%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
8415 		"%val       = OpLoad %f32 %loc\n"
8416 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
8417 		"             OpStore %loc %add\n"
8418 		"%ret       = OpLoad %v4f32 %result\n"
8419 		"             OpReturnValue %ret\n"
8420 		"             OpFunctionEnd\n";
8421 
8422 	map<string, string>	fragments;
8423 	SpecConstants		specConstants;
8424 
8425 	fragments["decoration"]	= decorations2;
8426 	fragments["pre_main"]	= typesAndConstants2;
8427 	fragments["testfun"]	= function2;
8428 
8429 	specConstants.append<deInt32>(56789);
8430 	specConstants.append<deInt32>(-2);
8431 	specConstants.append<deInt32>(56788);
8432 
8433 	createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
8434 
8435 	return group.release();
8436 }
8437 
createOpPhiTests(tcu::TestContext & testCtx)8438 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
8439 {
8440 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
8441 	RGBA							inputColors[4];
8442 	RGBA							outputColors1[4];
8443 	RGBA							outputColors2[4];
8444 	RGBA							outputColors3[4];
8445 	RGBA							outputColors4[4];
8446 	map<string, string>				fragments1;
8447 	map<string, string>				fragments2;
8448 	map<string, string>				fragments3;
8449 	map<string, string>				fragments4;
8450 	std::vector<std::string>		extensions4;
8451 	GraphicsResources				resources4;
8452 	VulkanFeatures					vulkanFeatures4;
8453 
8454 	const char	typesAndConstants1[]	=
8455 		"%c_f32_p2  = OpConstant %f32 0.2\n"
8456 		"%c_f32_p4  = OpConstant %f32 0.4\n"
8457 		"%c_f32_p5  = OpConstant %f32 0.5\n"
8458 		"%c_f32_p8  = OpConstant %f32 0.8\n";
8459 
8460 	// vec4 test_code(vec4 param) {
8461 	//   vec4 result = param;
8462 	//   for (int i = 0; i < 4; ++i) {
8463 	//     float operand;
8464 	//     switch (i) {
8465 	//       case 0: operand = .2; break;
8466 	//       case 1: operand = .5; break;
8467 	//       case 2: operand = .4; break;
8468 	//       case 3: operand = .0; break;
8469 	//       default: break; // unreachable
8470 	//     }
8471 	//     result[i] += operand;
8472 	//   }
8473 	//   return result;
8474 	// }
8475 	const char	function1[]				=
8476 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8477 		"%param1    = OpFunctionParameter %v4f32\n"
8478 		"%lbl       = OpLabel\n"
8479 		"%iptr      = OpVariable %fp_i32 Function\n"
8480 		"%result    = OpVariable %fp_v4f32 Function\n"
8481 		"             OpStore %iptr %c_i32_0\n"
8482 		"             OpStore %result %param1\n"
8483 		"             OpBranch %loop\n"
8484 
8485 		"%loop      = OpLabel\n"
8486 		"%ival      = OpLoad %i32 %iptr\n"
8487 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
8488 		"             OpLoopMerge %exit %cont None\n"
8489 		"             OpBranchConditional %lt_4 %entry %exit\n"
8490 
8491 		"%entry     = OpLabel\n"
8492 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
8493 		"%val       = OpLoad %f32 %loc\n"
8494 		"             OpSelectionMerge %phi None\n"
8495 		"             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
8496 
8497 		"%case0     = OpLabel\n"
8498 		"             OpBranch %phi\n"
8499 		"%case1     = OpLabel\n"
8500 		"             OpBranch %phi\n"
8501 		"%case2     = OpLabel\n"
8502 		"             OpBranch %phi\n"
8503 		"%case3     = OpLabel\n"
8504 		"             OpBranch %phi\n"
8505 
8506 		"%default   = OpLabel\n"
8507 		"             OpUnreachable\n"
8508 
8509 		"%phi       = OpLabel\n"
8510 		"%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
8511 		"             OpBranch %cont\n"
8512 		"%cont      = OpLabel\n"
8513 		"%add       = OpFAdd %f32 %val %operand\n"
8514 		"             OpStore %loc %add\n"
8515 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
8516 		"             OpStore %iptr %ival_next\n"
8517 		"             OpBranch %loop\n"
8518 
8519 		"%exit      = OpLabel\n"
8520 		"%ret       = OpLoad %v4f32 %result\n"
8521 		"             OpReturnValue %ret\n"
8522 
8523 		"             OpFunctionEnd\n";
8524 
8525 	fragments1["pre_main"]	= typesAndConstants1;
8526 	fragments1["testfun"]	= function1;
8527 
8528 	getHalfColorsFullAlpha(inputColors);
8529 
8530 	outputColors1[0]		= RGBA(178, 255, 229, 255);
8531 	outputColors1[1]		= RGBA(178, 127, 102, 255);
8532 	outputColors1[2]		= RGBA(51,  255, 102, 255);
8533 	outputColors1[3]		= RGBA(51,  127, 229, 255);
8534 
8535 	createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
8536 
8537 	const char	typesAndConstants2[]	=
8538 		"%c_f32_p2  = OpConstant %f32 0.2\n";
8539 
8540 	// Add .4 to the second element of the given parameter.
8541 	const char	function2[]				=
8542 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8543 		"%param     = OpFunctionParameter %v4f32\n"
8544 		"%entry     = OpLabel\n"
8545 		"%result    = OpVariable %fp_v4f32 Function\n"
8546 		"             OpStore %result %param\n"
8547 		"%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
8548 		"%val       = OpLoad %f32 %loc\n"
8549 		"             OpBranch %phi\n"
8550 
8551 		"%phi        = OpLabel\n"
8552 		"%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
8553 		"%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
8554 		"%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
8555 		"%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
8556 		"%still_loop = OpSLessThan %bool %step %c_i32_2\n"
8557 		"              OpLoopMerge %exit %phi None\n"
8558 		"              OpBranchConditional %still_loop %phi %exit\n"
8559 
8560 		"%exit       = OpLabel\n"
8561 		"              OpStore %loc %accum\n"
8562 		"%ret        = OpLoad %v4f32 %result\n"
8563 		"              OpReturnValue %ret\n"
8564 
8565 		"              OpFunctionEnd\n";
8566 
8567 	fragments2["pre_main"]	= typesAndConstants2;
8568 	fragments2["testfun"]	= function2;
8569 
8570 	outputColors2[0]			= RGBA(127, 229, 127, 255);
8571 	outputColors2[1]			= RGBA(127, 102, 0,   255);
8572 	outputColors2[2]			= RGBA(0,   229, 0,   255);
8573 	outputColors2[3]			= RGBA(0,   102, 127, 255);
8574 
8575 	createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
8576 
8577 	const char	typesAndConstants3[]	=
8578 		"%true      = OpConstantTrue %bool\n"
8579 		"%false     = OpConstantFalse %bool\n"
8580 		"%c_f32_p2  = OpConstant %f32 0.2\n";
8581 
8582 	// Swap the second and the third element of the given parameter.
8583 	const char	function3[]				=
8584 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8585 		"%param     = OpFunctionParameter %v4f32\n"
8586 		"%entry     = OpLabel\n"
8587 		"%result    = OpVariable %fp_v4f32 Function\n"
8588 		"             OpStore %result %param\n"
8589 		"%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
8590 		"%a_init    = OpLoad %f32 %a_loc\n"
8591 		"%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
8592 		"%b_init    = OpLoad %f32 %b_loc\n"
8593 		"             OpBranch %phi\n"
8594 
8595 		"%phi        = OpLabel\n"
8596 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8597 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
8598 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
8599 		"              OpLoopMerge %exit %phi None\n"
8600 		"              OpBranchConditional %still_loop %phi %exit\n"
8601 
8602 		"%exit       = OpLabel\n"
8603 		"              OpStore %a_loc %a_next\n"
8604 		"              OpStore %b_loc %b_next\n"
8605 		"%ret        = OpLoad %v4f32 %result\n"
8606 		"              OpReturnValue %ret\n"
8607 
8608 		"              OpFunctionEnd\n";
8609 
8610 	fragments3["pre_main"]	= typesAndConstants3;
8611 	fragments3["testfun"]	= function3;
8612 
8613 	outputColors3[0]			= RGBA(127, 127, 127, 255);
8614 	outputColors3[1]			= RGBA(127, 0,   0,   255);
8615 	outputColors3[2]			= RGBA(0,   0,   127, 255);
8616 	outputColors3[3]			= RGBA(0,   127, 0,   255);
8617 
8618 	createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
8619 
8620 	const char	typesAndConstants4[]	=
8621 		"%f16        = OpTypeFloat 16\n"
8622 		"%v4f16      = OpTypeVector %f16 4\n"
8623 		"%fp_f16     = OpTypePointer Function %f16\n"
8624 		"%fp_v4f16   = OpTypePointer Function %v4f16\n"
8625 		"%true       = OpConstantTrue %bool\n"
8626 		"%false      = OpConstantFalse %bool\n"
8627 		"%c_f32_p2   = OpConstant %f32 0.2\n";
8628 
8629 	// Swap the second and the third element of the given parameter.
8630 	const char	function4[]				=
8631 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8632 		"%param      = OpFunctionParameter %v4f32\n"
8633 		"%entry      = OpLabel\n"
8634 		"%result     = OpVariable %fp_v4f16 Function\n"
8635 		"%param16    = OpFConvert %v4f16 %param\n"
8636 		"              OpStore %result %param16\n"
8637 		"%a_loc      = OpAccessChain %fp_f16 %result %c_i32_1\n"
8638 		"%a_init     = OpLoad %f16 %a_loc\n"
8639 		"%b_loc      = OpAccessChain %fp_f16 %result %c_i32_2\n"
8640 		"%b_init     = OpLoad %f16 %b_loc\n"
8641 		"              OpBranch %phi\n"
8642 
8643 		"%phi        = OpLabel\n"
8644 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
8645 		"%a_next     = OpPhi %f16  %a_init %entry %b_next %phi\n"
8646 		"%b_next     = OpPhi %f16  %b_init %entry %a_next %phi\n"
8647 		"              OpLoopMerge %exit %phi None\n"
8648 		"              OpBranchConditional %still_loop %phi %exit\n"
8649 
8650 		"%exit       = OpLabel\n"
8651 		"              OpStore %a_loc %a_next\n"
8652 		"              OpStore %b_loc %b_next\n"
8653 		"%ret16      = OpLoad %v4f16 %result\n"
8654 		"%ret        = OpFConvert %v4f32 %ret16\n"
8655 		"              OpReturnValue %ret\n"
8656 
8657 		"              OpFunctionEnd\n";
8658 
8659 	fragments4["pre_main"]		= typesAndConstants4;
8660 	fragments4["testfun"]		= function4;
8661 	fragments4["capability"]	= "OpCapability Float16\n";
8662 
8663 	extensions4.push_back("VK_KHR_shader_float16_int8");
8664 
8665 	vulkanFeatures4.extFloat16Int8.shaderFloat16 = true;
8666 
8667 	outputColors4[0]			= RGBA(127, 127, 127, 255);
8668 	outputColors4[1]			= RGBA(127, 0,   0,   255);
8669 	outputColors4[2]			= RGBA(0,   0,   127, 255);
8670 	outputColors4[3]			= RGBA(0,   127, 0,   255);
8671 
8672 	createTestsForAllStages("swap16", inputColors, outputColors4, fragments4, resources4, extensions4, group.get(), vulkanFeatures4);
8673 
8674 	return group.release();
8675 }
8676 
createNoContractionTests(tcu::TestContext & testCtx)8677 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
8678 {
8679 	de::MovePtr<tcu::TestCaseGroup> group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
8680 	RGBA							inputColors[4];
8681 	RGBA							outputColors[4];
8682 
8683 	// With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
8684 	// For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
8685 	// only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
8686 	// On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
8687 	const char						constantsAndTypes[]	 =
8688 		"%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
8689 		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
8690 		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
8691 		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
8692 		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n";
8693 
8694 	const char						function[]	 =
8695 		"%test_code      = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8696 		"%param          = OpFunctionParameter %v4f32\n"
8697 		"%label          = OpLabel\n"
8698 		"%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
8699 		"%var2           = OpVariable %fp_f32 Function\n"
8700 		"%red            = OpCompositeExtract %f32 %param 0\n"
8701 		"%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
8702 		"                  OpStore %var2 %plus_red\n"
8703 		"%val1           = OpLoad %f32 %var1\n"
8704 		"%val2           = OpLoad %f32 %var2\n"
8705 		"%mul            = OpFMul %f32 %val1 %val2\n"
8706 		"%add            = OpFAdd %f32 %mul %c_f32_n1\n"
8707 		"%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
8708 		"%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
8709 		"%success        = OpLogicalOr %bool %is0 %isn1n24\n"
8710 		"%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
8711 		"%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
8712 		"                  OpReturnValue %ret\n"
8713 		"                  OpFunctionEnd\n";
8714 
8715 	struct CaseNameDecoration
8716 	{
8717 		string name;
8718 		string decoration;
8719 	};
8720 
8721 
8722 	CaseNameDecoration tests[] = {
8723 		{"multiplication",	"OpDecorate %mul NoContraction"},
8724 		{"addition",		"OpDecorate %add NoContraction"},
8725 		{"both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
8726 	};
8727 
8728 	getHalfColorsFullAlpha(inputColors);
8729 
8730 	for (deUint8 idx = 0; idx < 4; ++idx)
8731 	{
8732 		inputColors[idx].setRed(0);
8733 		outputColors[idx] = RGBA(0, 0, 0, 255);
8734 	}
8735 
8736 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
8737 	{
8738 		map<string, string> fragments;
8739 
8740 		fragments["decoration"] = tests[testNdx].decoration;
8741 		fragments["pre_main"] = constantsAndTypes;
8742 		fragments["testfun"] = function;
8743 
8744 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
8745 	}
8746 
8747 	return group.release();
8748 }
8749 
createMemoryAccessTests(tcu::TestContext & testCtx)8750 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
8751 {
8752 	de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
8753 	RGBA							colors[4];
8754 
8755 	const char						constantsAndTypes[]	 =
8756 		"%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
8757 		"%fp_a2f32          = OpTypePointer Function %a2f32\n"
8758 		"%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
8759 		"%fp_stype          = OpTypePointer Function %stype\n";
8760 
8761 	const char						function[]	 =
8762 		"%test_code         = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8763 		"%param1            = OpFunctionParameter %v4f32\n"
8764 		"%lbl               = OpLabel\n"
8765 		"%v1                = OpVariable %fp_v4f32 Function\n"
8766 		"%v2                = OpVariable %fp_a2f32 Function\n"
8767 		"%v3                = OpVariable %fp_f32 Function\n"
8768 		"%v                 = OpVariable %fp_stype Function\n"
8769 		"%vv                = OpVariable %fp_stype Function\n"
8770 		"%vvv               = OpVariable %fp_f32 Function\n"
8771 
8772 		"                     OpStore %v1 %c_v4f32_1_1_1_1\n"
8773 		"                     OpStore %v2 %c_a2f32_1\n"
8774 		"                     OpStore %v3 %c_f32_1\n"
8775 
8776 		"%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
8777 		"%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
8778 		"%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
8779 		"%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
8780 		"%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
8781 		"%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
8782 
8783 		"                    OpStore %p_v4f32 %v1_v ${access_type}\n"
8784 		"                    OpStore %p_a2f32 %v2_v ${access_type}\n"
8785 		"                    OpStore %p_f32 %v3_v ${access_type}\n"
8786 
8787 		"                    OpCopyMemory %vv %v ${access_type}\n"
8788 		"                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
8789 
8790 		"%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
8791 		"%v_f32_2          = OpLoad %f32 %p_f32_2\n"
8792 		"%v_f32_3          = OpLoad %f32 %vvv\n"
8793 
8794 		"%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
8795 		"%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
8796 		"                    OpReturnValue %ret2\n"
8797 		"                    OpFunctionEnd\n";
8798 
8799 	struct NameMemoryAccess
8800 	{
8801 		string name;
8802 		string accessType;
8803 	};
8804 
8805 
8806 	NameMemoryAccess tests[] =
8807 	{
8808 		{ "none", "" },
8809 		{ "volatile", "Volatile" },
8810 		{ "aligned",  "Aligned 1" },
8811 		{ "volatile_aligned",  "Volatile|Aligned 1" },
8812 		{ "nontemporal_aligned",  "Nontemporal|Aligned 1" },
8813 		{ "volatile_nontemporal",  "Volatile|Nontemporal" },
8814 		{ "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
8815 	};
8816 
8817 	getHalfColorsFullAlpha(colors);
8818 
8819 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
8820 	{
8821 		map<string, string> fragments;
8822 		map<string, string> memoryAccess;
8823 		memoryAccess["access_type"] = tests[testNdx].accessType;
8824 
8825 		fragments["pre_main"] = constantsAndTypes;
8826 		fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
8827 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
8828 	}
8829 	return memoryAccessTests.release();
8830 }
createOpUndefTests(tcu::TestContext & testCtx)8831 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
8832 {
8833 	de::MovePtr<tcu::TestCaseGroup>		opUndefTests		 (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
8834 	RGBA								defaultColors[4];
8835 	map<string, string>					fragments;
8836 	getDefaultColors(defaultColors);
8837 
8838 	// First, simple cases that don't do anything with the OpUndef result.
8839 	struct NameCodePair { string name, decl, type; };
8840 	const NameCodePair tests[] =
8841 	{
8842 		{"bool", "", "%bool"},
8843 		{"vec2uint32", "", "%v2u32"},
8844 		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown", "%type"},
8845 		{"sampler", "%type = OpTypeSampler", "%type"},
8846 		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img", "%type"},
8847 		{"pointer", "", "%fp_i32"},
8848 		{"runtimearray", "%type = OpTypeRuntimeArray %f32", "%type"},
8849 		{"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100", "%type"},
8850 		{"struct", "%type = OpTypeStruct %f32 %i32 %u32", "%type"}};
8851 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
8852 	{
8853 		fragments["undef_type"] = tests[testNdx].type;
8854 		fragments["testfun"] = StringTemplate(
8855 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8856 			"%param1 = OpFunctionParameter %v4f32\n"
8857 			"%label_testfun = OpLabel\n"
8858 			"%undef = OpUndef ${undef_type}\n"
8859 			"OpReturnValue %param1\n"
8860 			"OpFunctionEnd\n").specialize(fragments);
8861 		fragments["pre_main"] = tests[testNdx].decl;
8862 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
8863 	}
8864 	fragments.clear();
8865 
8866 	fragments["testfun"] =
8867 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8868 		"%param1 = OpFunctionParameter %v4f32\n"
8869 		"%label_testfun = OpLabel\n"
8870 		"%undef = OpUndef %f32\n"
8871 		"%zero = OpFMul %f32 %undef %c_f32_0\n"
8872 		"%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
8873 		"%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
8874 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8875 		"%b = OpFAdd %f32 %a %actually_zero\n"
8876 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
8877 		"OpReturnValue %ret\n"
8878 		"OpFunctionEnd\n";
8879 
8880 	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8881 
8882 	fragments["testfun"] =
8883 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8884 		"%param1 = OpFunctionParameter %v4f32\n"
8885 		"%label_testfun = OpLabel\n"
8886 		"%undef = OpUndef %i32\n"
8887 		"%zero = OpIMul %i32 %undef %c_i32_0\n"
8888 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8889 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8890 		"OpReturnValue %ret\n"
8891 		"OpFunctionEnd\n";
8892 
8893 	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8894 
8895 	fragments["testfun"] =
8896 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8897 		"%param1 = OpFunctionParameter %v4f32\n"
8898 		"%label_testfun = OpLabel\n"
8899 		"%undef = OpUndef %u32\n"
8900 		"%zero = OpIMul %u32 %undef %c_i32_0\n"
8901 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
8902 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
8903 		"OpReturnValue %ret\n"
8904 		"OpFunctionEnd\n";
8905 
8906 	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
8907 
8908 	fragments["testfun"] =
8909 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8910 		"%param1 = OpFunctionParameter %v4f32\n"
8911 		"%label_testfun = OpLabel\n"
8912 		"%undef = OpUndef %v4f32\n"
8913 		"%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
8914 		"%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
8915 		"%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
8916 		"%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
8917 		"%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
8918 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
8919 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
8920 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
8921 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
8922 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8923 		"%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8924 		"%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8925 		"%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8926 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8927 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8928 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8929 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8930 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8931 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8932 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8933 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8934 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8935 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8936 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8937 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8938 		"OpReturnValue %ret\n"
8939 		"OpFunctionEnd\n";
8940 
8941 	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
8942 
8943 	fragments["pre_main"] =
8944 		"%m2x2f32 = OpTypeMatrix %v2f32 2\n";
8945 	fragments["testfun"] =
8946 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
8947 		"%param1 = OpFunctionParameter %v4f32\n"
8948 		"%label_testfun = OpLabel\n"
8949 		"%undef = OpUndef %m2x2f32\n"
8950 		"%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
8951 		"%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
8952 		"%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
8953 		"%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
8954 		"%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
8955 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
8956 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
8957 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
8958 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
8959 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
8960 		"%actually_zero_1 = OpSelect %f32 %is_nan_1 %c_f32_0 %zero_1\n"
8961 		"%actually_zero_2 = OpSelect %f32 %is_nan_2 %c_f32_0 %zero_2\n"
8962 		"%actually_zero_3 = OpSelect %f32 %is_nan_3 %c_f32_0 %zero_3\n"
8963 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
8964 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
8965 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
8966 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
8967 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
8968 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
8969 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
8970 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
8971 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
8972 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
8973 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
8974 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
8975 		"OpReturnValue %ret\n"
8976 		"OpFunctionEnd\n";
8977 
8978 	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
8979 
8980 	return opUndefTests.release();
8981 }
8982 
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)8983 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
8984 {
8985 	const RGBA		inputColors[4]		=
8986 	{
8987 		RGBA(0,		0,		0,		255),
8988 		RGBA(0,		0,		255,	255),
8989 		RGBA(0,		255,	0,		255),
8990 		RGBA(0,		255,	255,	255)
8991 	};
8992 
8993 	const RGBA		expectedColors[4]	=
8994 	{
8995 		RGBA(255,	 0,		 0,		 255),
8996 		RGBA(255,	 0,		 0,		 255),
8997 		RGBA(255,	 0,		 0,		 255),
8998 		RGBA(255,	 0,		 0,		 255)
8999 	};
9000 
9001 	const struct SingleFP16Possibility
9002 	{
9003 		const char* name;
9004 		const char* constant;  // Value to assign to %test_constant.
9005 		float		valueAsFloat;
9006 		const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
9007 	}				tests[]				=
9008 	{
9009 		{
9010 			"negative",
9011 			"-0x1.3p1\n",
9012 			-constructNormalizedFloat(1, 0x300000),
9013 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
9014 		}, // -19
9015 		{
9016 			"positive",
9017 			"0x1.0p7\n",
9018 			constructNormalizedFloat(7, 0x000000),
9019 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
9020 		},  // +128
9021 		// SPIR-V requires that OpQuantizeToF16 flushes
9022 		// any numbers that would end up denormalized in F16 to zero.
9023 		{
9024 			"denorm",
9025 			"0x0.0006p-126\n",
9026 			std::ldexp(1.5f, -140),
9027 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9028 		},  // denorm
9029 		{
9030 			"negative_denorm",
9031 			"-0x0.0006p-126\n",
9032 			-std::ldexp(1.5f, -140),
9033 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9034 		}, // -denorm
9035 		{
9036 			"too_small",
9037 			"0x1.0p-16\n",
9038 			std::ldexp(1.0f, -16),
9039 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9040 		},     // too small positive
9041 		{
9042 			"negative_too_small",
9043 			"-0x1.0p-32\n",
9044 			-std::ldexp(1.0f, -32),
9045 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
9046 		},      // too small negative
9047 		{
9048 			"negative_inf",
9049 			"-0x1.0p128\n",
9050 			-std::ldexp(1.0f, 128),
9051 
9052 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9053 			"%inf = OpIsInf %bool %c\n"
9054 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9055 		},     // -inf to -inf
9056 		{
9057 			"inf",
9058 			"0x1.0p128\n",
9059 			std::ldexp(1.0f, 128),
9060 
9061 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9062 			"%inf = OpIsInf %bool %c\n"
9063 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9064 		},     // +inf to +inf
9065 		{
9066 			"round_to_negative_inf",
9067 			"-0x1.0p32\n",
9068 			-std::ldexp(1.0f, 32),
9069 
9070 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
9071 			"%inf = OpIsInf %bool %c\n"
9072 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9073 		},     // round to -inf
9074 		{
9075 			"round_to_inf",
9076 			"0x1.0p16\n",
9077 			std::ldexp(1.0f, 16),
9078 
9079 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
9080 			"%inf = OpIsInf %bool %c\n"
9081 			"%cond = OpLogicalAnd %bool %gz %inf\n"
9082 		},     // round to +inf
9083 		{
9084 			"nan",
9085 			"0x1.1p128\n",
9086 			std::numeric_limits<float>::quiet_NaN(),
9087 
9088 			// Test for any NaN value, as NaNs are not preserved
9089 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9090 			"%cond = OpIsNan %bool %direct_quant\n"
9091 		}, // nan
9092 		{
9093 			"negative_nan",
9094 			"-0x1.0001p128\n",
9095 			std::numeric_limits<float>::quiet_NaN(),
9096 
9097 			// Test for any NaN value, as NaNs are not preserved
9098 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
9099 			"%cond = OpIsNan %bool %direct_quant\n"
9100 		} // -nan
9101 	};
9102 	const char*		constants			=
9103 		"%test_constant = OpConstant %f32 ";  // The value will be test.constant.
9104 
9105 	StringTemplate	function			(
9106 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9107 		"%param1        = OpFunctionParameter %v4f32\n"
9108 		"%label_testfun = OpLabel\n"
9109 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9110 		"%b             = OpFAdd %f32 %test_constant %a\n"
9111 		"%c             = OpQuantizeToF16 %f32 %b\n"
9112 		"${condition}\n"
9113 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9114 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9115 		"                 OpReturnValue %retval\n"
9116 		"OpFunctionEnd\n"
9117 	);
9118 
9119 	const char*		specDecorations		= "OpDecorate %test_constant SpecId 0\n";
9120 	const char*		specConstants		=
9121 			"%test_constant = OpSpecConstant %f32 0.\n"
9122 			"%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
9123 
9124 	StringTemplate	specConstantFunction(
9125 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9126 		"%param1        = OpFunctionParameter %v4f32\n"
9127 		"%label_testfun = OpLabel\n"
9128 		"${condition}\n"
9129 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9130 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
9131 		"                 OpReturnValue %retval\n"
9132 		"OpFunctionEnd\n"
9133 	);
9134 
9135 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9136 	{
9137 		map<string, string>								codeSpecialization;
9138 		map<string, string>								fragments;
9139 		codeSpecialization["condition"]					= tests[idx].condition;
9140 		fragments["testfun"]							= function.specialize(codeSpecialization);
9141 		fragments["pre_main"]							= string(constants) + tests[idx].constant + "\n";
9142 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9143 	}
9144 
9145 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
9146 	{
9147 		map<string, string>								codeSpecialization;
9148 		map<string, string>								fragments;
9149 		SpecConstants									passConstants;
9150 
9151 		codeSpecialization["condition"]					= tests[idx].condition;
9152 		fragments["testfun"]							= specConstantFunction.specialize(codeSpecialization);
9153 		fragments["decoration"]							= specDecorations;
9154 		fragments["pre_main"]							= specConstants;
9155 
9156 		passConstants.append<float>(tests[idx].valueAsFloat);
9157 
9158 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9159 	}
9160 }
9161 
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)9162 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
9163 {
9164 	RGBA inputColors[4] =  {
9165 		RGBA(0,		0,		0,		255),
9166 		RGBA(0,		0,		255,	255),
9167 		RGBA(0,		255,	0,		255),
9168 		RGBA(0,		255,	255,	255)
9169 	};
9170 
9171 	RGBA expectedColors[4] =
9172 	{
9173 		RGBA(255,	 0,		 0,		 255),
9174 		RGBA(255,	 0,		 0,		 255),
9175 		RGBA(255,	 0,		 0,		 255),
9176 		RGBA(255,	 0,		 0,		 255)
9177 	};
9178 
9179 	struct DualFP16Possibility
9180 	{
9181 		const char* name;
9182 		const char* input;
9183 		float		inputAsFloat;
9184 		const char* possibleOutput1;
9185 		const char* possibleOutput2;
9186 	} tests[] = {
9187 		{
9188 			"positive_round_up_or_round_down",
9189 			"0x1.3003p8",
9190 			constructNormalizedFloat(8, 0x300300),
9191 			"0x1.304p8",
9192 			"0x1.3p8"
9193 		},
9194 		{
9195 			"negative_round_up_or_round_down",
9196 			"-0x1.6008p-7",
9197 			-constructNormalizedFloat(-7, 0x600800),
9198 			"-0x1.6p-7",
9199 			"-0x1.604p-7"
9200 		},
9201 		{
9202 			"carry_bit",
9203 			"0x1.01ep2",
9204 			constructNormalizedFloat(2, 0x01e000),
9205 			"0x1.01cp2",
9206 			"0x1.02p2"
9207 		},
9208 		{
9209 			"carry_to_exponent",
9210 			"0x1.ffep1",
9211 			constructNormalizedFloat(1, 0xffe000),
9212 			"0x1.ffcp1",
9213 			"0x1.0p2"
9214 		},
9215 	};
9216 	StringTemplate constants (
9217 		"%input_const = OpConstant %f32 ${input}\n"
9218 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
9219 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
9220 		);
9221 
9222 	StringTemplate specConstants (
9223 		"%input_const = OpSpecConstant %f32 0.\n"
9224 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
9225 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
9226 	);
9227 
9228 	const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
9229 
9230 	const char* function  =
9231 		"%test_code     = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9232 		"%param1        = OpFunctionParameter %v4f32\n"
9233 		"%label_testfun = OpLabel\n"
9234 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9235 		// For the purposes of this test we assume that 0.f will always get
9236 		// faithfully passed through the pipeline stages.
9237 		"%b             = OpFAdd %f32 %input_const %a\n"
9238 		"%c             = OpQuantizeToF16 %f32 %b\n"
9239 		"%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
9240 		"%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
9241 		"%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
9242 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
9243 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
9244 		"                 OpReturnValue %retval\n"
9245 		"OpFunctionEnd\n";
9246 
9247 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9248 		map<string, string>									fragments;
9249 		map<string, string>									constantSpecialization;
9250 
9251 		constantSpecialization["input"]						= tests[idx].input;
9252 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
9253 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
9254 		fragments["testfun"]								= function;
9255 		fragments["pre_main"]								= constants.specialize(constantSpecialization);
9256 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
9257 	}
9258 
9259 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
9260 		map<string, string>									fragments;
9261 		map<string, string>									constantSpecialization;
9262 		SpecConstants										passConstants;
9263 
9264 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
9265 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
9266 		fragments["testfun"]								= function;
9267 		fragments["decoration"]								= specDecorations;
9268 		fragments["pre_main"]								= specConstants.specialize(constantSpecialization);
9269 
9270 		passConstants.append<float>(tests[idx].inputAsFloat);
9271 
9272 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
9273 	}
9274 }
9275 
createOpQuantizeTests(tcu::TestContext & testCtx)9276 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
9277 {
9278 	de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
9279 	createOpQuantizeSingleOptionTests(opQuantizeTests.get());
9280 	createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
9281 	return opQuantizeTests.release();
9282 }
9283 
9284 struct ShaderPermutation
9285 {
9286 	deUint8 vertexPermutation;
9287 	deUint8 geometryPermutation;
9288 	deUint8 tesscPermutation;
9289 	deUint8 tessePermutation;
9290 	deUint8 fragmentPermutation;
9291 };
9292 
getShaderPermutation(deUint8 inputValue)9293 ShaderPermutation getShaderPermutation(deUint8 inputValue)
9294 {
9295 	ShaderPermutation	permutation =
9296 	{
9297 		static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
9298 		static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
9299 		static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
9300 		static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
9301 		static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
9302 	};
9303 	return permutation;
9304 }
9305 
createModuleTests(tcu::TestContext & testCtx)9306 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
9307 {
9308 	RGBA								defaultColors[4];
9309 	RGBA								invertedColors[4];
9310 	de::MovePtr<tcu::TestCaseGroup>		moduleTests			(new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
9311 
9312 	getDefaultColors(defaultColors);
9313 	getInvertedDefaultColors(invertedColors);
9314 
9315 	// Combined module tests
9316 	{
9317 		// Shader stages: vertex and fragment
9318 		{
9319 			const ShaderElement combinedPipeline[]	=
9320 			{
9321 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9322 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9323 			};
9324 
9325 			addFunctionCaseWithPrograms<InstanceContext>(
9326 				moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline,
9327 				createInstanceContext(combinedPipeline, map<string, string>()));
9328 		}
9329 
9330 		// Shader stages: vertex, geometry and fragment
9331 		{
9332 			const ShaderElement combinedPipeline[]	=
9333 			{
9334 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9335 				ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9336 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9337 			};
9338 
9339 			addFunctionCaseWithPrograms<InstanceContext>(
9340 				moduleTests.get(), "same_module_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9341 				createInstanceContext(combinedPipeline, map<string, string>()));
9342 		}
9343 
9344 		// Shader stages: vertex, tessellation control, tessellation evaluation and fragment
9345 		{
9346 			const ShaderElement combinedPipeline[]	=
9347 			{
9348 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9349 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9350 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9351 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9352 			};
9353 
9354 			addFunctionCaseWithPrograms<InstanceContext>(
9355 				moduleTests.get(), "same_module_tessc_tesse", "", createCombinedModule, runAndVerifyDefaultPipeline,
9356 				createInstanceContext(combinedPipeline, map<string, string>()));
9357 		}
9358 
9359 		// Shader stages: vertex, tessellation control, tessellation evaluation, geometry and fragment
9360 		{
9361 			const ShaderElement combinedPipeline[]	=
9362 			{
9363 				ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
9364 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9365 				ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9366 				ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
9367 				ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
9368 			};
9369 
9370 			addFunctionCaseWithPrograms<InstanceContext>(
9371 				moduleTests.get(), "same_module_tessc_tesse_geom", "", createCombinedModule, runAndVerifyDefaultPipeline,
9372 				createInstanceContext(combinedPipeline, map<string, string>()));
9373 		}
9374 	}
9375 
9376 	const char* numbers[] =
9377 	{
9378 		"1", "2"
9379 	};
9380 
9381 	for (deInt8 idx = 0; idx < 32; ++idx)
9382 	{
9383 		ShaderPermutation			permutation		= getShaderPermutation(idx);
9384 		string						name			= string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
9385 		const ShaderElement			pipeline[]		=
9386 		{
9387 			ShaderElement("vert",	string("vert") +	numbers[permutation.vertexPermutation],		VK_SHADER_STAGE_VERTEX_BIT),
9388 			ShaderElement("geom",	string("geom") +	numbers[permutation.geometryPermutation],	VK_SHADER_STAGE_GEOMETRY_BIT),
9389 			ShaderElement("tessc",	string("tessc") +	numbers[permutation.tesscPermutation],		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
9390 			ShaderElement("tesse",	string("tesse") +	numbers[permutation.tessePermutation],		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
9391 			ShaderElement("frag",	string("frag") +	numbers[permutation.fragmentPermutation],	VK_SHADER_STAGE_FRAGMENT_BIT)
9392 		};
9393 
9394 		// If there are an even number of swaps, then it should be no-op.
9395 		// If there are an odd number, the color should be flipped.
9396 		if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
9397 		{
9398 			addFunctionCaseWithPrograms<InstanceContext>(
9399 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9400 					createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
9401 		}
9402 		else
9403 		{
9404 			addFunctionCaseWithPrograms<InstanceContext>(
9405 					moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline,
9406 					createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
9407 		}
9408 	}
9409 	return moduleTests.release();
9410 }
9411 
getUnusedVarTestNamePiece(const std::string & prefix,ShaderTask task)9412 std::string getUnusedVarTestNamePiece(const std::string& prefix, ShaderTask task)
9413 {
9414 	switch (task)
9415 	{
9416 		case SHADER_TASK_NONE:			return "";
9417 		case SHADER_TASK_NORMAL:		return prefix + "_normal";
9418 		case SHADER_TASK_UNUSED_VAR:	return prefix + "_unused_var";
9419 		case SHADER_TASK_UNUSED_FUNC:	return prefix + "_unused_func";
9420 		default:						DE_ASSERT(DE_FALSE);
9421 	}
9422 	// unreachable
9423 	return "";
9424 }
9425 
getShaderTaskIndexName(ShaderTaskIndex index)9426 std::string getShaderTaskIndexName(ShaderTaskIndex index)
9427 {
9428 	switch (index)
9429 	{
9430 	case SHADER_TASK_INDEX_VERTEX:			return "vertex";
9431 	case SHADER_TASK_INDEX_GEOMETRY:		return "geom";
9432 	case SHADER_TASK_INDEX_TESS_CONTROL:	return "tessc";
9433 	case SHADER_TASK_INDEX_TESS_EVAL:		return "tesse";
9434 	case SHADER_TASK_INDEX_FRAGMENT:		return "frag";
9435 	default:								DE_ASSERT(DE_FALSE);
9436 	}
9437 	// unreachable
9438 	return "";
9439 }
9440 
getUnusedVarTestName(const ShaderTaskArray & shaderTasks,const VariableLocation & location)9441 std::string getUnusedVarTestName(const ShaderTaskArray& shaderTasks, const VariableLocation& location)
9442 {
9443 	std::string testName = location.toString();
9444 
9445 	for (size_t i = 0; i < DE_LENGTH_OF_ARRAY(shaderTasks); ++i)
9446 	{
9447 		if (shaderTasks[i] != SHADER_TASK_NONE)
9448 		{
9449 			testName += "_" + getUnusedVarTestNamePiece(getShaderTaskIndexName((ShaderTaskIndex)i), shaderTasks[i]);
9450 		}
9451 	}
9452 
9453 	return testName;
9454 }
9455 
createUnusedVariableTests(tcu::TestContext & testCtx)9456 tcu::TestCaseGroup* createUnusedVariableTests(tcu::TestContext& testCtx)
9457 {
9458 	de::MovePtr<tcu::TestCaseGroup>		moduleTests				(new tcu::TestCaseGroup(testCtx, "unused_variables", "Graphics shaders with unused variables"));
9459 
9460 	ShaderTaskArray						shaderCombinations[]	=
9461 	{
9462 		// Vertex					Geometry					Tess. Control				Tess. Evaluation			Fragment
9463 		{ SHADER_TASK_UNUSED_VAR,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9464 		{ SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9465 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_UNUSED_VAR	},
9466 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_UNUSED_FUNC	},
9467 		{ SHADER_TASK_NORMAL,		SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9468 		{ SHADER_TASK_NORMAL,		SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NONE,			SHADER_TASK_NONE,			SHADER_TASK_NORMAL	},
9469 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NORMAL,			SHADER_TASK_NORMAL	},
9470 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NORMAL,			SHADER_TASK_NORMAL	},
9471 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NORMAL,			SHADER_TASK_UNUSED_VAR,		SHADER_TASK_NORMAL	},
9472 		{ SHADER_TASK_NORMAL,		SHADER_TASK_NONE,			SHADER_TASK_NORMAL,			SHADER_TASK_UNUSED_FUNC,	SHADER_TASK_NORMAL	}
9473 	};
9474 
9475 	const VariableLocation				testLocations[] =
9476 	{
9477 		// Set		Binding
9478 		{ 0,		5			},
9479 		{ 5,		5			},
9480 	};
9481 
9482 	for (size_t combNdx = 0; combNdx < DE_LENGTH_OF_ARRAY(shaderCombinations); ++combNdx)
9483 	{
9484 		for (size_t locationNdx = 0; locationNdx < DE_LENGTH_OF_ARRAY(testLocations); ++locationNdx)
9485 		{
9486 			const ShaderTaskArray&	shaderTasks		= shaderCombinations[combNdx];
9487 			const VariableLocation&	location		= testLocations[locationNdx];
9488 			std::string				testName		= getUnusedVarTestName(shaderTasks, location);
9489 
9490 			addFunctionCaseWithPrograms<UnusedVariableContext>(
9491 				moduleTests.get(), testName, "", createUnusedVariableModules, runAndVerifyUnusedVariablePipeline,
9492 				createUnusedVariableContext(shaderTasks, location));
9493 		}
9494 	}
9495 
9496 	return moduleTests.release();
9497 }
9498 
createLoopTests(tcu::TestContext & testCtx)9499 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
9500 {
9501 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
9502 	RGBA defaultColors[4];
9503 	getDefaultColors(defaultColors);
9504 	map<string, string> fragments;
9505 	fragments["pre_main"] =
9506 		"%c_f32_5 = OpConstant %f32 5.\n";
9507 
9508 	// A loop with a single block. The Continue Target is the loop block
9509 	// itself. In SPIR-V terms, the "loop construct" contains no blocks at all
9510 	// -- the "continue construct" forms the entire loop.
9511 	fragments["testfun"] =
9512 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9513 		"%param1 = OpFunctionParameter %v4f32\n"
9514 
9515 		"%entry = OpLabel\n"
9516 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9517 		"OpBranch %loop\n"
9518 
9519 		";adds and subtracts 1.0 to %val in alternate iterations\n"
9520 		"%loop = OpLabel\n"
9521 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9522 		"%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
9523 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9524 		"%val = OpFAdd %f32 %val1 %delta\n"
9525 		"%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
9526 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9527 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9528 		"OpLoopMerge %exit %loop None\n"
9529 		"OpBranchConditional %again %loop %exit\n"
9530 
9531 		"%exit = OpLabel\n"
9532 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9533 		"OpReturnValue %result\n"
9534 
9535 		"OpFunctionEnd\n";
9536 
9537 	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
9538 
9539 	// Body comprised of multiple basic blocks.
9540 	const StringTemplate multiBlock(
9541 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9542 		"%param1 = OpFunctionParameter %v4f32\n"
9543 
9544 		"%entry = OpLabel\n"
9545 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9546 		"OpBranch %loop\n"
9547 
9548 		";adds and subtracts 1.0 to %val in alternate iterations\n"
9549 		"%loop = OpLabel\n"
9550 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %cont\n"
9551 		"%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %cont\n"
9552 		"%val1 = OpPhi %f32 %val0 %entry %val %cont\n"
9553 		// There are several possibilities for the Continue Target below.  Each
9554 		// will be specialized into a separate test case.
9555 		"OpLoopMerge %exit ${continue_target} None\n"
9556 		"OpBranch %if\n"
9557 
9558 		"%if = OpLabel\n"
9559 		";delta_next = (delta > 0) ? -1 : 1;\n"
9560 		"%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
9561 		"OpSelectionMerge %gather DontFlatten\n"
9562 		"OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
9563 
9564 		"%odd = OpLabel\n"
9565 		"OpBranch %gather\n"
9566 
9567 		"%even = OpLabel\n"
9568 		"OpBranch %gather\n"
9569 
9570 		"%gather = OpLabel\n"
9571 		"%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
9572 		"%val = OpFAdd %f32 %val1 %delta\n"
9573 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9574 		"OpBranch %cont\n"
9575 
9576 		"%cont = OpLabel\n"
9577 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9578 		"OpBranchConditional %again %loop %exit\n"
9579 
9580 		"%exit = OpLabel\n"
9581 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9582 		"OpReturnValue %result\n"
9583 
9584 		"OpFunctionEnd\n");
9585 
9586 	map<string, string> continue_target;
9587 
9588 	// The Continue Target is the loop block itself.
9589 	continue_target["continue_target"] = "%loop";
9590 	fragments["testfun"] = multiBlock.specialize(continue_target);
9591 	createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
9592 
9593 	// The Continue Target is at the end of the loop.
9594 	continue_target["continue_target"] = "%cont";
9595 	fragments["testfun"] = multiBlock.specialize(continue_target);
9596 	createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
9597 
9598 	// A loop with continue statement.
9599 	fragments["testfun"] =
9600 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9601 		"%param1 = OpFunctionParameter %v4f32\n"
9602 
9603 		"%entry = OpLabel\n"
9604 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9605 		"OpBranch %loop\n"
9606 
9607 		";adds 4, 3, and 1 to %val0 (skips 2)\n"
9608 		"%loop = OpLabel\n"
9609 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9610 		"%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
9611 		"OpLoopMerge %exit %continue None\n"
9612 		"OpBranch %if\n"
9613 
9614 		"%if = OpLabel\n"
9615 		";skip if %count==2\n"
9616 		"%eq2 = OpIEqual %bool %count %c_i32_2\n"
9617 		"OpBranchConditional %eq2 %continue %body\n"
9618 
9619 		"%body = OpLabel\n"
9620 		"%fcount = OpConvertSToF %f32 %count\n"
9621 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9622 		"OpBranch %continue\n"
9623 
9624 		"%continue = OpLabel\n"
9625 		"%val = OpPhi %f32 %val2 %body %val1 %if\n"
9626 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9627 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9628 		"OpBranchConditional %again %loop %exit\n"
9629 
9630 		"%exit = OpLabel\n"
9631 		"%same = OpFSub %f32 %val %c_f32_8\n"
9632 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9633 		"OpReturnValue %result\n"
9634 		"OpFunctionEnd\n";
9635 	createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
9636 
9637 	// A loop with break.
9638 	fragments["testfun"] =
9639 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9640 		"%param1 = OpFunctionParameter %v4f32\n"
9641 
9642 		"%entry = OpLabel\n"
9643 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9644 		"%dot = OpDot %f32 %param1 %param1\n"
9645 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9646 		"%zero = OpConvertFToU %u32 %div\n"
9647 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
9648 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9649 		"OpBranch %loop\n"
9650 
9651 		";adds 4 and 3 to %val0 (exits early)\n"
9652 		"%loop = OpLabel\n"
9653 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9654 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9655 		"OpLoopMerge %exit %continue None\n"
9656 		"OpBranch %if\n"
9657 
9658 		"%if = OpLabel\n"
9659 		";end loop if %count==%two\n"
9660 		"%above2 = OpSGreaterThan %bool %count %two\n"
9661 		"OpBranchConditional %above2 %body %exit\n"
9662 
9663 		"%body = OpLabel\n"
9664 		"%fcount = OpConvertSToF %f32 %count\n"
9665 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9666 		"OpBranch %continue\n"
9667 
9668 		"%continue = OpLabel\n"
9669 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9670 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9671 		"OpBranchConditional %again %loop %exit\n"
9672 
9673 		"%exit = OpLabel\n"
9674 		"%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
9675 		"%same = OpFSub %f32 %val_post %c_f32_7\n"
9676 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9677 		"OpReturnValue %result\n"
9678 		"OpFunctionEnd\n";
9679 	createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
9680 
9681 	// A loop with return.
9682 	fragments["testfun"] =
9683 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9684 		"%param1 = OpFunctionParameter %v4f32\n"
9685 
9686 		"%entry = OpLabel\n"
9687 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9688 		"%dot = OpDot %f32 %param1 %param1\n"
9689 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9690 		"%zero = OpConvertFToU %u32 %div\n"
9691 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
9692 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9693 		"OpBranch %loop\n"
9694 
9695 		";returns early without modifying %param1\n"
9696 		"%loop = OpLabel\n"
9697 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
9698 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
9699 		"OpLoopMerge %exit %continue None\n"
9700 		"OpBranch %if\n"
9701 
9702 		"%if = OpLabel\n"
9703 		";return if %count==%two\n"
9704 		"%above2 = OpSGreaterThan %bool %count %two\n"
9705 		"OpSelectionMerge %body DontFlatten\n"
9706 		"OpBranchConditional %above2 %body %early_exit\n"
9707 
9708 		"%early_exit = OpLabel\n"
9709 		"OpReturnValue %param1\n"
9710 
9711 		"%body = OpLabel\n"
9712 		"%fcount = OpConvertSToF %f32 %count\n"
9713 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
9714 		"OpBranch %continue\n"
9715 
9716 		"%continue = OpLabel\n"
9717 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9718 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9719 		"OpBranchConditional %again %loop %exit\n"
9720 
9721 		"%exit = OpLabel\n"
9722 		";should never get here, so return an incorrect result\n"
9723 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
9724 		"OpReturnValue %result\n"
9725 		"OpFunctionEnd\n";
9726 	createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
9727 
9728 	// Continue inside a switch block to break to enclosing loop's merge block.
9729 	// Matches roughly the following GLSL code:
9730 	// for (; keep_going; keep_going = false)
9731 	// {
9732 	//     switch (int(param1.x))
9733 	//     {
9734 	//         case 0: continue;
9735 	//         case 1: continue;
9736 	//         default: continue;
9737 	//     }
9738 	//     dead code: modify return value to invalid result.
9739 	// }
9740 	fragments["pre_main"] =
9741 		"%fp_bool = OpTypePointer Function %bool\n"
9742 		"%true = OpConstantTrue %bool\n"
9743 		"%false = OpConstantFalse %bool\n";
9744 
9745 	fragments["testfun"] =
9746 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9747 		"%param1 = OpFunctionParameter %v4f32\n"
9748 
9749 		"%entry = OpLabel\n"
9750 		"%keep_going = OpVariable %fp_bool Function\n"
9751 		"%val_ptr = OpVariable %fp_f32 Function\n"
9752 		"%param1_x = OpCompositeExtract %f32 %param1 0\n"
9753 		"OpStore %keep_going %true\n"
9754 		"OpBranch %forloop_begin\n"
9755 
9756 		"%forloop_begin = OpLabel\n"
9757 		"OpLoopMerge %forloop_merge %forloop_continue None\n"
9758 		"OpBranch %forloop\n"
9759 
9760 		"%forloop = OpLabel\n"
9761 		"%for_condition = OpLoad %bool %keep_going\n"
9762 		"OpBranchConditional %for_condition %forloop_body %forloop_merge\n"
9763 
9764 		"%forloop_body = OpLabel\n"
9765 		"OpStore %val_ptr %param1_x\n"
9766 		"%param1_x_int = OpConvertFToS %i32 %param1_x\n"
9767 
9768 		"OpSelectionMerge %switch_merge None\n"
9769 		"OpSwitch %param1_x_int %default 0 %case_0 1 %case_1\n"
9770 		"%case_0 = OpLabel\n"
9771 		"OpBranch %forloop_continue\n"
9772 		"%case_1 = OpLabel\n"
9773 		"OpBranch %forloop_continue\n"
9774 		"%default = OpLabel\n"
9775 		"OpBranch %forloop_continue\n"
9776 		"%switch_merge = OpLabel\n"
9777 		";should never get here, so change the return value to invalid result\n"
9778 		"OpStore %val_ptr %c_f32_1\n"
9779 		"OpBranch %forloop_continue\n"
9780 
9781 		"%forloop_continue = OpLabel\n"
9782 		"OpStore %keep_going %false\n"
9783 		"OpBranch %forloop_begin\n"
9784 		"%forloop_merge = OpLabel\n"
9785 
9786 		"%val = OpLoad %f32 %val_ptr\n"
9787 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
9788 		"OpReturnValue %result\n"
9789 		"OpFunctionEnd\n";
9790 	createTestsForAllStages("switch_continue", defaultColors, defaultColors, fragments, testGroup.get());
9791 
9792 	return testGroup.release();
9793 }
9794 
9795 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)9796 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
9797 {
9798 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
9799 	map<string, string> fragments;
9800 
9801 	// A barrier inside a function body.
9802 	fragments["pre_main"] =
9803 		"%Workgroup = OpConstant %i32 2\n"
9804 		"%Invocation = OpConstant %i32 4\n"
9805 		"%MemorySemanticsNone = OpConstant %i32 0\n";
9806 	fragments["testfun"] =
9807 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9808 		"%param1 = OpFunctionParameter %v4f32\n"
9809 		"%label_testfun = OpLabel\n"
9810 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9811 		"OpReturnValue %param1\n"
9812 		"OpFunctionEnd\n";
9813 	addTessCtrlTest(testGroup.get(), "in_function", fragments);
9814 
9815 	// Common setup code for the following tests.
9816 	fragments["pre_main"] =
9817 		"%Workgroup = OpConstant %i32 2\n"
9818 		"%Invocation = OpConstant %i32 4\n"
9819 		"%MemorySemanticsNone = OpConstant %i32 0\n"
9820 		"%c_f32_5 = OpConstant %f32 5.\n";
9821 	const string setupPercentZero =	 // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
9822 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9823 		"%param1 = OpFunctionParameter %v4f32\n"
9824 		"%entry = OpLabel\n"
9825 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
9826 		"%dot = OpDot %f32 %param1 %param1\n"
9827 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
9828 		"%zero = OpConvertFToU %u32 %div\n";
9829 
9830 	// Barriers inside OpSwitch branches.
9831 	fragments["testfun"] =
9832 		setupPercentZero +
9833 		"OpSelectionMerge %switch_exit None\n"
9834 		"OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
9835 
9836 		"%case1 = OpLabel\n"
9837 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9838 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9839 		"%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9840 		"OpBranch %switch_exit\n"
9841 
9842 		"%switch_default = OpLabel\n"
9843 		"%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9844 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9845 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9846 		"OpBranch %switch_exit\n"
9847 
9848 		"%case0 = OpLabel\n"
9849 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9850 		"OpBranch %switch_exit\n"
9851 
9852 		"%switch_exit = OpLabel\n"
9853 		"%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
9854 		"OpReturnValue %ret\n"
9855 		"OpFunctionEnd\n";
9856 	addTessCtrlTest(testGroup.get(), "in_switch", fragments);
9857 
9858 	// Barriers inside if-then-else.
9859 	fragments["testfun"] =
9860 		setupPercentZero +
9861 		"%eq0 = OpIEqual %bool %zero %c_u32_0\n"
9862 		"OpSelectionMerge %exit DontFlatten\n"
9863 		"OpBranchConditional %eq0 %then %else\n"
9864 
9865 		"%else = OpLabel\n"
9866 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
9867 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9868 		"%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
9869 		"OpBranch %exit\n"
9870 
9871 		"%then = OpLabel\n"
9872 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9873 		"OpBranch %exit\n"
9874 		"%exit = OpLabel\n"
9875 		"%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
9876 		"OpReturnValue %ret\n"
9877 		"OpFunctionEnd\n";
9878 	addTessCtrlTest(testGroup.get(), "in_if", fragments);
9879 
9880 	// A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
9881 	// http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
9882 	fragments["testfun"] =
9883 		setupPercentZero +
9884 		"%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
9885 		"%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
9886 		"OpSelectionMerge %exit DontFlatten\n"
9887 		"OpBranchConditional %thread0 %then %else\n"
9888 
9889 		"%else = OpLabel\n"
9890 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9891 		"OpBranch %exit\n"
9892 
9893 		"%then = OpLabel\n"
9894 		"%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
9895 		"OpBranch %exit\n"
9896 
9897 		"%exit = OpLabel\n"
9898 		"%val = OpPhi %f32 %val0 %else %val1 %then\n"
9899 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9900 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
9901 		"OpReturnValue %ret\n"
9902 		"OpFunctionEnd\n";
9903 	addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
9904 
9905 	// A barrier inside a loop.
9906 	fragments["pre_main"] =
9907 		"%Workgroup = OpConstant %i32 2\n"
9908 		"%Invocation = OpConstant %i32 4\n"
9909 		"%MemorySemanticsNone = OpConstant %i32 0\n"
9910 		"%c_f32_10 = OpConstant %f32 10.\n";
9911 	fragments["testfun"] =
9912 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9913 		"%param1 = OpFunctionParameter %v4f32\n"
9914 		"%entry = OpLabel\n"
9915 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
9916 		"OpBranch %loop\n"
9917 
9918 		";adds 4, 3, 2, and 1 to %val0\n"
9919 		"%loop = OpLabel\n"
9920 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
9921 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
9922 		"OpControlBarrier %Workgroup %Invocation %MemorySemanticsNone\n"
9923 		"%fcount = OpConvertSToF %f32 %count\n"
9924 		"%val = OpFAdd %f32 %val1 %fcount\n"
9925 		"%count__ = OpISub %i32 %count %c_i32_1\n"
9926 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
9927 		"OpLoopMerge %exit %loop None\n"
9928 		"OpBranchConditional %again %loop %exit\n"
9929 
9930 		"%exit = OpLabel\n"
9931 		"%same = OpFSub %f32 %val %c_f32_10\n"
9932 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
9933 		"OpReturnValue %ret\n"
9934 		"OpFunctionEnd\n";
9935 	addTessCtrlTest(testGroup.get(), "in_loop", fragments);
9936 
9937 	return testGroup.release();
9938 }
9939 
9940 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)9941 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
9942 {
9943 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
9944 	map<string, string>					fragments;
9945 	RGBA								inputColors[4];
9946 	RGBA								outputColors[4];
9947 
9948 	fragments["pre_main"]				 =
9949 		"%c_f32_3 = OpConstant %f32 3.0\n"
9950 		"%c_f32_n3 = OpConstant %f32 -3.0\n"
9951 		"%c_f32_4 = OpConstant %f32 4.0\n"
9952 		"%c_f32_p75 = OpConstant %f32 0.75\n"
9953 		"%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
9954 		"%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
9955 		"%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
9956 
9957 	// The test does the following.
9958 	// vec4 result = (param1 * 8.0) - 4.0;
9959 	// return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
9960 	fragments["testfun"]				 =
9961 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
9962 		"%param1 = OpFunctionParameter %v4f32\n"
9963 		"%label_testfun = OpLabel\n"
9964 		"%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
9965 		"%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
9966 		"%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
9967 		"%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
9968 		"%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
9969 		"%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
9970 		"OpReturnValue %xy_0_1\n"
9971 		"OpFunctionEnd\n";
9972 
9973 
9974 	inputColors[0]		= RGBA(16,	16,		0, 255);
9975 	inputColors[1]		= RGBA(232, 232,	0, 255);
9976 	inputColors[2]		= RGBA(232, 16,		0, 255);
9977 	inputColors[3]		= RGBA(16,	232,	0, 255);
9978 
9979 	outputColors[0]		= RGBA(64,	64,		0, 255);
9980 	outputColors[1]		= RGBA(255, 255,	0, 255);
9981 	outputColors[2]		= RGBA(255, 64,		0, 255);
9982 	outputColors[3]		= RGBA(64,	255,	0, 255);
9983 
9984 	createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
9985 	return testGroup.release();
9986 }
9987 
9988 // Test for the OpSRem instruction.
createOpSRemGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)9989 tcu::TestCaseGroup* createOpSRemGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
9990 {
9991 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "srem", "OpSRem"));
9992 	map<string, string>					fragments;
9993 
9994 	fragments["pre_main"]				 =
9995 		"%c_f32_255 = OpConstant %f32 255.0\n"
9996 		"%c_i32_128 = OpConstant %i32 128\n"
9997 		"%c_i32_255 = OpConstant %i32 255\n"
9998 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
9999 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10000 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10001 
10002 	// The test does the following.
10003 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10004 	// ivec4 result = ivec4(srem(ints.x, ints.y), srem(ints.y, ints.z), srem(ints.z, ints.x), 255);
10005 	// return float(result + 128) / 255.0;
10006 	fragments["testfun"]				 =
10007 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10008 		"%param1 = OpFunctionParameter %v4f32\n"
10009 		"%label_testfun = OpLabel\n"
10010 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10011 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10012 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10013 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10014 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10015 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10016 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10017 		"%x_out = OpSRem %i32 %x_in %y_in\n"
10018 		"%y_out = OpSRem %i32 %y_in %z_in\n"
10019 		"%z_out = OpSRem %i32 %z_in %x_in\n"
10020 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10021 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10022 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10023 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10024 		"OpReturnValue %float_out\n"
10025 		"OpFunctionEnd\n";
10026 
10027 	const struct CaseParams
10028 	{
10029 		const char*		name;
10030 		const char*		failMessageTemplate;	// customized status message
10031 		qpTestResult	failResult;				// override status on failure
10032 		int				operands[4][3];			// four (x, y, z) vectors of operands
10033 		int				results[4][3];			// four (x, y, z) vectors of results
10034 	} cases[] =
10035 	{
10036 		{
10037 			"positive",
10038 			"${reason}",
10039 			QP_TEST_RESULT_FAIL,
10040 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },			// operands
10041 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },			// results
10042 		},
10043 		{
10044 			"all",
10045 			"Inconsistent results, but within specification: ${reason}",
10046 			negFailResult,															// negative operands, not required by the spec
10047 			{ { 5, 12, -17 }, { -5, -5, 7 }, { 75, 8, -81 }, { 25, -60, 100 } },	// operands
10048 			{ { 5, 12,  -2 }, {  0, -5, 2 }, {  3, 8,  -6 }, { 25, -60,   0 } },	// results
10049 		},
10050 	};
10051 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
10052 
10053 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10054 	{
10055 		const CaseParams&	params			= cases[caseNdx];
10056 		RGBA				inputColors[4];
10057 		RGBA				outputColors[4];
10058 
10059 		for (int i = 0; i < 4; ++i)
10060 		{
10061 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10062 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10063 		}
10064 
10065 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10066 	}
10067 
10068 	return testGroup.release();
10069 }
10070 
10071 // Test for the OpSMod instruction.
createOpSModGraphicsTests(tcu::TestContext & testCtx,qpTestResult negFailResult)10072 tcu::TestCaseGroup* createOpSModGraphicsTests(tcu::TestContext& testCtx, qpTestResult negFailResult)
10073 {
10074 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "smod", "OpSMod"));
10075 	map<string, string>					fragments;
10076 
10077 	fragments["pre_main"]				 =
10078 		"%c_f32_255 = OpConstant %f32 255.0\n"
10079 		"%c_i32_128 = OpConstant %i32 128\n"
10080 		"%c_i32_255 = OpConstant %i32 255\n"
10081 		"%c_v4f32_255 = OpConstantComposite %v4f32 %c_f32_255 %c_f32_255 %c_f32_255 %c_f32_255 \n"
10082 		"%c_v4f32_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 \n"
10083 		"%c_v4i32_128 = OpConstantComposite %v4i32 %c_i32_128 %c_i32_128 %c_i32_128 %c_i32_128 \n";
10084 
10085 	// The test does the following.
10086 	// ivec4 ints = int(param1 * 255.0 + 0.5) - 128;
10087 	// ivec4 result = ivec4(smod(ints.x, ints.y), smod(ints.y, ints.z), smod(ints.z, ints.x), 255);
10088 	// return float(result + 128) / 255.0;
10089 	fragments["testfun"]				 =
10090 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
10091 		"%param1 = OpFunctionParameter %v4f32\n"
10092 		"%label_testfun = OpLabel\n"
10093 		"%div255 = OpFMul %v4f32 %param1 %c_v4f32_255\n"
10094 		"%add0_5 = OpFAdd %v4f32 %div255 %c_v4f32_0_5\n"
10095 		"%uints_in = OpConvertFToS %v4i32 %add0_5\n"
10096 		"%ints_in = OpISub %v4i32 %uints_in %c_v4i32_128\n"
10097 		"%x_in = OpCompositeExtract %i32 %ints_in 0\n"
10098 		"%y_in = OpCompositeExtract %i32 %ints_in 1\n"
10099 		"%z_in = OpCompositeExtract %i32 %ints_in 2\n"
10100 		"%x_out = OpSMod %i32 %x_in %y_in\n"
10101 		"%y_out = OpSMod %i32 %y_in %z_in\n"
10102 		"%z_out = OpSMod %i32 %z_in %x_in\n"
10103 		"%ints_out = OpCompositeConstruct %v4i32 %x_out %y_out %z_out %c_i32_255\n"
10104 		"%ints_offset = OpIAdd %v4i32 %ints_out %c_v4i32_128\n"
10105 		"%f_ints_offset = OpConvertSToF %v4f32 %ints_offset\n"
10106 		"%float_out = OpFDiv %v4f32 %f_ints_offset %c_v4f32_255\n"
10107 		"OpReturnValue %float_out\n"
10108 		"OpFunctionEnd\n";
10109 
10110 	const struct CaseParams
10111 	{
10112 		const char*		name;
10113 		const char*		failMessageTemplate;	// customized status message
10114 		qpTestResult	failResult;				// override status on failure
10115 		int				operands[4][3];			// four (x, y, z) vectors of operands
10116 		int				results[4][3];			// four (x, y, z) vectors of results
10117 	} cases[] =
10118 	{
10119 		{
10120 			"positive",
10121 			"${reason}",
10122 			QP_TEST_RESULT_FAIL,
10123 			{ { 5, 12, 17 }, { 5, 5, 7 }, { 75, 8, 81 }, { 25, 60, 100 } },				// operands
10124 			{ { 5, 12,  2 }, { 0, 5, 2 }, {  3, 8,  6 }, { 25, 60,   0 } },				// results
10125 		},
10126 		{
10127 			"all",
10128 			"Inconsistent results, but within specification: ${reason}",
10129 			negFailResult,																// negative operands, not required by the spec
10130 			{ { 5, 12, -17 }, { -5, -5,  7 }, { 75,   8, -81 }, {  25, -60, 100 } },	// operands
10131 			{ { 5, -5,   3 }, {  0,  2, -3 }, {  3, -73,  69 }, { -35,  40,   0 } },	// results
10132 		},
10133 	};
10134 	// If either operand is negative the result is undefined. Some implementations may still return correct values.
10135 
10136 	for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(cases); ++caseNdx)
10137 	{
10138 		const CaseParams&	params			= cases[caseNdx];
10139 		RGBA				inputColors[4];
10140 		RGBA				outputColors[4];
10141 
10142 		for (int i = 0; i < 4; ++i)
10143 		{
10144 			inputColors [i] = RGBA(params.operands[i][0] + 128, params.operands[i][1] + 128, params.operands[i][2] + 128, 255);
10145 			outputColors[i] = RGBA(params.results [i][0] + 128, params.results [i][1] + 128, params.results [i][2] + 128, 255);
10146 		}
10147 
10148 		createTestsForAllStages(params.name, inputColors, outputColors, fragments, testGroup.get(), params.failResult, params.failMessageTemplate);
10149 	}
10150 	return testGroup.release();
10151 }
10152 
10153 enum ConversionDataType
10154 {
10155 	DATA_TYPE_SIGNED_8,
10156 	DATA_TYPE_SIGNED_16,
10157 	DATA_TYPE_SIGNED_32,
10158 	DATA_TYPE_SIGNED_64,
10159 	DATA_TYPE_UNSIGNED_8,
10160 	DATA_TYPE_UNSIGNED_16,
10161 	DATA_TYPE_UNSIGNED_32,
10162 	DATA_TYPE_UNSIGNED_64,
10163 	DATA_TYPE_FLOAT_16,
10164 	DATA_TYPE_FLOAT_32,
10165 	DATA_TYPE_FLOAT_64,
10166 	DATA_TYPE_VEC2_SIGNED_16,
10167 	DATA_TYPE_VEC2_SIGNED_32
10168 };
10169 
getBitWidthStr(ConversionDataType type)10170 const string getBitWidthStr (ConversionDataType type)
10171 {
10172 	switch (type)
10173 	{
10174 		case DATA_TYPE_SIGNED_8:
10175 		case DATA_TYPE_UNSIGNED_8:
10176 			return "8";
10177 
10178 		case DATA_TYPE_SIGNED_16:
10179 		case DATA_TYPE_UNSIGNED_16:
10180 		case DATA_TYPE_FLOAT_16:
10181 			return "16";
10182 
10183 		case DATA_TYPE_SIGNED_32:
10184 		case DATA_TYPE_UNSIGNED_32:
10185 		case DATA_TYPE_FLOAT_32:
10186 		case DATA_TYPE_VEC2_SIGNED_16:
10187 			return "32";
10188 
10189 		case DATA_TYPE_SIGNED_64:
10190 		case DATA_TYPE_UNSIGNED_64:
10191 		case DATA_TYPE_FLOAT_64:
10192 		case DATA_TYPE_VEC2_SIGNED_32:
10193 			return "64";
10194 
10195 		default:
10196 			DE_ASSERT(false);
10197 	}
10198 	return "";
10199 }
10200 
getByteWidthStr(ConversionDataType type)10201 const string getByteWidthStr (ConversionDataType type)
10202 {
10203 	switch (type)
10204 	{
10205 		case DATA_TYPE_SIGNED_8:
10206 		case DATA_TYPE_UNSIGNED_8:
10207 			return "1";
10208 
10209 		case DATA_TYPE_SIGNED_16:
10210 		case DATA_TYPE_UNSIGNED_16:
10211 		case DATA_TYPE_FLOAT_16:
10212 			return "2";
10213 
10214 		case DATA_TYPE_SIGNED_32:
10215 		case DATA_TYPE_UNSIGNED_32:
10216 		case DATA_TYPE_FLOAT_32:
10217 		case DATA_TYPE_VEC2_SIGNED_16:
10218 			return "4";
10219 
10220 		case DATA_TYPE_SIGNED_64:
10221 		case DATA_TYPE_UNSIGNED_64:
10222 		case DATA_TYPE_FLOAT_64:
10223 		case DATA_TYPE_VEC2_SIGNED_32:
10224 			return "8";
10225 
10226 		default:
10227 			DE_ASSERT(false);
10228 	}
10229 	return "";
10230 }
10231 
isSigned(ConversionDataType type)10232 bool isSigned (ConversionDataType type)
10233 {
10234 	switch (type)
10235 	{
10236 		case DATA_TYPE_SIGNED_8:
10237 		case DATA_TYPE_SIGNED_16:
10238 		case DATA_TYPE_SIGNED_32:
10239 		case DATA_TYPE_SIGNED_64:
10240 		case DATA_TYPE_FLOAT_16:
10241 		case DATA_TYPE_FLOAT_32:
10242 		case DATA_TYPE_FLOAT_64:
10243 		case DATA_TYPE_VEC2_SIGNED_16:
10244 		case DATA_TYPE_VEC2_SIGNED_32:
10245 			return true;
10246 
10247 		case DATA_TYPE_UNSIGNED_8:
10248 		case DATA_TYPE_UNSIGNED_16:
10249 		case DATA_TYPE_UNSIGNED_32:
10250 		case DATA_TYPE_UNSIGNED_64:
10251 			return false;
10252 
10253 		default:
10254 			DE_ASSERT(false);
10255 	}
10256 	return false;
10257 }
10258 
isInt(ConversionDataType type)10259 bool isInt (ConversionDataType type)
10260 {
10261 	switch (type)
10262 	{
10263 		case DATA_TYPE_SIGNED_8:
10264 		case DATA_TYPE_SIGNED_16:
10265 		case DATA_TYPE_SIGNED_32:
10266 		case DATA_TYPE_SIGNED_64:
10267 		case DATA_TYPE_UNSIGNED_8:
10268 		case DATA_TYPE_UNSIGNED_16:
10269 		case DATA_TYPE_UNSIGNED_32:
10270 		case DATA_TYPE_UNSIGNED_64:
10271 			return true;
10272 
10273 		case DATA_TYPE_FLOAT_16:
10274 		case DATA_TYPE_FLOAT_32:
10275 		case DATA_TYPE_FLOAT_64:
10276 		case DATA_TYPE_VEC2_SIGNED_16:
10277 		case DATA_TYPE_VEC2_SIGNED_32:
10278 			return false;
10279 
10280 		default:
10281 			DE_ASSERT(false);
10282 	}
10283 	return false;
10284 }
10285 
isFloat(ConversionDataType type)10286 bool isFloat (ConversionDataType type)
10287 {
10288 	switch (type)
10289 	{
10290 		case DATA_TYPE_SIGNED_8:
10291 		case DATA_TYPE_SIGNED_16:
10292 		case DATA_TYPE_SIGNED_32:
10293 		case DATA_TYPE_SIGNED_64:
10294 		case DATA_TYPE_UNSIGNED_8:
10295 		case DATA_TYPE_UNSIGNED_16:
10296 		case DATA_TYPE_UNSIGNED_32:
10297 		case DATA_TYPE_UNSIGNED_64:
10298 		case DATA_TYPE_VEC2_SIGNED_16:
10299 		case DATA_TYPE_VEC2_SIGNED_32:
10300 			return false;
10301 
10302 		case DATA_TYPE_FLOAT_16:
10303 		case DATA_TYPE_FLOAT_32:
10304 		case DATA_TYPE_FLOAT_64:
10305 			return true;
10306 
10307 		default:
10308 			DE_ASSERT(false);
10309 	}
10310 	return false;
10311 }
10312 
getTypeName(ConversionDataType type)10313 const string getTypeName (ConversionDataType type)
10314 {
10315 	string prefix = isSigned(type) ? "" : "u";
10316 
10317 	if		(isInt(type))						return prefix + "int"	+ getBitWidthStr(type);
10318 	else if (isFloat(type))						return prefix + "float"	+ getBitWidthStr(type);
10319 	else if (type == DATA_TYPE_VEC2_SIGNED_16)	return "i16vec2";
10320 	else if (type == DATA_TYPE_VEC2_SIGNED_32)	return "i32vec2";
10321 	else										DE_ASSERT(false);
10322 
10323 	return "";
10324 }
10325 
getTestName(ConversionDataType from,ConversionDataType to,const char * suffix)10326 const string getTestName (ConversionDataType from, ConversionDataType to, const char* suffix)
10327 {
10328 	const string fullSuffix(suffix == DE_NULL ? "" : string("_") + string(suffix));
10329 
10330 	return getTypeName(from) + "_to_" + getTypeName(to) + fullSuffix;
10331 }
10332 
getAsmTypeName(ConversionDataType type,deUint32 elements=1)10333 const string getAsmTypeName (ConversionDataType type, deUint32 elements = 1)
10334 {
10335 	string prefix;
10336 
10337 	if		(isInt(type))						prefix = isSigned(type) ? "i" : "u";
10338 	else if (isFloat(type))						prefix = "f";
10339 	else if (type == DATA_TYPE_VEC2_SIGNED_16)	return "i16vec2";
10340 	else if (type == DATA_TYPE_VEC2_SIGNED_32)	return "v2i32";
10341 	else										DE_ASSERT(false);
10342 	if ((isInt(type) || isFloat(type)) && elements == 2)
10343 	{
10344 		prefix = "v2" + prefix;
10345 	}
10346 
10347 	return prefix + getBitWidthStr(type);
10348 }
10349 
10350 template<typename T>
getSpecializedBuffer(deInt64 number,deUint32 elements=1)10351 BufferSp getSpecializedBuffer (deInt64 number, deUint32 elements = 1)
10352 {
10353 	return BufferSp(new Buffer<T>(vector<T>(elements, (T)number)));
10354 }
10355 
getBuffer(ConversionDataType type,deInt64 number,deUint32 elements=1)10356 BufferSp getBuffer (ConversionDataType type, deInt64 number, deUint32 elements = 1)
10357 {
10358 	switch (type)
10359 	{
10360 		case DATA_TYPE_SIGNED_8:		return getSpecializedBuffer<deInt8>(number, elements);
10361 		case DATA_TYPE_SIGNED_16:		return getSpecializedBuffer<deInt16>(number, elements);
10362 		case DATA_TYPE_SIGNED_32:		return getSpecializedBuffer<deInt32>(number, elements);
10363 		case DATA_TYPE_SIGNED_64:		return getSpecializedBuffer<deInt64>(number, elements);
10364 		case DATA_TYPE_UNSIGNED_8:		return getSpecializedBuffer<deUint8>(number, elements);
10365 		case DATA_TYPE_UNSIGNED_16:		return getSpecializedBuffer<deUint16>(number, elements);
10366 		case DATA_TYPE_UNSIGNED_32:		return getSpecializedBuffer<deUint32>(number, elements);
10367 		case DATA_TYPE_UNSIGNED_64:		return getSpecializedBuffer<deUint64>(number, elements);
10368 		case DATA_TYPE_FLOAT_16:		return getSpecializedBuffer<deUint16>(number, elements);
10369 		case DATA_TYPE_FLOAT_32:		return getSpecializedBuffer<deUint32>(number, elements);
10370 		case DATA_TYPE_FLOAT_64:		return getSpecializedBuffer<deUint64>(number, elements);
10371 		case DATA_TYPE_VEC2_SIGNED_16:	return getSpecializedBuffer<deUint32>(number, elements);
10372 		case DATA_TYPE_VEC2_SIGNED_32:	return getSpecializedBuffer<deUint64>(number, elements);
10373 
10374 		default:						TCU_THROW(InternalError, "Unimplemented type passed");
10375 	}
10376 }
10377 
usesInt8(ConversionDataType from,ConversionDataType to)10378 bool usesInt8 (ConversionDataType from, ConversionDataType to)
10379 {
10380 	return (from == DATA_TYPE_SIGNED_8 || to == DATA_TYPE_SIGNED_8 ||
10381 			from == DATA_TYPE_UNSIGNED_8 || to == DATA_TYPE_UNSIGNED_8);
10382 }
10383 
usesInt16(ConversionDataType from,ConversionDataType to)10384 bool usesInt16 (ConversionDataType from, ConversionDataType to)
10385 {
10386 	return (from == DATA_TYPE_SIGNED_16 || to == DATA_TYPE_SIGNED_16 ||
10387 			from == DATA_TYPE_UNSIGNED_16 || to == DATA_TYPE_UNSIGNED_16 ||
10388 			from == DATA_TYPE_VEC2_SIGNED_16 || to == DATA_TYPE_VEC2_SIGNED_16);
10389 }
10390 
usesInt32(ConversionDataType from,ConversionDataType to)10391 bool usesInt32 (ConversionDataType from, ConversionDataType to)
10392 {
10393 	return (from == DATA_TYPE_SIGNED_32 || to == DATA_TYPE_SIGNED_32 ||
10394 			from == DATA_TYPE_UNSIGNED_32 || to == DATA_TYPE_UNSIGNED_32 ||
10395 			from == DATA_TYPE_VEC2_SIGNED_32|| to == DATA_TYPE_VEC2_SIGNED_32);
10396 }
10397 
usesInt64(ConversionDataType from,ConversionDataType to)10398 bool usesInt64 (ConversionDataType from, ConversionDataType to)
10399 {
10400 	return (from == DATA_TYPE_SIGNED_64 || to == DATA_TYPE_SIGNED_64 ||
10401 			from == DATA_TYPE_UNSIGNED_64 || to == DATA_TYPE_UNSIGNED_64);
10402 }
10403 
usesFloat16(ConversionDataType from,ConversionDataType to)10404 bool usesFloat16 (ConversionDataType from, ConversionDataType to)
10405 {
10406 	return (from == DATA_TYPE_FLOAT_16 || to == DATA_TYPE_FLOAT_16);
10407 }
10408 
usesFloat32(ConversionDataType from,ConversionDataType to)10409 bool usesFloat32 (ConversionDataType from, ConversionDataType to)
10410 {
10411 	return (from == DATA_TYPE_FLOAT_32 || to == DATA_TYPE_FLOAT_32);
10412 }
10413 
usesFloat64(ConversionDataType from,ConversionDataType to)10414 bool usesFloat64 (ConversionDataType from, ConversionDataType to)
10415 {
10416 	return (from == DATA_TYPE_FLOAT_64 || to == DATA_TYPE_FLOAT_64);
10417 }
10418 
getVulkanFeaturesAndExtensions(ConversionDataType from,ConversionDataType to,bool useStorageExt,VulkanFeatures & vulkanFeatures,vector<string> & extensions)10419 void getVulkanFeaturesAndExtensions (ConversionDataType from, ConversionDataType to, bool useStorageExt, VulkanFeatures& vulkanFeatures, vector<string>& extensions)
10420 {
10421 	if (usesInt16(from, to) && !usesInt32(from, to))
10422 		vulkanFeatures.coreFeatures.shaderInt16 = DE_TRUE;
10423 
10424 	if (usesInt64(from, to))
10425 		vulkanFeatures.coreFeatures.shaderInt64 = DE_TRUE;
10426 
10427 	if (usesFloat64(from, to))
10428 		vulkanFeatures.coreFeatures.shaderFloat64 = DE_TRUE;
10429 
10430 	if ((usesInt16(from, to) || usesFloat16(from, to)) && useStorageExt)
10431 	{
10432 		extensions.push_back("VK_KHR_16bit_storage");
10433 		vulkanFeatures.ext16BitStorage.storageBuffer16BitAccess = true;
10434 	}
10435 
10436 	if (usesFloat16(from, to) || usesInt8(from, to))
10437 	{
10438 		extensions.push_back("VK_KHR_shader_float16_int8");
10439 
10440 		if (usesFloat16(from, to))
10441 		{
10442 			vulkanFeatures.extFloat16Int8.shaderFloat16 = true;
10443 		}
10444 
10445 		if (usesInt8(from, to))
10446 		{
10447 			vulkanFeatures.extFloat16Int8.shaderInt8 = true;
10448 
10449 			extensions.push_back("VK_KHR_8bit_storage");
10450 			vulkanFeatures.ext8BitStorage.storageBuffer8BitAccess = true;
10451 		}
10452 	}
10453 }
10454 
10455 struct ConvertCase
10456 {
ConvertCasevkt::SpirVAssembly::ConvertCase10457 	ConvertCase (const string& instruction, ConversionDataType from, ConversionDataType to, deInt64 number, bool separateOutput = false, deInt64 outputNumber = 0, const char* suffix = DE_NULL, bool useStorageExt = true)
10458 	: m_fromType		(from)
10459 	, m_toType			(to)
10460 	, m_elements		(1)
10461 	, m_useStorageExt	(useStorageExt)
10462 	, m_name			(getTestName(from, to, suffix))
10463 	{
10464 		string caps;
10465 		string decl;
10466 		string exts;
10467 
10468 		m_asmTypes["inStorageType"]	= getAsmTypeName(from);
10469 		m_asmTypes["outStorageType"] = getAsmTypeName(to);
10470 		m_asmTypes["inCast"] = "OpCopyObject";
10471 		m_asmTypes["outCast"] = "OpCopyObject";
10472 		// If the storage extensions are being avoided, tests instead uses
10473 		// vectors so that they are easily convertible to 32-bit integers.
10474 		// |m_elements| indicates the size of the vector. It modifies how many
10475 		// items added to the buffers and converted in the tests.
10476 		//
10477 		// Currently only supports 1 (default) or 2 elements.
10478 		if (!m_useStorageExt)
10479 		{
10480 			bool in_change = false;
10481 			bool out_change = false;
10482 			if (usesFloat16(from, from) || usesInt16(from, from))
10483 			{
10484 				m_asmTypes["inStorageType"] = "u32";
10485 				m_asmTypes["inCast"] = "OpBitcast";
10486 				m_elements = 2;
10487 				in_change = true;
10488 			}
10489 			if (usesFloat16(to, to) || usesInt16(to, to))
10490 			{
10491 				m_asmTypes["outStorageType"] = "u32";
10492 				m_asmTypes["outCast"] = "OpBitcast";
10493 				m_elements = 2;
10494 				out_change = true;
10495 			}
10496 			if (in_change && !out_change)
10497 			{
10498 				m_asmTypes["outStorageType"] = getAsmTypeName(to, m_elements);
10499 			}
10500 			if (!in_change && out_change)
10501 			{
10502 				m_asmTypes["inStorageType"] = getAsmTypeName(from, m_elements);
10503 			}
10504 		}
10505 
10506 		// Safety check for implementation.
10507 		if (m_elements < 1 || m_elements > 2)
10508 			TCU_THROW(InternalError, "Unsupported number of elements");
10509 
10510 		m_asmTypes["inputType"]		= getAsmTypeName(from, m_elements);
10511 		m_asmTypes["outputType"]	= getAsmTypeName(to, m_elements);
10512 
10513 		m_inputBuffer = getBuffer(from, number, m_elements);
10514 		if (separateOutput)
10515 			m_outputBuffer = getBuffer(to, outputNumber, m_elements);
10516 		else
10517 			m_outputBuffer = getBuffer(to, number, m_elements);
10518 
10519 		if (usesInt8(from, to))
10520 		{
10521 			bool requiresInt8Capability = true;
10522 			if (instruction == "OpUConvert" || instruction == "OpSConvert")
10523 			{
10524 				// Conversions between 8 and 32 bit are provided by SPV_KHR_8bit_storage. The rest requires explicit Int8
10525 				if (usesInt32(from, to))
10526 					requiresInt8Capability = false;
10527 			}
10528 
10529 			caps += "OpCapability StorageBuffer8BitAccess\n";
10530 			if (requiresInt8Capability)
10531 				caps += "OpCapability Int8\n";
10532 
10533 			decl += "%i8         = OpTypeInt 8 1\n"
10534 					"%u8         = OpTypeInt 8 0\n";
10535 
10536 			if (m_elements == 2)
10537 			{
10538 				decl += "%v2i8       = OpTypeVector %i8 2\n"
10539 						"%v2u8       = OpTypeVector %u8 2\n";
10540 			}
10541 			exts += "OpExtension \"SPV_KHR_8bit_storage\"\n";
10542 		}
10543 
10544 		if (usesInt16(from, to))
10545 		{
10546 			bool requiresInt16Capability = true;
10547 
10548 			if (instruction == "OpUConvert" || instruction == "OpSConvert" || instruction == "OpFConvert")
10549 			{
10550 				// Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10551 				if (usesInt32(from, to) || usesFloat32(from, to))
10552 					requiresInt16Capability = false;
10553 			}
10554 
10555 			decl += "%i16        = OpTypeInt 16 1\n"
10556 					"%u16        = OpTypeInt 16 0\n";
10557 			if (m_elements == 2)
10558 			{
10559 				decl += "%v2i16      = OpTypeVector %i16 2\n"
10560 						"%v2u16      = OpTypeVector %u16 2\n";
10561 			}
10562 			else
10563 			{
10564 				decl += "%i16vec2    = OpTypeVector %i16 2\n";
10565 			}
10566 
10567 			// Conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Int16
10568 			if (requiresInt16Capability || !m_useStorageExt)
10569 				caps += "OpCapability Int16\n";
10570 		}
10571 
10572 		if (usesFloat16(from, to))
10573 		{
10574 			decl += "%f16        = OpTypeFloat 16\n";
10575 			if (m_elements == 2)
10576 			{
10577 				decl += "%v2f16      = OpTypeVector %f16 2\n";
10578 			}
10579 
10580 			// Width-only conversions between 16 and 32 bit are provided by SPV_KHR_16bit_storage. The rest requires explicit Float16
10581 			if (!usesFloat32(from, to) || !m_useStorageExt)
10582 				caps += "OpCapability Float16\n";
10583 		}
10584 
10585 		if ((usesInt16(from, to) || usesFloat16(from, to)) && m_useStorageExt)
10586 		{
10587 			caps += "OpCapability StorageUniformBufferBlock16\n";
10588 			exts += "OpExtension \"SPV_KHR_16bit_storage\"\n";
10589 		}
10590 
10591 		if (usesInt64(from, to))
10592 		{
10593 			caps += "OpCapability Int64\n";
10594 			decl += "%i64        = OpTypeInt 64 1\n"
10595 					"%u64        = OpTypeInt 64 0\n";
10596 			if (m_elements == 2)
10597 			{
10598 				decl += "%v2i64      = OpTypeVector %i64 2\n"
10599 						"%v2u64      = OpTypeVector %u64 2\n";
10600 			}
10601 		}
10602 
10603 		if (usesFloat64(from, to))
10604 		{
10605 			caps += "OpCapability Float64\n";
10606 			decl += "%f64        = OpTypeFloat 64\n";
10607 			if (m_elements == 2)
10608 			{
10609 				decl += "%v2f64        = OpTypeVector %f64 2\n";
10610 			}
10611 		}
10612 
10613 		m_asmTypes["datatype_capabilities"]		= caps;
10614 		m_asmTypes["datatype_additional_decl"]	= decl;
10615 		m_asmTypes["datatype_extensions"]		= exts;
10616 	}
10617 
10618 	ConversionDataType		m_fromType;
10619 	ConversionDataType		m_toType;
10620 	deUint32				m_elements;
10621 	bool					m_useStorageExt;
10622 	string					m_name;
10623 	map<string, string>		m_asmTypes;
10624 	BufferSp				m_inputBuffer;
10625 	BufferSp				m_outputBuffer;
10626 };
10627 
getConvertCaseShaderStr(const string & instruction,const ConvertCase & convertCase,bool addVectors=false)10628 const string getConvertCaseShaderStr (const string& instruction, const ConvertCase& convertCase, bool addVectors = false)
10629 {
10630 	map<string, string> params = convertCase.m_asmTypes;
10631 
10632 	params["instruction"]	= instruction;
10633 	params["inDecorator"]	= getByteWidthStr(convertCase.m_fromType);
10634 	params["outDecorator"]	= getByteWidthStr(convertCase.m_toType);
10635 
10636 	std::string shader (
10637 		"OpCapability Shader\n"
10638 		"${datatype_capabilities}"
10639 		"${datatype_extensions:opt}"
10640 		"OpMemoryModel Logical GLSL450\n"
10641 		"OpEntryPoint GLCompute %main \"main\"\n"
10642 		"OpExecutionMode %main LocalSize 1 1 1\n"
10643 		"OpSource GLSL 430\n"
10644 		"OpName %main           \"main\"\n"
10645 		// Decorators
10646 		"OpDecorate %indata DescriptorSet 0\n"
10647 		"OpDecorate %indata Binding 0\n"
10648 		"OpDecorate %outdata DescriptorSet 0\n"
10649 		"OpDecorate %outdata Binding 1\n"
10650 		"OpDecorate %in_buf BufferBlock\n"
10651 		"OpDecorate %out_buf BufferBlock\n"
10652 		"OpMemberDecorate %in_buf 0 Offset 0\n"
10653 		"OpMemberDecorate %out_buf 0 Offset 0\n"
10654 		// Base types
10655 		"%void       = OpTypeVoid\n"
10656 		"%voidf      = OpTypeFunction %void\n"
10657 		"%u32        = OpTypeInt 32 0\n"
10658 		"%i32        = OpTypeInt 32 1\n"
10659 		"%f32        = OpTypeFloat 32\n"
10660 		"%v2i32      = OpTypeVector %i32 2\n"
10661 		"${datatype_additional_decl}"
10662 	);
10663 	if (addVectors)
10664 	{
10665 		shader += "%v2u32 = OpTypeVector %u32 2\n"
10666 					"%v2f32 = OpTypeVector %f32 2\n";
10667 	}
10668 	shader +=
10669 		"%uvec3      = OpTypeVector %u32 3\n"
10670 		// Derived types
10671 		"%in_ptr     = OpTypePointer Uniform %${inStorageType}\n"
10672 		"%out_ptr    = OpTypePointer Uniform %${outStorageType}\n"
10673 		"%in_buf     = OpTypeStruct %${inStorageType}\n"
10674 		"%out_buf    = OpTypeStruct %${outStorageType}\n"
10675 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
10676 		"%out_bufptr = OpTypePointer Uniform %out_buf\n"
10677 		"%indata     = OpVariable %in_bufptr Uniform\n"
10678 		"%outdata    = OpVariable %out_bufptr Uniform\n"
10679 		// Constants
10680 		"%zero       = OpConstant %i32 0\n"
10681 		// Main function
10682 		"%main       = OpFunction %void None %voidf\n"
10683 		"%label      = OpLabel\n"
10684 		"%inloc      = OpAccessChain %in_ptr %indata %zero\n"
10685 		"%outloc     = OpAccessChain %out_ptr %outdata %zero\n"
10686 		"%inval      = OpLoad %${inStorageType} %inloc\n"
10687 		"%in_cast    = ${inCast} %${inputType} %inval\n"
10688 		"%conv       = ${instruction} %${outputType} %in_cast\n"
10689 		"%out_cast   = ${outCast} %${outStorageType} %conv\n"
10690 		"              OpStore %outloc %out_cast\n"
10691 		"              OpReturn\n"
10692 		"              OpFunctionEnd\n"
10693 	;
10694 
10695 	return StringTemplate(shader).specialize(params);
10696 }
10697 
createConvertCases(vector<ConvertCase> & testCases,const string & instruction)10698 void createConvertCases (vector<ConvertCase>& testCases, const string& instruction)
10699 {
10700 	if (instruction == "OpUConvert")
10701 	{
10702 		// Convert unsigned int to unsigned int
10703 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_16,		42));
10704 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_32,		73));
10705 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_UNSIGNED_64,		121));
10706 
10707 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_8,		33));
10708 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_32,		60653));
10709 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_UNSIGNED_64,		17991));
10710 
10711 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_64,		904256275));
10712 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_16,		6275));
10713 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_UNSIGNED_8,		17));
10714 
10715 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_32,		701256243));
10716 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_16,		4741));
10717 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_UNSIGNED_8,		65));
10718 
10719 		// Zero extension for int->uint
10720 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_16,		56));
10721 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_32,		-47,								true,	209));
10722 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_64,		-5,									true,	251));
10723 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_32,		14669));
10724 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_64,		-3341,								true,	62195));
10725 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_64,		973610259));
10726 
10727 		// Truncate for int->uint
10728 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_8,		-25711,								true,	145));
10729 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_8,		103));
10730 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_8,		-1067742499291926803ll,				true,	237));
10731 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_16,		12382));
10732 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_32,		-972812359,							true,	3322154937u));
10733 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_16,		-1067742499291926803ll,				true,	61165));
10734 	}
10735 	else if (instruction == "OpSConvert")
10736 	{
10737 		// Sign extension int->int
10738 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_16,		-30));
10739 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_32,		55));
10740 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_SIGNED_64,		-3));
10741 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_32,		14669));
10742 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_64,		-3341));
10743 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_64,		973610259));
10744 
10745 		// Truncate for int->int
10746 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_SIGNED_8,			81));
10747 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_8,			-93));
10748 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_8,			3182748172687672ll,					true,	56));
10749 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_SIGNED_16,		12382));
10750 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_32,		-972812359));
10751 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_SIGNED_16,		-1067742499291926803ll,				true,	-4371));
10752 
10753 		// Sign extension for int->uint
10754 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_16,		56));
10755 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_32,		-47,								true,	4294967249u));
10756 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_UNSIGNED_64,		-5,									true,	18446744073709551611ull));
10757 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_32,		14669));
10758 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_64,		-3341,								true,	18446744073709548275ull));
10759 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_64,		973610259));
10760 
10761 		// Truncate for int->uint
10762 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_UNSIGNED_8,		-25711,								true,	145));
10763 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_8,		103));
10764 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_8,		-1067742499291926803ll,				true,	237));
10765 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_UNSIGNED_16,		12382));
10766 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_32,		-972812359,							true,	3322154937u));
10767 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_UNSIGNED_16,		-1067742499291926803ll,				true,	61165));
10768 
10769 		// Sign extension for uint->int
10770 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_16,		71));
10771 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_32,		201,								true,	-55));
10772 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_SIGNED_64,		188,								true,	-68));
10773 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_32,		14669));
10774 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_64,		62195,								true,	-3341));
10775 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_64,		973610259));
10776 
10777 		// Truncate for uint->int
10778 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_SIGNED_8,			67));
10779 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_8,			133,								true,	-123));
10780 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_8,			836927654193256494ull,				true,	46));
10781 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_SIGNED_16,		12382));
10782 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_32,		18446744072736739257ull,			true,	-972812359));
10783 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_SIGNED_16,		17379001574417624813ull,			true,	-4371));
10784 
10785 		// Convert i16vec2 to i32vec2 and vice versa
10786 		// Unsigned values are used here to represent negative signed values and to allow defined shifting behaviour.
10787 		// The actual signed value -32123 is used here as uint16 value 33413 and uint32 value 4294935173
10788 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_VEC2_SIGNED_16,	DATA_TYPE_VEC2_SIGNED_32,	(33413u << 16)			| 27593,	true,	(4294935173ull << 32)	| 27593));
10789 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_VEC2_SIGNED_32,	DATA_TYPE_VEC2_SIGNED_16,	(4294935173ull << 32)	| 27593,	true,	(33413u << 16)			| 27593));
10790 	}
10791 	else if (instruction == "OpFConvert")
10792 	{
10793 		// All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10794 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_64,			0x449a4000,							true,	0x4093480000000000));
10795 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_32,			0x4093480000000000,					true,	0x449a4000));
10796 
10797 		// Conversion to/from 32-bit floats are supported by both 16-bit
10798 		// storage and Float16. The tests are duplicated to exercise both
10799 		// cases.
10800 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_16,			0x449a4000,							true,	0x64D2));
10801 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_32,			0x64D2,								true,	0x449a4000));
10802 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_FLOAT_16,			0x449a4000,							true,	0x64D2,					"no_storage",	false));
10803 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_32,			0x64D2,								true,	0x449a4000,				"no_storage",	false));
10804 
10805 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_64,			0x64D2,								true,	0x4093480000000000));
10806 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_16,			0x4093480000000000,					true,	0x64D2));
10807 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_FLOAT_64,			0x64D2,								true,	0x4093480000000000,		"no_storage",	false));
10808 	    testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_FLOAT_16,			0x4093480000000000,					true,	0x64D2,					"no_storage",	false));
10809 
10810 	}
10811 	else if (instruction == "OpConvertFToU")
10812 	{
10813 		// Normal numbers from uint8 range
10814 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x5020,								true,	33,									"33",	false));
10815 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x42280000,							true,	42,									"42"));
10816 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x4067800000000000ull,				true,	188,								"188"));
10817 
10818 		// Maximum uint8 value
10819 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x5BF8,								true,	255,								"max",	false));
10820 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x437F0000,							true,	255,								"max"));
10821 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x406FE00000000000ull,				true,	255,								"max"));
10822 
10823 		// +0
10824 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x0000,								true,	0,									"p0",	false));
10825 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x00000000,							true,	0,									"p0"));
10826 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x0000000000000000ull,				true,	0,									"p0"));
10827 
10828 		// -0
10829 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_8,		0x8000,								true,	0,									"m0",	false));
10830 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_8,		0x80000000,							true,	0,									"m0"));
10831 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_8,		0x8000000000000000ull,				true,	0,									"m0"));
10832 
10833 		// All hexadecimal values below represent 1234.0 as 16/32/64-bit IEEE 754 float
10834 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x64D2,								true,	1234,								"1234",	false));
10835 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x64D2,								true,	1234,								"1234",	false));
10836 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x64D2,								true,	1234,								"1234",	false));
10837 
10838 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10839 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x7BFF,								true,	65504,								"max",	false));
10840 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x7BFF,								true,	65504,								"max",	false));
10841 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x7BFF,								true,	65504,								"max",	false));
10842 
10843 		// +0
10844 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x0000,								true,	0,									"p0",	false));
10845 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x0000,								true,	0,									"p0",	false));
10846 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x0000,								true,	0,									"p0",	false));
10847 
10848 		// -0
10849 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_16,		0x8000,								true,	0,									"m0",	false));
10850 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_32,		0x8000,								true,	0,									"m0",	false));
10851 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_UNSIGNED_64,		0x8000,								true,	0,									"m0",	false));
10852 
10853 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_16,		0x449a4000,							true,	1234));
10854 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_32,		0x449a4000,							true,	1234));
10855 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_UNSIGNED_64,		0x449a4000,							true,	1234));
10856 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_16,		0x4093480000000000,					true,	1234));
10857 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_32,		0x4093480000000000,					true,	1234));
10858 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_UNSIGNED_64,		0x4093480000000000,					true,	1234));
10859 	}
10860 	else if (instruction == "OpConvertUToF")
10861 	{
10862 		// Normal numbers from uint8 range
10863 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_16,			116,								true,	0x5740,								"116",	false));
10864 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_32,			232,								true,	0x43680000,							"232"));
10865 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_64,			164,								true,	0x4064800000000000ull,				"164"));
10866 
10867 		// Maximum uint8 value
10868 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_16,			255,								true,	0x5BF8,								"max",	false));
10869 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_32,			255,								true,	0x437F0000,							"max"));
10870 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_8,		DATA_TYPE_FLOAT_64,			255,								true,	0x406FE00000000000ull,				"max"));
10871 
10872 		// All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10873 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10874 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10875 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_16,			1234,								true,	0x64D2,								"1234",	false));
10876 
10877 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10878 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10879 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10880 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10881 
10882 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			4294967296ll,						true,	0x4f800000,							"4294967296",	false));
10883 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_64,			4294967296ll,						true,	0x41f0000000000000,					"4294967296",	false));
10884 
10885 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			0xffffff0000000000,					true,	0x5f7fffff,							"max",	false));
10886 
10887 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10888 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_16,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10889 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10890 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_32,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10891 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_32,			1234,								true,	0x449a4000));
10892 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_UNSIGNED_64,		DATA_TYPE_FLOAT_64,			1234,								true,	0x4093480000000000));
10893 	}
10894 	else if (instruction == "OpConvertFToS")
10895 	{
10896 		// Normal numbers from int8 range
10897 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0xC980,								true,	-11,								"m11",	false));
10898 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0xC2140000,							true,	-37,								"m37"));
10899 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0xC050800000000000ull,				true,	-66,								"m66"));
10900 
10901 		// Minimum int8 value
10902 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0xD800,								true,	-128,								"min",	false));
10903 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0xC3000000,							true,	-128,								"min"));
10904 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0xC060000000000000ull,				true,	-128,								"min"));
10905 
10906 		// Maximum int8 value
10907 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x57F0,								true,	127,								"max",	false));
10908 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x42FE0000,							true,	127,								"max"));
10909 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x405FC00000000000ull,				true,	127,								"max"));
10910 
10911 		// +0
10912 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x0000,								true,	0,									"p0",	false));
10913 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x00000000,							true,	0,									"p0"));
10914 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x0000000000000000ull,				true,	0,									"p0"));
10915 
10916 		// -0
10917 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_8,			0x8000,								true,	0,									"m0",	false));
10918 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_8,			0x80000000,							true,	0,									"m0"));
10919 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_8,			0x8000000000000000ull,				true,	0,									"m0"));
10920 
10921 		// All hexadecimal values below represent -1234.0 as 32/64-bit IEEE 754 float
10922 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0xE4D2,								true,	-1234,								"m1234",	false));
10923 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0xE4D2,								true,	-1234,								"m1234",	false));
10924 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0xE4D2,								true,	-1234,								"m1234",	false));
10925 
10926 		// 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10927 		// 0xFBFF = 1111 1011 1111 1111 = 1 11110 1111111111 = -65504
10928 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0xF800,								true,	-32768,								"min",	false));
10929 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0xFBFF,								true,	-65504,								"min",	false));
10930 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0xFBFF,								true,	-65504,								"min",	false));
10931 
10932 		// 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10933 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10934 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x77FF,								true,	32752,								"max",	false));
10935 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x7BFF,								true,	65504,								"max",	false));
10936 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x7BFF,								true,	65504,								"max",	false));
10937 
10938 		// +0
10939 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x0000,								true,	0,									"p0",	false));
10940 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x0000,								true,	0,									"p0",	false));
10941 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x0000,								true,	0,									"p0",	false));
10942 
10943 		// -0
10944 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_16,		0x8000,								true,	0,									"m0",	false));
10945 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_32,		0x8000,								true,	0,									"m0",	false));
10946 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_16,			DATA_TYPE_SIGNED_64,		0x8000,								true,	0,									"m0",	false));
10947 
10948 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0xc49a4000,							true,	-1234));
10949 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_32,		0xc49a4000,							true,	-1234));
10950 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_64,		0xc49a4000,							true,	-1234));
10951 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_16,		0xc093480000000000,					true,	-1234));
10952 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_32,		0xc093480000000000,					true,	-1234));
10953 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_64,			DATA_TYPE_SIGNED_64,		0xc093480000000000,					true,	-1234));
10954 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0x453b9000,							true,	 3001,								"p3001"));
10955 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_FLOAT_32,			DATA_TYPE_SIGNED_16,		0xc53b9000,							true,	-3001,								"m3001"));
10956 	}
10957 	else if (instruction == "OpConvertSToF")
10958 	{
10959 		// Normal numbers from int8 range
10960 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			-12,								true,	0xCA00,								"m21",	false));
10961 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			-21,								true,	0xC1A80000,							"m21"));
10962 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			-99,								true,	0xC058C00000000000ull,				"m99"));
10963 
10964 		// Minimum int8 value
10965 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			-128,								true,	0xD800,								"min",	false));
10966 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			-128,								true,	0xC3000000,							"min"));
10967 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			-128,								true,	0xC060000000000000ull,				"min"));
10968 
10969 		// Maximum int8 value
10970 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_16,			127,								true,	0x57F0,								"max",	false));
10971 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_32,			127,								true,	0x42FE0000,							"max"));
10972 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_8,			DATA_TYPE_FLOAT_64,			127,								true,	0x405FC00000000000ull,				"max"));
10973 
10974 		// All hexadecimal values below represent 1234.0 as 32/64-bit IEEE 754 float
10975 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10976 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10977 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-1234,								true,	0xE4D2,								"m1234",	false));
10978 
10979 		// 0x7800 = 0111 1000 0000 0000 = 0 11110 0000000000 = 32768
10980 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			32768,								true,	0x7800,								"p32768",	false));
10981 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			32768,								true,	0x7800,								"p32768",	false));
10982 
10983 		// 0xF800 = 1111 1000 0000 0000 = 1 11110 0000000000 = -32768
10984 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"m32768",	false));
10985 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"m32768",	false));
10986 
10987 		// 0xFBFF = 1111 1000 0000 0000 = 1 11110 1111111111 = -65504
10988 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			-32768,								true,	0xF800,								"min",	false));
10989 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			-65504,								true,	0xFBFF,								"min",	false));
10990 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			-65504,								true,	0xFBFF,								"min",	false));
10991 
10992 		// 0x77FF = 0111 0111 1111 1111 = 0 11101 1111111111 = 32752
10993 		// 0x7BFF = 0111 1011 1111 1111 = 0 11110 1111111111 = 65504
10994 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_16,			32752,								true,	0x77FF,								"max",	false));
10995 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10996 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_16,			65504,								true,	0x7BFF,								"max",	false));
10997 
10998 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			4294967296ll,						true,	0x4f800000,							"p4294967296",	false));
10999 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			4294967296ll,						true,	0x41f0000000000000,					"p4294967296",	false));
11000 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-4294967296ll,						true,	0xcf800000,							"m4294967296",	false));
11001 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			-4294967296ll,						true,	0xc1f0000000000000,					"m4294967296",	false));
11002 
11003 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			0x7fffff8000000000,					true,	0x5effffff,							"max",	false));
11004 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-0x7fffff8000000000,				true,	0xdeffffff,							"min",	false));
11005 
11006 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11007 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_16,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11008 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11009 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_32,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11010 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_32,			-1234,								true,	0xc49a4000));
11011 		testCases.push_back(ConvertCase(instruction,	DATA_TYPE_SIGNED_64,		DATA_TYPE_FLOAT_64,			-1234,								true,	0xc093480000000000));
11012 	}
11013 	else
11014 		DE_FATAL("Unknown instruction");
11015 }
11016 
getConvertCaseFragments(string instruction,const ConvertCase & convertCase)11017 const map<string, string> getConvertCaseFragments (string instruction, const ConvertCase& convertCase)
11018 {
11019 	map<string, string> params = convertCase.m_asmTypes;
11020 	map<string, string> fragments;
11021 
11022 	params["instruction"] = instruction;
11023 	params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11024 
11025 	const StringTemplate decoration (
11026 		"      OpDecorate %SSBOi DescriptorSet 0\n"
11027 		"      OpDecorate %SSBOo DescriptorSet 0\n"
11028 		"      OpDecorate %SSBOi Binding 0\n"
11029 		"      OpDecorate %SSBOo Binding 1\n"
11030 		"      OpDecorate %s_SSBOi Block\n"
11031 		"      OpDecorate %s_SSBOo Block\n"
11032 		"OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11033 		"OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11034 
11035 	const StringTemplate pre_main (
11036 		"${datatype_additional_decl:opt}"
11037 		"    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11038 		"   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11039 		"   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11040 		"   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11041 		" %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11042 		" %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11043 		"     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11044 		"     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11045 
11046 	const StringTemplate testfun (
11047 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11048 		"%param      = OpFunctionParameter %v4f32\n"
11049 		"%label      = OpLabel\n"
11050 		"%iLoc       = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11051 		"%oLoc       = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11052 		"%valIn      = OpLoad %${inStorageType} %iLoc\n"
11053 		"%valInCast  = ${inCast} %${inputType} %valIn\n"
11054 		"%conv       = ${instruction} %${outputType} %valInCast\n"
11055 		"%valOutCast = ${outCast} %${outStorageType} %conv\n"
11056 		"              OpStore %oLoc %valOutCast\n"
11057 		"              OpReturnValue %param\n"
11058 		"              OpFunctionEnd\n");
11059 
11060 	params["datatype_extensions"] =
11061 		params["datatype_extensions"] +
11062 		"OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11063 
11064 	fragments["capability"]	= params["datatype_capabilities"];
11065 	fragments["extension"]	= params["datatype_extensions"];
11066 	fragments["decoration"]	= decoration.specialize(params);
11067 	fragments["pre_main"]	= pre_main.specialize(params);
11068 	fragments["testfun"]	= testfun.specialize(params);
11069 
11070 	return fragments;
11071 }
11072 
getConvertCaseFragmentsNoStorage(string instruction,const ConvertCase & convertCase)11073 const map<string, string> getConvertCaseFragmentsNoStorage(string instruction, const ConvertCase& convertCase)
11074 {
11075 	map<string, string> params = convertCase.m_asmTypes;
11076 	map<string, string> fragments;
11077 
11078 	params["instruction"] = instruction;
11079 	params["inDecorator"] = getByteWidthStr(convertCase.m_fromType);
11080 
11081 	const StringTemplate decoration(
11082 		"      OpDecorate %SSBOi DescriptorSet 0\n"
11083 		"      OpDecorate %SSBOo DescriptorSet 0\n"
11084 		"      OpDecorate %SSBOi Binding 0\n"
11085 		"      OpDecorate %SSBOo Binding 1\n"
11086 		"      OpDecorate %s_SSBOi Block\n"
11087 		"      OpDecorate %s_SSBOo Block\n"
11088 		"OpMemberDecorate %s_SSBOi 0 Offset 0\n"
11089 		"OpMemberDecorate %s_SSBOo 0 Offset 0\n");
11090 
11091 	const StringTemplate pre_main(
11092 		"${datatype_additional_decl:opt}"
11093 		"    %ptr_in = OpTypePointer StorageBuffer %${inStorageType}\n"
11094 		"   %ptr_out = OpTypePointer StorageBuffer %${outStorageType}\n"
11095 		"   %s_SSBOi = OpTypeStruct %${inStorageType}\n"
11096 		"   %s_SSBOo = OpTypeStruct %${outStorageType}\n"
11097 		" %ptr_SSBOi = OpTypePointer StorageBuffer %s_SSBOi\n"
11098 		" %ptr_SSBOo = OpTypePointer StorageBuffer %s_SSBOo\n"
11099 		"     %SSBOi = OpVariable %ptr_SSBOi StorageBuffer\n"
11100 		"     %SSBOo = OpVariable %ptr_SSBOo StorageBuffer\n");
11101 
11102 	const StringTemplate testfun(
11103 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11104 		"%param     = OpFunctionParameter %v4f32\n"
11105 		"%label     = OpLabel\n"
11106 		"%iLoc      = OpAccessChain %ptr_in %SSBOi %c_u32_0\n"
11107 		"%oLoc      = OpAccessChain %ptr_out %SSBOo %c_u32_0\n"
11108 		"%inval      = OpLoad %${inStorageType} %iLoc\n"
11109 		"%in_cast    = ${inCast} %${inputType} %inval\n"
11110 		"%conv       = ${instruction} %${outputType} %in_cast\n"
11111 		"%out_cast   = ${outCast} %${outStorageType} %conv\n"
11112 		"              OpStore %oLoc %out_cast\n"
11113 		"              OpReturnValue %param\n"
11114 		"              OpFunctionEnd\n");
11115 
11116 	params["datatype_extensions"] =
11117 		params["datatype_extensions"] +
11118 		"OpExtension \"SPV_KHR_storage_buffer_storage_class\"\n";
11119 
11120 	fragments["capability"] = params["datatype_capabilities"];
11121 	fragments["extension"] = params["datatype_extensions"];
11122 	fragments["decoration"] = decoration.specialize(params);
11123 	fragments["pre_main"] = pre_main.specialize(params);
11124 	fragments["testfun"] = testfun.specialize(params);
11125 	return fragments;
11126 }
11127 
11128 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in compute shaders
createConvertComputeTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11129 tcu::TestCaseGroup* createConvertComputeTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11130 {
11131 	de::MovePtr<tcu::TestCaseGroup>		group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11132 	vector<ConvertCase>					testCases;
11133 	createConvertCases(testCases, instruction);
11134 
11135 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11136 	{
11137 		ComputeShaderSpec spec;
11138 		spec.assembly			= getConvertCaseShaderStr(instruction, *test, true);
11139 		spec.numWorkGroups		= IVec3(1, 1, 1);
11140 		spec.inputs.push_back	(test->m_inputBuffer);
11141 		spec.outputs.push_back	(test->m_outputBuffer);
11142 
11143 		getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, spec.requestedVulkanFeatures, spec.extensions);
11144 
11145 		group->addChild(new SpvAsmComputeShaderCase(testCtx, test->m_name.c_str(), "", spec));
11146 	}
11147 	return group.release();
11148 }
11149 
11150 // Test for OpSConvert, OpUConvert, OpFConvert and OpConvert* in graphics shaders
createConvertGraphicsTests(tcu::TestContext & testCtx,const string & instruction,const string & name)11151 tcu::TestCaseGroup* createConvertGraphicsTests (tcu::TestContext& testCtx, const string& instruction, const string& name)
11152 {
11153 	de::MovePtr<tcu::TestCaseGroup>		group(new tcu::TestCaseGroup(testCtx, name.c_str(), instruction.c_str()));
11154 	vector<ConvertCase>					testCases;
11155 	createConvertCases(testCases, instruction);
11156 
11157 	for (vector<ConvertCase>::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
11158 	{
11159 		map<string, string>	fragments		= (test->m_useStorageExt) ? getConvertCaseFragments(instruction, *test) : getConvertCaseFragmentsNoStorage(instruction,*test);
11160 		VulkanFeatures		vulkanFeatures;
11161 		GraphicsResources	resources;
11162 		vector<string>		extensions;
11163 		SpecConstants		noSpecConstants;
11164 		PushConstants		noPushConstants;
11165 		GraphicsInterfaces	noInterfaces;
11166 		tcu::RGBA			defaultColors[4];
11167 
11168 		getDefaultColors			(defaultColors);
11169 		resources.inputs.push_back	(Resource(test->m_inputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11170 		resources.outputs.push_back	(Resource(test->m_outputBuffer, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11171 		extensions.push_back		("VK_KHR_storage_buffer_storage_class");
11172 
11173 		getVulkanFeaturesAndExtensions(test->m_fromType, test->m_toType, test->m_useStorageExt, vulkanFeatures, extensions);
11174 
11175 		vulkanFeatures.coreFeatures.vertexPipelineStoresAndAtomics	= true;
11176 		vulkanFeatures.coreFeatures.fragmentStoresAndAtomics		= true;
11177 
11178 		createTestsForAllStages(
11179 			test->m_name, defaultColors, defaultColors, fragments, noSpecConstants,
11180 			noPushConstants, resources, noInterfaces, extensions, vulkanFeatures, group.get());
11181 	}
11182 	return group.release();
11183 }
11184 
11185 // Constant-Creation Instructions: OpConstant, OpConstantComposite
createOpConstantFloat16Tests(tcu::TestContext & testCtx)11186 tcu::TestCaseGroup* createOpConstantFloat16Tests(tcu::TestContext& testCtx)
11187 {
11188 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstant", "OpConstant and OpConstantComposite instruction"));
11189 	RGBA							inputColors[4];
11190 	RGBA							outputColors[4];
11191 	vector<string>					extensions;
11192 	GraphicsResources				resources;
11193 	VulkanFeatures					features;
11194 
11195 	const char						functionStart[]	 =
11196 		"%test_code             = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11197 		"%param1                = OpFunctionParameter %v4f32\n"
11198 		"%lbl                   = OpLabel\n";
11199 
11200 	const char						functionEnd[]		=
11201 		"%transformed_param_32  = OpFConvert %v4f32 %transformed_param\n"
11202 		"                         OpReturnValue %transformed_param_32\n"
11203 		"                         OpFunctionEnd\n";
11204 
11205 	struct NameConstantsCode
11206 	{
11207 		string name;
11208 		string constants;
11209 		string code;
11210 	};
11211 
11212 #define FLOAT_16_COMMON_TYPES_AND_CONSTS \
11213 			"%f16                  = OpTypeFloat 16\n"                                                 \
11214 			"%c_f16_0              = OpConstant %f16 0.0\n"                                            \
11215 			"%c_f16_0_5            = OpConstant %f16 0.5\n"                                            \
11216 			"%c_f16_1              = OpConstant %f16 1.0\n"                                            \
11217 			"%v4f16                = OpTypeVector %f16 4\n"                                            \
11218 			"%fp_f16               = OpTypePointer Function %f16\n"                                    \
11219 			"%fp_v4f16             = OpTypePointer Function %v4f16\n"                                  \
11220 			"%c_v4f16_1_1_1_1      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n" \
11221 			"%a4f16                = OpTypeArray %f16 %c_u32_4\n"                                      \
11222 
11223 	NameConstantsCode				tests[] =
11224 	{
11225 		{
11226 			"vec4",
11227 
11228 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11229 			"%cval                 = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_0\n",
11230 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11231 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %cval\n"
11232 		},
11233 		{
11234 			"struct",
11235 
11236 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11237 			"%stype                = OpTypeStruct %v4f16 %f16\n"
11238 			"%fp_stype             = OpTypePointer Function %stype\n"
11239 			"%f16_n_1              = OpConstant %f16 -1.0\n"
11240 			"%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11241 			"%cvec                 = OpConstantComposite %v4f16 %f16_1_5 %f16_1_5 %f16_1_5 %c_f16_1\n"
11242 			"%cval                 = OpConstantComposite %stype %cvec %f16_n_1\n",
11243 
11244 			"%v                    = OpVariable %fp_stype Function %cval\n"
11245 			"%vec_ptr              = OpAccessChain %fp_v4f16 %v %c_u32_0\n"
11246 			"%f16_ptr              = OpAccessChain %fp_f16 %v %c_u32_1\n"
11247 			"%vec_val              = OpLoad %v4f16 %vec_ptr\n"
11248 			"%f16_val              = OpLoad %f16 %f16_ptr\n"
11249 			"%tmp1                 = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_1 %f16_val\n" // vec4(-1)
11250 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11251 			"%tmp2                 = OpFAdd %v4f16 %tmp1 %param1_16\n" // param1 + vec4(-1)
11252 			"%transformed_param    = OpFAdd %v4f16 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
11253 		},
11254 		{
11255 			// [1|0|0|0.5] [x] = x + 0.5
11256 			// [0|1|0|0.5] [y] = y + 0.5
11257 			// [0|0|1|0.5] [z] = z + 0.5
11258 			// [0|0|0|1  ] [1] = 1
11259 			"matrix",
11260 
11261 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11262 			"%mat4x4_f16           = OpTypeMatrix %v4f16 4\n"
11263 			"%v4f16_1_0_0_0        = OpConstantComposite %v4f16 %c_f16_1 %c_f16_0 %c_f16_0 %c_f16_0\n"
11264 			"%v4f16_0_1_0_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_1 %c_f16_0 %c_f16_0\n"
11265 			"%v4f16_0_0_1_0        = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_1 %c_f16_0\n"
11266 			"%v4f16_0_5_0_5_0_5_1  = OpConstantComposite %v4f16 %c_f16_0_5 %c_f16_0_5 %c_f16_0_5 %c_f16_1\n"
11267 			"%cval                 = OpConstantComposite %mat4x4_f16 %v4f16_1_0_0_0 %v4f16_0_1_0_0 %v4f16_0_0_1_0 %v4f16_0_5_0_5_0_5_1\n",
11268 
11269 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11270 			"%transformed_param    = OpMatrixTimesVector %v4f16 %cval %param1_16\n"
11271 		},
11272 		{
11273 			"array",
11274 
11275 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11276 			"%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11277 			"%fp_a4f16             = OpTypePointer Function %a4f16\n"
11278 			"%f16_n_1              = OpConstant %f16 -1.0\n"
11279 			"%f16_1_5              = OpConstant %f16 !0x3e00\n" // +1.5
11280 			"%carr                 = OpConstantComposite %a4f16 %c_f16_0 %f16_n_1 %f16_1_5 %c_f16_0\n",
11281 
11282 			"%v                    = OpVariable %fp_a4f16 Function %carr\n"
11283 			"%f                    = OpAccessChain %fp_f16 %v %c_u32_0\n"
11284 			"%f1                   = OpAccessChain %fp_f16 %v %c_u32_1\n"
11285 			"%f2                   = OpAccessChain %fp_f16 %v %c_u32_2\n"
11286 			"%f3                   = OpAccessChain %fp_f16 %v %c_u32_3\n"
11287 			"%f_val                = OpLoad %f16 %f\n"
11288 			"%f1_val               = OpLoad %f16 %f1\n"
11289 			"%f2_val               = OpLoad %f16 %f2\n"
11290 			"%f3_val               = OpLoad %f16 %f3\n"
11291 			"%ftot1                = OpFAdd %f16 %f_val %f1_val\n"
11292 			"%ftot2                = OpFAdd %f16 %ftot1 %f2_val\n"
11293 			"%ftot3                = OpFAdd %f16 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
11294 			"%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %ftot3\n"
11295 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11296 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11297 		},
11298 		{
11299 			//
11300 			// [
11301 			//   {
11302 			//      0.0,
11303 			//      [ 1.0, 1.0, 1.0, 1.0]
11304 			//   },
11305 			//   {
11306 			//      1.0,
11307 			//      [ 0.0, 0.5, 0.0, 0.0]
11308 			//   }, //     ^^^
11309 			//   {
11310 			//      0.0,
11311 			//      [ 1.0, 1.0, 1.0, 1.0]
11312 			//   }
11313 			// ]
11314 			"array_of_struct_of_array",
11315 
11316 			FLOAT_16_COMMON_TYPES_AND_CONSTS
11317 			"%c_v4f16_1_1_1_0      = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_0\n"
11318 			"%fp_a4f16             = OpTypePointer Function %a4f16\n"
11319 			"%stype                = OpTypeStruct %f16 %a4f16\n"
11320 			"%a3stype              = OpTypeArray %stype %c_u32_3\n"
11321 			"%fp_a3stype           = OpTypePointer Function %a3stype\n"
11322 			"%ca4f16_0             = OpConstantComposite %a4f16 %c_f16_0 %c_f16_0_5 %c_f16_0 %c_f16_0\n"
11323 			"%ca4f16_1             = OpConstantComposite %a4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
11324 			"%cstype1              = OpConstantComposite %stype %c_f16_0 %ca4f16_1\n"
11325 			"%cstype2              = OpConstantComposite %stype %c_f16_1 %ca4f16_0\n"
11326 			"%carr                 = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
11327 
11328 			"%v                    = OpVariable %fp_a3stype Function %carr\n"
11329 			"%f                    = OpAccessChain %fp_f16 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
11330 			"%f_l                  = OpLoad %f16 %f\n"
11331 			"%add_vec              = OpVectorTimesScalar %v4f16 %c_v4f16_1_1_1_0 %f_l\n"
11332 			"%param1_16            = OpFConvert %v4f16 %param1\n"
11333 			"%transformed_param    = OpFAdd %v4f16 %param1_16 %add_vec\n"
11334 		}
11335 	};
11336 
11337 	getHalfColorsFullAlpha(inputColors);
11338 	outputColors[0] = RGBA(255, 255, 255, 255);
11339 	outputColors[1] = RGBA(255, 127, 127, 255);
11340 	outputColors[2] = RGBA(127, 255, 127, 255);
11341 	outputColors[3] = RGBA(127, 127, 255, 255);
11342 
11343 	extensions.push_back("VK_KHR_shader_float16_int8");
11344 	features.extFloat16Int8.shaderFloat16 = true;
11345 
11346 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
11347 	{
11348 		map<string, string> fragments;
11349 
11350 		fragments["capability"]	= "OpCapability Float16\n";
11351 		fragments["pre_main"]	= tests[testNdx].constants;
11352 		fragments["testfun"]	= string(functionStart) + tests[testNdx].code + functionEnd;
11353 
11354 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, resources, extensions, opConstantCompositeTests.get(), features);
11355 	}
11356 	return opConstantCompositeTests.release();
11357 }
11358 
11359 template<typename T>
11360 void finalizeTestsCreation (T&							specResource,
11361 							const map<string, string>&	fragments,
11362 							tcu::TestContext&			testCtx,
11363 							tcu::TestCaseGroup&			testGroup,
11364 							const std::string&			testName,
11365 							const VulkanFeatures&		vulkanFeatures,
11366 							const vector<string>&		extensions,
11367 							const IVec3&				numWorkGroups,
11368 							const bool					splitRenderArea = false);
11369 
11370 template<>
finalizeTestsCreation(GraphicsResources & specResource,const map<string,string> & fragments,tcu::TestContext &,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 &,const bool splitRenderArea)11371 void finalizeTestsCreation (GraphicsResources&			specResource,
11372 							const map<string, string>&	fragments,
11373 							tcu::TestContext&			,
11374 							tcu::TestCaseGroup&			testGroup,
11375 							const std::string&			testName,
11376 							const VulkanFeatures&		vulkanFeatures,
11377 							const vector<string>&		extensions,
11378 							const IVec3&				,
11379 							const bool					splitRenderArea)
11380 {
11381 	RGBA defaultColors[4];
11382 	getDefaultColors(defaultColors);
11383 
11384 	createTestsForAllStages(testName, defaultColors, defaultColors, fragments, specResource, extensions, &testGroup, vulkanFeatures, QP_TEST_RESULT_FAIL, std::string(), splitRenderArea);
11385 }
11386 
11387 template<>
finalizeTestsCreation(ComputeShaderSpec & specResource,const map<string,string> & fragments,tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const std::string & testName,const VulkanFeatures & vulkanFeatures,const vector<string> & extensions,const IVec3 & numWorkGroups,bool)11388 void finalizeTestsCreation (ComputeShaderSpec&			specResource,
11389 							const map<string, string>&	fragments,
11390 							tcu::TestContext&			testCtx,
11391 							tcu::TestCaseGroup&			testGroup,
11392 							const std::string&			testName,
11393 							const VulkanFeatures&		vulkanFeatures,
11394 							const vector<string>&		extensions,
11395 							const IVec3&				numWorkGroups,
11396 							bool)
11397 {
11398 	specResource.numWorkGroups = numWorkGroups;
11399 	specResource.requestedVulkanFeatures = vulkanFeatures;
11400 	specResource.extensions = extensions;
11401 
11402 	specResource.assembly = makeComputeShaderAssembly(fragments);
11403 
11404 	testGroup.addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", specResource));
11405 }
11406 
11407 template<class SpecResource>
createFloat16LogicalSet(tcu::TestContext & testCtx,const bool nanSupported)11408 tcu::TestCaseGroup* createFloat16LogicalSet (tcu::TestContext& testCtx, const bool nanSupported)
11409 {
11410 	const string						nan					= nanSupported ? "_nan" : "";
11411 	const string						groupName			= "logical" + nan;
11412 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, groupName.c_str(), "Float 16 logical tests"));
11413 
11414 	de::Random							rnd					(deStringHash(testGroup->getName()));
11415 	const string						spvCapabilities		= string("OpCapability Float16\n") + (nanSupported ? "OpCapability SignedZeroInfNanPreserve\n" : "");
11416 	const string						spvExtensions		= (nanSupported ? "OpExtension \"SPV_KHR_float_controls\"\n" : "");
11417 	const string						spvExecutionMode	= nanSupported ? "OpExecutionMode %BP_main SignedZeroInfNanPreserve 16\n" : "";
11418 	const deUint32						numDataPointsScalar	= 16;
11419 	const deUint32						numDataPointsVector	= 14;
11420 	const vector<deFloat16>				float16DataScalar	= getFloat16s(rnd, numDataPointsScalar);
11421 	const vector<deFloat16>				float16DataVector	= getFloat16s(rnd, numDataPointsVector);
11422 	const vector<deFloat16>				float16Data1		= squarize(float16DataScalar, 0);			// Total Size: square(sizeof(float16DataScalar))
11423 	const vector<deFloat16>				float16Data2		= squarize(float16DataScalar, 1);
11424 	const vector<deFloat16>				float16DataVec1		= squarizeVector(float16DataVector, 0);		// Total Size: 2 * (square(square(sizeof(float16DataVector))))
11425 	const vector<deFloat16>				float16DataVec2		= squarizeVector(float16DataVector, 1);
11426 	const vector<deFloat16>				float16OutDummy		(float16Data1.size(), 0);
11427 	const vector<deFloat16>				float16OutVecDummy	(float16DataVec1.size(), 0);
11428 
11429 	struct TestOp
11430 	{
11431 		const char*		opCode;
11432 		VerifyIOFunc	verifyFuncNan;
11433 		VerifyIOFunc	verifyFuncNonNan;
11434 		const deUint32	argCount;
11435 	};
11436 
11437 	const TestOp	testOps[]	=
11438 	{
11439 		{ "OpIsNan"						,	compareFP16Logical<fp16isNan,				true,  false, true>,	compareFP16Logical<fp16isNan,				true,  false, false>,	1	},
11440 		{ "OpIsInf"						,	compareFP16Logical<fp16isInf,				true,  false, true>,	compareFP16Logical<fp16isInf,				true,  false, false>,	1	},
11441 		{ "OpFOrdEqual"					,	compareFP16Logical<fp16isEqual,				false, true,  true>,	compareFP16Logical<fp16isEqual,				false, true,  false>,	2	},
11442 		{ "OpFUnordEqual"				,	compareFP16Logical<fp16isEqual,				false, false, true>,	compareFP16Logical<fp16isEqual,				false, false, false>,	2	},
11443 		{ "OpFOrdNotEqual"				,	compareFP16Logical<fp16isUnequal,			false, true,  true>,	compareFP16Logical<fp16isUnequal,			false, true,  false>,	2	},
11444 		{ "OpFUnordNotEqual"			,	compareFP16Logical<fp16isUnequal,			false, false, true>,	compareFP16Logical<fp16isUnequal,			false, false, false>,	2	},
11445 		{ "OpFOrdLessThan"				,	compareFP16Logical<fp16isLess,				false, true,  true>,	compareFP16Logical<fp16isLess,				false, true,  false>,	2	},
11446 		{ "OpFUnordLessThan"			,	compareFP16Logical<fp16isLess,				false, false, true>,	compareFP16Logical<fp16isLess,				false, false, false>,	2	},
11447 		{ "OpFOrdGreaterThan"			,	compareFP16Logical<fp16isGreater,			false, true,  true>,	compareFP16Logical<fp16isGreater,			false, true,  false>,	2	},
11448 		{ "OpFUnordGreaterThan"			,	compareFP16Logical<fp16isGreater,			false, false, true>,	compareFP16Logical<fp16isGreater,			false, false, false>,	2	},
11449 		{ "OpFOrdLessThanEqual"			,	compareFP16Logical<fp16isLessOrEqual,		false, true,  true>,	compareFP16Logical<fp16isLessOrEqual,		false, true,  false>,	2	},
11450 		{ "OpFUnordLessThanEqual"		,	compareFP16Logical<fp16isLessOrEqual,		false, false, true>,	compareFP16Logical<fp16isLessOrEqual,		false, false, false>,	2	},
11451 		{ "OpFOrdGreaterThanEqual"		,	compareFP16Logical<fp16isGreaterOrEqual,	false, true,  true>,	compareFP16Logical<fp16isGreaterOrEqual,	false, true,  false>,	2	},
11452 		{ "OpFUnordGreaterThanEqual"	,	compareFP16Logical<fp16isGreaterOrEqual,	false, false, true>,	compareFP16Logical<fp16isGreaterOrEqual,	false, false, false>,	2	},
11453 	};
11454 
11455 	{ // scalar cases
11456 		const StringTemplate preMain
11457 		(
11458 			"      %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11459 			"     %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11460 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11461 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
11462 			"            %f16 = OpTypeFloat 16\n"
11463 			"          %v2f16 = OpTypeVector %f16 2\n"
11464 			"        %c_f16_0 = OpConstant %f16 0.0\n"
11465 			"        %c_f16_1 = OpConstant %f16 1.0\n"
11466 			"         %up_u32 = OpTypePointer Uniform %u32\n"
11467 			"         %ra_u32 = OpTypeArray %u32 %c_i32_hndp\n"
11468 			"         %SSBO16 = OpTypeStruct %ra_u32\n"
11469 			"      %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11470 			"     %f16_i32_fn = OpTypeFunction %f16 %i32\n"
11471 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11472 			"      %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11473 			"      %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11474 			"       %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11475 		);
11476 
11477 		const StringTemplate decoration
11478 		(
11479 			"OpDecorate %ra_u32 ArrayStride 4\n"
11480 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
11481 			"OpDecorate %SSBO16 BufferBlock\n"
11482 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
11483 			"OpDecorate %ssbo_src0 Binding 0\n"
11484 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
11485 			"OpDecorate %ssbo_src1 Binding 1\n"
11486 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
11487 			"OpDecorate %ssbo_dst Binding 2\n"
11488 		);
11489 
11490 		const StringTemplate testFun
11491 		(
11492 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11493 			"    %param = OpFunctionParameter %v4f32\n"
11494 
11495 			"    %entry = OpLabel\n"
11496 			"        %i = OpVariable %fp_i32 Function\n"
11497 			"             OpStore %i %c_i32_0\n"
11498 			"             OpBranch %loop\n"
11499 
11500 			"     %loop = OpLabel\n"
11501 			"    %i_cmp = OpLoad %i32 %i\n"
11502 			"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11503 			"             OpLoopMerge %merge %next None\n"
11504 			"             OpBranchConditional %lt %write %merge\n"
11505 
11506 			"    %write = OpLabel\n"
11507 			"      %ndx = OpLoad %i32 %i\n"
11508 
11509 			" %val_src0 = OpFunctionCall %f16 %ld_arg_ssbo_src0 %ndx\n"
11510 
11511 			"${op_arg1_calc}"
11512 
11513 			" %val_bdst = ${op_code} %bool %val_src0 ${op_arg1}\n"
11514 			"  %val_dst = OpSelect %f16 %val_bdst %c_f16_1 %c_f16_0\n"
11515 			"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11516 			"             OpBranch %next\n"
11517 
11518 			"     %next = OpLabel\n"
11519 			"    %i_cur = OpLoad %i32 %i\n"
11520 			"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11521 			"             OpStore %i %i_new\n"
11522 			"             OpBranch %loop\n"
11523 
11524 			"    %merge = OpLabel\n"
11525 			"             OpReturnValue %param\n"
11526 
11527 			"             OpFunctionEnd\n"
11528 		);
11529 
11530 		const StringTemplate arg1Calc
11531 		(
11532 			" %val_src1 = OpFunctionCall %f16 %ld_arg_ssbo_src1 %ndx\n"
11533 		);
11534 
11535 		for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11536 		{
11537 			const size_t		iterations		= float16Data1.size();
11538 			const TestOp&		testOp			= testOps[testOpsIdx];
11539 			const string		testName		= de::toLower(string(testOp.opCode)) + "_scalar";
11540 			SpecResource		specResource;
11541 			map<string, string>	specs;
11542 			VulkanFeatures		features;
11543 			map<string, string>	fragments;
11544 			vector<string>		extensions;
11545 
11546 			specs["num_data_points"]	= de::toString(iterations);
11547 			specs["op_code"]			= testOp.opCode;
11548 			specs["op_arg1"]			= (testOp.argCount == 1) ? "" : "%val_src1";
11549 			specs["op_arg1_calc"]		= (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11550 
11551 			fragments["extension"]		= spvExtensions;
11552 			fragments["capability"]		= spvCapabilities;
11553 			fragments["execution_mode"]	= spvExecutionMode;
11554 			fragments["decoration"]		= decoration.specialize(specs);
11555 			fragments["pre_main"]		= preMain.specialize(specs);
11556 			fragments["testfun"]		= testFun.specialize(specs);
11557 			fragments["testfun"]		+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src0"}});
11558 			if (testOp.argCount > 1)
11559 			{
11560 				fragments["testfun"]	+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src1"}});
11561 			}
11562 			fragments["testfun"]		+= StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
11563 
11564 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11565 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Data2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11566 			specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11567 			specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11568 
11569 			extensions.push_back("VK_KHR_shader_float16_int8");
11570 
11571 			if (nanSupported)
11572 			{
11573 				extensions.push_back("VK_KHR_shader_float_controls");
11574 
11575 				features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11576 			}
11577 
11578 			features.extFloat16Int8.shaderFloat16 = true;
11579 
11580 			finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11581 		}
11582 	}
11583 	{ // vector cases
11584 		const StringTemplate preMain
11585 		(
11586 			"        %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11587 			"           %v2bool = OpTypeVector %bool 2\n"
11588 			"              %f16 = OpTypeFloat 16\n"
11589 			"          %c_f16_0 = OpConstant %f16 0.0\n"
11590 			"          %c_f16_1 = OpConstant %f16 1.0\n"
11591 			"            %v2f16 = OpTypeVector %f16 2\n"
11592 			"      %c_v2f16_0_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11593 			"      %c_v2f16_1_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
11594 			"           %up_u32 = OpTypePointer Uniform %u32\n"
11595 			"           %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
11596 			"           %SSBO16 = OpTypeStruct %ra_u32\n"
11597 			"        %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11598 			"     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11599 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
11600 			"        %ssbo_src0 = OpVariable %up_SSBO16 Uniform\n"
11601 			"        %ssbo_src1 = OpVariable %up_SSBO16 Uniform\n"
11602 			"         %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11603 		);
11604 
11605 		const StringTemplate decoration
11606 		(
11607 			"OpDecorate %ra_u32 ArrayStride 4\n"
11608 			"OpMemberDecorate %SSBO16 0 Offset 0\n"
11609 			"OpDecorate %SSBO16 BufferBlock\n"
11610 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
11611 			"OpDecorate %ssbo_src0 Binding 0\n"
11612 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
11613 			"OpDecorate %ssbo_src1 Binding 1\n"
11614 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
11615 			"OpDecorate %ssbo_dst Binding 2\n"
11616 		);
11617 
11618 		const StringTemplate testFun
11619 		(
11620 			"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11621 			"    %param = OpFunctionParameter %v4f32\n"
11622 
11623 			"    %entry = OpLabel\n"
11624 			"        %i = OpVariable %fp_i32 Function\n"
11625 			"             OpStore %i %c_i32_0\n"
11626 			"             OpBranch %loop\n"
11627 
11628 			"     %loop = OpLabel\n"
11629 			"    %i_cmp = OpLoad %i32 %i\n"
11630 			"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11631 			"             OpLoopMerge %merge %next None\n"
11632 			"             OpBranchConditional %lt %write %merge\n"
11633 
11634 			"    %write = OpLabel\n"
11635 			"      %ndx = OpLoad %i32 %i\n"
11636 
11637 			" %val_src0 = OpFunctionCall %v2f16 %ld_arg_ssbo_src0 %ndx\n"
11638 
11639 			"${op_arg1_calc}"
11640 
11641 			" %val_bdst = ${op_code} %v2bool %val_src0 ${op_arg1}\n"
11642 			"  %val_dst = OpSelect %v2f16 %val_bdst %c_v2f16_1_1 %c_v2f16_0_0\n"
11643 			"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11644 			"             OpBranch %next\n"
11645 
11646 			"     %next = OpLabel\n"
11647 			"    %i_cur = OpLoad %i32 %i\n"
11648 			"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11649 			"             OpStore %i %i_new\n"
11650 			"             OpBranch %loop\n"
11651 
11652 			"    %merge = OpLabel\n"
11653 			"             OpReturnValue %param\n"
11654 
11655 			"             OpFunctionEnd\n"
11656 		);
11657 
11658 		const StringTemplate arg1Calc
11659 		(
11660 			" %val_src1 = OpFunctionCall %v2f16 %ld_arg_ssbo_src1 %ndx\n"
11661 		);
11662 
11663 		for (deUint32 testOpsIdx = 0; testOpsIdx < DE_LENGTH_OF_ARRAY(testOps); ++testOpsIdx)
11664 		{
11665 			const deUint32		itemsPerVec	= 2;
11666 			const size_t		iterations	= float16DataVec1.size() / itemsPerVec;
11667 			const TestOp&		testOp		= testOps[testOpsIdx];
11668 			const string		testName	= de::toLower(string(testOp.opCode)) + "_vector";
11669 			SpecResource		specResource;
11670 			map<string, string>	specs;
11671 			vector<string>		extensions;
11672 			VulkanFeatures		features;
11673 			map<string, string>	fragments;
11674 
11675 			specs["num_data_points"]	= de::toString(iterations);
11676 			specs["op_code"]			= testOp.opCode;
11677 			specs["op_arg1"]			= (testOp.argCount == 1) ? "" : "%val_src1";
11678 			specs["op_arg1_calc"]		= (testOp.argCount == 1) ? "" : arg1Calc.specialize(specs);
11679 
11680 			fragments["extension"]		= spvExtensions;
11681 			fragments["capability"]		= spvCapabilities;
11682 			fragments["execution_mode"]	= spvExecutionMode;
11683 			fragments["decoration"]		= decoration.specialize(specs);
11684 			fragments["pre_main"]		= preMain.specialize(specs);
11685 			fragments["testfun"]		= testFun.specialize(specs);
11686 			fragments["testfun"]		+= StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src0"}});
11687 			if (testOp.argCount > 1)
11688 			{
11689 				fragments["testfun"]	+= StringTemplate(loadV2F16FromUint).specialize({{"var", "ssbo_src1"}});
11690 			}
11691 			fragments["testfun"]		+= StringTemplate(storeV2F16AsUint).specialize({{"var", "ssbo_dst"}});
11692 
11693 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11694 			specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16DataVec2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11695 			specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutVecDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11696 			specResource.verifyIO = nanSupported ? testOp.verifyFuncNan : testOp.verifyFuncNonNan;
11697 
11698 			extensions.push_back("VK_KHR_shader_float16_int8");
11699 
11700 			if (nanSupported)
11701 			{
11702 				extensions.push_back("VK_KHR_shader_float_controls");
11703 
11704 				features.floatControlsProperties.shaderSignedZeroInfNanPreserveFloat16 = DE_TRUE;
11705 			}
11706 
11707 			features.extFloat16Int8.shaderFloat16 = true;
11708 
11709 			finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1), true);
11710 		}
11711 	}
11712 
11713 	return testGroup.release();
11714 }
11715 
compareFP16FunctionSetFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11716 bool compareFP16FunctionSetFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11717 {
11718 	if (inputs.size() != 1 || outputAllocs.size() != 1)
11719 		return false;
11720 
11721 	vector<deUint8>	input1Bytes;
11722 
11723 	inputs[0].getBytes(input1Bytes);
11724 
11725 	const deUint16* const	input1AsFP16	= (const deUint16*)&input1Bytes[0];
11726 	const deUint16* const	outputAsFP16	= (const deUint16*)outputAllocs[0]->getHostPtr();
11727 	std::string				error;
11728 
11729 	for (size_t idx = 0; idx < input1Bytes.size() / sizeof(deUint16); ++idx)
11730 	{
11731 		if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
11732 		{
11733 			log << TestLog::Message << error << TestLog::EndMessage;
11734 
11735 			return false;
11736 		}
11737 	}
11738 
11739 	return true;
11740 }
11741 
11742 template<class SpecResource>
createFloat16FuncSet(tcu::TestContext & testCtx)11743 tcu::TestCaseGroup* createFloat16FuncSet (tcu::TestContext& testCtx)
11744 {
11745 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "function", "Float 16 function call related tests"));
11746 
11747 	de::Random							rnd					(deStringHash(testGroup->getName()));
11748 	const StringTemplate				capabilities		("OpCapability Float16\n");
11749 	const deUint32						numDataPoints		= 256;
11750 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
11751 	const vector<deFloat16>				float16OutputDummy	(float16InputData.size(), 0);
11752 	map<string, string>					fragments;
11753 
11754 	struct TestType
11755 	{
11756 		const deUint32	typeComponents;
11757 		const char*		typeName;
11758 		const char*		typeDecls;
11759 		const char*		typeStorage;
11760 		const string		loadFunc;
11761 		const string		storeFunc;
11762 	};
11763 
11764 	const TestType	testTypes[]	=
11765 	{
11766 		{
11767 			1,
11768 			"f16",
11769 			"      %v2f16 = OpTypeVector %f16 2\n"
11770 			"%f16_i32_fn = OpTypeFunction %f16 %i32\n"
11771 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11772 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11773 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11774 			"u32_hndp",
11775 			loadScalarF16FromUint,
11776 			storeScalarF16AsUint
11777 		},
11778 		{
11779 			2,
11780 			"v2f16",
11781 			"      %v2f16 = OpTypeVector %f16 2\n"
11782 			"  %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
11783 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11784 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
11785 			"u32_ndp",
11786 			loadV2F16FromUint,
11787 			storeV2F16AsUint
11788 		},
11789 		{
11790 			4,
11791 			"v4f16",
11792 			"      %v2f16 = OpTypeVector %f16 2\n"
11793 			"      %v4f16 = OpTypeVector %f16 4\n"
11794 			"  %c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
11795 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
11796 			"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
11797 			"ra_u32_2",
11798 			loadV4F16FromUints,
11799 			storeV4F16AsUints
11800 		},
11801 	};
11802 
11803 	const StringTemplate preMain
11804 	(
11805 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
11806 		" %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
11807 		"     %v2bool = OpTypeVector %bool 2\n"
11808 		"        %f16 = OpTypeFloat 16\n"
11809 		"    %c_f16_0 = OpConstant %f16 0.0\n"
11810 
11811 		"${type_decls}"
11812 
11813 		"  %${tt}_fun = OpTypeFunction %${tt} %${tt}\n"
11814 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
11815 		"%ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
11816 		" %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
11817 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
11818 		"	  %up_u32 = OpTypePointer Uniform %u32\n"
11819 		"     %SSBO16 = OpTypeStruct %ra_${ts}\n"
11820 		"  %up_SSBO16 = OpTypePointer Uniform %SSBO16\n"
11821 		"   %ssbo_src = OpVariable %up_SSBO16 Uniform\n"
11822 		"   %ssbo_dst = OpVariable %up_SSBO16 Uniform\n"
11823 	);
11824 
11825 	const StringTemplate decoration
11826 	(
11827 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
11828 		"OpDecorate %ra_u32_hndp ArrayStride 4\n"
11829 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
11830 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
11831 		"OpMemberDecorate %SSBO16 0 Offset 0\n"
11832 		"OpDecorate %SSBO16 BufferBlock\n"
11833 		"OpDecorate %ssbo_src DescriptorSet 0\n"
11834 		"OpDecorate %ssbo_src Binding 0\n"
11835 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
11836 		"OpDecorate %ssbo_dst Binding 1\n"
11837 	);
11838 
11839 	const StringTemplate testFun
11840 	(
11841 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
11842 		"    %param = OpFunctionParameter %v4f32\n"
11843 		"    %entry = OpLabel\n"
11844 
11845 		"        %i = OpVariable %fp_i32 Function\n"
11846 		"             OpStore %i %c_i32_0\n"
11847 		"             OpBranch %loop\n"
11848 
11849 		"     %loop = OpLabel\n"
11850 		"    %i_cmp = OpLoad %i32 %i\n"
11851 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
11852 		"             OpLoopMerge %merge %next None\n"
11853 		"             OpBranchConditional %lt %write %merge\n"
11854 
11855 		"    %write = OpLabel\n"
11856 		"      %ndx = OpLoad %i32 %i\n"
11857 
11858 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
11859 		"  %val_dst = OpFunctionCall %${tt} %pass_fun %val_src\n"
11860 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
11861 		"             OpBranch %next\n"
11862 
11863 		"     %next = OpLabel\n"
11864 		"    %i_cur = OpLoad %i32 %i\n"
11865 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
11866 		"             OpStore %i %i_new\n"
11867 		"             OpBranch %loop\n"
11868 
11869 		"    %merge = OpLabel\n"
11870 		"             OpReturnValue %param\n"
11871 
11872 		"             OpFunctionEnd\n"
11873 
11874 		" %pass_fun = OpFunction %${tt} None %${tt}_fun\n"
11875 		"   %param0 = OpFunctionParameter %${tt}\n"
11876 		" %entry_pf = OpLabel\n"
11877 		"     %res0 = OpFAdd %${tt} %param0 %c_${tt}_0\n"
11878 		"             OpReturnValue %res0\n"
11879 		"             OpFunctionEnd\n"
11880 	);
11881 
11882 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
11883 	{
11884 		const TestType&		testType		= testTypes[testTypeIdx];
11885 		const string		testName		= testType.typeName;
11886 		const deUint32		itemsPerType	= testType.typeComponents;
11887 		const size_t		iterations		= float16InputData.size() / itemsPerType;
11888 		const size_t		typeStride		= itemsPerType * sizeof(deFloat16);
11889 		SpecResource		specResource;
11890 		map<string, string>	specs;
11891 		VulkanFeatures		features;
11892 		vector<string>		extensions;
11893 
11894 		specs["num_data_points"]	= de::toString(iterations);
11895 		specs["tt"]					= testType.typeName;
11896 		specs["ts"]					= testType.typeStorage;
11897 		specs["tt_stride"]			= de::toString(typeStride);
11898 		specs["type_decls"]			= testType.typeDecls;
11899 
11900 		fragments["capability"]		= capabilities.specialize(specs);
11901 		fragments["decoration"]		= decoration.specialize(specs);
11902 		fragments["pre_main"]		= preMain.specialize(specs);
11903 		fragments["testfun"]		= testFun.specialize(specs);
11904 		fragments["testfun"]		+= StringTemplate(testType.loadFunc).specialize({{"var", "ssbo_src"}});
11905 		fragments["testfun"]		+= StringTemplate(testType.storeFunc).specialize({{"var", "ssbo_dst"}});
11906 
11907 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11908 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
11909 		specResource.verifyIO = compareFP16FunctionSetFunc;
11910 
11911 		extensions.push_back("VK_KHR_shader_float16_int8");
11912 
11913 		features.extFloat16Int8.shaderFloat16 = true;
11914 
11915 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
11916 	}
11917 
11918 	return testGroup.release();
11919 }
11920 
compareFP16VectorExtractFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)11921 bool compareFP16VectorExtractFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
11922 {
11923 	if (inputs.size() != 2 || outputAllocs.size() != 1)
11924 		return false;
11925 
11926 	vector<deUint8>	input1Bytes;
11927 	vector<deUint8>	input2Bytes;
11928 
11929 	inputs[0].getBytes(input1Bytes);
11930 	inputs[1].getBytes(input2Bytes);
11931 
11932 	DE_ASSERT(input1Bytes.size() > 0);
11933 	DE_ASSERT(input2Bytes.size() > 0);
11934 	DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
11935 
11936 	const size_t			iterations		= input2Bytes.size() / sizeof(deUint32);
11937 	const size_t			components		= input1Bytes.size() / (sizeof(deFloat16) * iterations);
11938 	const deFloat16* const	input1AsFP16	= (const deFloat16*)&input1Bytes[0];
11939 	const deUint32* const	inputIndices	= (const deUint32*)&input2Bytes[0];
11940 	const deFloat16* const	outputAsFP16	= (const deFloat16*)outputAllocs[0]->getHostPtr();
11941 	std::string				error;
11942 
11943 	DE_ASSERT(components == 2 || components == 4);
11944 	DE_ASSERT(input1Bytes.size() == iterations * components * sizeof(deFloat16));
11945 
11946 	for (size_t idx = 0; idx < iterations; ++idx)
11947 	{
11948 		const deUint32	componentNdx	= inputIndices[idx];
11949 
11950 		DE_ASSERT(componentNdx < components);
11951 
11952 		const deFloat16	expected		= input1AsFP16[components * idx + componentNdx];
11953 
11954 		if (!compare16BitFloat(expected, outputAsFP16[idx], error))
11955 		{
11956 			log << TestLog::Message << "At " << idx << error << TestLog::EndMessage;
11957 
11958 			return false;
11959 		}
11960 	}
11961 
11962 	return true;
11963 }
11964 
11965 template<class SpecResource>
createFloat16VectorExtractSet(tcu::TestContext & testCtx)11966 tcu::TestCaseGroup* createFloat16VectorExtractSet (tcu::TestContext& testCtx)
11967 {
11968 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorextractdynamic", "OpVectorExtractDynamic tests"));
11969 
11970 	de::Random							rnd					(deStringHash(testGroup->getName()));
11971 	const deUint32						numDataPoints		= 256;
11972 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
11973 	const vector<deFloat16>				float16OutputDummy	(float16InputData.size(), 0);
11974 
11975 	struct TestType
11976 	{
11977 		const deUint32	typeComponents;
11978 		const size_t	typeStride;
11979 		const char*		typeName;
11980 		const char*		typeDecls;
11981 		const char*		typeStorage;
11982 		const string		loadFunction;
11983 		const string		storeFunction;
11984 	};
11985 
11986 	const TestType	testTypes[]	=
11987 	{
11988 		{
11989 			2,
11990 			2 * sizeof(deFloat16),
11991 			"v2f16",
11992 			"      %v2f16 = OpTypeVector %f16 2\n"
11993 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
11994 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
11995 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
11996 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
11997 			"u32",
11998 			loadV2F16FromUint,
11999 			storeScalarF16AsUint
12000 		},
12001 		{
12002 			3,
12003 			4 * sizeof(deFloat16),
12004 			"v3f16",
12005 			"      %v2f16 = OpTypeVector %f16 2\n"
12006 			"      %v3f16 = OpTypeVector %f16 3\n"
12007 			"%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12008 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12009 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12010 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12011 			"ra_u32_2",
12012 			loadV3F16FromUints,
12013 			storeScalarF16AsUint
12014 		},
12015 		{
12016 			4,
12017 			4 * sizeof(deFloat16),
12018 			"v4f16",
12019 			"      %v2f16 = OpTypeVector %f16 2\n"
12020 			"      %v4f16 = OpTypeVector %f16 4\n"
12021 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12022 			"%void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
12023 			"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
12024 			" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n",
12025 			"ra_u32_2",
12026 			loadV4F16FromUints,
12027 			storeScalarF16AsUint
12028 		},
12029 	};
12030 
12031 	const StringTemplate preMain
12032 	(
12033 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12034 		" %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
12035 		"        %f16 = OpTypeFloat 16\n"
12036 
12037 		"${type_decl}"
12038 
12039 		"     %up_u32 = OpTypePointer Uniform %u32\n"
12040 		"     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12041 		"   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12042 		"%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12043 
12044 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12045 		" %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12046 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12047 		"   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12048 		"%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12049 
12050 		" %ra_u32_hndp = OpTypeArray %u32 %c_i32_hndp\n"
12051 		"   %SSBO_DST = OpTypeStruct %ra_u32_hndp\n"
12052 		"%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12053 
12054 		"   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12055 		"   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12056 		"   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12057 	);
12058 
12059 	const StringTemplate decoration
12060 	(
12061 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12062 		"OpDecorate %ra_u32_hndp ArrayStride 4\n"
12063 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12064 		"OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12065 		"OpDecorate %SSBO_SRC BufferBlock\n"
12066 		"OpDecorate %ssbo_src DescriptorSet 0\n"
12067 		"OpDecorate %ssbo_src Binding 0\n"
12068 
12069 		"OpDecorate %ra_u32 ArrayStride 4\n"
12070 		"OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12071 		"OpDecorate %SSBO_IDX BufferBlock\n"
12072 		"OpDecorate %ssbo_idx DescriptorSet 0\n"
12073 		"OpDecorate %ssbo_idx Binding 1\n"
12074 
12075 		"OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12076 		"OpDecorate %SSBO_DST BufferBlock\n"
12077 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12078 		"OpDecorate %ssbo_dst Binding 2\n"
12079 	);
12080 
12081 	const StringTemplate testFun
12082 	(
12083 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12084 		"    %param = OpFunctionParameter %v4f32\n"
12085 		"    %entry = OpLabel\n"
12086 
12087 		"        %i = OpVariable %fp_i32 Function\n"
12088 		"             OpStore %i %c_i32_0\n"
12089 
12090 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12091 		"             OpSelectionMerge %end_if None\n"
12092 		"             OpBranchConditional %will_run %run_test %end_if\n"
12093 
12094 		" %run_test = OpLabel\n"
12095 		"             OpBranch %loop\n"
12096 
12097 		"     %loop = OpLabel\n"
12098 		"    %i_cmp = OpLoad %i32 %i\n"
12099 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12100 		"             OpLoopMerge %merge %next None\n"
12101 		"             OpBranchConditional %lt %write %merge\n"
12102 
12103 		"    %write = OpLabel\n"
12104 		"      %ndx = OpLoad %i32 %i\n"
12105 
12106 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12107 
12108 		"  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12109 		"  %val_idx = OpLoad %u32 %src_idx\n"
12110 
12111 		"  %val_dst = OpVectorExtractDynamic %f16 %val_src %val_idx\n"
12112 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12113 
12114 		"             OpBranch %next\n"
12115 
12116 		"     %next = OpLabel\n"
12117 		"    %i_cur = OpLoad %i32 %i\n"
12118 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12119 		"             OpStore %i %i_new\n"
12120 		"             OpBranch %loop\n"
12121 
12122 		"    %merge = OpLabel\n"
12123 		"             OpBranch %end_if\n"
12124 		"   %end_if = OpLabel\n"
12125 		"             OpReturnValue %param\n"
12126 
12127 		"             OpFunctionEnd\n"
12128 	);
12129 
12130 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12131 	{
12132 		const TestType&		testType		= testTypes[testTypeIdx];
12133 		const string		testName		= testType.typeName;
12134 		const size_t		itemsPerType	= testType.typeStride / sizeof(deFloat16);
12135 		const size_t		iterations		= float16InputData.size() / itemsPerType;
12136 		SpecResource		specResource;
12137 		map<string, string>	specs;
12138 		VulkanFeatures		features;
12139 		vector<deUint32>	inputDataNdx;
12140 		map<string, string>	fragments;
12141 		vector<string>		extensions;
12142 
12143 		for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12144 			inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12145 
12146 		specs["num_data_points"]	= de::toString(iterations);
12147 		specs["tt"]					= testType.typeName;
12148 		specs["ts"]					= testType.typeStorage;
12149 		specs["tt_stride"]			= de::toString(testType.typeStride);
12150 		specs["type_decl"]			= testType.typeDecls;
12151 
12152 		fragments["capability"]		= "OpCapability Float16\n";
12153 		fragments["decoration"]		= decoration.specialize(specs);
12154 		fragments["pre_main"]		= preMain.specialize(specs);
12155 		fragments["testfun"]		= testFun.specialize(specs);
12156 		fragments["testfun"]		+= StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12157 		fragments["testfun"]		+= StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12158 
12159 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12160 		specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12161 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12162 		specResource.verifyIO = compareFP16VectorExtractFunc;
12163 
12164 		extensions.push_back("VK_KHR_shader_float16_int8");
12165 
12166 		features.extFloat16Int8.shaderFloat16 = true;
12167 
12168 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12169 	}
12170 
12171 	return testGroup.release();
12172 }
12173 
12174 template<deUint32 COMPONENTS_COUNT, deUint32 REPLACEMENT>
compareFP16VectorInsertFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12175 bool compareFP16VectorInsertFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12176 {
12177 	if (inputs.size() != 2 || outputAllocs.size() != 1)
12178 		return false;
12179 
12180 	vector<deUint8>	input1Bytes;
12181 	vector<deUint8>	input2Bytes;
12182 
12183 	inputs[0].getBytes(input1Bytes);
12184 	inputs[1].getBytes(input2Bytes);
12185 
12186 	DE_ASSERT(input1Bytes.size() > 0);
12187 	DE_ASSERT(input2Bytes.size() > 0);
12188 	DE_ASSERT(input2Bytes.size() % sizeof(deUint32) == 0);
12189 
12190 	const size_t			iterations			= input2Bytes.size() / sizeof(deUint32);
12191 	const size_t			componentsStride	= input1Bytes.size() / (sizeof(deFloat16) * iterations);
12192 	const deFloat16* const	input1AsFP16		= (const deFloat16*)&input1Bytes[0];
12193 	const deUint32* const	inputIndices		= (const deUint32*)&input2Bytes[0];
12194 	const deFloat16* const	outputAsFP16		= (const deFloat16*)outputAllocs[0]->getHostPtr();
12195 	const deFloat16			magic				= tcu::Float16(float(REPLACEMENT)).bits();
12196 	std::string				error;
12197 
12198 	DE_ASSERT(componentsStride == 2 || componentsStride == 4);
12199 	DE_ASSERT(input1Bytes.size() == iterations * componentsStride * sizeof(deFloat16));
12200 
12201 	for (size_t idx = 0; idx < iterations; ++idx)
12202 	{
12203 		const deFloat16*	inputVec		= &input1AsFP16[componentsStride * idx];
12204 		const deFloat16*	outputVec		= &outputAsFP16[componentsStride * idx];
12205 		const deUint32		replacedCompNdx	= inputIndices[idx];
12206 
12207 		DE_ASSERT(replacedCompNdx < COMPONENTS_COUNT);
12208 
12209 		for (size_t compNdx = 0; compNdx < COMPONENTS_COUNT; ++compNdx)
12210 		{
12211 			const deFloat16	expected	= (compNdx == replacedCompNdx) ? magic : inputVec[compNdx];
12212 
12213 			if (!compare16BitFloat(expected, outputVec[compNdx], error))
12214 			{
12215 				log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12216 
12217 				return false;
12218 			}
12219 		}
12220 	}
12221 
12222 	return true;
12223 }
12224 
12225 template<class SpecResource>
createFloat16VectorInsertSet(tcu::TestContext & testCtx)12226 tcu::TestCaseGroup* createFloat16VectorInsertSet (tcu::TestContext& testCtx)
12227 {
12228 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorinsertdynamic", "OpVectorInsertDynamic tests"));
12229 
12230 	de::Random							rnd					(deStringHash(testGroup->getName()));
12231 	const deUint32						replacement			= 42;
12232 	const deUint32						numDataPoints		= 256;
12233 	const vector<deFloat16>				float16InputData	= getFloat16s(rnd, numDataPoints);
12234 	const vector<deFloat16>				float16OutputDummy	(float16InputData.size(), 0);
12235 
12236 	struct TestType
12237 	{
12238 		const deUint32	typeComponents;
12239 		const size_t	typeStride;
12240 		const char*		typeName;
12241 		const char*		typeDecls;
12242 		VerifyIOFunc	verifyIOFunc;
12243 		const char*		typeStorage;
12244 		const string		loadFunction;
12245 		const string		storeFunction;
12246 	};
12247 
12248 	const TestType	testTypes[]	=
12249 	{
12250 		{
12251 			2,
12252 			2 * sizeof(deFloat16),
12253 			"v2f16",
12254 			"      %v2f16 = OpTypeVector %f16 2\n"
12255 			"%v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12256 			"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n",
12257 			compareFP16VectorInsertFunc<2, replacement>,
12258 			"u32",
12259 			loadV2F16FromUint,
12260 			storeV2F16AsUint
12261 		},
12262 		{
12263 			3,
12264 			4 * sizeof(deFloat16),
12265 			"v3f16",
12266 			"      %v2f16 = OpTypeVector %f16 2\n"
12267 			"      %v3f16 = OpTypeVector %f16 3\n"
12268 			"%v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12269 			"%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n",
12270 			compareFP16VectorInsertFunc<3, replacement>,
12271 			"ra_u32_2",
12272 			loadV3F16FromUints,
12273 			storeV3F16AsUints
12274 		},
12275 		{
12276 			4,
12277 			4 * sizeof(deFloat16),
12278 			"v4f16",
12279 			"      %v2f16 = OpTypeVector %f16 2\n"
12280 			"      %v4f16 = OpTypeVector %f16 4\n"
12281 			"%v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12282 			"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n",
12283 			compareFP16VectorInsertFunc<4, replacement>,
12284 			"ra_u32_2",
12285 			loadV4F16FromUints,
12286 			storeV4F16AsUints
12287 		},
12288 	};
12289 
12290 	const StringTemplate preMain
12291 	(
12292 		"  %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12293 		"        %f16 = OpTypeFloat 16\n"
12294 		"  %c_f16_ins = OpConstant %f16 ${replacement}\n"
12295 
12296 		"${type_decl}"
12297 
12298 		"     %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
12299 		"	  %up_u32 = OpTypePointer Uniform %u32\n"
12300 		"   %SSBO_IDX = OpTypeStruct %ra_u32\n"
12301 		"%up_SSBO_IDX = OpTypePointer Uniform %SSBO_IDX\n"
12302 
12303 		"   %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12304 		"%ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12305 		"   %SSBO_SRC = OpTypeStruct %ra_${ts}\n"
12306 		"%up_SSBO_SRC = OpTypePointer Uniform %SSBO_SRC\n"
12307 
12308 		"   %SSBO_DST = OpTypeStruct %ra_${ts}\n"
12309 		"%up_SSBO_DST = OpTypePointer Uniform %SSBO_DST\n"
12310 
12311 		"   %ssbo_src = OpVariable %up_SSBO_SRC Uniform\n"
12312 		"   %ssbo_idx = OpVariable %up_SSBO_IDX Uniform\n"
12313 		"   %ssbo_dst = OpVariable %up_SSBO_DST Uniform\n"
12314 	);
12315 
12316 	const StringTemplate decoration
12317 	(
12318 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12319 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12320 		"OpMemberDecorate %SSBO_SRC 0 Offset 0\n"
12321 		"OpDecorate %SSBO_SRC BufferBlock\n"
12322 		"OpDecorate %ssbo_src DescriptorSet 0\n"
12323 		"OpDecorate %ssbo_src Binding 0\n"
12324 
12325 		"OpDecorate %ra_u32 ArrayStride 4\n"
12326 		"OpMemberDecorate %SSBO_IDX 0 Offset 0\n"
12327 		"OpDecorate %SSBO_IDX BufferBlock\n"
12328 		"OpDecorate %ssbo_idx DescriptorSet 0\n"
12329 		"OpDecorate %ssbo_idx Binding 1\n"
12330 
12331 		"OpMemberDecorate %SSBO_DST 0 Offset 0\n"
12332 		"OpDecorate %SSBO_DST BufferBlock\n"
12333 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12334 		"OpDecorate %ssbo_dst Binding 2\n"
12335 	);
12336 
12337 	const StringTemplate testFun
12338 	(
12339 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12340 		"    %param = OpFunctionParameter %v4f32\n"
12341 		"    %entry = OpLabel\n"
12342 
12343 		"        %i = OpVariable %fp_i32 Function\n"
12344 		"             OpStore %i %c_i32_0\n"
12345 
12346 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12347 		"             OpSelectionMerge %end_if None\n"
12348 		"             OpBranchConditional %will_run %run_test %end_if\n"
12349 
12350 		" %run_test = OpLabel\n"
12351 		"             OpBranch %loop\n"
12352 
12353 		"     %loop = OpLabel\n"
12354 		"    %i_cmp = OpLoad %i32 %i\n"
12355 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12356 		"             OpLoopMerge %merge %next None\n"
12357 		"             OpBranchConditional %lt %write %merge\n"
12358 
12359 		"    %write = OpLabel\n"
12360 		"      %ndx = OpLoad %i32 %i\n"
12361 
12362 		"  %val_src = OpFunctionCall %${tt} %ld_arg_ssbo_src %ndx\n"
12363 
12364 		"  %src_idx = OpAccessChain %up_u32 %ssbo_idx %c_i32_0 %ndx\n"
12365 		"  %val_idx = OpLoad %u32 %src_idx\n"
12366 
12367 		"  %val_dst = OpVectorInsertDynamic %${tt} %val_src %c_f16_ins %val_idx\n"
12368 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12369 
12370 		"             OpBranch %next\n"
12371 
12372 		"     %next = OpLabel\n"
12373 		"    %i_cur = OpLoad %i32 %i\n"
12374 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12375 		"             OpStore %i %i_new\n"
12376 		"             OpBranch %loop\n"
12377 
12378 		"    %merge = OpLabel\n"
12379 		"             OpBranch %end_if\n"
12380 		"   %end_if = OpLabel\n"
12381 		"             OpReturnValue %param\n"
12382 
12383 		"             OpFunctionEnd\n"
12384 	);
12385 
12386 	for (deUint32 testTypeIdx = 0; testTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++testTypeIdx)
12387 	{
12388 		const TestType&		testType		= testTypes[testTypeIdx];
12389 		const string		testName		= testType.typeName;
12390 		const size_t		itemsPerType	= testType.typeStride / sizeof(deFloat16);
12391 		const size_t		iterations		= float16InputData.size() / itemsPerType;
12392 		SpecResource		specResource;
12393 		map<string, string>	specs;
12394 		VulkanFeatures		features;
12395 		vector<deUint32>	inputDataNdx;
12396 		map<string, string>	fragments;
12397 		vector<string>		extensions;
12398 
12399 		for (deUint32 ndx = 0; ndx < iterations; ++ndx)
12400 			inputDataNdx.push_back(rnd.getUint32() % testType.typeComponents);
12401 
12402 		specs["num_data_points"]	= de::toString(iterations);
12403 		specs["tt"]					= testType.typeName;
12404 		specs["ts"]					= testType.typeStorage;
12405 		specs["tt_stride"]			= de::toString(testType.typeStride);
12406 		specs["type_decl"]			= testType.typeDecls;
12407 		specs["replacement"]		= de::toString(replacement);
12408 
12409 		fragments["capability"]		= "OpCapability Float16\n";
12410 		fragments["decoration"]		= decoration.specialize(specs);
12411 		fragments["pre_main"]		= preMain.specialize(specs);
12412 		fragments["testfun"]		= testFun.specialize(specs);
12413 		fragments["testfun"]		+= StringTemplate(testType.loadFunction).specialize({{"var", "ssbo_src"}});
12414 		fragments["testfun"]		+= StringTemplate(testType.storeFunction).specialize({{"var", "ssbo_dst"}});
12415 
12416 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16InputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12417 		specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inputDataNdx)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12418 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12419 		specResource.verifyIO = testType.verifyIOFunc;
12420 
12421 		extensions.push_back("VK_KHR_shader_float16_int8");
12422 
12423 		features.extFloat16Int8.shaderFloat16 = true;
12424 
12425 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12426 	}
12427 
12428 	return testGroup.release();
12429 }
12430 
getShuffledComponent(const size_t iteration,const size_t componentNdx,const deFloat16 * input1Vec,const deFloat16 * input2Vec,size_t vec1Len,size_t vec2Len,bool & validate)12431 inline deFloat16 getShuffledComponent (const size_t iteration, const size_t componentNdx, const deFloat16* input1Vec, const deFloat16* input2Vec, size_t vec1Len, size_t vec2Len, bool& validate)
12432 {
12433 	const size_t	compNdxCount	= (vec1Len + vec2Len + 1);
12434 	const size_t	compNdxLimited	= iteration % (compNdxCount * compNdxCount);
12435 	size_t			comp;
12436 
12437 	switch (componentNdx)
12438 	{
12439 		case 0: comp = compNdxLimited / compNdxCount; break;
12440 		case 1: comp = compNdxLimited % compNdxCount; break;
12441 		case 2: comp = 0; break;
12442 		case 3: comp = 1; break;
12443 		default: TCU_THROW(InternalError, "Impossible");
12444 	}
12445 
12446 	if (comp >= vec1Len + vec2Len)
12447 	{
12448 		validate = false;
12449 		return 0;
12450 	}
12451 	else
12452 	{
12453 		validate = true;
12454 		return (comp < vec1Len) ? input1Vec[comp] : input2Vec[comp - vec1Len];
12455 	}
12456 }
12457 
12458 template<deUint32 DST_COMPONENTS_COUNT, deUint32 SRC0_COMPONENTS_COUNT, deUint32 SRC1_COMPONENTS_COUNT>
compareFP16VectorShuffleFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12459 bool compareFP16VectorShuffleFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12460 {
12461 	DE_STATIC_ASSERT(DST_COMPONENTS_COUNT == 2 || DST_COMPONENTS_COUNT == 3 || DST_COMPONENTS_COUNT == 4);
12462 	DE_STATIC_ASSERT(SRC0_COMPONENTS_COUNT == 2 || SRC0_COMPONENTS_COUNT == 3 || SRC0_COMPONENTS_COUNT == 4);
12463 	DE_STATIC_ASSERT(SRC1_COMPONENTS_COUNT == 2 || SRC1_COMPONENTS_COUNT == 3 || SRC1_COMPONENTS_COUNT == 4);
12464 
12465 	if (inputs.size() != 2 || outputAllocs.size() != 1)
12466 		return false;
12467 
12468 	vector<deUint8>	input1Bytes;
12469 	vector<deUint8>	input2Bytes;
12470 
12471 	inputs[0].getBytes(input1Bytes);
12472 	inputs[1].getBytes(input2Bytes);
12473 
12474 	DE_ASSERT(input1Bytes.size() > 0);
12475 	DE_ASSERT(input2Bytes.size() > 0);
12476 	DE_ASSERT(input2Bytes.size() % sizeof(deFloat16) == 0);
12477 
12478 	const size_t			componentsStrideDst		= (DST_COMPONENTS_COUNT == 3) ? 4 : DST_COMPONENTS_COUNT;
12479 	const size_t			componentsStrideSrc0	= (SRC0_COMPONENTS_COUNT == 3) ? 4 : SRC0_COMPONENTS_COUNT;
12480 	const size_t			componentsStrideSrc1	= (SRC1_COMPONENTS_COUNT == 3) ? 4 : SRC1_COMPONENTS_COUNT;
12481 	const size_t			iterations				= input1Bytes.size() / (componentsStrideSrc0 * sizeof(deFloat16));
12482 	const deFloat16* const	input1AsFP16			= (const deFloat16*)&input1Bytes[0];
12483 	const deFloat16* const	input2AsFP16			= (const deFloat16*)&input2Bytes[0];
12484 	const deFloat16* const	outputAsFP16			= (const deFloat16*)outputAllocs[0]->getHostPtr();
12485 	std::string				error;
12486 
12487 	DE_ASSERT(input1Bytes.size() == iterations * componentsStrideSrc0 * sizeof(deFloat16));
12488 	DE_ASSERT(input2Bytes.size() == iterations * componentsStrideSrc1 * sizeof(deFloat16));
12489 
12490 	for (size_t idx = 0; idx < iterations; ++idx)
12491 	{
12492 		const deFloat16*	input1Vec	= &input1AsFP16[componentsStrideSrc0 * idx];
12493 		const deFloat16*	input2Vec	= &input2AsFP16[componentsStrideSrc1 * idx];
12494 		const deFloat16*	outputVec	= &outputAsFP16[componentsStrideDst * idx];
12495 
12496 		for (size_t compNdx = 0; compNdx < DST_COMPONENTS_COUNT; ++compNdx)
12497 		{
12498 			bool		validate	= true;
12499 			deFloat16	expected	= getShuffledComponent(idx, compNdx, input1Vec, input2Vec, SRC0_COMPONENTS_COUNT, SRC1_COMPONENTS_COUNT, validate);
12500 
12501 			if (validate && !compare16BitFloat(expected, outputVec[compNdx], error))
12502 			{
12503 				log << TestLog::Message << "At " << idx << "[" << compNdx << "]: " << error << TestLog::EndMessage;
12504 
12505 				return false;
12506 			}
12507 		}
12508 	}
12509 
12510 	return true;
12511 }
12512 
getFloat16VectorShuffleVerifyIOFunc(deUint32 dstComponentsCount,deUint32 src0ComponentsCount,deUint32 src1ComponentsCount)12513 VerifyIOFunc getFloat16VectorShuffleVerifyIOFunc (deUint32 dstComponentsCount, deUint32 src0ComponentsCount, deUint32 src1ComponentsCount)
12514 {
12515 	DE_ASSERT(dstComponentsCount <= 4);
12516 	DE_ASSERT(src0ComponentsCount <= 4);
12517 	DE_ASSERT(src1ComponentsCount <= 4);
12518 	deUint32 funcCode = 100 * dstComponentsCount + 10 * src0ComponentsCount + src1ComponentsCount;
12519 
12520 	switch (funcCode)
12521 	{
12522 		case 222:return compareFP16VectorShuffleFunc<2, 2, 2>;
12523 		case 223:return compareFP16VectorShuffleFunc<2, 2, 3>;
12524 		case 224:return compareFP16VectorShuffleFunc<2, 2, 4>;
12525 		case 232:return compareFP16VectorShuffleFunc<2, 3, 2>;
12526 		case 233:return compareFP16VectorShuffleFunc<2, 3, 3>;
12527 		case 234:return compareFP16VectorShuffleFunc<2, 3, 4>;
12528 		case 242:return compareFP16VectorShuffleFunc<2, 4, 2>;
12529 		case 243:return compareFP16VectorShuffleFunc<2, 4, 3>;
12530 		case 244:return compareFP16VectorShuffleFunc<2, 4, 4>;
12531 		case 322:return compareFP16VectorShuffleFunc<3, 2, 2>;
12532 		case 323:return compareFP16VectorShuffleFunc<3, 2, 3>;
12533 		case 324:return compareFP16VectorShuffleFunc<3, 2, 4>;
12534 		case 332:return compareFP16VectorShuffleFunc<3, 3, 2>;
12535 		case 333:return compareFP16VectorShuffleFunc<3, 3, 3>;
12536 		case 334:return compareFP16VectorShuffleFunc<3, 3, 4>;
12537 		case 342:return compareFP16VectorShuffleFunc<3, 4, 2>;
12538 		case 343:return compareFP16VectorShuffleFunc<3, 4, 3>;
12539 		case 344:return compareFP16VectorShuffleFunc<3, 4, 4>;
12540 		case 422:return compareFP16VectorShuffleFunc<4, 2, 2>;
12541 		case 423:return compareFP16VectorShuffleFunc<4, 2, 3>;
12542 		case 424:return compareFP16VectorShuffleFunc<4, 2, 4>;
12543 		case 432:return compareFP16VectorShuffleFunc<4, 3, 2>;
12544 		case 433:return compareFP16VectorShuffleFunc<4, 3, 3>;
12545 		case 434:return compareFP16VectorShuffleFunc<4, 3, 4>;
12546 		case 442:return compareFP16VectorShuffleFunc<4, 4, 2>;
12547 		case 443:return compareFP16VectorShuffleFunc<4, 4, 3>;
12548 		case 444:return compareFP16VectorShuffleFunc<4, 4, 4>;
12549 		default: TCU_THROW(InternalError, "Invalid number of components specified.");
12550 	}
12551 }
12552 
12553 template<class SpecResource>
createFloat16VectorShuffleSet(tcu::TestContext & testCtx)12554 tcu::TestCaseGroup* createFloat16VectorShuffleSet (tcu::TestContext& testCtx)
12555 {
12556 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "opvectorshuffle", "OpVectorShuffle tests"));
12557 	const int							testSpecificSeed	= deStringHash(testGroup->getName());
12558 	const int							seed				= testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
12559 	de::Random							rnd					(seed);
12560 	const deUint32						numDataPoints		= 128;
12561 	map<string, string>					fragments;
12562 
12563 	struct TestType
12564 	{
12565 		const deUint32	typeComponents;
12566 		const char*		typeName;
12567 		const string	loadFunction;
12568 		const string	storeFunction;
12569 	};
12570 
12571 	const TestType	testTypes[]	=
12572 	{
12573 		{
12574 			2,
12575 			"v2f16",
12576 			loadV2F16FromUint,
12577 			storeV2F16AsUint
12578 		},
12579 		{
12580 			3,
12581 			"v3f16",
12582 			loadV3F16FromUints,
12583 			storeV3F16AsUints
12584 		},
12585 		{
12586 			4,
12587 			"v4f16",
12588 			loadV4F16FromUints,
12589 			storeV4F16AsUints
12590 		},
12591 	};
12592 
12593 	const StringTemplate preMain
12594 	(
12595 		"    %c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
12596 		"     %c_i32_cc = OpConstant %i32 ${case_count}\n"
12597 		"          %f16 = OpTypeFloat 16\n"
12598 		"        %v2f16 = OpTypeVector %f16 2\n"
12599 		"        %v3f16 = OpTypeVector %f16 3\n"
12600 		"        %v4f16 = OpTypeVector %f16 4\n"
12601 
12602 		"     %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
12603 		"     %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
12604 		"     %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
12605 		"%void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
12606 		"%void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
12607 		"%void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
12608 
12609 		"     %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
12610 		"   %ra_u32_ndp = OpTypeArray %u32 %c_i32_ndp\n"
12611 		"  %ra_ra_u32_2 = OpTypeArray %ra_u32_2 %c_i32_ndp\n"
12612 		"       %up_u32 = OpTypePointer Uniform %u32\n"
12613 		"   %SSBO_v2f16 = OpTypeStruct %ra_u32_ndp\n"
12614 		"   %SSBO_v3f16 = OpTypeStruct %ra_ra_u32_2\n"
12615 		"   %SSBO_v4f16 = OpTypeStruct %ra_ra_u32_2\n"
12616 
12617 		"%up_SSBO_v2f16 = OpTypePointer Uniform %SSBO_v2f16\n"
12618 		"%up_SSBO_v3f16 = OpTypePointer Uniform %SSBO_v3f16\n"
12619 		"%up_SSBO_v4f16 = OpTypePointer Uniform %SSBO_v4f16\n"
12620 
12621 		"        %fun_t = OpTypeFunction %${tt_dst} %${tt_src0} %${tt_src1} %i32\n"
12622 
12623 		"    %ssbo_src0 = OpVariable %up_SSBO_${tt_src0} Uniform\n"
12624 		"    %ssbo_src1 = OpVariable %up_SSBO_${tt_src1} Uniform\n"
12625 		"     %ssbo_dst = OpVariable %up_SSBO_${tt_dst} Uniform\n"
12626 	);
12627 
12628 	const StringTemplate decoration
12629 	(
12630 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
12631 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
12632 		"OpDecorate %ra_ra_u32_2 ArrayStride 8\n"
12633 
12634 		"OpMemberDecorate %SSBO_v2f16 0 Offset 0\n"
12635 		"OpDecorate %SSBO_v2f16 BufferBlock\n"
12636 
12637 		"OpMemberDecorate %SSBO_v3f16 0 Offset 0\n"
12638 		"OpDecorate %SSBO_v3f16 BufferBlock\n"
12639 
12640 		"OpMemberDecorate %SSBO_v4f16 0 Offset 0\n"
12641 		"OpDecorate %SSBO_v4f16 BufferBlock\n"
12642 
12643 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
12644 		"OpDecorate %ssbo_src0 Binding 0\n"
12645 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
12646 		"OpDecorate %ssbo_src1 Binding 1\n"
12647 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12648 		"OpDecorate %ssbo_dst Binding 2\n"
12649 	);
12650 
12651 	const StringTemplate testFun
12652 	(
12653 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12654 		"    %param = OpFunctionParameter %v4f32\n"
12655 		"    %entry = OpLabel\n"
12656 
12657 		"        %i = OpVariable %fp_i32 Function\n"
12658 		"             OpStore %i %c_i32_0\n"
12659 
12660 		" %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12661 		"             OpSelectionMerge %end_if None\n"
12662 		"             OpBranchConditional %will_run %run_test %end_if\n"
12663 
12664 		" %run_test = OpLabel\n"
12665 		"             OpBranch %loop\n"
12666 
12667 		"     %loop = OpLabel\n"
12668 		"    %i_cmp = OpLoad %i32 %i\n"
12669 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12670 		"             OpLoopMerge %merge %next None\n"
12671 		"             OpBranchConditional %lt %write %merge\n"
12672 
12673 		"    %write = OpLabel\n"
12674 		"      %ndx = OpLoad %i32 %i\n"
12675 		" %val_src0 = OpFunctionCall %${tt_src0} %ld_arg_ssbo_src0 %ndx\n"
12676 		" %val_src1 = OpFunctionCall %${tt_src1} %ld_arg_ssbo_src1 %ndx\n"
12677 		"  %val_dst = OpFunctionCall %${tt_dst} %sw_fun %val_src0 %val_src1 %ndx\n"
12678 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n"
12679 		"             OpBranch %next\n"
12680 
12681 		"     %next = OpLabel\n"
12682 		"    %i_cur = OpLoad %i32 %i\n"
12683 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
12684 		"             OpStore %i %i_new\n"
12685 		"             OpBranch %loop\n"
12686 
12687 		"    %merge = OpLabel\n"
12688 		"             OpBranch %end_if\n"
12689 		"   %end_if = OpLabel\n"
12690 		"             OpReturnValue %param\n"
12691 		"             OpFunctionEnd\n"
12692 		"\n"
12693 
12694 		"   %sw_fun = OpFunction %${tt_dst} None %fun_t\n"
12695 		"%sw_param0 = OpFunctionParameter %${tt_src0}\n"
12696 		"%sw_param1 = OpFunctionParameter %${tt_src1}\n"
12697 		"%sw_paramn = OpFunctionParameter %i32\n"
12698 		" %sw_entry = OpLabel\n"
12699 		"   %modulo = OpSMod %i32 %sw_paramn %c_i32_cc\n"
12700 		"             OpSelectionMerge %switch_e None\n"
12701 		"             OpSwitch %modulo %default ${case_list}\n"
12702 		"${case_bodies}"
12703 		"%default   = OpLabel\n"
12704 		"             OpUnreachable\n" // Unreachable default case for switch statement
12705 		"%switch_e  = OpLabel\n"
12706 		"             OpUnreachable\n" // Unreachable merge block for switch statement
12707 		"             OpFunctionEnd\n"
12708 	);
12709 
12710 	const StringTemplate testCaseBody
12711 	(
12712 		"%case_${case_ndx}    = OpLabel\n"
12713 		"%val_dst_${case_ndx} = OpVectorShuffle %${tt_dst} %sw_param0 %sw_param1 ${shuffle}\n"
12714 		"             OpReturnValue %val_dst_${case_ndx}\n"
12715 	);
12716 
12717 	for (deUint32 dstTypeIdx = 0; dstTypeIdx < DE_LENGTH_OF_ARRAY(testTypes); ++dstTypeIdx)
12718 	{
12719 		const TestType&	dstType			= testTypes[dstTypeIdx];
12720 
12721 		for (deUint32 comp0Idx = 0; comp0Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp0Idx)
12722 		{
12723 			const TestType&	src0Type	= testTypes[comp0Idx];
12724 
12725 			for (deUint32 comp1Idx = 0; comp1Idx < DE_LENGTH_OF_ARRAY(testTypes); ++comp1Idx)
12726 			{
12727 				const TestType&			src1Type			= testTypes[comp1Idx];
12728 				const deUint32			input0Stride		= (src0Type.typeComponents == 3) ? 4 : src0Type.typeComponents;
12729 				const deUint32			input1Stride		= (src1Type.typeComponents == 3) ? 4 : src1Type.typeComponents;
12730 				const deUint32			outputStride		= (dstType.typeComponents == 3) ? 4 : dstType.typeComponents;
12731 				const vector<deFloat16>	float16Input0Data	= getFloat16s(rnd, input0Stride * numDataPoints);
12732 				const vector<deFloat16>	float16Input1Data	= getFloat16s(rnd, input1Stride * numDataPoints);
12733 				const vector<deFloat16>	float16OutputDummy	(outputStride * numDataPoints, 0);
12734 				const string			testName			= de::toString(dstType.typeComponents) + de::toString(src0Type.typeComponents) + de::toString(src1Type.typeComponents);
12735 				deUint32				caseCount			= 0;
12736 				SpecResource			specResource;
12737 				map<string, string>		specs;
12738 				vector<string>			extensions;
12739 				VulkanFeatures			features;
12740 				string					caseBodies;
12741 				string					caseList;
12742 
12743 				// Generate case
12744 				{
12745 					vector<string>	componentList;
12746 
12747 					// Generate component possible indices for OpVectorShuffle for components 0 and 1 in output vector
12748 					{
12749 						deUint32		caseNo		= 0;
12750 
12751 						for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < src0Type.typeComponents; ++comp0IdxLocal)
12752 							componentList.push_back(de::toString(caseNo++));
12753 						for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < src1Type.typeComponents; ++comp1IdxLocal)
12754 							componentList.push_back(de::toString(caseNo++));
12755 						componentList.push_back("0xFFFFFFFF");
12756 					}
12757 
12758 					for (deUint32 comp0IdxLocal = 0; comp0IdxLocal < componentList.size(); ++comp0IdxLocal)
12759 					{
12760 						for (deUint32 comp1IdxLocal = 0; comp1IdxLocal < componentList.size(); ++comp1IdxLocal)
12761 						{
12762 							map<string, string>	specCase;
12763 							string				shuffle		= componentList[comp0IdxLocal] + " " + componentList[comp1IdxLocal];
12764 
12765 							for (deUint32 compIdx = 2; compIdx < dstType.typeComponents; ++compIdx)
12766 								shuffle += " " + de::toString(compIdx - 2);
12767 
12768 							specCase["case_ndx"]	= de::toString(caseCount);
12769 							specCase["shuffle"]		= shuffle;
12770 							specCase["tt_dst"]		= dstType.typeName;
12771 
12772 							caseBodies	+= testCaseBody.specialize(specCase);
12773 							caseList	+= de::toString(caseCount) + " %case_" + de::toString(caseCount) + " ";
12774 
12775 							caseCount++;
12776 						}
12777 					}
12778 				}
12779 
12780 				specs["num_data_points"]	= de::toString(numDataPoints);
12781 				specs["tt_dst"]				= dstType.typeName;
12782 				specs["tt_src0"]			= src0Type.typeName;
12783 				specs["tt_src1"]			= src1Type.typeName;
12784 				specs["case_bodies"]		= caseBodies;
12785 				specs["case_list"]			= caseList;
12786 				specs["case_count"]			= de::toString(caseCount);
12787 
12788 				fragments["capability"]		= "OpCapability Float16\n";
12789 				fragments["decoration"]		= decoration.specialize(specs);
12790 				fragments["pre_main"]		= preMain.specialize(specs);
12791 				fragments["testfun"]		= testFun.specialize(specs);
12792 				fragments["testfun"]		+= StringTemplate(src0Type.loadFunction).specialize({{"var", "ssbo_src0"}});
12793 				fragments["testfun"]		+= StringTemplate(src1Type.loadFunction).specialize({{"var", "ssbo_src1"}});
12794 				fragments["testfun"]		+= StringTemplate(dstType.storeFunction).specialize({{"var", "ssbo_dst"}});
12795 
12796 				specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input0Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12797 				specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(float16Input1Data)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12798 				specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16OutputDummy)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
12799 				specResource.verifyIO = getFloat16VectorShuffleVerifyIOFunc(dstType.typeComponents, src0Type.typeComponents, src1Type.typeComponents);
12800 
12801 				extensions.push_back("VK_KHR_shader_float16_int8");
12802 
12803 				features.extFloat16Int8.shaderFloat16 = true;
12804 
12805 				finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
12806 			}
12807 		}
12808 	}
12809 
12810 	return testGroup.release();
12811 }
12812 
compareFP16CompositeFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> &,TestLog & log)12813 bool compareFP16CompositeFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>&, TestLog& log)
12814 {
12815 	if (inputs.size() != 1 || outputAllocs.size() != 1)
12816 		return false;
12817 
12818 	vector<deUint8>	input1Bytes;
12819 
12820 	inputs[0].getBytes(input1Bytes);
12821 
12822 	DE_ASSERT(input1Bytes.size() > 0);
12823 	DE_ASSERT(input1Bytes.size() % sizeof(deFloat16) == 0);
12824 
12825 	const size_t			iterations		= input1Bytes.size() / sizeof(deFloat16);
12826 	const deFloat16* const	input1AsFP16	= (const deFloat16*)&input1Bytes[0];
12827 	const deFloat16* const	outputAsFP16	= (const deFloat16*)outputAllocs[0]->getHostPtr();
12828 	const deFloat16			exceptionValue	= tcu::Float16(-1.0).bits();
12829 	std::string				error;
12830 
12831 	for (size_t idx = 0; idx < iterations; ++idx)
12832 	{
12833 		if (input1AsFP16[idx] == exceptionValue)
12834 			continue;
12835 
12836 		if (!compare16BitFloat(input1AsFP16[idx], outputAsFP16[idx], error))
12837 		{
12838 			log << TestLog::Message << "At " << idx << ":" << error << TestLog::EndMessage;
12839 
12840 			return false;
12841 		}
12842 	}
12843 
12844 	return true;
12845 }
12846 
12847 template<class SpecResource>
createFloat16CompositeConstructSet(tcu::TestContext & testCtx)12848 tcu::TestCaseGroup* createFloat16CompositeConstructSet (tcu::TestContext& testCtx)
12849 {
12850 	de::MovePtr<tcu::TestCaseGroup>		testGroup				(new tcu::TestCaseGroup(testCtx, "opcompositeconstruct", "OpCompositeConstruct tests"));
12851 	const deUint32						numElements				= 8;
12852 	const string						testName				= "struct";
12853 	const deUint32						structItemsCount		= 88;
12854 	const deUint32						exceptionIndices[]		= { 1, 7, 15, 17, 25, 33, 51, 55, 59, 63, 67, 71, 84, 85, 86, 87 };
12855 	const deFloat16						exceptionValue			= tcu::Float16(-1.0).bits();
12856 	const deUint32						fieldModifier			= 2;
12857 	const deUint32						fieldModifiedMulIndex	= 60;
12858 	const deUint32						fieldModifiedAddIndex	= 66;
12859 
12860 	const StringTemplate preMain
12861 	(
12862 		"    %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
12863 		"          %f16 = OpTypeFloat 16\n"
12864 		"        %v2f16 = OpTypeVector %f16 2\n"
12865 		"        %v3f16 = OpTypeVector %f16 3\n"
12866 		"        %v4f16 = OpTypeVector %f16 4\n"
12867 		"    %c_f16_mod = OpConstant %f16 ${field_modifier}\n"
12868 
12869 		"${consts}"
12870 
12871 		"     %c_f16_n1 = OpConstant %f16 -1.0\n"
12872 		"   %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_n1 %c_f16_n1\n"
12873 		"      %c_u32_5 = OpConstant %u32 5\n"
12874 		"      %c_u32_6 = OpConstant %u32 6\n"
12875 		"      %c_u32_7 = OpConstant %u32 7\n"
12876 		"      %c_u32_8 = OpConstant %u32 8\n"
12877 		"      %c_u32_9 = OpConstant %u32 9\n"
12878 		"     %c_u32_10 = OpConstant %u32 10\n"
12879 		"     %c_u32_11 = OpConstant %u32 11\n"
12880 		"     %c_u32_12 = OpConstant %u32 12\n"
12881 		"     %c_u32_13 = OpConstant %u32 13\n"
12882 		"     %c_u32_14 = OpConstant %u32 14\n"
12883 		"     %c_u32_15 = OpConstant %u32 15\n"
12884 		"     %c_u32_16 = OpConstant %u32 16\n"
12885 		"     %c_u32_17 = OpConstant %u32 17\n"
12886 		"     %c_u32_18 = OpConstant %u32 18\n"
12887 		"     %c_u32_19 = OpConstant %u32 19\n"
12888 		"     %c_u32_20 = OpConstant %u32 20\n"
12889 		"     %c_u32_21 = OpConstant %u32 21\n"
12890 		"     %c_u32_22 = OpConstant %u32 22\n"
12891 		"     %c_u32_23 = OpConstant %u32 23\n"
12892 		"     %c_u32_24 = OpConstant %u32 24\n"
12893 		"     %c_u32_25 = OpConstant %u32 25\n"
12894 		"     %c_u32_26 = OpConstant %u32 26\n"
12895 		"     %c_u32_27 = OpConstant %u32 27\n"
12896 		"     %c_u32_28 = OpConstant %u32 28\n"
12897 		"     %c_u32_29 = OpConstant %u32 29\n"
12898 		"     %c_u32_30 = OpConstant %u32 30\n"
12899 		"     %c_u32_31 = OpConstant %u32 31\n"
12900 		"     %c_u32_33 = OpConstant %u32 33\n"
12901 		"     %c_u32_34 = OpConstant %u32 34\n"
12902 		"     %c_u32_35 = OpConstant %u32 35\n"
12903 		"     %c_u32_36 = OpConstant %u32 36\n"
12904 		"     %c_u32_37 = OpConstant %u32 37\n"
12905 		"     %c_u32_38 = OpConstant %u32 38\n"
12906 		"     %c_u32_39 = OpConstant %u32 39\n"
12907 		"     %c_u32_40 = OpConstant %u32 40\n"
12908 		"     %c_u32_41 = OpConstant %u32 41\n"
12909 		"     %c_u32_44 = OpConstant %u32 44\n"
12910 
12911 		" %f16arr3      = OpTypeArray %f16 %c_u32_3\n"
12912 		" %v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
12913 		" %v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
12914 		" %v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
12915 		" %v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
12916 		" %struct16     = OpTypeStruct %f16 %v2f16arr3\n"
12917 		" %struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
12918 		" %st_test      = OpTypeStruct %f16 %v2f16 %v3f16 %v4f16 %f16arr3 %struct16arr3 %v2f16arr5 %f16 %v3f16arr5 %v4f16arr3\n"
12919 
12920 		"       %up_u32 = OpTypePointer Uniform %u32\n"
12921 		"    %ra_u32_44 = OpTypeArray %u32 %c_u32_44\n"
12922 		"    %ra_ra_u32 = OpTypeArray %ra_u32_44 %c_i32_ndp\n"
12923 		"      %SSBO_st = OpTypeStruct %ra_ra_u32\n"
12924 		"   %up_SSBO_st = OpTypePointer Uniform %SSBO_st\n"
12925 
12926 		"     %ssbo_dst = OpVariable %up_SSBO_st Uniform\n"
12927 	);
12928 
12929 	const StringTemplate decoration
12930 	(
12931 		"OpDecorate %SSBO_st BufferBlock\n"
12932 		"OpDecorate %ra_u32_44 ArrayStride 4\n"
12933 		"OpDecorate %ra_ra_u32 ArrayStride ${struct_item_size}\n"
12934 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
12935 		"OpDecorate %ssbo_dst Binding 1\n"
12936 
12937 		"OpMemberDecorate %SSBO_st 0 Offset 0\n"
12938 
12939 		"OpDecorate %v2f16arr3 ArrayStride 4\n"
12940 		"OpMemberDecorate %struct16 0 Offset 0\n"
12941 		"OpMemberDecorate %struct16 1 Offset 4\n"
12942 		"OpDecorate %struct16arr3 ArrayStride 16\n"
12943 		"OpDecorate %f16arr3 ArrayStride 2\n"
12944 		"OpDecorate %v2f16arr5 ArrayStride 4\n"
12945 		"OpDecorate %v3f16arr5 ArrayStride 8\n"
12946 		"OpDecorate %v4f16arr3 ArrayStride 8\n"
12947 
12948 		"OpMemberDecorate %st_test 0 Offset 0\n"
12949 		"OpMemberDecorate %st_test 1 Offset 4\n"
12950 		"OpMemberDecorate %st_test 2 Offset 8\n"
12951 		"OpMemberDecorate %st_test 3 Offset 16\n"
12952 		"OpMemberDecorate %st_test 4 Offset 24\n"
12953 		"OpMemberDecorate %st_test 5 Offset 32\n"
12954 		"OpMemberDecorate %st_test 6 Offset 80\n"
12955 		"OpMemberDecorate %st_test 7 Offset 100\n"
12956 		"OpMemberDecorate %st_test 8 Offset 104\n"
12957 		"OpMemberDecorate %st_test 9 Offset 144\n"
12958 	);
12959 
12960 	const StringTemplate testFun
12961 	(
12962 		" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
12963 		"     %param = OpFunctionParameter %v4f32\n"
12964 		"     %entry = OpLabel\n"
12965 
12966 		"         %i = OpVariable %fp_i32 Function\n"
12967 		"              OpStore %i %c_i32_0\n"
12968 
12969 		"  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
12970 		"              OpSelectionMerge %end_if None\n"
12971 		"              OpBranchConditional %will_run %run_test %end_if\n"
12972 
12973 		"  %run_test = OpLabel\n"
12974 		"              OpBranch %loop\n"
12975 
12976 		"      %loop = OpLabel\n"
12977 		"     %i_cmp = OpLoad %i32 %i\n"
12978 		"        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
12979 		"              OpLoopMerge %merge %next None\n"
12980 		"              OpBranchConditional %lt %write %merge\n"
12981 
12982 		"     %write = OpLabel\n"
12983 		"       %ndx = OpLoad %i32 %i\n"
12984 
12985 		"      %fld1 = OpCompositeConstruct %v2f16 %c_f16_2 %c_f16_3\n"
12986 		"      %fld2 = OpCompositeConstruct %v3f16 %c_f16_4 %c_f16_5 %c_f16_6\n"
12987 		"      %fld3 = OpCompositeConstruct %v4f16 %c_f16_8 %c_f16_9 %c_f16_10 %c_f16_11\n"
12988 
12989 		"      %fld4 = OpCompositeConstruct %f16arr3 %c_f16_12 %c_f16_13 %c_f16_14\n"
12990 
12991 		"%fld5_0_1_0 = OpCompositeConstruct %v2f16 %c_f16_18 %c_f16_19\n"
12992 		"%fld5_0_1_1 = OpCompositeConstruct %v2f16 %c_f16_20 %c_f16_21\n"
12993 		"%fld5_0_1_2 = OpCompositeConstruct %v2f16 %c_f16_22 %c_f16_23\n"
12994 		"  %fld5_0_1 = OpCompositeConstruct %v2f16arr3 %fld5_0_1_0 %fld5_0_1_1 %fld5_0_1_2\n"
12995 		"    %fld5_0 = OpCompositeConstruct %struct16 %c_f16_16 %fld5_0_1\n"
12996 
12997 		"%fld5_1_1_0 = OpCompositeConstruct %v2f16 %c_f16_26 %c_f16_27\n"
12998 		"%fld5_1_1_1 = OpCompositeConstruct %v2f16 %c_f16_28 %c_f16_29\n"
12999 		"%fld5_1_1_2 = OpCompositeConstruct %v2f16 %c_f16_30 %c_f16_31\n"
13000 		"  %fld5_1_1 = OpCompositeConstruct %v2f16arr3 %fld5_1_1_0 %fld5_1_1_1 %fld5_1_1_2\n"
13001 		"    %fld5_1 = OpCompositeConstruct %struct16 %c_f16_24 %fld5_1_1\n"
13002 
13003 		"%fld5_2_1_0 = OpCompositeConstruct %v2f16 %c_f16_34 %c_f16_35\n"
13004 		"%fld5_2_1_1 = OpCompositeConstruct %v2f16 %c_f16_36 %c_f16_37\n"
13005 		"%fld5_2_1_2 = OpCompositeConstruct %v2f16 %c_f16_38 %c_f16_39\n"
13006 		"  %fld5_2_1 = OpCompositeConstruct %v2f16arr3 %fld5_2_1_0 %fld5_2_1_1 %fld5_2_1_2\n"
13007 		"    %fld5_2 = OpCompositeConstruct %struct16 %c_f16_32 %fld5_2_1\n"
13008 
13009 		"      %fld5 = OpCompositeConstruct %struct16arr3 %fld5_0 %fld5_1 %fld5_2\n"
13010 
13011 		"    %fld6_0 = OpCompositeConstruct %v2f16 %c_f16_40 %c_f16_41\n"
13012 		"    %fld6_1 = OpCompositeConstruct %v2f16 %c_f16_42 %c_f16_43\n"
13013 		"    %fld6_2 = OpCompositeConstruct %v2f16 %c_f16_44 %c_f16_45\n"
13014 		"    %fld6_3 = OpCompositeConstruct %v2f16 %c_f16_46 %c_f16_47\n"
13015 		"    %fld6_4 = OpCompositeConstruct %v2f16 %c_f16_48 %c_f16_49\n"
13016 		"      %fld6 = OpCompositeConstruct %v2f16arr5 %fld6_0 %fld6_1 %fld6_2 %fld6_3 %fld6_4\n"
13017 
13018 		"      %fndx = OpConvertSToF %f16 %ndx\n"
13019 		"  %fld8_2a0 = OpFMul %f16 %fndx %c_f16_mod\n"
13020 		"  %fld8_3b1 = OpFAdd %f16 %fndx %c_f16_mod\n"
13021 
13022 		"   %fld8_2a = OpCompositeConstruct %v2f16 %fld8_2a0 %c_f16_61\n"
13023 		"   %fld8_3b = OpCompositeConstruct %v2f16 %c_f16_65 %fld8_3b1\n"
13024 		"    %fld8_0 = OpCompositeConstruct %v3f16 %c_f16_52 %c_f16_53 %c_f16_54\n"
13025 		"    %fld8_1 = OpCompositeConstruct %v3f16 %c_f16_56 %c_f16_57 %c_f16_58\n"
13026 		"    %fld8_2 = OpCompositeConstruct %v3f16 %fld8_2a %c_f16_62\n"
13027 		"    %fld8_3 = OpCompositeConstruct %v3f16 %c_f16_64 %fld8_3b\n"
13028 		"    %fld8_4 = OpCompositeConstruct %v3f16 %c_f16_68 %c_f16_69 %c_f16_70\n"
13029 		"      %fld8 = OpCompositeConstruct %v3f16arr5 %fld8_0 %fld8_1 %fld8_2 %fld8_3 %fld8_4\n"
13030 
13031 		"    %fld9_0 = OpCompositeConstruct %v4f16 %c_f16_72 %c_f16_73 %c_f16_74 %c_f16_75\n"
13032 		"    %fld9_1 = OpCompositeConstruct %v4f16 %c_f16_76 %c_f16_77 %c_f16_78 %c_f16_79\n"
13033 		"    %fld9_2 = OpCompositeConstruct %v4f16 %c_f16_80 %c_f16_81 %c_f16_82 %c_f16_83\n"
13034 		"      %fld9 = OpCompositeConstruct %v4f16arr3 %fld9_0 %fld9_1 %fld9_2\n"
13035 
13036 		"    %st_val = OpCompositeConstruct %st_test %c_f16_0 %fld1 %fld2 %fld3 %fld4 %fld5 %fld6 %c_f16_50 %fld8 %fld9\n"
13037 
13038 		// Storage section: all elements that are not directly accessed should
13039 		// have the value of -1.0. This means for f16 and v3f16 stores the v2f16
13040 		// is constructed with one element from a constant -1.0.
13041 		// half offset 0
13042 		"      %ex_0 = OpCompositeExtract %f16 %st_val 0\n"
13043 		"     %vec_0 = OpCompositeConstruct %v2f16 %ex_0 %c_f16_n1\n"
13044 		"      %bc_0 = OpBitcast %u32 %vec_0\n"
13045 		"     %gep_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_0\n"
13046 		"              OpStore %gep_0 %bc_0\n"
13047 
13048 		// <2 x half> offset 4
13049 		"      %ex_1 = OpCompositeExtract %v2f16 %st_val 1\n"
13050 		"      %bc_1 = OpBitcast %u32 %ex_1\n"
13051 		"     %gep_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_1\n"
13052 		"              OpStore %gep_1 %bc_1\n"
13053 
13054 		// <3 x half> offset 8
13055 		"      %ex_2 = OpCompositeExtract %v3f16 %st_val 2\n"
13056 		"    %ex_2_0 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 0 1\n"
13057 		"    %ex_2_1 = OpVectorShuffle %v2f16 %ex_2 %c_v2f16_n1 2 3\n"
13058 		"    %bc_2_0 = OpBitcast %u32 %ex_2_0\n"
13059 		"    %bc_2_1 = OpBitcast %u32 %ex_2_1\n"
13060 		"   %gep_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_2\n"
13061 		"   %gep_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_3\n"
13062 		"              OpStore %gep_2_0 %bc_2_0\n"
13063 		"              OpStore %gep_2_1 %bc_2_1\n"
13064 
13065 		// <4 x half> offset 16
13066 		"      %ex_3 = OpCompositeExtract %v4f16 %st_val 3\n"
13067 		"    %ex_3_0 = OpVectorShuffle %v2f16 %ex_3 %ex_3 0 1\n"
13068 		"    %ex_3_1 = OpVectorShuffle %v2f16 %ex_3 %ex_3 2 3\n"
13069 		"    %bc_3_0 = OpBitcast %u32 %ex_3_0\n"
13070 		"    %bc_3_1 = OpBitcast %u32 %ex_3_1\n"
13071 		"   %gep_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_4\n"
13072 		"   %gep_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_5\n"
13073 		"              OpStore %gep_3_0 %bc_3_0\n"
13074 		"              OpStore %gep_3_1 %bc_3_1\n"
13075 
13076 		// [3 x half] offset 24
13077 		"    %ex_4_0 = OpCompositeExtract %f16 %st_val 4 0\n"
13078 		"    %ex_4_1 = OpCompositeExtract %f16 %st_val 4 1\n"
13079 		"    %ex_4_2 = OpCompositeExtract %f16 %st_val 4 2\n"
13080 		"   %vec_4_0 = OpCompositeConstruct %v2f16 %ex_4_0 %ex_4_1\n"
13081 		"   %vec_4_1 = OpCompositeConstruct %v2f16 %ex_4_2 %c_f16_n1\n"
13082 		"    %bc_4_0 = OpBitcast %u32 %vec_4_0\n"
13083 		"    %bc_4_1 = OpBitcast %u32 %vec_4_1\n"
13084 		"   %gep_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_6\n"
13085 		"   %gep_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_7\n"
13086 		"              OpStore %gep_4_0 %bc_4_0\n"
13087 		"              OpStore %gep_4_1 %bc_4_1\n"
13088 
13089 		// [3 x {half, [3 x <2 x half>]}] offset 32
13090 		"    %ex_5_0 = OpCompositeExtract %struct16 %st_val 5 0\n"
13091 		"    %ex_5_1 = OpCompositeExtract %struct16 %st_val 5 1\n"
13092 		"    %ex_5_2 = OpCompositeExtract %struct16 %st_val 5 2\n"
13093 		"  %ex_5_0_0 = OpCompositeExtract %f16 %ex_5_0 0\n"
13094 		"  %ex_5_1_0 = OpCompositeExtract %f16 %ex_5_1 0\n"
13095 		"  %ex_5_2_0 = OpCompositeExtract %f16 %ex_5_2 0\n"
13096 		"%ex_5_0_1_0 = OpCompositeExtract %v2f16 %ex_5_0 1 0\n"
13097 		"%ex_5_0_1_1 = OpCompositeExtract %v2f16 %ex_5_0 1 1\n"
13098 		"%ex_5_0_1_2 = OpCompositeExtract %v2f16 %ex_5_0 1 2\n"
13099 		"%ex_5_1_1_0 = OpCompositeExtract %v2f16 %ex_5_1 1 0\n"
13100 		"%ex_5_1_1_1 = OpCompositeExtract %v2f16 %ex_5_1 1 1\n"
13101 		"%ex_5_1_1_2 = OpCompositeExtract %v2f16 %ex_5_1 1 2\n"
13102 		"%ex_5_2_1_0 = OpCompositeExtract %v2f16 %ex_5_2 1 0\n"
13103 		"%ex_5_2_1_1 = OpCompositeExtract %v2f16 %ex_5_2 1 1\n"
13104 		"%ex_5_2_1_2 = OpCompositeExtract %v2f16 %ex_5_2 1 2\n"
13105 		" %vec_5_0_0 = OpCompositeConstruct %v2f16 %ex_5_0_0 %c_f16_n1\n"
13106 		" %vec_5_1_0 = OpCompositeConstruct %v2f16 %ex_5_1_0 %c_f16_n1\n"
13107 		" %vec_5_2_0 = OpCompositeConstruct %v2f16 %ex_5_2_0 %c_f16_n1\n"
13108 		"  %bc_5_0_0 = OpBitcast %u32 %vec_5_0_0\n"
13109 		"  %bc_5_1_0 = OpBitcast %u32 %vec_5_1_0\n"
13110 		"  %bc_5_2_0 = OpBitcast %u32 %vec_5_2_0\n"
13111 		"%bc_5_0_1_0 = OpBitcast %u32 %ex_5_0_1_0\n"
13112 		"%bc_5_0_1_1 = OpBitcast %u32 %ex_5_0_1_1\n"
13113 		"%bc_5_0_1_2 = OpBitcast %u32 %ex_5_0_1_2\n"
13114 		"%bc_5_1_1_0 = OpBitcast %u32 %ex_5_1_1_0\n"
13115 		"%bc_5_1_1_1 = OpBitcast %u32 %ex_5_1_1_1\n"
13116 		"%bc_5_1_1_2 = OpBitcast %u32 %ex_5_1_1_2\n"
13117 		"%bc_5_2_1_0 = OpBitcast %u32 %ex_5_2_1_0\n"
13118 		"%bc_5_2_1_1 = OpBitcast %u32 %ex_5_2_1_1\n"
13119 		"%bc_5_2_1_2 = OpBitcast %u32 %ex_5_2_1_2\n"
13120 		"  %gep_5_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_8\n"
13121 		"%gep_5_0_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_9\n"
13122 		"%gep_5_0_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_10\n"
13123 		"%gep_5_0_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_11\n"
13124 		"  %gep_5_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_12\n"
13125 		"%gep_5_1_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_13\n"
13126 		"%gep_5_1_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_14\n"
13127 		"%gep_5_1_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_15\n"
13128 		"  %gep_5_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_16\n"
13129 		"%gep_5_2_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_17\n"
13130 		"%gep_5_2_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_18\n"
13131 		"%gep_5_2_1_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_19\n"
13132 		"              OpStore %gep_5_0_0 %bc_5_0_0\n"
13133 		"              OpStore %gep_5_0_1_0 %bc_5_0_1_0\n"
13134 		"              OpStore %gep_5_0_1_1 %bc_5_0_1_1\n"
13135 		"              OpStore %gep_5_0_1_2 %bc_5_0_1_2\n"
13136 		"              OpStore %gep_5_1_0 %bc_5_1_0\n"
13137 		"              OpStore %gep_5_1_1_0 %bc_5_1_1_0\n"
13138 		"              OpStore %gep_5_1_1_1 %bc_5_1_1_1\n"
13139 		"              OpStore %gep_5_1_1_2 %bc_5_1_1_2\n"
13140 		"              OpStore %gep_5_2_0 %bc_5_2_0\n"
13141 		"              OpStore %gep_5_2_1_0 %bc_5_2_1_0\n"
13142 		"              OpStore %gep_5_2_1_1 %bc_5_2_1_1\n"
13143 		"              OpStore %gep_5_2_1_2 %bc_5_2_1_2\n"
13144 
13145 		// [5 x <2 x half>] offset 80
13146 		"    %ex_6_0 = OpCompositeExtract %v2f16 %st_val 6 0\n"
13147 		"    %ex_6_1 = OpCompositeExtract %v2f16 %st_val 6 1\n"
13148 		"    %ex_6_2 = OpCompositeExtract %v2f16 %st_val 6 2\n"
13149 		"    %ex_6_3 = OpCompositeExtract %v2f16 %st_val 6 3\n"
13150 		"    %ex_6_4 = OpCompositeExtract %v2f16 %st_val 6 4\n"
13151 		"    %bc_6_0 = OpBitcast %u32 %ex_6_0\n"
13152 		"    %bc_6_1 = OpBitcast %u32 %ex_6_1\n"
13153 		"    %bc_6_2 = OpBitcast %u32 %ex_6_2\n"
13154 		"    %bc_6_3 = OpBitcast %u32 %ex_6_3\n"
13155 		"    %bc_6_4 = OpBitcast %u32 %ex_6_4\n"
13156 		"   %gep_6_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_20\n"
13157 		"   %gep_6_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_21\n"
13158 		"   %gep_6_2 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_22\n"
13159 		"   %gep_6_3 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_23\n"
13160 		"   %gep_6_4 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_24\n"
13161 		"              OpStore %gep_6_0 %bc_6_0\n"
13162 		"              OpStore %gep_6_1 %bc_6_1\n"
13163 		"              OpStore %gep_6_2 %bc_6_2\n"
13164 		"              OpStore %gep_6_3 %bc_6_3\n"
13165 		"              OpStore %gep_6_4 %bc_6_4\n"
13166 
13167 		// half offset 100
13168 		"      %ex_7 = OpCompositeExtract %f16 %st_val 7\n"
13169 		"     %vec_7 = OpCompositeConstruct %v2f16 %ex_7 %c_f16_n1\n"
13170 		"      %bc_7 = OpBitcast %u32 %vec_7\n"
13171 		"     %gep_7 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_25\n"
13172 		"              OpStore %gep_7 %bc_7\n"
13173 
13174 		// [5 x <3 x half>] offset 104
13175 		"    %ex_8_0 = OpCompositeExtract %v3f16 %st_val 8 0\n"
13176 		"    %ex_8_1 = OpCompositeExtract %v3f16 %st_val 8 1\n"
13177 		"    %ex_8_2 = OpCompositeExtract %v3f16 %st_val 8 2\n"
13178 		"    %ex_8_3 = OpCompositeExtract %v3f16 %st_val 8 3\n"
13179 		"    %ex_8_4 = OpCompositeExtract %v3f16 %st_val 8 4\n"
13180 		" %vec_8_0_0 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 0 1\n"
13181 		" %vec_8_0_1 = OpVectorShuffle %v2f16 %ex_8_0 %c_v2f16_n1 2 3\n"
13182 		" %vec_8_1_0 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 0 1\n"
13183 		" %vec_8_1_1 = OpVectorShuffle %v2f16 %ex_8_1 %c_v2f16_n1 2 3\n"
13184 		" %vec_8_2_0 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 0 1\n"
13185 		" %vec_8_2_1 = OpVectorShuffle %v2f16 %ex_8_2 %c_v2f16_n1 2 3\n"
13186 		" %vec_8_3_0 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 0 1\n"
13187 		" %vec_8_3_1 = OpVectorShuffle %v2f16 %ex_8_3 %c_v2f16_n1 2 3\n"
13188 		" %vec_8_4_0 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 0 1\n"
13189 		" %vec_8_4_1 = OpVectorShuffle %v2f16 %ex_8_4 %c_v2f16_n1 2 3\n"
13190 		"  %bc_8_0_0 = OpBitcast %u32 %vec_8_0_0\n"
13191 		"  %bc_8_0_1 = OpBitcast %u32 %vec_8_0_1\n"
13192 		"  %bc_8_1_0 = OpBitcast %u32 %vec_8_1_0\n"
13193 		"  %bc_8_1_1 = OpBitcast %u32 %vec_8_1_1\n"
13194 		"  %bc_8_2_0 = OpBitcast %u32 %vec_8_2_0\n"
13195 		"  %bc_8_2_1 = OpBitcast %u32 %vec_8_2_1\n"
13196 		"  %bc_8_3_0 = OpBitcast %u32 %vec_8_3_0\n"
13197 		"  %bc_8_3_1 = OpBitcast %u32 %vec_8_3_1\n"
13198 		"  %bc_8_4_0 = OpBitcast %u32 %vec_8_4_0\n"
13199 		"  %bc_8_4_1 = OpBitcast %u32 %vec_8_4_1\n"
13200 		" %gep_8_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_26\n"
13201 		" %gep_8_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_27\n"
13202 		" %gep_8_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_28\n"
13203 		" %gep_8_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_29\n"
13204 		" %gep_8_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_30\n"
13205 		" %gep_8_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_31\n"
13206 		" %gep_8_3_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_32\n"
13207 		" %gep_8_3_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_33\n"
13208 		" %gep_8_4_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_34\n"
13209 		" %gep_8_4_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_35\n"
13210 		"              OpStore %gep_8_0_0 %bc_8_0_0\n"
13211 		"              OpStore %gep_8_0_1 %bc_8_0_1\n"
13212 		"              OpStore %gep_8_1_0 %bc_8_1_0\n"
13213 		"              OpStore %gep_8_1_1 %bc_8_1_1\n"
13214 		"              OpStore %gep_8_2_0 %bc_8_2_0\n"
13215 		"              OpStore %gep_8_2_1 %bc_8_2_1\n"
13216 		"              OpStore %gep_8_3_0 %bc_8_3_0\n"
13217 		"              OpStore %gep_8_3_1 %bc_8_3_1\n"
13218 		"              OpStore %gep_8_4_0 %bc_8_4_0\n"
13219 		"              OpStore %gep_8_4_1 %bc_8_4_1\n"
13220 
13221 		// [3 x <4 x half>] offset 144
13222 		"    %ex_9_0 = OpCompositeExtract %v4f16 %st_val 9 0\n"
13223 		"    %ex_9_1 = OpCompositeExtract %v4f16 %st_val 9 1\n"
13224 		"    %ex_9_2 = OpCompositeExtract %v4f16 %st_val 9 2\n"
13225 		" %vec_9_0_0 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 0 1\n"
13226 		" %vec_9_0_1 = OpVectorShuffle %v2f16 %ex_9_0 %ex_9_0 2 3\n"
13227 		" %vec_9_1_0 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 0 1\n"
13228 		" %vec_9_1_1 = OpVectorShuffle %v2f16 %ex_9_1 %ex_9_1 2 3\n"
13229 		" %vec_9_2_0 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 0 1\n"
13230 		" %vec_9_2_1 = OpVectorShuffle %v2f16 %ex_9_2 %ex_9_2 2 3\n"
13231 		"  %bc_9_0_0 = OpBitcast %u32 %vec_9_0_0\n"
13232 		"  %bc_9_0_1 = OpBitcast %u32 %vec_9_0_1\n"
13233 		"  %bc_9_1_0 = OpBitcast %u32 %vec_9_1_0\n"
13234 		"  %bc_9_1_1 = OpBitcast %u32 %vec_9_1_1\n"
13235 		"  %bc_9_2_0 = OpBitcast %u32 %vec_9_2_0\n"
13236 		"  %bc_9_2_1 = OpBitcast %u32 %vec_9_2_1\n"
13237 		" %gep_9_0_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_36\n"
13238 		" %gep_9_0_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_37\n"
13239 		" %gep_9_1_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_38\n"
13240 		" %gep_9_1_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_39\n"
13241 		" %gep_9_2_0 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_40\n"
13242 		" %gep_9_2_1 = OpAccessChain %up_u32 %ssbo_dst %c_u32_0 %ndx %c_u32_41\n"
13243 		"              OpStore %gep_9_0_0 %bc_9_0_0\n"
13244 		"              OpStore %gep_9_0_1 %bc_9_0_1\n"
13245 		"              OpStore %gep_9_1_0 %bc_9_1_0\n"
13246 		"              OpStore %gep_9_1_1 %bc_9_1_1\n"
13247 		"              OpStore %gep_9_2_0 %bc_9_2_0\n"
13248 		"              OpStore %gep_9_2_1 %bc_9_2_1\n"
13249 
13250 		"              OpBranch %next\n"
13251 
13252 		"      %next = OpLabel\n"
13253 		"     %i_cur = OpLoad %i32 %i\n"
13254 		"     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13255 		"              OpStore %i %i_new\n"
13256 		"              OpBranch %loop\n"
13257 
13258 		"     %merge = OpLabel\n"
13259 		"              OpBranch %end_if\n"
13260 		"    %end_if = OpLabel\n"
13261 		"              OpReturnValue %param\n"
13262 		"              OpFunctionEnd\n"
13263 	);
13264 
13265 	{
13266 		SpecResource		specResource;
13267 		map<string, string>	specs;
13268 		VulkanFeatures		features;
13269 		map<string, string>	fragments;
13270 		vector<string>		extensions;
13271 		vector<deFloat16>	expectedOutput;
13272 		string				consts;
13273 
13274 		for (deUint32 elementNdx = 0; elementNdx < numElements; ++elementNdx)
13275 		{
13276 			vector<deFloat16>	expectedIterationOutput;
13277 
13278 			for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
13279 				expectedIterationOutput.push_back(tcu::Float16(float(structItemNdx)).bits());
13280 
13281 			for (deUint32 structItemNdx = 0; structItemNdx < DE_LENGTH_OF_ARRAY(exceptionIndices); ++structItemNdx)
13282 				expectedIterationOutput[exceptionIndices[structItemNdx]] = exceptionValue;
13283 
13284 			expectedIterationOutput[fieldModifiedMulIndex] = tcu::Float16(float(elementNdx * fieldModifier)).bits();
13285 			expectedIterationOutput[fieldModifiedAddIndex] = tcu::Float16(float(elementNdx + fieldModifier)).bits();
13286 
13287 			expectedOutput.insert(expectedOutput.end(), expectedIterationOutput.begin(), expectedIterationOutput.end());
13288 		}
13289 
13290 		for (deUint32 i = 0; i < structItemsCount; ++i)
13291 			consts += "     %c_f16_" + de::toString(i) + " = OpConstant %f16 "  + de::toString(i) + "\n";
13292 
13293 		specs["num_elements"]		= de::toString(numElements);
13294 		specs["struct_item_size"]	= de::toString(structItemsCount * sizeof(deFloat16));
13295 		specs["field_modifier"]		= de::toString(fieldModifier);
13296 		specs["consts"]				= consts;
13297 
13298 		fragments["capability"]		= "OpCapability Float16\n";
13299 		fragments["decoration"]		= decoration.specialize(specs);
13300 		fragments["pre_main"]		= preMain.specialize(specs);
13301 		fragments["testfun"]		= testFun.specialize(specs);
13302 
13303 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13304 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(expectedOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
13305 		specResource.verifyIO = compareFP16CompositeFunc;
13306 
13307 		extensions.push_back("VK_KHR_shader_float16_int8");
13308 
13309 		features.extFloat16Int8.shaderFloat16 = true;
13310 
13311 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
13312 	}
13313 
13314 	return testGroup.release();
13315 }
13316 
13317 template<class SpecResource>
createFloat16CompositeInsertExtractSet(tcu::TestContext & testCtx,const char * op)13318 tcu::TestCaseGroup* createFloat16CompositeInsertExtractSet (tcu::TestContext& testCtx, const char* op)
13319 {
13320 	de::MovePtr<tcu::TestCaseGroup>		testGroup		(new tcu::TestCaseGroup(testCtx, de::toLower(op).c_str(), op));
13321 	const deFloat16						exceptionValue	= tcu::Float16(-1.0).bits();
13322 	const string						opName			(op);
13323 	const deUint32						opIndex			= (opName == "OpCompositeInsert") ? 0
13324 														: (opName == "OpCompositeExtract") ? 1
13325 														: std::numeric_limits<deUint32>::max();
13326 
13327 	const StringTemplate preMain
13328 	(
13329 		"   %c_i32_ndp = OpConstant %i32 ${num_elements}\n"
13330 		"  %c_i32_hndp = OpSpecConstantOp %i32 SDiv %c_i32_ndp %c_i32_2\n"
13331 		"  %c_i32_size = OpConstant %i32 ${struct_u32s}\n"
13332 		"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
13333 		" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
13334 		"         %f16 = OpTypeFloat 16\n"
13335 		"       %v2f16 = OpTypeVector %f16 2\n"
13336 		"       %v3f16 = OpTypeVector %f16 3\n"
13337 		"       %v4f16 = OpTypeVector %f16 4\n"
13338 		"    %c_f16_na = OpConstant %f16 -1.0\n"
13339 		"  %c_v2f16_n1 = OpConstantComposite %v2f16 %c_f16_na %c_f16_na\n"
13340 		"     %c_u32_5 = OpConstant %u32 5\n"
13341 		"     %c_i32_5 = OpConstant %i32 5\n"
13342 		"     %c_i32_6 = OpConstant %i32 6\n"
13343 		"     %c_i32_7 = OpConstant %i32 7\n"
13344 		"     %c_i32_8 = OpConstant %i32 8\n"
13345 		"     %c_i32_9 = OpConstant %i32 9\n"
13346 		"    %c_i32_10 = OpConstant %i32 10\n"
13347 		"    %c_i32_11 = OpConstant %i32 11\n"
13348 
13349 		"%f16arr3      = OpTypeArray %f16 %c_u32_3\n"
13350 		"%v2f16arr3    = OpTypeArray %v2f16 %c_u32_3\n"
13351 		"%v2f16arr5    = OpTypeArray %v2f16 %c_u32_5\n"
13352 		"%v3f16arr5    = OpTypeArray %v3f16 %c_u32_5\n"
13353 		"%v4f16arr3    = OpTypeArray %v4f16 %c_u32_3\n"
13354 		"%struct16     = OpTypeStruct %f16 %v2f16arr3\n"
13355 		"%struct16arr3 = OpTypeArray %struct16 %c_u32_3\n"
13356 		"%st_test      = OpTypeStruct %${field_type}\n"
13357 
13358 		"      %ra_f16 = OpTypeArray %u32 %c_i32_hndp\n"
13359 		"       %ra_st = OpTypeArray %u32 %c_i32_size\n"
13360 		"      %up_u32 = OpTypePointer Uniform %u32\n"
13361 		"     %st_test_i32_fn = OpTypeFunction %st_test %i32\n"
13362 		"%void_st_test_i32_fn = OpTypeFunction %void %st_test %i32\n"
13363 		"         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
13364 		"    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
13365 		"       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
13366 		"  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
13367 
13368 		"${op_premain_decls}"
13369 
13370 		" %up_SSBO_src = OpTypePointer Uniform %SSBO_src\n"
13371 		" %up_SSBO_dst = OpTypePointer Uniform %SSBO_dst\n"
13372 
13373 		"    %ssbo_src = OpVariable %up_SSBO_src Uniform\n"
13374 		"    %ssbo_dst = OpVariable %up_SSBO_dst Uniform\n"
13375 	);
13376 
13377 	const StringTemplate decoration
13378 	(
13379 		"OpDecorate %SSBO_src BufferBlock\n"
13380 		"OpDecorate %SSBO_dst BufferBlock\n"
13381 		"OpDecorate %ra_f16 ArrayStride 4\n"
13382 		"OpDecorate %ra_st ArrayStride 4\n"
13383 		"OpDecorate %ssbo_src DescriptorSet 0\n"
13384 		"OpDecorate %ssbo_src Binding 0\n"
13385 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
13386 		"OpDecorate %ssbo_dst Binding 1\n"
13387 
13388 		"OpMemberDecorate %SSBO_src 0 Offset 0\n"
13389 		"OpMemberDecorate %SSBO_dst 0 Offset 0\n"
13390 
13391 		"OpDecorate %v2f16arr3 ArrayStride 4\n"
13392 		"OpMemberDecorate %struct16 0 Offset 0\n"
13393 		"OpMemberDecorate %struct16 1 Offset 4\n"
13394 		"OpDecorate %struct16arr3 ArrayStride 16\n"
13395 		"OpDecorate %f16arr3 ArrayStride 2\n"
13396 		"OpDecorate %v2f16arr5 ArrayStride 4\n"
13397 		"OpDecorate %v3f16arr5 ArrayStride 8\n"
13398 		"OpDecorate %v4f16arr3 ArrayStride 8\n"
13399 
13400 		"OpMemberDecorate %st_test 0 Offset 0\n"
13401 	);
13402 
13403 	const StringTemplate testFun
13404 	(
13405 		" %test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
13406 		"     %param = OpFunctionParameter %v4f32\n"
13407 		"     %entry = OpLabel\n"
13408 
13409 		"         %i = OpVariable %fp_i32 Function\n"
13410 		"              OpStore %i %c_i32_0\n"
13411 
13412 		"  %will_run = OpFunctionCall %bool %isUniqueIdZero\n"
13413 		"              OpSelectionMerge %end_if None\n"
13414 		"              OpBranchConditional %will_run %run_test %end_if\n"
13415 
13416 		"  %run_test = OpLabel\n"
13417 		"              OpBranch %loop\n"
13418 
13419 		"      %loop = OpLabel\n"
13420 		"     %i_cmp = OpLoad %i32 %i\n"
13421 		"        %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
13422 		"              OpLoopMerge %merge %next None\n"
13423 		"              OpBranchConditional %lt %write %merge\n"
13424 
13425 		"     %write = OpLabel\n"
13426 		"       %ndx = OpLoad %i32 %i\n"
13427 
13428 		"${op_sw_fun_call}"
13429 
13430 		"    %dst_st = OpFunctionCall %void %${st_call} %val_dst %${st_ndx}\n"
13431 		"              OpBranch %next\n"
13432 
13433 		"      %next = OpLabel\n"
13434 		"     %i_cur = OpLoad %i32 %i\n"
13435 		"     %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
13436 		"              OpStore %i %i_new\n"
13437 		"              OpBranch %loop\n"
13438 
13439 		"     %merge = OpLabel\n"
13440 		"              OpBranch %end_if\n"
13441 		"    %end_if = OpLabel\n"
13442 		"              OpReturnValue %param\n"
13443 		"              OpFunctionEnd\n"
13444 
13445 		"${op_sw_fun_header}"
13446 		" %sw_param = OpFunctionParameter %st_test\n"
13447 		"%sw_paramn = OpFunctionParameter %i32\n"
13448 		" %sw_entry = OpLabel\n"
13449 		"             OpSelectionMerge %switch_e None\n"
13450 		"             OpSwitch %sw_paramn %default ${case_list}\n"
13451 
13452 		"${case_bodies}"
13453 
13454 		"%default   = OpLabel\n"
13455 		"             OpReturnValue ${op_case_default_value}\n"
13456 		"%switch_e  = OpLabel\n"
13457 		"             OpUnreachable\n" // Unreachable merge block for switch statement
13458 		"             OpFunctionEnd\n"
13459 	);
13460 
13461 	const StringTemplate testCaseBody
13462 	(
13463 		"%case_${case_ndx}    = OpLabel\n"
13464 		"%val_ret_${case_ndx} = ${op_name} ${op_args_part} ${access_path}\n"
13465 		"             OpReturnValue %val_ret_${case_ndx}\n"
13466 	);
13467 
13468 	const string loadF16
13469 	(
13470 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13471 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13472 		"  %ld_${var}_entry = OpLabel\n"
13473 		"   %ld_${var}_call = OpFunctionCall %f16 %ld_arg_${var} %ld_${var}_param\n"
13474 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13475 		"                     OpReturnValue %ld_${var}_st_test\n"
13476 		"                     OpFunctionEnd\n" +
13477 		loadScalarF16FromUint
13478 	);
13479 
13480 	const string loadV2F16
13481 	(
13482 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13483 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13484 		"  %ld_${var}_entry = OpLabel\n"
13485 		"   %ld_${var}_call = OpFunctionCall %v2f16 %ld_arg_${var} %ld_${var}_param\n"
13486 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_call\n"
13487 		"                     OpReturnValue %ld_${var}_st_test\n"
13488 		"                     OpFunctionEnd\n" +
13489 		loadV2F16FromUint
13490 	);
13491 
13492 	const string loadV3F16
13493 	(
13494 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13495 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13496 		"  %ld_${var}_entry = OpLabel\n"
13497 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13498 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13499 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13500 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13501 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13502 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13503 		"    %ld_${var}_vec = OpVectorShuffle %v3f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2\n"
13504 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13505 		"                     OpReturnValue %ld_${var}_st_test\n"
13506 		"                     OpFunctionEnd\n"
13507 	);
13508 
13509 	const string loadV4F16
13510 	(
13511 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13512 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13513 		"  %ld_${var}_entry = OpLabel\n"
13514 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13515 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13516 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13517 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13518 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13519 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13520 		"    %ld_${var}_vec = OpVectorShuffle %v4f16 %ld_${var}_bc_0 %ld_${var}_bc_1 0 1 2 3\n"
13521 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_vec\n"
13522 		"                     OpReturnValue %ld_${var}_st_test\n"
13523 		"                     OpFunctionEnd\n"
13524 	);
13525 
13526 	const string loadF16Arr3
13527 	(
13528 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13529 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13530 		"  %ld_${var}_entry = OpLabel\n"
13531 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_0\n"
13532 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_u32_0 %c_u32_1\n"
13533 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13534 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13535 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13536 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13537 		"   %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0 0\n"
13538 		"   %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_0 1\n"
13539 		"   %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_1 0\n"
13540 		"   %ld_${var}_cons = OpCompositeConstruct %f16arr3 %ld_${var}_ex_0 %ld_${var}_ex_1 %ld_${var}_ex_2\n"
13541 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13542 		"                     OpReturnValue %ld_${var}_st_test\n"
13543 		"                     OpFunctionEnd\n"
13544 	);
13545 
13546 	const string loadV2F16Arr5
13547 	(
13548 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13549 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13550 		"  %ld_${var}_label = OpLabel\n"
13551 		"  %ld_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13552 		"  %ld_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13553 		"  %ld_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13554 		"  %ld_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13555 		"  %ld_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13556 		"   %ld_${var}_ld_0 = OpLoad %u32 %ld_${var}_gep_0\n"
13557 		"   %ld_${var}_ld_1 = OpLoad %u32 %ld_${var}_gep_1\n"
13558 		"   %ld_${var}_ld_2 = OpLoad %u32 %ld_${var}_gep_2\n"
13559 		"   %ld_${var}_ld_3 = OpLoad %u32 %ld_${var}_gep_3\n"
13560 		"   %ld_${var}_ld_4 = OpLoad %u32 %ld_${var}_gep_4\n"
13561 		"   %ld_${var}_bc_0 = OpBitcast %v2f16 %ld_${var}_ld_0\n"
13562 		"   %ld_${var}_bc_1 = OpBitcast %v2f16 %ld_${var}_ld_1\n"
13563 		"   %ld_${var}_bc_2 = OpBitcast %v2f16 %ld_${var}_ld_2\n"
13564 		"   %ld_${var}_bc_3 = OpBitcast %v2f16 %ld_${var}_ld_3\n"
13565 		"   %ld_${var}_bc_4 = OpBitcast %v2f16 %ld_${var}_ld_4\n"
13566 		"   %ld_${var}_cons = OpCompositeConstruct %v2f16arr5 %ld_${var}_bc_0 %ld_${var}_bc_1 %ld_${var}_bc_2 %ld_${var}_bc_3 %ld_${var}_bc_4\n"
13567 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13568 		"                     OpReturnValue %ld_${var}_st_test\n"
13569 		"                     OpFunctionEnd\n"
13570 	);
13571 
13572 	const string loadV3F16Arr5
13573 	(
13574 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13575 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13576 		"  %ld_${var}_entry = OpLabel\n"
13577 		"%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13578 		"%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13579 		"%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13580 		"%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13581 		"%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13582 		"%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13583 		"%ld_${var}_gep_3_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13584 		"%ld_${var}_gep_3_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13585 		"%ld_${var}_gep_4_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13586 		"%ld_${var}_gep_4_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13587 		" %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13588 		" %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13589 		" %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13590 		" %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13591 		" %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13592 		" %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13593 		" %ld_${var}_ld_3_0 = OpLoad %u32 %ld_${var}_gep_3_0\n"
13594 		" %ld_${var}_ld_3_1 = OpLoad %u32 %ld_${var}_gep_3_1\n"
13595 		" %ld_${var}_ld_4_0 = OpLoad %u32 %ld_${var}_gep_4_0\n"
13596 		" %ld_${var}_ld_4_1 = OpLoad %u32 %ld_${var}_gep_4_1\n"
13597 		" %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13598 		" %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13599 		" %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13600 		" %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13601 		" %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13602 		" %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13603 		" %ld_${var}_bc_3_0 = OpBitcast %v2f16 %ld_${var}_ld_3_0\n"
13604 		" %ld_${var}_bc_3_1 = OpBitcast %v2f16 %ld_${var}_ld_3_1\n"
13605 		" %ld_${var}_bc_4_0 = OpBitcast %v2f16 %ld_${var}_ld_4_0\n"
13606 		" %ld_${var}_bc_4_1 = OpBitcast %v2f16 %ld_${var}_ld_4_1\n"
13607 		"  %ld_${var}_vec_0 = OpVectorShuffle %v3f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2\n"
13608 		"  %ld_${var}_vec_1 = OpVectorShuffle %v3f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2\n"
13609 		"  %ld_${var}_vec_2 = OpVectorShuffle %v3f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2\n"
13610 		"  %ld_${var}_vec_3 = OpVectorShuffle %v3f16 %ld_${var}_bc_3_0 %ld_${var}_bc_3_1 0 1 2\n"
13611 		"  %ld_${var}_vec_4 = OpVectorShuffle %v3f16 %ld_${var}_bc_4_0 %ld_${var}_bc_4_1 0 1 2\n"
13612 		"   %ld_${var}_cons = OpCompositeConstruct %v3f16arr5 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2 %ld_${var}_vec_3 %ld_${var}_vec_4\n"
13613 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13614 		"                     OpReturnValue %ld_${var}_st_test\n"
13615 		"                     OpFunctionEnd\n"
13616 	);
13617 
13618 	const string loadV4F16Arr3
13619 	(
13620 		"        %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13621 		"  %ld_${var}_param = OpFunctionParameter %i32\n"
13622 		"  %ld_${var}_entry = OpLabel\n"
13623 		"%ld_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13624 		"%ld_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13625 		"%ld_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13626 		"%ld_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13627 		"%ld_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13628 		"%ld_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13629 		" %ld_${var}_ld_0_0 = OpLoad %u32 %ld_${var}_gep_0_0\n"
13630 		" %ld_${var}_ld_0_1 = OpLoad %u32 %ld_${var}_gep_0_1\n"
13631 		" %ld_${var}_ld_1_0 = OpLoad %u32 %ld_${var}_gep_1_0\n"
13632 		" %ld_${var}_ld_1_1 = OpLoad %u32 %ld_${var}_gep_1_1\n"
13633 		" %ld_${var}_ld_2_0 = OpLoad %u32 %ld_${var}_gep_2_0\n"
13634 		" %ld_${var}_ld_2_1 = OpLoad %u32 %ld_${var}_gep_2_1\n"
13635 		" %ld_${var}_bc_0_0 = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13636 		" %ld_${var}_bc_0_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1\n"
13637 		" %ld_${var}_bc_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13638 		" %ld_${var}_bc_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1\n"
13639 		" %ld_${var}_bc_2_0 = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13640 		" %ld_${var}_bc_2_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1\n"
13641 		"  %ld_${var}_vec_0 = OpVectorShuffle %v4f16 %ld_${var}_bc_0_0 %ld_${var}_bc_0_1 0 1 2 3\n"
13642 		"  %ld_${var}_vec_1 = OpVectorShuffle %v4f16 %ld_${var}_bc_1_0 %ld_${var}_bc_1_1 0 1 2 3\n"
13643 		"  %ld_${var}_vec_2 = OpVectorShuffle %v4f16 %ld_${var}_bc_2_0 %ld_${var}_bc_2_1 0 1 2 3\n"
13644 		"   %ld_${var}_cons = OpCompositeConstruct %v4f16arr3 %ld_${var}_vec_0 %ld_${var}_vec_1 %ld_${var}_vec_2\n"
13645 		"%ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13646 		"                     OpReturnValue %ld_${var}_st_test\n"
13647 		"                     OpFunctionEnd\n"
13648 	);
13649 
13650 	const string loadStruct16Arr3
13651 	(
13652 		"          %ld_${var} = OpFunction %st_test None %st_test_i32_fn\n"
13653 		"    %ld_${var}_param = OpFunctionParameter %i32\n"
13654 		"    %ld_${var}_entry = OpLabel\n"
13655 		"%ld_${var}_gep_0_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13656 		"%ld_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13657 		"%ld_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13658 		"%ld_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13659 		"%ld_${var}_gep_1_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13660 		"%ld_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13661 		"%ld_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13662 		"%ld_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13663 		"%ld_${var}_gep_2_0   = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13664 		"%ld_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13665 		"%ld_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13666 		"%ld_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13667 		" %ld_${var}_ld_0_0   = OpLoad %u32 %ld_${var}_gep_0_0\n"
13668 		" %ld_${var}_ld_0_1_0 = OpLoad %u32 %ld_${var}_gep_0_1_0\n"
13669 		" %ld_${var}_ld_0_1_1 = OpLoad %u32 %ld_${var}_gep_0_1_1\n"
13670 		" %ld_${var}_ld_0_1_2 = OpLoad %u32 %ld_${var}_gep_0_1_2\n"
13671 		" %ld_${var}_ld_1_0   = OpLoad %u32 %ld_${var}_gep_1_0\n"
13672 		" %ld_${var}_ld_1_1_0 = OpLoad %u32 %ld_${var}_gep_1_1_0\n"
13673 		" %ld_${var}_ld_1_1_1 = OpLoad %u32 %ld_${var}_gep_1_1_1\n"
13674 		" %ld_${var}_ld_1_1_2 = OpLoad %u32 %ld_${var}_gep_1_1_2\n"
13675 		" %ld_${var}_ld_2_0   = OpLoad %u32 %ld_${var}_gep_2_0\n"
13676 		" %ld_${var}_ld_2_1_0 = OpLoad %u32 %ld_${var}_gep_2_1_0\n"
13677 		" %ld_${var}_ld_2_1_1 = OpLoad %u32 %ld_${var}_gep_2_1_1\n"
13678 		" %ld_${var}_ld_2_1_2 = OpLoad %u32 %ld_${var}_gep_2_1_2\n"
13679 		" %ld_${var}_bc_0_0   = OpBitcast %v2f16 %ld_${var}_ld_0_0\n"
13680 		" %ld_${var}_bc_0_1_0 = OpBitcast %v2f16 %ld_${var}_ld_0_1_0\n"
13681 		" %ld_${var}_bc_0_1_1 = OpBitcast %v2f16 %ld_${var}_ld_0_1_1\n"
13682 		" %ld_${var}_bc_0_1_2 = OpBitcast %v2f16 %ld_${var}_ld_0_1_2\n"
13683 		" %ld_${var}_bc_1_0   = OpBitcast %v2f16 %ld_${var}_ld_1_0\n"
13684 		" %ld_${var}_bc_1_1_0 = OpBitcast %v2f16 %ld_${var}_ld_1_1_0\n"
13685 		" %ld_${var}_bc_1_1_1 = OpBitcast %v2f16 %ld_${var}_ld_1_1_1\n"
13686 		" %ld_${var}_bc_1_1_2 = OpBitcast %v2f16 %ld_${var}_ld_1_1_2\n"
13687 		" %ld_${var}_bc_2_0   = OpBitcast %v2f16 %ld_${var}_ld_2_0\n"
13688 		" %ld_${var}_bc_2_1_0 = OpBitcast %v2f16 %ld_${var}_ld_2_1_0\n"
13689 		" %ld_${var}_bc_2_1_1 = OpBitcast %v2f16 %ld_${var}_ld_2_1_1\n"
13690 		" %ld_${var}_bc_2_1_2 = OpBitcast %v2f16 %ld_${var}_ld_2_1_2\n"
13691 		"    %ld_${var}_arr_0 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_0_1_0 %ld_${var}_bc_0_1_1 %ld_${var}_bc_0_1_2\n"
13692 		"    %ld_${var}_arr_1 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_1_1_0 %ld_${var}_bc_1_1_1 %ld_${var}_bc_1_1_2\n"
13693 		"    %ld_${var}_arr_2 = OpCompositeConstruct %v2f16arr3 %ld_${var}_bc_2_1_0 %ld_${var}_bc_2_1_1 %ld_${var}_bc_2_1_2\n"
13694 		"     %ld_${var}_ex_0 = OpCompositeExtract %f16 %ld_${var}_bc_0_0 0\n"
13695 		"     %ld_${var}_ex_1 = OpCompositeExtract %f16 %ld_${var}_bc_1_0 0\n"
13696 		"     %ld_${var}_ex_2 = OpCompositeExtract %f16 %ld_${var}_bc_2_0 0\n"
13697 		"     %ld_${var}_st_0 = OpCompositeConstruct %struct16 %ld_${var}_ex_0 %ld_${var}_arr_0\n"
13698 		"     %ld_${var}_st_1 = OpCompositeConstruct %struct16 %ld_${var}_ex_1 %ld_${var}_arr_1\n"
13699 		"     %ld_${var}_st_2 = OpCompositeConstruct %struct16 %ld_${var}_ex_2 %ld_${var}_arr_2\n"
13700 		"     %ld_${var}_cons = OpCompositeConstruct %struct16arr3 %ld_${var}_st_0 %ld_${var}_st_1 %ld_${var}_st_2\n"
13701 		"  %ld_${var}_st_test = OpCompositeConstruct %st_test %ld_${var}_cons\n"
13702 		"                       OpReturnValue %ld_${var}_st_test\n"
13703 		"                      OpFunctionEnd\n"
13704 	);
13705 
13706 	const string storeF16
13707 	(
13708 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13709 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13710 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13711 		" %st_${var}_entry = OpLabel\n"
13712 		"    %st_${var}_ex = OpCompositeExtract %f16 %st_${var}_param1 0\n"
13713 		"  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13714 		"                    OpReturn\n"
13715 		"                    OpFunctionEnd\n" +
13716 		storeScalarF16AsUint
13717 	);
13718 
13719 	const string storeV2F16
13720 	(
13721 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13722 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13723 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13724 		" %st_${var}_entry = OpLabel\n"
13725 		"    %st_${var}_ex = OpCompositeExtract %v2f16 %st_${var}_param1 0\n"
13726 		"  %st_${var}_call = OpFunctionCall %void %st_fn_${var} %st_${var}_ex %st_${var}_param2\n"
13727 		"                    OpReturn\n"
13728 		"                    OpFunctionEnd\n" +
13729 		storeV2F16AsUint
13730 	);
13731 
13732 	const string storeV3F16
13733 	(
13734 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13735 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13736 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13737 		" %st_${var}_entry = OpLabel\n"
13738 		"    %st_${var}_ex = OpCompositeExtract %v3f16 %st_${var}_param1 0\n"
13739 		" %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13740 		" %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13741 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13742 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13743 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13744 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13745 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13746 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13747 		"                    OpReturn\n"
13748 		"                    OpFunctionEnd\n"
13749 	);
13750 
13751 	const string storeV4F16
13752 	(
13753 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13754 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13755 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13756 		" %st_${var}_entry = OpLabel\n"
13757 		"    %st_${var}_ex = OpCompositeExtract %v4f16 %st_${var}_param1 0\n"
13758 		" %st_${var}_vec_0 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 0 1\n"
13759 		" %st_${var}_vec_1 = OpVectorShuffle %v2f16 %st_${var}_ex %c_v2f16_n1 2 3\n"
13760 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13761 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13762 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13763 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13764 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13765 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13766 		"                    OpReturn\n"
13767 		"                    OpFunctionEnd\n"
13768 	);
13769 
13770 	const string storeF16Arr3
13771 	(
13772 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13773 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13774 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13775 		" %st_${var}_entry = OpLabel\n"
13776 		"  %st_${var}_ex_0 = OpCompositeExtract %f16 %st_${var}_param1 0 0\n"
13777 		"  %st_${var}_ex_1 = OpCompositeExtract %f16 %st_${var}_param1 0 1\n"
13778 		"  %st_${var}_ex_2 = OpCompositeExtract %f16 %st_${var}_param1 0 2\n"
13779 		" %st_${var}_vec_0 = OpCompositeConstruct %v2f16 %st_${var}_ex_0 %st_${var}_ex_1\n"
13780 		" %st_${var}_vec_1 = OpCompositeConstruct %v2f16 %st_${var}_ex_2 %c_f16_na\n"
13781 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_vec_0\n"
13782 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_vec_1\n"
13783 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13784 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13785 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13786 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13787 		"                    OpReturn\n"
13788 		"                    OpFunctionEnd\n"
13789 	);
13790 
13791 	const string storeV2F16Arr5
13792 	(
13793 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13794 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13795 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13796 		" %st_${var}_entry = OpLabel\n"
13797 		"  %st_${var}_ex_0 = OpCompositeExtract %v2f16 %st_${var}_param1 0 0\n"
13798 		"  %st_${var}_ex_1 = OpCompositeExtract %v2f16 %st_${var}_param1 0 1\n"
13799 		"  %st_${var}_ex_2 = OpCompositeExtract %v2f16 %st_${var}_param1 0 2\n"
13800 		"  %st_${var}_ex_3 = OpCompositeExtract %v2f16 %st_${var}_param1 0 3\n"
13801 		"  %st_${var}_ex_4 = OpCompositeExtract %v2f16 %st_${var}_param1 0 4\n"
13802 		"  %st_${var}_bc_0 = OpBitcast %u32 %st_${var}_ex_0\n"
13803 		"  %st_${var}_bc_1 = OpBitcast %u32 %st_${var}_ex_1\n"
13804 		"  %st_${var}_bc_2 = OpBitcast %u32 %st_${var}_ex_2\n"
13805 		"  %st_${var}_bc_3 = OpBitcast %u32 %st_${var}_ex_3\n"
13806 		"  %st_${var}_bc_4 = OpBitcast %u32 %st_${var}_ex_4\n"
13807 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13808 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13809 		" %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13810 		" %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13811 		" %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13812 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0\n"
13813 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_1\n"
13814 		"                    OpStore %st_${var}_gep_2 %st_${var}_bc_2\n"
13815 		"                    OpStore %st_${var}_gep_3 %st_${var}_bc_3\n"
13816 		"                    OpStore %st_${var}_gep_4 %st_${var}_bc_4\n"
13817 		"                    OpReturn\n"
13818 		"                    OpFunctionEnd\n"
13819 	);
13820 
13821 	const string storeV3F16Arr5
13822 	(
13823 		"       %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13824 		"%st_${var}_param1 = OpFunctionParameter %st_test\n"
13825 		"%st_${var}_param2 = OpFunctionParameter %i32\n"
13826 		" %st_${var}_entry = OpLabel\n"
13827 		"  %st_${var}_ex_0 = OpCompositeExtract %v3f16 %st_${var}_param1 0 0\n"
13828 		"  %st_${var}_ex_1 = OpCompositeExtract %v3f16 %st_${var}_param1 0 1\n"
13829 		"  %st_${var}_ex_2 = OpCompositeExtract %v3f16 %st_${var}_param1 0 2\n"
13830 		"  %st_${var}_ex_3 = OpCompositeExtract %v3f16 %st_${var}_param1 0 3\n"
13831 		"  %st_${var}_ex_4 = OpCompositeExtract %v3f16 %st_${var}_param1 0 4\n"
13832 		"%st_${var}_v2_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 0 1\n"
13833 		"%st_${var}_v2_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %c_v2f16_n1 2 3\n"
13834 		"%st_${var}_v2_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 0 1\n"
13835 		"%st_${var}_v2_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %c_v2f16_n1 2 3\n"
13836 		"%st_${var}_v2_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 0 1\n"
13837 		"%st_${var}_v2_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %c_v2f16_n1 2 3\n"
13838 		"%st_${var}_v2_3_0 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 0 1\n"
13839 		"%st_${var}_v2_3_1 = OpVectorShuffle %v2f16 %st_${var}_ex_3 %c_v2f16_n1 2 3\n"
13840 		"%st_${var}_v2_4_0 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 0 1\n"
13841 		"%st_${var}_v2_4_1 = OpVectorShuffle %v2f16 %st_${var}_ex_4 %c_v2f16_n1 2 3\n"
13842 		"%st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13843 		"%st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13844 		"%st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13845 		"%st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13846 		"%st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13847 		"%st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13848 		"%st_${var}_bc_3_0 = OpBitcast %u32 %st_${var}_v2_3_0\n"
13849 		"%st_${var}_bc_3_1 = OpBitcast %u32 %st_${var}_v2_3_1\n"
13850 		"%st_${var}_bc_4_0 = OpBitcast %u32 %st_${var}_v2_4_0\n"
13851 		"%st_${var}_bc_4_1 = OpBitcast %u32 %st_${var}_v2_4_1\n"
13852 		" %st_${var}_gep_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13853 		" %st_${var}_gep_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13854 		" %st_${var}_gep_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13855 		" %st_${var}_gep_3 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13856 		" %st_${var}_gep_4 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13857 		" %st_${var}_gep_5 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13858 		" %st_${var}_gep_6 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13859 		" %st_${var}_gep_7 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13860 		" %st_${var}_gep_8 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13861 		" %st_${var}_gep_9 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13862 		"                    OpStore %st_${var}_gep_0 %st_${var}_bc_0_0\n"
13863 		"                    OpStore %st_${var}_gep_1 %st_${var}_bc_0_1\n"
13864 		"                    OpStore %st_${var}_gep_2 %st_${var}_bc_1_0\n"
13865 		"                    OpStore %st_${var}_gep_3 %st_${var}_bc_1_1\n"
13866 		"                    OpStore %st_${var}_gep_4 %st_${var}_bc_2_0\n"
13867 		"                    OpStore %st_${var}_gep_5 %st_${var}_bc_2_1\n"
13868 		"                    OpStore %st_${var}_gep_6 %st_${var}_bc_3_0\n"
13869 		"                    OpStore %st_${var}_gep_7 %st_${var}_bc_3_1\n"
13870 		"                    OpStore %st_${var}_gep_8 %st_${var}_bc_4_0\n"
13871 		"                    OpStore %st_${var}_gep_9 %st_${var}_bc_4_1\n"
13872 		"                    OpReturn\n"
13873 		"                    OpFunctionEnd\n"
13874 	);
13875 
13876 	const string storeV4F16Arr3
13877 	(
13878 		"        %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13879 		" %st_${var}_param1 = OpFunctionParameter %st_test\n"
13880 		" %st_${var}_param2 = OpFunctionParameter %i32\n"
13881 		"  %st_${var}_entry = OpLabel\n"
13882 		"   %st_${var}_ex_0 = OpCompositeExtract %v4f16 %st_${var}_param1 0 0\n"
13883 		"   %st_${var}_ex_1 = OpCompositeExtract %v4f16 %st_${var}_param1 0 1\n"
13884 		"   %st_${var}_ex_2 = OpCompositeExtract %v4f16 %st_${var}_param1 0 2\n"
13885 		"%st_${var}_vec_0_0 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 0 1\n"
13886 		"%st_${var}_vec_0_1 = OpVectorShuffle %v2f16 %st_${var}_ex_0 %st_${var}_ex_0 2 3\n"
13887 		"%st_${var}_vec_1_0 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 0 1\n"
13888 		"%st_${var}_vec_1_1 = OpVectorShuffle %v2f16 %st_${var}_ex_1 %st_${var}_ex_1 2 3\n"
13889 		"%st_${var}_vec_2_0 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 0 1\n"
13890 		"%st_${var}_vec_2_1 = OpVectorShuffle %v2f16 %st_${var}_ex_2 %st_${var}_ex_2 2 3\n"
13891 		" %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_vec_0_0\n"
13892 		" %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_vec_0_1\n"
13893 		" %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_vec_1_0\n"
13894 		" %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_vec_1_1\n"
13895 		" %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_vec_2_0\n"
13896 		" %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_vec_2_1\n"
13897 		"%st_${var}_gep_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13898 		"%st_${var}_gep_0_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13899 		"%st_${var}_gep_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13900 		"%st_${var}_gep_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13901 		"%st_${var}_gep_2_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13902 		"%st_${var}_gep_2_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13903 		"                     OpStore %st_${var}_gep_0_0 %st_${var}_bc_0_0\n"
13904 		"                     OpStore %st_${var}_gep_0_1 %st_${var}_bc_0_1\n"
13905 		"                     OpStore %st_${var}_gep_1_0 %st_${var}_bc_1_0\n"
13906 		"                     OpStore %st_${var}_gep_1_1 %st_${var}_bc_1_1\n"
13907 		"                     OpStore %st_${var}_gep_2_0 %st_${var}_bc_2_0\n"
13908 		"                     OpStore %st_${var}_gep_2_1 %st_${var}_bc_2_1\n"
13909 		"                     OpReturn\n"
13910 		"                     OpFunctionEnd\n"
13911 	);
13912 
13913 	const string storeStruct16Arr3
13914 	(
13915 		"          %st_${var} = OpFunction %void None %void_st_test_i32_fn\n"
13916 		"   %st_${var}_param1 = OpFunctionParameter %st_test\n"
13917 		"   %st_${var}_param2 = OpFunctionParameter %i32\n"
13918 		"    %st_${var}_entry = OpLabel\n"
13919 		"     %st_${var}_st_0 = OpCompositeExtract %struct16 %st_${var}_param1 0 0\n"
13920 		"     %st_${var}_st_1 = OpCompositeExtract %struct16 %st_${var}_param1 0 1\n"
13921 		"     %st_${var}_st_2 = OpCompositeExtract %struct16 %st_${var}_param1 0 2\n"
13922 		"   %st_${var}_el_0   = OpCompositeExtract   %f16 %st_${var}_st_0 0\n"
13923 		"   %st_${var}_v2_0_0 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 0\n"
13924 		"   %st_${var}_v2_0_1 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 1\n"
13925 		"   %st_${var}_v2_0_2 = OpCompositeExtract %v2f16 %st_${var}_st_0 1 2\n"
13926 		"   %st_${var}_el_1   = OpCompositeExtract   %f16 %st_${var}_st_1 0\n"
13927 		"   %st_${var}_v2_1_0 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 0\n"
13928 		"   %st_${var}_v2_1_1 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 1\n"
13929 		"   %st_${var}_v2_1_2 = OpCompositeExtract %v2f16 %st_${var}_st_1 1 2\n"
13930 		"   %st_${var}_el_2   = OpCompositeExtract   %f16 %st_${var}_st_2 0\n"
13931 		"   %st_${var}_v2_2_0 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 0\n"
13932 		"   %st_${var}_v2_2_1 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 1\n"
13933 		"   %st_${var}_v2_2_2 = OpCompositeExtract %v2f16 %st_${var}_st_2 1 2\n"
13934 		"     %st_${var}_v2_0 = OpCompositeConstruct %v2f16 %st_${var}_el_0 %c_f16_na\n"
13935 		"     %st_${var}_v2_1 = OpCompositeConstruct %v2f16 %st_${var}_el_1 %c_f16_na\n"
13936 		"     %st_${var}_v2_2 = OpCompositeConstruct %v2f16 %st_${var}_el_2 %c_f16_na\n"
13937 		"   %st_${var}_bc_0   = OpBitcast %u32 %st_${var}_v2_0\n"
13938 		"   %st_${var}_bc_0_0 = OpBitcast %u32 %st_${var}_v2_0_0\n"
13939 		"   %st_${var}_bc_0_1 = OpBitcast %u32 %st_${var}_v2_0_1\n"
13940 		"   %st_${var}_bc_0_2 = OpBitcast %u32 %st_${var}_v2_0_2\n"
13941 		"   %st_${var}_bc_1   = OpBitcast %u32 %st_${var}_v2_1\n"
13942 		"   %st_${var}_bc_1_0 = OpBitcast %u32 %st_${var}_v2_1_0\n"
13943 		"   %st_${var}_bc_1_1 = OpBitcast %u32 %st_${var}_v2_1_1\n"
13944 		"   %st_${var}_bc_1_2 = OpBitcast %u32 %st_${var}_v2_1_2\n"
13945 		"   %st_${var}_bc_2   = OpBitcast %u32 %st_${var}_v2_2\n"
13946 		"   %st_${var}_bc_2_0 = OpBitcast %u32 %st_${var}_v2_2_0\n"
13947 		"   %st_${var}_bc_2_1 = OpBitcast %u32 %st_${var}_v2_2_1\n"
13948 		"   %st_${var}_bc_2_2 = OpBitcast %u32 %st_${var}_v2_2_2\n"
13949 		"%st_${var}_gep_0_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_0\n"
13950 		"%st_${var}_gep_0_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_1\n"
13951 		"%st_${var}_gep_0_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_2\n"
13952 		"%st_${var}_gep_0_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_3\n"
13953 		"%st_${var}_gep_1_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_4\n"
13954 		"%st_${var}_gep_1_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_5\n"
13955 		"%st_${var}_gep_1_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_6\n"
13956 		"%st_${var}_gep_1_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_7\n"
13957 		"%st_${var}_gep_2_0_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_8\n"
13958 		"%st_${var}_gep_2_1_0 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_9\n"
13959 		"%st_${var}_gep_2_1_1 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_10\n"
13960 		"%st_${var}_gep_2_1_2 = OpAccessChain %up_u32 %${var} %c_i32_0 %c_i32_11\n"
13961 		"                       OpStore %st_${var}_gep_0_0_0 %st_${var}_bc_0\n"
13962 		"                       OpStore %st_${var}_gep_0_1_0 %st_${var}_bc_0_0\n"
13963 		"                       OpStore %st_${var}_gep_0_1_1 %st_${var}_bc_0_1\n"
13964 		"                       OpStore %st_${var}_gep_0_1_2 %st_${var}_bc_0_2\n"
13965 		"                       OpStore %st_${var}_gep_1_0_0 %st_${var}_bc_1\n"
13966 		"                       OpStore %st_${var}_gep_1_1_0 %st_${var}_bc_1_0\n"
13967 		"                       OpStore %st_${var}_gep_1_1_1 %st_${var}_bc_1_1\n"
13968 		"                       OpStore %st_${var}_gep_1_1_2 %st_${var}_bc_1_2\n"
13969 		"                       OpStore %st_${var}_gep_2_0_0 %st_${var}_bc_2\n"
13970 		"                       OpStore %st_${var}_gep_2_1_0 %st_${var}_bc_2_0\n"
13971 		"                       OpStore %st_${var}_gep_2_1_1 %st_${var}_bc_2_1\n"
13972 		"                       OpStore %st_${var}_gep_2_1_2 %st_${var}_bc_2_2\n"
13973 		"                       OpReturn\n"
13974 		"                       OpFunctionEnd\n"
13975 	);
13976 
13977 	struct OpParts
13978 	{
13979 		const char*	premainDecls;
13980 		const char*	swFunCall;
13981 		const char*	swFunHeader;
13982 		const char*	caseDefaultValue;
13983 		const char*	argsPartial;
13984 	};
13985 
13986 	OpParts								opPartsArray[]			=
13987 	{
13988 		// OpCompositeInsert
13989 		{
13990 			"       %fun_t = OpTypeFunction %st_test %f16 %st_test %i32\n"
13991 			"    %SSBO_src = OpTypeStruct %ra_f16\n"
13992 			"    %SSBO_dst = OpTypeStruct %ra_st\n",
13993 
13994 			"   %val_new = OpFunctionCall %f16 %ld_arg_ssbo_src %ndx\n"
13995 			"   %val_old = OpFunctionCall %st_test %ld_ssbo_dst %c_i32_0\n"
13996 			"   %val_dst = OpFunctionCall %st_test %sw_fun %val_new %val_old %ndx\n",
13997 
13998 			"   %sw_fun = OpFunction %st_test None %fun_t\n"
13999 			"%sw_paramv = OpFunctionParameter %f16\n",
14000 
14001 			"%sw_param",
14002 
14003 			"%st_test %sw_paramv %sw_param",
14004 		},
14005 		// OpCompositeExtract
14006 		{
14007 			"       %fun_t = OpTypeFunction %f16 %st_test %i32\n"
14008 			"    %SSBO_src = OpTypeStruct %ra_st\n"
14009 			"    %SSBO_dst = OpTypeStruct %ra_f16\n",
14010 
14011 			"   %val_src = OpFunctionCall %st_test %ld_ssbo_src %c_i32_0\n"
14012 			"   %val_dst = OpFunctionCall %f16 %sw_fun %val_src %ndx\n",
14013 
14014 			"   %sw_fun = OpFunction %f16 None %fun_t\n",
14015 
14016 			"%c_f16_na",
14017 
14018 			"%f16 %sw_param",
14019 		},
14020 	};
14021 
14022 	DE_ASSERT(opIndex < DE_LENGTH_OF_ARRAY(opPartsArray));
14023 
14024 	const char*	accessPathF16[] =
14025 	{
14026 		"0",			// %f16
14027 		DE_NULL,
14028 	};
14029 	const char*	accessPathV2F16[] =
14030 	{
14031 		"0 0",			// %v2f16
14032 		"0 1",
14033 	};
14034 	const char*	accessPathV3F16[] =
14035 	{
14036 		"0 0",			// %v3f16
14037 		"0 1",
14038 		"0 2",
14039 		DE_NULL,
14040 	};
14041 	const char*	accessPathV4F16[] =
14042 	{
14043 		"0 0",			// %v4f16"
14044 		"0 1",
14045 		"0 2",
14046 		"0 3",
14047 	};
14048 	const char*	accessPathF16Arr3[] =
14049 	{
14050 		"0 0",			// %f16arr3
14051 		"0 1",
14052 		"0 2",
14053 		DE_NULL,
14054 	};
14055 	const char*	accessPathStruct16Arr3[] =
14056 	{
14057 		"0 0 0",		// %struct16arr3
14058 		DE_NULL,
14059 		"0 0 1 0 0",
14060 		"0 0 1 0 1",
14061 		"0 0 1 1 0",
14062 		"0 0 1 1 1",
14063 		"0 0 1 2 0",
14064 		"0 0 1 2 1",
14065 		"0 1 0",
14066 		DE_NULL,
14067 		"0 1 1 0 0",
14068 		"0 1 1 0 1",
14069 		"0 1 1 1 0",
14070 		"0 1 1 1 1",
14071 		"0 1 1 2 0",
14072 		"0 1 1 2 1",
14073 		"0 2 0",
14074 		DE_NULL,
14075 		"0 2 1 0 0",
14076 		"0 2 1 0 1",
14077 		"0 2 1 1 0",
14078 		"0 2 1 1 1",
14079 		"0 2 1 2 0",
14080 		"0 2 1 2 1",
14081 	};
14082 	const char*	accessPathV2F16Arr5[] =
14083 	{
14084 		"0 0 0",		// %v2f16arr5
14085 		"0 0 1",
14086 		"0 1 0",
14087 		"0 1 1",
14088 		"0 2 0",
14089 		"0 2 1",
14090 		"0 3 0",
14091 		"0 3 1",
14092 		"0 4 0",
14093 		"0 4 1",
14094 	};
14095 	const char*	accessPathV3F16Arr5[] =
14096 	{
14097 		"0 0 0",		// %v3f16arr5
14098 		"0 0 1",
14099 		"0 0 2",
14100 		DE_NULL,
14101 		"0 1 0",
14102 		"0 1 1",
14103 		"0 1 2",
14104 		DE_NULL,
14105 		"0 2 0",
14106 		"0 2 1",
14107 		"0 2 2",
14108 		DE_NULL,
14109 		"0 3 0",
14110 		"0 3 1",
14111 		"0 3 2",
14112 		DE_NULL,
14113 		"0 4 0",
14114 		"0 4 1",
14115 		"0 4 2",
14116 		DE_NULL,
14117 	};
14118 	const char*	accessPathV4F16Arr3[] =
14119 	{
14120 		"0 0 0",		// %v4f16arr3
14121 		"0 0 1",
14122 		"0 0 2",
14123 		"0 0 3",
14124 		"0 1 0",
14125 		"0 1 1",
14126 		"0 1 2",
14127 		"0 1 3",
14128 		"0 2 0",
14129 		"0 2 1",
14130 		"0 2 2",
14131 		"0 2 3",
14132 		DE_NULL,
14133 		DE_NULL,
14134 		DE_NULL,
14135 		DE_NULL,
14136 	};
14137 
14138 	struct TypeTestParameters
14139 	{
14140 		const char*		name;
14141 		size_t			accessPathLength;
14142 		const char**	accessPath;
14143 		const string	loadFunction;
14144 		const string	storeFunction;
14145 	};
14146 
14147 	const TypeTestParameters typeTestParameters[] =
14148 	{
14149 		{	"f16",			DE_LENGTH_OF_ARRAY(accessPathF16),			accessPathF16,			loadF16,			storeF16		 },
14150 		{	"v2f16",		DE_LENGTH_OF_ARRAY(accessPathV2F16),		accessPathV2F16,		loadV2F16,			storeV2F16		 },
14151 		{	"v3f16",		DE_LENGTH_OF_ARRAY(accessPathV3F16),		accessPathV3F16,		loadV3F16,			storeV3F16		 },
14152 		{	"v4f16",		DE_LENGTH_OF_ARRAY(accessPathV4F16),		accessPathV4F16,		loadV4F16,			storeV4F16		  },
14153 		{	"f16arr3",		DE_LENGTH_OF_ARRAY(accessPathF16Arr3),		accessPathF16Arr3,		loadF16Arr3,		storeF16Arr3	  },
14154 		{	"v2f16arr5",	DE_LENGTH_OF_ARRAY(accessPathV2F16Arr5),	accessPathV2F16Arr5,	loadV2F16Arr5,		storeV2F16Arr5	  },
14155 		{	"v3f16arr5",	DE_LENGTH_OF_ARRAY(accessPathV3F16Arr5),	accessPathV3F16Arr5,	loadV3F16Arr5,		storeV3F16Arr5	  },
14156 		{	"v4f16arr3",	DE_LENGTH_OF_ARRAY(accessPathV4F16Arr3),	accessPathV4F16Arr3,	loadV4F16Arr3,		storeV4F16Arr3	  },
14157 		{	"struct16arr3",	DE_LENGTH_OF_ARRAY(accessPathStruct16Arr3),	accessPathStruct16Arr3,	loadStruct16Arr3,	storeStruct16Arr3},
14158 	};
14159 
14160 	for (size_t typeTestNdx = 0; typeTestNdx < DE_LENGTH_OF_ARRAY(typeTestParameters); ++typeTestNdx)
14161 	{
14162 		const OpParts		opParts				= opPartsArray[opIndex];
14163 		const string		testName			= typeTestParameters[typeTestNdx].name;
14164 		const size_t		structItemsCount	= typeTestParameters[typeTestNdx].accessPathLength;
14165 		const char**		accessPath			= typeTestParameters[typeTestNdx].accessPath;
14166 		SpecResource		specResource;
14167 		map<string, string>	specs;
14168 		VulkanFeatures		features;
14169 		map<string, string>	fragments;
14170 		vector<string>		extensions;
14171 		vector<deFloat16>	inputFP16;
14172 		vector<deFloat16>	dummyFP16Output;
14173 
14174 		// Generate values for input
14175 		inputFP16.reserve(structItemsCount);
14176 		for (deUint32 structItemNdx = 0; structItemNdx < structItemsCount; ++structItemNdx)
14177 			inputFP16.push_back((accessPath[structItemNdx] == DE_NULL) ? exceptionValue : tcu::Float16(float(structItemNdx)).bits());
14178 
14179 		dummyFP16Output.resize(structItemsCount);
14180 
14181 		// Generate cases for OpSwitch
14182 		{
14183 			string	caseBodies;
14184 			string	caseList;
14185 
14186 			for (deUint32 caseNdx = 0; caseNdx < structItemsCount; ++caseNdx)
14187 				if (accessPath[caseNdx] != DE_NULL)
14188 				{
14189 					map<string, string>	specCase;
14190 
14191 					specCase["case_ndx"]		= de::toString(caseNdx);
14192 					specCase["access_path"]		= accessPath[caseNdx];
14193 					specCase["op_args_part"]	= opParts.argsPartial;
14194 					specCase["op_name"]			= opName;
14195 
14196 					caseBodies	+= testCaseBody.specialize(specCase);
14197 					caseList	+= de::toString(caseNdx) + " %case_" + de::toString(caseNdx) + " ";
14198 				}
14199 
14200 			specs["case_bodies"]	= caseBodies;
14201 			specs["case_list"]		= caseList;
14202 		}
14203 
14204 		specs["num_elements"]			= de::toString(structItemsCount);
14205 		specs["field_type"]				= typeTestParameters[typeTestNdx].name;
14206 		specs["struct_item_size"]		= de::toString(structItemsCount * sizeof(deFloat16));
14207 		specs["struct_u32s"]			= de::toString(structItemsCount / 2);
14208 		specs["op_premain_decls"]		= opParts.premainDecls;
14209 		specs["op_sw_fun_call"]			= opParts.swFunCall;
14210 		specs["op_sw_fun_header"]		= opParts.swFunHeader;
14211 		specs["op_case_default_value"]	= opParts.caseDefaultValue;
14212 		if (opIndex == 0) {
14213 			specs["st_call"]			= "st_ssbo_dst";
14214 			specs["st_ndx"]				= "c_i32_0";
14215 		} else {
14216 			specs["st_call"]			= "st_fn_ssbo_dst";
14217 			specs["st_ndx"]				= "ndx";
14218 		}
14219 
14220 		fragments["capability"]		= "OpCapability Float16\n";
14221 		fragments["decoration"]		= decoration.specialize(specs);
14222 		fragments["pre_main"]		= preMain.specialize(specs);
14223 		fragments["testfun"]		= testFun.specialize(specs);
14224 		if (opIndex == 0) {
14225 			fragments["testfun"]		+= StringTemplate(loadScalarF16FromUint).specialize({{"var", "ssbo_src"}});
14226 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_dst"}});
14227 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].storeFunction).specialize({{"var", "ssbo_dst"}});
14228 		} else {
14229 			fragments["testfun"]		+= StringTemplate(typeTestParameters[typeTestNdx].loadFunction).specialize({{"var", "ssbo_src"}});
14230 			fragments["testfun"]		+= StringTemplate(storeScalarF16AsUint).specialize({{"var", "ssbo_dst"}});
14231 		}
14232 
14233 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputFP16)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14234 		specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(dummyFP16Output)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
14235 		specResource.verifyIO = compareFP16CompositeFunc;
14236 
14237 		extensions.push_back("VK_KHR_shader_float16_int8");
14238 
14239 		features.extFloat16Int8.shaderFloat16 = true;
14240 
14241 		finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
14242 	}
14243 
14244 	return testGroup.release();
14245 }
14246 
14247 struct fp16PerComponent
14248 {
fp16PerComponentvkt::SpirVAssembly::fp16PerComponent14249 	fp16PerComponent()
14250 		: flavor(0)
14251 		, floatFormat16	(-14, 15, 10, true)
14252 		, outCompCount(0)
14253 		, argCompCount(3, 0)
14254 	{
14255 	}
14256 
callOncePerComponentvkt::SpirVAssembly::fp16PerComponent14257 	bool			callOncePerComponent	()									{ return true; }
getComponentValidityvkt::SpirVAssembly::fp16PerComponent14258 	deUint32		getComponentValidity	()									{ return static_cast<deUint32>(-1); }
14259 
getULPsvkt::SpirVAssembly::fp16PerComponent14260 	virtual double	getULPs					(vector<const deFloat16*>&)			{ return 1.0; }
getMinvkt::SpirVAssembly::fp16PerComponent14261 	virtual double	getMin					(double value, double ulps)			{ return value - floatFormat16.ulp(deAbs(value), ulps); }
getMaxvkt::SpirVAssembly::fp16PerComponent14262 	virtual double	getMax					(double value, double ulps)			{ return value + floatFormat16.ulp(deAbs(value), ulps); }
14263 
getFlavorCountvkt::SpirVAssembly::fp16PerComponent14264 	virtual size_t	getFlavorCount			()									{ return flavorNames.empty() ? 1 : flavorNames.size(); }
setFlavorvkt::SpirVAssembly::fp16PerComponent14265 	virtual void	setFlavor				(size_t flavorNo)					{ DE_ASSERT(flavorNo < getFlavorCount()); flavor = flavorNo; }
getFlavorvkt::SpirVAssembly::fp16PerComponent14266 	virtual size_t	getFlavor				()									{ return flavor; }
getCurrentFlavorNamevkt::SpirVAssembly::fp16PerComponent14267 	virtual string	getCurrentFlavorName	()									{ return flavorNames.empty() ? string("") : flavorNames[getFlavor()]; }
14268 
setOutCompCountvkt::SpirVAssembly::fp16PerComponent14269 	virtual void	setOutCompCount			(size_t compCount)					{ outCompCount = compCount; }
getOutCompCountvkt::SpirVAssembly::fp16PerComponent14270 	virtual size_t	getOutCompCount			()									{ return outCompCount; }
14271 
setArgCompCountvkt::SpirVAssembly::fp16PerComponent14272 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)	{ argCompCount[argNo] = compCount; }
getArgCompCountvkt::SpirVAssembly::fp16PerComponent14273 	virtual size_t	getArgCompCount			(size_t argNo)						{ return argCompCount[argNo]; }
14274 
14275 protected:
14276 	size_t				flavor;
14277 	tcu::FloatFormat	floatFormat16;
14278 	size_t				outCompCount;
14279 	vector<size_t>		argCompCount;
14280 	vector<string>		flavorNames;
14281 };
14282 
14283 struct fp16OpFNegate : public fp16PerComponent
14284 {
14285 	template <class fp16type>
calcvkt::SpirVAssembly::fp16OpFNegate14286 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14287 	{
14288 		const fp16type	x		(*in[0]);
14289 		const double	d		(x.asDouble());
14290 		const double	result	(0.0 - d);
14291 
14292 		out[0] = fp16type(result).bits();
14293 		min[0] = getMin(result, getULPs(in));
14294 		max[0] = getMax(result, getULPs(in));
14295 
14296 		return true;
14297 	}
14298 };
14299 
14300 struct fp16Round : public fp16PerComponent
14301 {
fp16Roundvkt::SpirVAssembly::fp16Round14302 	fp16Round() : fp16PerComponent()
14303 	{
14304 		flavorNames.push_back("Floor(x+0.5)");
14305 		flavorNames.push_back("Floor(x-0.5)");
14306 		flavorNames.push_back("RoundEven");
14307 	}
14308 
14309 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Round14310 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14311 	{
14312 		const fp16type	x		(*in[0]);
14313 		const double	d		(x.asDouble());
14314 		double			result	(0.0);
14315 
14316 		switch (flavor)
14317 		{
14318 			case 0:		result = deRound(d);		break;
14319 			case 1:		result = deFloor(d - 0.5);	break;
14320 			case 2:		result = deRoundEven(d);	break;
14321 			default:	TCU_THROW(InternalError, "Invalid flavor specified");
14322 		}
14323 
14324 		out[0] = fp16type(result).bits();
14325 		min[0] = getMin(result, getULPs(in));
14326 		max[0] = getMax(result, getULPs(in));
14327 
14328 		return true;
14329 	}
14330 };
14331 
14332 struct fp16RoundEven : public fp16PerComponent
14333 {
14334 	template<class fp16type>
calcvkt::SpirVAssembly::fp16RoundEven14335 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14336 	{
14337 		const fp16type	x		(*in[0]);
14338 		const double	d		(x.asDouble());
14339 		const double	result	(deRoundEven(d));
14340 
14341 		out[0] = fp16type(result).bits();
14342 		min[0] = getMin(result, getULPs(in));
14343 		max[0] = getMax(result, getULPs(in));
14344 
14345 		return true;
14346 	}
14347 };
14348 
14349 struct fp16Trunc : public fp16PerComponent
14350 {
14351 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Trunc14352 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14353 	{
14354 		const fp16type	x		(*in[0]);
14355 		const double	d		(x.asDouble());
14356 		const double	result	(deTrunc(d));
14357 
14358 		out[0] = fp16type(result).bits();
14359 		min[0] = getMin(result, getULPs(in));
14360 		max[0] = getMax(result, getULPs(in));
14361 
14362 		return true;
14363 	}
14364 };
14365 
14366 struct fp16FAbs : public fp16PerComponent
14367 {
14368 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FAbs14369 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14370 	{
14371 		const fp16type	x		(*in[0]);
14372 		const double	d		(x.asDouble());
14373 		const double	result	(deAbs(d));
14374 
14375 		out[0] = fp16type(result).bits();
14376 		min[0] = getMin(result, getULPs(in));
14377 		max[0] = getMax(result, getULPs(in));
14378 
14379 		return true;
14380 	}
14381 };
14382 
14383 struct fp16FSign : public fp16PerComponent
14384 {
14385 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FSign14386 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14387 	{
14388 		const fp16type	x		(*in[0]);
14389 		const double	d		(x.asDouble());
14390 		const double	result	(deSign(d));
14391 
14392 		if (x.isNaN())
14393 			return false;
14394 
14395 		out[0] = fp16type(result).bits();
14396 		min[0] = getMin(result, getULPs(in));
14397 		max[0] = getMax(result, getULPs(in));
14398 
14399 		return true;
14400 	}
14401 };
14402 
14403 struct fp16Floor : public fp16PerComponent
14404 {
14405 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Floor14406 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14407 	{
14408 		const fp16type	x		(*in[0]);
14409 		const double	d		(x.asDouble());
14410 		const double	result	(deFloor(d));
14411 
14412 		out[0] = fp16type(result).bits();
14413 		min[0] = getMin(result, getULPs(in));
14414 		max[0] = getMax(result, getULPs(in));
14415 
14416 		return true;
14417 	}
14418 };
14419 
14420 struct fp16Ceil : public fp16PerComponent
14421 {
14422 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Ceil14423 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14424 	{
14425 		const fp16type	x		(*in[0]);
14426 		const double	d		(x.asDouble());
14427 		const double	result	(deCeil(d));
14428 
14429 		out[0] = fp16type(result).bits();
14430 		min[0] = getMin(result, getULPs(in));
14431 		max[0] = getMax(result, getULPs(in));
14432 
14433 		return true;
14434 	}
14435 };
14436 
14437 struct fp16Fract : public fp16PerComponent
14438 {
14439 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Fract14440 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14441 	{
14442 		const fp16type	x		(*in[0]);
14443 		const double	d		(x.asDouble());
14444 		const double	result	(deFrac(d));
14445 
14446 		out[0] = fp16type(result).bits();
14447 		min[0] = getMin(result, getULPs(in));
14448 		max[0] = getMax(result, getULPs(in));
14449 
14450 		return true;
14451 	}
14452 };
14453 
14454 struct fp16Radians : public fp16PerComponent
14455 {
getULPsvkt::SpirVAssembly::fp16Radians14456 	virtual double getULPs (vector<const deFloat16*>& in)
14457 	{
14458 		DE_UNREF(in);
14459 
14460 		return 2.5;
14461 	}
14462 
14463 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Radians14464 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14465 	{
14466 		const fp16type	x		(*in[0]);
14467 		const float		d		(x.asFloat());
14468 		const float		result	(deFloatRadians(d));
14469 
14470 		out[0] = fp16type(result).bits();
14471 		min[0] = getMin(result, getULPs(in));
14472 		max[0] = getMax(result, getULPs(in));
14473 
14474 		return true;
14475 	}
14476 };
14477 
14478 struct fp16Degrees : public fp16PerComponent
14479 {
getULPsvkt::SpirVAssembly::fp16Degrees14480 	virtual double getULPs (vector<const deFloat16*>& in)
14481 	{
14482 		DE_UNREF(in);
14483 
14484 		return 2.5;
14485 	}
14486 
14487 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Degrees14488 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14489 	{
14490 		const fp16type	x		(*in[0]);
14491 		const float		d		(x.asFloat());
14492 		const float		result	(deFloatDegrees(d));
14493 
14494 		out[0] = fp16type(result).bits();
14495 		min[0] = getMin(result, getULPs(in));
14496 		max[0] = getMax(result, getULPs(in));
14497 
14498 		return true;
14499 	}
14500 };
14501 
14502 struct fp16Sin : public fp16PerComponent
14503 {
14504 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sin14505 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14506 	{
14507 		const fp16type	x			(*in[0]);
14508 		const double	d			(x.asDouble());
14509 		const double	result		(deSin(d));
14510 		const double	unspecUlp	(16.0);
14511 		const double	err			(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14512 
14513 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14514 			return false;
14515 
14516 		out[0] = fp16type(result).bits();
14517 		min[0] = result - err;
14518 		max[0] = result + err;
14519 
14520 		return true;
14521 	}
14522 };
14523 
14524 struct fp16Cos : public fp16PerComponent
14525 {
14526 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cos14527 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14528 	{
14529 		const fp16type	x			(*in[0]);
14530 		const double	d			(x.asDouble());
14531 		const double	result		(deCos(d));
14532 		const double	unspecUlp	(16.0);
14533 		const double	err			(de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE) ? deLdExp(1.0, -7) : floatFormat16.ulp(deAbs(result), unspecUlp));
14534 
14535 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14536 			return false;
14537 
14538 		out[0] = fp16type(result).bits();
14539 		min[0] = result - err;
14540 		max[0] = result + err;
14541 
14542 		return true;
14543 	}
14544 };
14545 
14546 struct fp16Tan : public fp16PerComponent
14547 {
14548 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Tan14549 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14550 	{
14551 		const fp16type	x		(*in[0]);
14552 		const double	d		(x.asDouble());
14553 		const double	result	(deTan(d));
14554 
14555 		if (!de::inRange(d, -DE_PI_DOUBLE, DE_PI_DOUBLE))
14556 			return false;
14557 
14558 		out[0] = fp16type(result).bits();
14559 		{
14560 			const double	err			= deLdExp(1.0, -7);
14561 			const double	s1			= deSin(d) + err;
14562 			const double	s2			= deSin(d) - err;
14563 			const double	c1			= deCos(d) + err;
14564 			const double	c2			= deCos(d) - err;
14565 			const double	edgeVals[]	= {s1/c1, s1/c2, s2/c1, s2/c2};
14566 			double			edgeLeft	= out[0];
14567 			double			edgeRight	= out[0];
14568 
14569 			if (deSign(c1 * c2) < 0.0)
14570 			{
14571 				edgeLeft	= -std::numeric_limits<double>::infinity();
14572 				edgeRight	= +std::numeric_limits<double>::infinity();
14573 			}
14574 			else
14575 			{
14576 				edgeLeft	= *std::min_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14577 				edgeRight	= *std::max_element(&edgeVals[0], &edgeVals[DE_LENGTH_OF_ARRAY(edgeVals)]);
14578 			}
14579 
14580 			min[0] = edgeLeft;
14581 			max[0] = edgeRight;
14582 		}
14583 
14584 		return true;
14585 	}
14586 };
14587 
14588 struct fp16Asin : public fp16PerComponent
14589 {
14590 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Asin14591 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14592 	{
14593 		const fp16type	x		(*in[0]);
14594 		const double	d		(x.asDouble());
14595 		const double	result	(deAsin(d));
14596 		const double	error	(deAtan2(d, sqrt(1.0 - d * d)));
14597 
14598 		if (!x.isNaN() && deAbs(d) > 1.0)
14599 			return false;
14600 
14601 		out[0] = fp16type(result).bits();
14602 		min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14603 		max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14604 
14605 		return true;
14606 	}
14607 };
14608 
14609 struct fp16Acos : public fp16PerComponent
14610 {
14611 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Acos14612 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14613 	{
14614 		const fp16type	x		(*in[0]);
14615 		const double	d		(x.asDouble());
14616 		const double	result	(deAcos(d));
14617 		const double	error	(deAtan2(sqrt(1.0 - d * d), d));
14618 
14619 		if (!x.isNaN() && deAbs(d) > 1.0)
14620 			return false;
14621 
14622 		out[0] = fp16type(result).bits();
14623 		min[0] = result - floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14624 		max[0] = result + floatFormat16.ulp(deAbs(error), 2 * 5.0); // This is not a precision test. Value is not from spec
14625 
14626 		return true;
14627 	}
14628 };
14629 
14630 struct fp16Atan : public fp16PerComponent
14631 {
getULPsvkt::SpirVAssembly::fp16Atan14632 	virtual double getULPs(vector<const deFloat16*>& in)
14633 	{
14634 		DE_UNREF(in);
14635 
14636 		return 2 * 5.0; // This is not a precision test. Value is not from spec
14637 	}
14638 
14639 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan14640 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14641 	{
14642 		const fp16type	x		(*in[0]);
14643 		const double	d		(x.asDouble());
14644 		const double	result	(deAtanOver(d));
14645 
14646 		out[0] = fp16type(result).bits();
14647 		min[0] = getMin(result, getULPs(in));
14648 		max[0] = getMax(result, getULPs(in));
14649 
14650 		return true;
14651 	}
14652 };
14653 
14654 struct fp16Sinh : public fp16PerComponent
14655 {
fp16Sinhvkt::SpirVAssembly::fp16Sinh14656 	fp16Sinh() : fp16PerComponent()
14657 	{
14658 		flavorNames.push_back("Double");
14659 		flavorNames.push_back("ExpFP16");
14660 	}
14661 
14662 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sinh14663 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14664 	{
14665 		const fp16type	x		(*in[0]);
14666 		const double	d		(x.asDouble());
14667 		const double	ulps	(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14668 		double			result	(0.0);
14669 		double			error	(0.0);
14670 
14671 		if (getFlavor() == 0)
14672 		{
14673 			result	= deSinh(d);
14674 			error	= floatFormat16.ulp(deAbs(result), ulps);
14675 		}
14676 		else if (getFlavor() == 1)
14677 		{
14678 			const fp16type	epx	(deExp(d));
14679 			const fp16type	enx	(deExp(-d));
14680 			const fp16type	esx	(epx.asDouble() - enx.asDouble());
14681 			const fp16type	sx2	(esx.asDouble() / 2.0);
14682 
14683 			result	= sx2.asDouble();
14684 			error	= deAbs(floatFormat16.ulp(epx.asDouble(), ulps)) + deAbs(floatFormat16.ulp(enx.asDouble(), ulps));
14685 		}
14686 		else
14687 		{
14688 			TCU_THROW(InternalError, "Unknown flavor");
14689 		}
14690 
14691 		out[0] = fp16type(result).bits();
14692 		min[0] = result - error;
14693 		max[0] = result + error;
14694 
14695 		return true;
14696 	}
14697 };
14698 
14699 struct fp16Cosh : public fp16PerComponent
14700 {
fp16Coshvkt::SpirVAssembly::fp16Cosh14701 	fp16Cosh() : fp16PerComponent()
14702 	{
14703 		flavorNames.push_back("Double");
14704 		flavorNames.push_back("ExpFP16");
14705 	}
14706 
14707 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cosh14708 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14709 	{
14710 		const fp16type	x		(*in[0]);
14711 		const double	d		(x.asDouble());
14712 		const double	ulps	(64 * (1.0 + 2 * deAbs(d))); // This is not a precision test. Value is not from spec
14713 		double			result	(0.0);
14714 
14715 		if (getFlavor() == 0)
14716 		{
14717 			result = deCosh(d);
14718 		}
14719 		else if (getFlavor() == 1)
14720 		{
14721 			const fp16type	epx	(deExp(d));
14722 			const fp16type	enx	(deExp(-d));
14723 			const fp16type	esx	(epx.asDouble() + enx.asDouble());
14724 			const fp16type	sx2	(esx.asDouble() / 2.0);
14725 
14726 			result = sx2.asDouble();
14727 		}
14728 		else
14729 		{
14730 			TCU_THROW(InternalError, "Unknown flavor");
14731 		}
14732 
14733 		out[0] = fp16type(result).bits();
14734 		min[0] = result - floatFormat16.ulp(deAbs(result), ulps);
14735 		max[0] = result + floatFormat16.ulp(deAbs(result), ulps);
14736 
14737 		return true;
14738 	}
14739 };
14740 
14741 struct fp16Tanh : public fp16PerComponent
14742 {
fp16Tanhvkt::SpirVAssembly::fp16Tanh14743 	fp16Tanh() : fp16PerComponent()
14744 	{
14745 		flavorNames.push_back("Tanh");
14746 		flavorNames.push_back("SinhCosh");
14747 		flavorNames.push_back("SinhCoshFP16");
14748 		flavorNames.push_back("PolyFP16");
14749 	}
14750 
getULPsvkt::SpirVAssembly::fp16Tanh14751 	virtual double getULPs (vector<const deFloat16*>& in)
14752 	{
14753 		const tcu::Float16	x	(*in[0]);
14754 		const double		d	(x.asDouble());
14755 
14756 		return 2 * (1.0 + 2 * deAbs(d)); // This is not a precision test. Value is not from spec
14757 	}
14758 
14759 	template<class fp16type>
calcPolyvkt::SpirVAssembly::fp16Tanh14760 	inline double calcPoly (const fp16type& espx, const fp16type& esnx, const fp16type& ecpx, const fp16type& ecnx)
14761 	{
14762 		const fp16type	esx	(espx.asDouble() - esnx.asDouble());
14763 		const fp16type	sx2	(esx.asDouble() / 2.0);
14764 		const fp16type	ecx	(ecpx.asDouble() + ecnx.asDouble());
14765 		const fp16type	cx2	(ecx.asDouble() / 2.0);
14766 		const fp16type	tg	(sx2.asDouble() / cx2.asDouble());
14767 		const double	rez	(tg.asDouble());
14768 
14769 		return rez;
14770 	}
14771 
14772 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Tanh14773 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14774 	{
14775 		const fp16type	x		(*in[0]);
14776 		const double	d		(x.asDouble());
14777 		double			result	(0.0);
14778 
14779 		if (getFlavor() == 0)
14780 		{
14781 			result	= deTanh(d);
14782 			min[0]	= getMin(result, getULPs(in));
14783 			max[0]	= getMax(result, getULPs(in));
14784 		}
14785 		else if (getFlavor() == 1)
14786 		{
14787 			result	= deSinh(d) / deCosh(d);
14788 			min[0]	= getMin(result, getULPs(in));
14789 			max[0]	= getMax(result, getULPs(in));
14790 		}
14791 		else if (getFlavor() == 2)
14792 		{
14793 			const fp16type	s	(deSinh(d));
14794 			const fp16type	c	(deCosh(d));
14795 
14796 			result	= s.asDouble() / c.asDouble();
14797 			min[0]	= getMin(result, getULPs(in));
14798 			max[0]	= getMax(result, getULPs(in));
14799 		}
14800 		else if (getFlavor() == 3)
14801 		{
14802 			const double	ulps	(getULPs(in));
14803 			const double	epxm	(deExp( d));
14804 			const double	enxm	(deExp(-d));
14805 			const double	epxmerr	= floatFormat16.ulp(epxm, ulps);
14806 			const double	enxmerr	= floatFormat16.ulp(enxm, ulps);
14807 			const fp16type	epx[]	= { fp16type(epxm - epxmerr), fp16type(epxm + epxmerr) };
14808 			const fp16type	enx[]	= { fp16type(enxm - enxmerr), fp16type(enxm + enxmerr) };
14809 			const fp16type	epxm16	(epxm);
14810 			const fp16type	enxm16	(enxm);
14811 			vector<double>	tgs;
14812 
14813 			for (size_t spNdx = 0; spNdx < DE_LENGTH_OF_ARRAY(epx); ++spNdx)
14814 			for (size_t snNdx = 0; snNdx < DE_LENGTH_OF_ARRAY(enx); ++snNdx)
14815 			for (size_t cpNdx = 0; cpNdx < DE_LENGTH_OF_ARRAY(epx); ++cpNdx)
14816 			for (size_t cnNdx = 0; cnNdx < DE_LENGTH_OF_ARRAY(enx); ++cnNdx)
14817 			{
14818 				const double tgh = calcPoly(epx[spNdx], enx[snNdx], epx[cpNdx], enx[cnNdx]);
14819 
14820 				tgs.push_back(tgh);
14821 			}
14822 
14823 			result = calcPoly(epxm16, enxm16, epxm16, enxm16);
14824 			min[0] = *std::min_element(tgs.begin(), tgs.end());
14825 			max[0] = *std::max_element(tgs.begin(), tgs.end());
14826 		}
14827 		else
14828 		{
14829 			TCU_THROW(InternalError, "Unknown flavor");
14830 		}
14831 
14832 		out[0] = fp16type(result).bits();
14833 
14834 		return true;
14835 	}
14836 };
14837 
14838 struct fp16Asinh : public fp16PerComponent
14839 {
fp16Asinhvkt::SpirVAssembly::fp16Asinh14840 	fp16Asinh() : fp16PerComponent()
14841 	{
14842 		flavorNames.push_back("Double");
14843 		flavorNames.push_back("PolyFP16Wiki");
14844 		flavorNames.push_back("PolyFP16Abs");
14845 	}
14846 
getULPsvkt::SpirVAssembly::fp16Asinh14847 	virtual double getULPs (vector<const deFloat16*>& in)
14848 	{
14849 		DE_UNREF(in);
14850 
14851 		return 256.0; // This is not a precision test. Value is not from spec
14852 	}
14853 
14854 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Asinh14855 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14856 	{
14857 		const fp16type	x		(*in[0]);
14858 		const double	d		(x.asDouble());
14859 		double			result	(0.0);
14860 
14861 		if (getFlavor() == 0)
14862 		{
14863 			result = deAsinh(d);
14864 		}
14865 		else if (getFlavor() == 1)
14866 		{
14867 			const fp16type	x2		(d * d);
14868 			const fp16type	x2p1	(x2.asDouble() + 1.0);
14869 			const fp16type	sq		(deSqrt(x2p1.asDouble()));
14870 			const fp16type	sxsq	(d + sq.asDouble());
14871 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14872 
14873 			if (lsxsq.isInf())
14874 				return false;
14875 
14876 			result = lsxsq.asDouble();
14877 		}
14878 		else if (getFlavor() == 2)
14879 		{
14880 			const fp16type	x2		(d * d);
14881 			const fp16type	x2p1	(x2.asDouble() + 1.0);
14882 			const fp16type	sq		(deSqrt(x2p1.asDouble()));
14883 			const fp16type	sxsq	(deAbs(d) + sq.asDouble());
14884 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14885 
14886 			result = deSign(d) * lsxsq.asDouble();
14887 		}
14888 		else
14889 		{
14890 			TCU_THROW(InternalError, "Unknown flavor");
14891 		}
14892 
14893 		out[0] = fp16type(result).bits();
14894 		min[0] = getMin(result, getULPs(in));
14895 		max[0] = getMax(result, getULPs(in));
14896 
14897 		return true;
14898 	}
14899 };
14900 
14901 struct fp16Acosh : public fp16PerComponent
14902 {
fp16Acoshvkt::SpirVAssembly::fp16Acosh14903 	fp16Acosh() : fp16PerComponent()
14904 	{
14905 		flavorNames.push_back("Double");
14906 		flavorNames.push_back("PolyFP16");
14907 	}
14908 
getULPsvkt::SpirVAssembly::fp16Acosh14909 	virtual double getULPs (vector<const deFloat16*>& in)
14910 	{
14911 		DE_UNREF(in);
14912 
14913 		return 16.0; // This is not a precision test. Value is not from spec
14914 	}
14915 
14916 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Acosh14917 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14918 	{
14919 		const fp16type	x		(*in[0]);
14920 		const double	d		(x.asDouble());
14921 		double			result	(0.0);
14922 
14923 		if (!x.isNaN() && d < 1.0)
14924 			return false;
14925 
14926 		if (getFlavor() == 0)
14927 		{
14928 			result = deAcosh(d);
14929 		}
14930 		else if (getFlavor() == 1)
14931 		{
14932 			const fp16type	x2		(d * d);
14933 			const fp16type	x2m1	(x2.asDouble() - 1.0);
14934 			const fp16type	sq		(deSqrt(x2m1.asDouble()));
14935 			const fp16type	sxsq	(d + sq.asDouble());
14936 			const fp16type	lsxsq	(deLog(sxsq.asDouble()));
14937 
14938 			result = lsxsq.asDouble();
14939 		}
14940 		else
14941 		{
14942 			TCU_THROW(InternalError, "Unknown flavor");
14943 		}
14944 
14945 		out[0] = fp16type(result).bits();
14946 		min[0] = getMin(result, getULPs(in));
14947 		max[0] = getMax(result, getULPs(in));
14948 
14949 		return true;
14950 	}
14951 };
14952 
14953 struct fp16Atanh : public fp16PerComponent
14954 {
fp16Atanhvkt::SpirVAssembly::fp16Atanh14955 	fp16Atanh() : fp16PerComponent()
14956 	{
14957 		flavorNames.push_back("Double");
14958 		flavorNames.push_back("PolyFP16");
14959 	}
14960 
14961 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atanh14962 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
14963 	{
14964 		const fp16type	x		(*in[0]);
14965 		const double	d		(x.asDouble());
14966 		double			result	(0.0);
14967 
14968 		if (deAbs(d) >= 1.0)
14969 			return false;
14970 
14971 		if (getFlavor() == 0)
14972 		{
14973 			const double	ulps	(16.0);	// This is not a precision test. Value is not from spec
14974 
14975 			result = deAtanh(d);
14976 			min[0] = getMin(result, ulps);
14977 			max[0] = getMax(result, ulps);
14978 		}
14979 		else if (getFlavor() == 1)
14980 		{
14981 			const fp16type	x1a		(1.0 + d);
14982 			const fp16type	x1b		(1.0 - d);
14983 			const fp16type	x1d		(x1a.asDouble() / x1b.asDouble());
14984 			const fp16type	lx1d	(deLog(x1d.asDouble()));
14985 			const fp16type	lx1d2	(0.5 * lx1d.asDouble());
14986 			const double	error	(2 * (de::inRange(deAbs(x1d.asDouble()), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(x1d.asDouble()), 3.0)));
14987 
14988 			result = lx1d2.asDouble();
14989 			min[0] = result - error;
14990 			max[0] = result + error;
14991 		}
14992 		else
14993 		{
14994 			TCU_THROW(InternalError, "Unknown flavor");
14995 		}
14996 
14997 		out[0] = fp16type(result).bits();
14998 
14999 		return true;
15000 	}
15001 };
15002 
15003 struct fp16Exp : public fp16PerComponent
15004 {
15005 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp15006 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15007 	{
15008 		const fp16type	x		(*in[0]);
15009 		const double	d		(x.asDouble());
15010 		const double	ulps	(10.0 * (1.0 + 2.0 * deAbs(d)));
15011 		const double	result	(deExp(d));
15012 
15013 		out[0] = fp16type(result).bits();
15014 		min[0] = getMin(result, ulps);
15015 		max[0] = getMax(result, ulps);
15016 
15017 		return true;
15018 	}
15019 };
15020 
15021 struct fp16Log : public fp16PerComponent
15022 {
15023 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Log15024 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15025 	{
15026 		const fp16type	x		(*in[0]);
15027 		const double	d		(x.asDouble());
15028 		const double	result	(deLog(d));
15029 		const double	error	(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15030 
15031 		if (d <= 0.0)
15032 			return false;
15033 
15034 		out[0] = fp16type(result).bits();
15035 		min[0] = result - error;
15036 		max[0] = result + error;
15037 
15038 		return true;
15039 	}
15040 };
15041 
15042 struct fp16Exp2 : public fp16PerComponent
15043 {
15044 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Exp215045 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15046 	{
15047 		const fp16type	x		(*in[0]);
15048 		const double	d		(x.asDouble());
15049 		const double	result	(deExp2(d));
15050 		const double	ulps	(1.0 + 2.0 * deAbs(fp16type(in[0][0]).asDouble()));
15051 
15052 		out[0] = fp16type(result).bits();
15053 		min[0] = getMin(result, ulps);
15054 		max[0] = getMax(result, ulps);
15055 
15056 		return true;
15057 	}
15058 };
15059 
15060 struct fp16Log2 : public fp16PerComponent
15061 {
15062 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Log215063 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15064 	{
15065 		const fp16type	x		(*in[0]);
15066 		const double	d		(x.asDouble());
15067 		const double	result	(deLog2(d));
15068 		const double	error	(de::inRange(deAbs(d), 0.5, 2.0) ? deLdExp(2.0, -7) : floatFormat16.ulp(deAbs(result), 3.0));
15069 
15070 		if (d <= 0.0)
15071 			return false;
15072 
15073 		out[0] = fp16type(result).bits();
15074 		min[0] = result - error;
15075 		max[0] = result + error;
15076 
15077 		return true;
15078 	}
15079 };
15080 
15081 struct fp16Sqrt : public fp16PerComponent
15082 {
getULPsvkt::SpirVAssembly::fp16Sqrt15083 	virtual double getULPs (vector<const deFloat16*>& in)
15084 	{
15085 		DE_UNREF(in);
15086 
15087 		return 6.0;
15088 	}
15089 
15090 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Sqrt15091 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15092 	{
15093 		const fp16type	x		(*in[0]);
15094 		const double	d		(x.asDouble());
15095 		const double	result	(deSqrt(d));
15096 
15097 		if (!x.isNaN() && d < 0.0)
15098 			return false;
15099 
15100 		out[0] = fp16type(result).bits();
15101 		min[0] = getMin(result, getULPs(in));
15102 		max[0] = getMax(result, getULPs(in));
15103 
15104 		return true;
15105 	}
15106 };
15107 
15108 struct fp16InverseSqrt : public fp16PerComponent
15109 {
getULPsvkt::SpirVAssembly::fp16InverseSqrt15110 	virtual double getULPs (vector<const deFloat16*>& in)
15111 	{
15112 		DE_UNREF(in);
15113 
15114 		return 2.0;
15115 	}
15116 
15117 	template<class fp16type>
calcvkt::SpirVAssembly::fp16InverseSqrt15118 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15119 	{
15120 		const fp16type	x		(*in[0]);
15121 		const double	d		(x.asDouble());
15122 		const double	result	(1.0/deSqrt(d));
15123 
15124 		if (!x.isNaN() && d <= 0.0)
15125 			return false;
15126 
15127 		out[0] = fp16type(result).bits();
15128 		min[0] = getMin(result, getULPs(in));
15129 		max[0] = getMax(result, getULPs(in));
15130 
15131 		return true;
15132 	}
15133 };
15134 
15135 struct fp16ModfFrac : public fp16PerComponent
15136 {
15137 	template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfFrac15138 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15139 	{
15140 		const fp16type	x		(*in[0]);
15141 		const double	d		(x.asDouble());
15142 		double			i		(0.0);
15143 		const double	result	(deModf(d, &i));
15144 
15145 		if (x.isInf() || x.isNaN())
15146 			return false;
15147 
15148 		out[0] = fp16type(result).bits();
15149 		min[0] = getMin(result, getULPs(in));
15150 		max[0] = getMax(result, getULPs(in));
15151 
15152 		return true;
15153 	}
15154 };
15155 
15156 struct fp16ModfInt : public fp16PerComponent
15157 {
15158 	template<class fp16type>
calcvkt::SpirVAssembly::fp16ModfInt15159 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15160 	{
15161 		const fp16type	x		(*in[0]);
15162 		const double	d		(x.asDouble());
15163 		double			i		(0.0);
15164 		const double	dummy	(deModf(d, &i));
15165 		const double	result	(i);
15166 
15167 		DE_UNREF(dummy);
15168 
15169 		if (x.isInf() || x.isNaN())
15170 			return false;
15171 
15172 		out[0] = fp16type(result).bits();
15173 		min[0] = getMin(result, getULPs(in));
15174 		max[0] = getMax(result, getULPs(in));
15175 
15176 		return true;
15177 	}
15178 };
15179 
15180 struct fp16FrexpS : public fp16PerComponent
15181 {
15182 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpS15183 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15184 	{
15185 		const fp16type	x		(*in[0]);
15186 		const double	d		(x.asDouble());
15187 		int				e		(0);
15188 		const double	result	(deFrExp(d, &e));
15189 
15190 		if (x.isNaN() || x.isInf())
15191 			return false;
15192 
15193 		out[0] = fp16type(result).bits();
15194 		min[0] = getMin(result, getULPs(in));
15195 		max[0] = getMax(result, getULPs(in));
15196 
15197 		return true;
15198 	}
15199 };
15200 
15201 struct fp16FrexpE : public fp16PerComponent
15202 {
15203 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FrexpE15204 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15205 	{
15206 		const fp16type	x		(*in[0]);
15207 		const double	d		(x.asDouble());
15208 		int				e		(0);
15209 		const double	dummy	(deFrExp(d, &e));
15210 		const double	result	(static_cast<double>(e));
15211 
15212 		DE_UNREF(dummy);
15213 
15214 		if (x.isNaN() || x.isInf())
15215 			return false;
15216 
15217 		out[0] = fp16type(result).bits();
15218 		min[0] = getMin(result, getULPs(in));
15219 		max[0] = getMax(result, getULPs(in));
15220 
15221 		return true;
15222 	}
15223 };
15224 
15225 struct fp16OpFAdd : public fp16PerComponent
15226 {
15227 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFAdd15228 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15229 	{
15230 		const fp16type	x		(*in[0]);
15231 		const fp16type	y		(*in[1]);
15232 		const double	xd		(x.asDouble());
15233 		const double	yd		(y.asDouble());
15234 		const double	result	(xd + yd);
15235 
15236 		out[0] = fp16type(result).bits();
15237 		min[0] = getMin(result, getULPs(in));
15238 		max[0] = getMax(result, getULPs(in));
15239 
15240 		return true;
15241 	}
15242 };
15243 
15244 struct fp16OpFSub : public fp16PerComponent
15245 {
15246 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFSub15247 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15248 	{
15249 		const fp16type	x		(*in[0]);
15250 		const fp16type	y		(*in[1]);
15251 		const double	xd		(x.asDouble());
15252 		const double	yd		(y.asDouble());
15253 		const double	result	(xd - yd);
15254 
15255 		out[0] = fp16type(result).bits();
15256 		min[0] = getMin(result, getULPs(in));
15257 		max[0] = getMax(result, getULPs(in));
15258 
15259 		return true;
15260 	}
15261 };
15262 
15263 struct fp16OpFMul : public fp16PerComponent
15264 {
15265 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFMul15266 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15267 	{
15268 		const fp16type	x		(*in[0]);
15269 		const fp16type	y		(*in[1]);
15270 		const double	xd		(x.asDouble());
15271 		const double	yd		(y.asDouble());
15272 		const double	result	(xd * yd);
15273 
15274 		out[0] = fp16type(result).bits();
15275 		min[0] = getMin(result, getULPs(in));
15276 		max[0] = getMax(result, getULPs(in));
15277 
15278 		return true;
15279 	}
15280 };
15281 
15282 struct fp16OpFDiv : public fp16PerComponent
15283 {
fp16OpFDivvkt::SpirVAssembly::fp16OpFDiv15284 	fp16OpFDiv() : fp16PerComponent()
15285 	{
15286 		flavorNames.push_back("DirectDiv");
15287 		flavorNames.push_back("InverseDiv");
15288 	}
15289 
15290 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OpFDiv15291 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15292 	{
15293 		const fp16type	x			(*in[0]);
15294 		const fp16type	y			(*in[1]);
15295 		const double	xd			(x.asDouble());
15296 		const double	yd			(y.asDouble());
15297 		const double	unspecUlp	(16.0);
15298 		const double	ulpCnt		(de::inRange(deAbs(yd), deLdExp(1, -14), deLdExp(1, 14)) ? 2.5 : unspecUlp);
15299 		double			result		(0.0);
15300 
15301 		if (y.isZero())
15302 			return false;
15303 
15304 		if (getFlavor() == 0)
15305 		{
15306 			result = (xd / yd);
15307 		}
15308 		else if (getFlavor() == 1)
15309 		{
15310 			const double	invyd	(1.0 / yd);
15311 			const fp16type	invy	(invyd);
15312 
15313 			result = (xd * invy.asDouble());
15314 		}
15315 		else
15316 		{
15317 			TCU_THROW(InternalError, "Unknown flavor");
15318 		}
15319 
15320 		out[0] = fp16type(result).bits();
15321 		min[0] = getMin(result, ulpCnt);
15322 		max[0] = getMax(result, ulpCnt);
15323 
15324 		return true;
15325 	}
15326 };
15327 
15328 struct fp16Atan2 : public fp16PerComponent
15329 {
fp16Atan2vkt::SpirVAssembly::fp16Atan215330 	fp16Atan2() : fp16PerComponent()
15331 	{
15332 		flavorNames.push_back("DoubleCalc");
15333 		flavorNames.push_back("DoubleCalc_PI");
15334 	}
15335 
getULPsvkt::SpirVAssembly::fp16Atan215336 	virtual double getULPs(vector<const deFloat16*>& in)
15337 	{
15338 		DE_UNREF(in);
15339 
15340 		return 2 * 5.0; // This is not a precision test. Value is not from spec
15341 	}
15342 
15343 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Atan215344 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15345 	{
15346 		const fp16type	x		(*in[0]);
15347 		const fp16type	y		(*in[1]);
15348 		const double	xd		(x.asDouble());
15349 		const double	yd		(y.asDouble());
15350 		double			result	(0.0);
15351 
15352 		if ((x.isZero() && y.isZero())||(x.isInf() && y.isInf()))
15353 			return false;
15354 
15355 		if (getFlavor() == 0)
15356 		{
15357 			result	= deAtan2(xd, yd);
15358 		}
15359 		else if (getFlavor() == 1)
15360 		{
15361 			const double	ulps	(2.0 * 5.0); // This is not a precision test. Value is not from spec
15362 			const double	eps		(floatFormat16.ulp(DE_PI_DOUBLE, ulps));
15363 
15364 			result	= deAtan2(xd, yd);
15365 
15366 			if (de::inRange(deAbs(result), DE_PI_DOUBLE - eps, DE_PI_DOUBLE + eps))
15367 				result	= -result;
15368 		}
15369 		else
15370 		{
15371 			TCU_THROW(InternalError, "Unknown flavor");
15372 		}
15373 
15374 		out[0] = fp16type(result).bits();
15375 		min[0] = getMin(result, getULPs(in));
15376 		max[0] = getMax(result, getULPs(in));
15377 
15378 		return true;
15379 	}
15380 };
15381 
15382 struct fp16Pow : public fp16PerComponent
15383 {
fp16Powvkt::SpirVAssembly::fp16Pow15384 	fp16Pow() : fp16PerComponent()
15385 	{
15386 		flavorNames.push_back("Pow");
15387 		flavorNames.push_back("PowLog2");
15388 		flavorNames.push_back("PowLog2FP16");
15389 	}
15390 
15391 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Pow15392 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15393 	{
15394 		const fp16type	x		(*in[0]);
15395 		const fp16type	y		(*in[1]);
15396 		const double	xd		(x.asDouble());
15397 		const double	yd		(y.asDouble());
15398 		const double	logxeps	(de::inRange(deAbs(xd), 0.5, 2.0) ? deLdExp(1.0, -7) : floatFormat16.ulp(deLog2(xd), 3.0));
15399 		const double	ulps1	(1.0 + 4.0 * deAbs(yd * (deLog2(xd) - logxeps)));
15400 		const double	ulps2	(1.0 + 4.0 * deAbs(yd * (deLog2(xd) + logxeps)));
15401 		const double	ulps	(deMax(deAbs(ulps1), deAbs(ulps2)));
15402 		double			result	(0.0);
15403 
15404 		if (xd < 0.0)
15405 			return false;
15406 
15407 		if (x.isZero() && yd <= 0.0)
15408 			return false;
15409 
15410 		if (getFlavor() == 0)
15411 		{
15412 			result = dePow(xd, yd);
15413 		}
15414 		else if (getFlavor() == 1)
15415 		{
15416 			const double	l2d	(deLog2(xd));
15417 			const double	e2d	(deExp2(yd * l2d));
15418 
15419 			result = e2d;
15420 		}
15421 		else if (getFlavor() == 2)
15422 		{
15423 			const double	l2d	(deLog2(xd));
15424 			const fp16type	l2	(l2d);
15425 			const double	e2d	(deExp2(yd * l2.asDouble()));
15426 			const fp16type	e2	(e2d);
15427 
15428 			result = e2.asDouble();
15429 		}
15430 		else
15431 		{
15432 			TCU_THROW(InternalError, "Unknown flavor");
15433 		}
15434 
15435 		out[0] = fp16type(result).bits();
15436 		min[0] = getMin(result, ulps);
15437 		max[0] = getMax(result, ulps);
15438 
15439 		return true;
15440 	}
15441 };
15442 
15443 struct fp16FMin : public fp16PerComponent
15444 {
15445 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMin15446 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15447 	{
15448 		const fp16type	x		(*in[0]);
15449 		const fp16type	y		(*in[1]);
15450 		const double	xd		(x.asDouble());
15451 		const double	yd		(y.asDouble());
15452 		const double	result	(deMin(xd, yd));
15453 
15454 		if (x.isNaN() || y.isNaN())
15455 			return false;
15456 
15457 		out[0] = fp16type(result).bits();
15458 		min[0] = getMin(result, getULPs(in));
15459 		max[0] = getMax(result, getULPs(in));
15460 
15461 		return true;
15462 	}
15463 };
15464 
15465 struct fp16FMax : public fp16PerComponent
15466 {
15467 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMax15468 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15469 	{
15470 		const fp16type	x		(*in[0]);
15471 		const fp16type	y		(*in[1]);
15472 		const double	xd		(x.asDouble());
15473 		const double	yd		(y.asDouble());
15474 		const double	result	(deMax(xd, yd));
15475 
15476 		if (x.isNaN() || y.isNaN())
15477 			return false;
15478 
15479 		out[0] = fp16type(result).bits();
15480 		min[0] = getMin(result, getULPs(in));
15481 		max[0] = getMax(result, getULPs(in));
15482 
15483 		return true;
15484 	}
15485 };
15486 
15487 struct fp16Step : public fp16PerComponent
15488 {
15489 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Step15490 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15491 	{
15492 		const fp16type	edge	(*in[0]);
15493 		const fp16type	x		(*in[1]);
15494 		const double	edged	(edge.asDouble());
15495 		const double	xd		(x.asDouble());
15496 		const double	result	(deStep(edged, xd));
15497 
15498 		out[0] = fp16type(result).bits();
15499 		min[0] = getMin(result, getULPs(in));
15500 		max[0] = getMax(result, getULPs(in));
15501 
15502 		return true;
15503 	}
15504 };
15505 
15506 struct fp16Ldexp : public fp16PerComponent
15507 {
15508 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Ldexp15509 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15510 	{
15511 		const fp16type	x		(*in[0]);
15512 		const fp16type	y		(*in[1]);
15513 		const double	xd		(x.asDouble());
15514 		const int		yd		(static_cast<int>(deTrunc(y.asDouble())));
15515 		const double	result	(deLdExp(xd, yd));
15516 
15517 		if (y.isNaN() || y.isInf() || y.isDenorm() || yd < -14 || yd > 15)
15518 			return false;
15519 
15520 		// Spec: "If this product is too large to be represented in the floating-point type, the result is undefined."
15521 		if (fp16type(result).isInf())
15522 			return false;
15523 
15524 		out[0] = fp16type(result).bits();
15525 		min[0] = getMin(result, getULPs(in));
15526 		max[0] = getMax(result, getULPs(in));
15527 
15528 		return true;
15529 	}
15530 };
15531 
15532 struct fp16FClamp : public fp16PerComponent
15533 {
15534 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FClamp15535 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15536 	{
15537 		const fp16type	x		(*in[0]);
15538 		const fp16type	minVal	(*in[1]);
15539 		const fp16type	maxVal	(*in[2]);
15540 		const double	xd		(x.asDouble());
15541 		const double	minVald	(minVal.asDouble());
15542 		const double	maxVald	(maxVal.asDouble());
15543 		const double	result	(deClamp(xd, minVald, maxVald));
15544 
15545 		if (minVal.isNaN() || maxVal.isNaN() || minVald > maxVald)
15546 			return false;
15547 
15548 		out[0] = fp16type(result).bits();
15549 		min[0] = getMin(result, getULPs(in));
15550 		max[0] = getMax(result, getULPs(in));
15551 
15552 		return true;
15553 	}
15554 };
15555 
15556 struct fp16FMix : public fp16PerComponent
15557 {
fp16FMixvkt::SpirVAssembly::fp16FMix15558 	fp16FMix() : fp16PerComponent()
15559 	{
15560 		flavorNames.push_back("DoubleCalc");
15561 		flavorNames.push_back("EmulatingFP16");
15562 		flavorNames.push_back("EmulatingFP16YminusX");
15563 	}
15564 
15565 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FMix15566 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15567 	{
15568 		const fp16type	x		(*in[0]);
15569 		const fp16type	y		(*in[1]);
15570 		const fp16type	a		(*in[2]);
15571 		const double	ulps	(8.0); // This is not a precision test. Value is not from spec
15572 		double			result	(0.0);
15573 
15574 		if (getFlavor() == 0)
15575 		{
15576 			const double	xd		(x.asDouble());
15577 			const double	yd		(y.asDouble());
15578 			const double	ad		(a.asDouble());
15579 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15580 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15581 			const double	eps		(xeps + yeps);
15582 
15583 			result = deMix(xd, yd, ad);
15584 			min[0] = result - eps;
15585 			max[0] = result + eps;
15586 		}
15587 		else if (getFlavor() == 1)
15588 		{
15589 			const double	xd		(x.asDouble());
15590 			const double	yd		(y.asDouble());
15591 			const double	ad		(a.asDouble());
15592 			const fp16type	am		(1.0 - ad);
15593 			const double	amd		(am.asDouble());
15594 			const fp16type	xam		(xd * amd);
15595 			const double	xamd	(xam.asDouble());
15596 			const fp16type	ya		(yd * ad);
15597 			const double	yad		(ya.asDouble());
15598 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15599 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15600 			const double	eps		(xeps + yeps);
15601 
15602 			result = xamd + yad;
15603 			min[0] = result - eps;
15604 			max[0] = result + eps;
15605 		}
15606 		else if (getFlavor() == 2)
15607 		{
15608 			const double	xd		(x.asDouble());
15609 			const double	yd		(y.asDouble());
15610 			const double	ad		(a.asDouble());
15611 			const fp16type	ymx		(yd - xd);
15612 			const double	ymxd	(ymx.asDouble());
15613 			const fp16type	ymxa	(ymxd * ad);
15614 			const double	ymxad	(ymxa.asDouble());
15615 			const double	xeps	(floatFormat16.ulp(deAbs(xd * (1.0 - ad)), ulps));
15616 			const double	yeps	(floatFormat16.ulp(deAbs(yd * ad), ulps));
15617 			const double	eps		(xeps + yeps);
15618 
15619 			result = xd + ymxad;
15620 			min[0] = result - eps;
15621 			max[0] = result + eps;
15622 		}
15623 		else
15624 		{
15625 			TCU_THROW(InternalError, "Unknown flavor");
15626 		}
15627 
15628 		out[0] = fp16type(result).bits();
15629 
15630 		return true;
15631 	}
15632 };
15633 
15634 struct fp16SmoothStep : public fp16PerComponent
15635 {
fp16SmoothStepvkt::SpirVAssembly::fp16SmoothStep15636 	fp16SmoothStep() : fp16PerComponent()
15637 	{
15638 		flavorNames.push_back("FloatCalc");
15639 		flavorNames.push_back("EmulatingFP16");
15640 		flavorNames.push_back("EmulatingFP16WClamp");
15641 	}
15642 
getULPsvkt::SpirVAssembly::fp16SmoothStep15643 	virtual double getULPs(vector<const deFloat16*>& in)
15644 	{
15645 		DE_UNREF(in);
15646 
15647 		return 4.0; // This is not a precision test. Value is not from spec
15648 	}
15649 
15650 	template<class fp16type>
calcvkt::SpirVAssembly::fp16SmoothStep15651 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15652 	{
15653 		const fp16type	edge0	(*in[0]);
15654 		const fp16type	edge1	(*in[1]);
15655 		const fp16type	x		(*in[2]);
15656 		double			result	(0.0);
15657 
15658 		if (edge0.isNaN() || edge1.isNaN() || x.isNaN() || edge0.asDouble() >= edge1.asDouble())
15659 			return false;
15660 
15661 		if (edge0.isInf() || edge1.isInf() || x.isInf())
15662 			return false;
15663 
15664 		if (getFlavor() == 0)
15665 		{
15666 			const float	edge0d	(edge0.asFloat());
15667 			const float	edge1d	(edge1.asFloat());
15668 			const float	xd		(x.asFloat());
15669 			const float	sstep	(deFloatSmoothStep(edge0d, edge1d, xd));
15670 
15671 			result = sstep;
15672 		}
15673 		else if (getFlavor() == 1)
15674 		{
15675 			const double	edge0d	(edge0.asDouble());
15676 			const double	edge1d	(edge1.asDouble());
15677 			const double	xd		(x.asDouble());
15678 
15679 			if (xd <= edge0d)
15680 				result = 0.0;
15681 			else if (xd >= edge1d)
15682 				result = 1.0;
15683 			else
15684 			{
15685 				const fp16type	a	(xd - edge0d);
15686 				const fp16type	b	(edge1d - edge0d);
15687 				const fp16type	t	(a.asDouble() / b.asDouble());
15688 				const fp16type	t2	(2.0 * t.asDouble());
15689 				const fp16type	t3	(3.0 - t2.asDouble());
15690 				const fp16type	t4	(t.asDouble() * t3.asDouble());
15691 				const fp16type	t5	(t.asDouble() * t4.asDouble());
15692 
15693 				result = t5.asDouble();
15694 			}
15695 		}
15696 		else if (getFlavor() == 2)
15697 		{
15698 			const double	edge0d	(edge0.asDouble());
15699 			const double	edge1d	(edge1.asDouble());
15700 			const double	xd		(x.asDouble());
15701 			const fp16type	a	(xd - edge0d);
15702 			const fp16type	b	(edge1d - edge0d);
15703 			const fp16type	bi	(1.0 / b.asDouble());
15704 			const fp16type	t0	(a.asDouble() * bi.asDouble());
15705 			const double	tc	(deClamp(t0.asDouble(), 0.0, 1.0));
15706 			const fp16type	t	(tc);
15707 			const fp16type	t2	(2.0 * t.asDouble());
15708 			const fp16type	t3	(3.0 - t2.asDouble());
15709 			const fp16type	t4	(t.asDouble() * t3.asDouble());
15710 			const fp16type	t5	(t.asDouble() * t4.asDouble());
15711 
15712 			result = t5.asDouble();
15713 		}
15714 		else
15715 		{
15716 			TCU_THROW(InternalError, "Unknown flavor");
15717 		}
15718 
15719 		out[0] = fp16type(result).bits();
15720 		min[0] = getMin(result, getULPs(in));
15721 		max[0] = getMax(result, getULPs(in));
15722 
15723 		return true;
15724 	}
15725 };
15726 
15727 struct fp16Fma : public fp16PerComponent
15728 {
fp16Fmavkt::SpirVAssembly::fp16Fma15729 	fp16Fma()
15730 	{
15731 		flavorNames.push_back("DoubleCalc");
15732 		flavorNames.push_back("EmulatingFP16");
15733 	}
15734 
getULPsvkt::SpirVAssembly::fp16Fma15735 	virtual double getULPs(vector<const deFloat16*>& in)
15736 	{
15737 		DE_UNREF(in);
15738 
15739 		return 16.0;
15740 	}
15741 
15742 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Fma15743 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15744 	{
15745 		DE_ASSERT(in.size() == 3);
15746 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
15747 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
15748 		DE_ASSERT(getArgCompCount(2) == getOutCompCount());
15749 		DE_ASSERT(getOutCompCount() > 0);
15750 
15751 		const fp16type	a		(*in[0]);
15752 		const fp16type	b		(*in[1]);
15753 		const fp16type	c		(*in[2]);
15754 		double			result	(0.0);
15755 
15756 		if (getFlavor() == 0)
15757 		{
15758 			const double	ad	(a.asDouble());
15759 			const double	bd	(b.asDouble());
15760 			const double	cd	(c.asDouble());
15761 
15762 			result	= deMadd(ad, bd, cd);
15763 		}
15764 		else if (getFlavor() == 1)
15765 		{
15766 			const double	ad	(a.asDouble());
15767 			const double	bd	(b.asDouble());
15768 			const double	cd	(c.asDouble());
15769 			const fp16type	ab	(ad * bd);
15770 			const fp16type	r	(ab.asDouble() + cd);
15771 
15772 			result	= r.asDouble();
15773 		}
15774 		else
15775 		{
15776 			TCU_THROW(InternalError, "Unknown flavor");
15777 		}
15778 
15779 		out[0] = fp16type(result).bits();
15780 		min[0] = getMin(result, getULPs(in));
15781 		max[0] = getMax(result, getULPs(in));
15782 
15783 		return true;
15784 	}
15785 };
15786 
15787 
15788 struct fp16AllComponents : public fp16PerComponent
15789 {
callOncePerComponentvkt::SpirVAssembly::fp16AllComponents15790 	bool		callOncePerComponent	()	{ return false; }
15791 };
15792 
15793 struct fp16Length : public fp16AllComponents
15794 {
fp16Lengthvkt::SpirVAssembly::fp16Length15795 	fp16Length() : fp16AllComponents()
15796 	{
15797 		flavorNames.push_back("EmulatingFP16");
15798 		flavorNames.push_back("DoubleCalc");
15799 	}
15800 
getULPsvkt::SpirVAssembly::fp16Length15801 	virtual double getULPs(vector<const deFloat16*>& in)
15802 	{
15803 		DE_UNREF(in);
15804 
15805 		return 4.0;
15806 	}
15807 
15808 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Length15809 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15810 	{
15811 		DE_ASSERT(getOutCompCount() == 1);
15812 		DE_ASSERT(in.size() == 1);
15813 
15814 		double	result	(0.0);
15815 
15816 		if (getFlavor() == 0)
15817 		{
15818 			fp16type	r	(0.0);
15819 
15820 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15821 			{
15822 				const fp16type	x	(in[0][componentNdx]);
15823 				const fp16type	q	(x.asDouble() * x.asDouble());
15824 
15825 				r = fp16type(r.asDouble() + q.asDouble());
15826 			}
15827 
15828 			result = deSqrt(r.asDouble());
15829 
15830 			out[0] = fp16type(result).bits();
15831 		}
15832 		else if (getFlavor() == 1)
15833 		{
15834 			double	r	(0.0);
15835 
15836 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15837 			{
15838 				const fp16type	x	(in[0][componentNdx]);
15839 				const double	q	(x.asDouble() * x.asDouble());
15840 
15841 				r += q;
15842 			}
15843 
15844 			result = deSqrt(r);
15845 
15846 			out[0] = fp16type(result).bits();
15847 		}
15848 		else
15849 		{
15850 			TCU_THROW(InternalError, "Unknown flavor");
15851 		}
15852 
15853 		min[0] = getMin(result, getULPs(in));
15854 		max[0] = getMax(result, getULPs(in));
15855 
15856 		return true;
15857 	}
15858 };
15859 
15860 struct fp16Distance : public fp16AllComponents
15861 {
fp16Distancevkt::SpirVAssembly::fp16Distance15862 	fp16Distance() : fp16AllComponents()
15863 	{
15864 		flavorNames.push_back("EmulatingFP16");
15865 		flavorNames.push_back("DoubleCalc");
15866 	}
15867 
getULPsvkt::SpirVAssembly::fp16Distance15868 	virtual double getULPs(vector<const deFloat16*>& in)
15869 	{
15870 		DE_UNREF(in);
15871 
15872 		return 4.0;
15873 	}
15874 
15875 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Distance15876 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15877 	{
15878 		DE_ASSERT(getOutCompCount() == 1);
15879 		DE_ASSERT(in.size() == 2);
15880 		DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
15881 
15882 		double	result	(0.0);
15883 
15884 		if (getFlavor() == 0)
15885 		{
15886 			fp16type	r	(0.0);
15887 
15888 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15889 			{
15890 				const fp16type	x	(in[0][componentNdx]);
15891 				const fp16type	y	(in[1][componentNdx]);
15892 				const fp16type	d	(x.asDouble() - y.asDouble());
15893 				const fp16type	q	(d.asDouble() * d.asDouble());
15894 
15895 				r = fp16type(r.asDouble() + q.asDouble());
15896 			}
15897 
15898 			result = deSqrt(r.asDouble());
15899 		}
15900 		else if (getFlavor() == 1)
15901 		{
15902 			double	r	(0.0);
15903 
15904 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
15905 			{
15906 				const fp16type	x	(in[0][componentNdx]);
15907 				const fp16type	y	(in[1][componentNdx]);
15908 				const double	d	(x.asDouble() - y.asDouble());
15909 				const double	q	(d * d);
15910 
15911 				r += q;
15912 			}
15913 
15914 			result = deSqrt(r);
15915 		}
15916 		else
15917 		{
15918 			TCU_THROW(InternalError, "Unknown flavor");
15919 		}
15920 
15921 		out[0] = fp16type(result).bits();
15922 		min[0] = getMin(result, getULPs(in));
15923 		max[0] = getMax(result, getULPs(in));
15924 
15925 		return true;
15926 	}
15927 };
15928 
15929 struct fp16Cross : public fp16AllComponents
15930 {
fp16Crossvkt::SpirVAssembly::fp16Cross15931 	fp16Cross() : fp16AllComponents()
15932 	{
15933 		flavorNames.push_back("EmulatingFP16");
15934 		flavorNames.push_back("DoubleCalc");
15935 	}
15936 
getULPsvkt::SpirVAssembly::fp16Cross15937 	virtual double getULPs(vector<const deFloat16*>& in)
15938 	{
15939 		DE_UNREF(in);
15940 
15941 		return 4.0;
15942 	}
15943 
15944 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Cross15945 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
15946 	{
15947 		DE_ASSERT(getOutCompCount() == 3);
15948 		DE_ASSERT(in.size() == 2);
15949 		DE_ASSERT(getArgCompCount(0) == 3);
15950 		DE_ASSERT(getArgCompCount(1) == 3);
15951 
15952 		if (getFlavor() == 0)
15953 		{
15954 			const fp16type	x0		(in[0][0]);
15955 			const fp16type	x1		(in[0][1]);
15956 			const fp16type	x2		(in[0][2]);
15957 			const fp16type	y0		(in[1][0]);
15958 			const fp16type	y1		(in[1][1]);
15959 			const fp16type	y2		(in[1][2]);
15960 			const fp16type	x1y2	(x1.asDouble() * y2.asDouble());
15961 			const fp16type	y1x2	(y1.asDouble() * x2.asDouble());
15962 			const fp16type	x2y0	(x2.asDouble() * y0.asDouble());
15963 			const fp16type	y2x0	(y2.asDouble() * x0.asDouble());
15964 			const fp16type	x0y1	(x0.asDouble() * y1.asDouble());
15965 			const fp16type	y0x1	(y0.asDouble() * x1.asDouble());
15966 
15967 			out[0] = fp16type(x1y2.asDouble() - y1x2.asDouble()).bits();
15968 			out[1] = fp16type(x2y0.asDouble() - y2x0.asDouble()).bits();
15969 			out[2] = fp16type(x0y1.asDouble() - y0x1.asDouble()).bits();
15970 		}
15971 		else if (getFlavor() == 1)
15972 		{
15973 			const fp16type	x0		(in[0][0]);
15974 			const fp16type	x1		(in[0][1]);
15975 			const fp16type	x2		(in[0][2]);
15976 			const fp16type	y0		(in[1][0]);
15977 			const fp16type	y1		(in[1][1]);
15978 			const fp16type	y2		(in[1][2]);
15979 			const double	x1y2	(x1.asDouble() * y2.asDouble());
15980 			const double	y1x2	(y1.asDouble() * x2.asDouble());
15981 			const double	x2y0	(x2.asDouble() * y0.asDouble());
15982 			const double	y2x0	(y2.asDouble() * x0.asDouble());
15983 			const double	x0y1	(x0.asDouble() * y1.asDouble());
15984 			const double	y0x1	(y0.asDouble() * x1.asDouble());
15985 
15986 			out[0] = fp16type(x1y2 - y1x2).bits();
15987 			out[1] = fp16type(x2y0 - y2x0).bits();
15988 			out[2] = fp16type(x0y1 - y0x1).bits();
15989 		}
15990 		else
15991 		{
15992 			TCU_THROW(InternalError, "Unknown flavor");
15993 		}
15994 
15995 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15996 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
15997 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
15998 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
15999 
16000 		return true;
16001 	}
16002 };
16003 
16004 struct fp16Normalize : public fp16AllComponents
16005 {
fp16Normalizevkt::SpirVAssembly::fp16Normalize16006 	fp16Normalize() : fp16AllComponents()
16007 	{
16008 		flavorNames.push_back("EmulatingFP16");
16009 		flavorNames.push_back("DoubleCalc");
16010 
16011 		permutationsFlavorStart = 0;
16012 		permutationsFlavorEnd = flavorNames.size();
16013 
16014 		// flavorNames will be extended later
16015 	}
16016 
setArgCompCountvkt::SpirVAssembly::fp16Normalize16017 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)
16018 	{
16019 		DE_ASSERT(argCompCount[argNo] == 0); // Once only
16020 
16021 		if (argNo == 0 && argCompCount[argNo] == 0)
16022 		{
16023 			const size_t		maxPermutationsCount	= 24u; // Equal to 4!
16024 			std::vector<int>	indices;
16025 
16026 			for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16027 				indices.push_back(static_cast<int>(componentNdx));
16028 
16029 			m_permutations.reserve(maxPermutationsCount);
16030 
16031 			permutationsFlavorStart = flavorNames.size();
16032 
16033 			do
16034 			{
16035 				tcu::UVec4	permutation;
16036 				std::string	name		= "Permutted_";
16037 
16038 				for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16039 				{
16040 					permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16041 					name += de::toString(indices[componentNdx]);
16042 				}
16043 
16044 				m_permutations.push_back(permutation);
16045 				flavorNames.push_back(name);
16046 
16047 			} while(std::next_permutation(indices.begin(), indices.end()));
16048 
16049 			permutationsFlavorEnd = flavorNames.size();
16050 		}
16051 
16052 		fp16AllComponents::setArgCompCount(argNo, compCount);
16053 	}
getULPsvkt::SpirVAssembly::fp16Normalize16054 	virtual double getULPs(vector<const deFloat16*>& in)
16055 	{
16056 		DE_UNREF(in);
16057 
16058 		return 8.0;
16059 	}
16060 
16061 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Normalize16062 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16063 	{
16064 		DE_ASSERT(in.size() == 1);
16065 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16066 
16067 		if (getFlavor() == 0)
16068 		{
16069 			fp16type	r(0.0);
16070 
16071 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16072 			{
16073 				const fp16type	x	(in[0][componentNdx]);
16074 				const fp16type	q	(x.asDouble() * x.asDouble());
16075 
16076 				r = fp16type(r.asDouble() + q.asDouble());
16077 			}
16078 
16079 			r = fp16type(deSqrt(r.asDouble()));
16080 
16081 			if (r.isZero())
16082 				return false;
16083 
16084 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16085 			{
16086 				const fp16type	x	(in[0][componentNdx]);
16087 
16088 				out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16089 			}
16090 		}
16091 		else if (getFlavor() == 1)
16092 		{
16093 			double	r(0.0);
16094 
16095 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16096 			{
16097 				const fp16type	x	(in[0][componentNdx]);
16098 				const double	q	(x.asDouble() * x.asDouble());
16099 
16100 				r += q;
16101 			}
16102 
16103 			r = deSqrt(r);
16104 
16105 			if (r == 0)
16106 				return false;
16107 
16108 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16109 			{
16110 				const fp16type	x	(in[0][componentNdx]);
16111 
16112 				out[componentNdx] = fp16type(x.asDouble() / r).bits();
16113 			}
16114 		}
16115 		else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16116 		{
16117 			const int			compCount		(static_cast<int>(getArgCompCount(0)));
16118 			const size_t		permutationNdx	(getFlavor() - permutationsFlavorStart);
16119 			const tcu::UVec4&	permutation		(m_permutations[permutationNdx]);
16120 			fp16type			r				(0.0);
16121 
16122 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16123 			{
16124 				const size_t	componentNdx	(permutation[permComponentNdx]);
16125 				const fp16type	x				(in[0][componentNdx]);
16126 				const fp16type	q				(x.asDouble() * x.asDouble());
16127 
16128 				r = fp16type(r.asDouble() + q.asDouble());
16129 			}
16130 
16131 			r = fp16type(deSqrt(r.asDouble()));
16132 
16133 			if (r.isZero())
16134 				return false;
16135 
16136 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16137 			{
16138 				const size_t	componentNdx	(permutation[permComponentNdx]);
16139 				const fp16type	x				(in[0][componentNdx]);
16140 
16141 				out[componentNdx] = fp16type(x.asDouble() / r.asDouble()).bits();
16142 			}
16143 		}
16144 		else
16145 		{
16146 			TCU_THROW(InternalError, "Unknown flavor");
16147 		}
16148 
16149 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16150 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16151 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16152 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16153 
16154 		return true;
16155 	}
16156 
16157 private:
16158 	std::vector<tcu::UVec4> m_permutations;
16159 	size_t					permutationsFlavorStart;
16160 	size_t					permutationsFlavorEnd;
16161 };
16162 
16163 struct fp16FaceForward : public fp16AllComponents
16164 {
getULPsvkt::SpirVAssembly::fp16FaceForward16165 	virtual double getULPs(vector<const deFloat16*>& in)
16166 	{
16167 		DE_UNREF(in);
16168 
16169 		return 4.0;
16170 	}
16171 
16172 	template<class fp16type>
calcvkt::SpirVAssembly::fp16FaceForward16173 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16174 	{
16175 		DE_ASSERT(in.size() == 3);
16176 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16177 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16178 		DE_ASSERT(getArgCompCount(2) == getOutCompCount());
16179 
16180 		fp16type	dp(0.0);
16181 
16182 		for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16183 		{
16184 			const fp16type	x	(in[1][componentNdx]);
16185 			const fp16type	y	(in[2][componentNdx]);
16186 			const double	xd	(x.asDouble());
16187 			const double	yd	(y.asDouble());
16188 			const fp16type	q	(xd * yd);
16189 
16190 			dp = fp16type(dp.asDouble() + q.asDouble());
16191 		}
16192 
16193 		if (dp.isNaN() || dp.isZero())
16194 			return false;
16195 
16196 		for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16197 		{
16198 			const fp16type	n	(in[0][componentNdx]);
16199 
16200 			out[componentNdx] = (dp.signBit() == 1) ? n.bits() : fp16type(-n.asDouble()).bits();
16201 		}
16202 
16203 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16204 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16205 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16206 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16207 
16208 		return true;
16209 	}
16210 };
16211 
16212 struct fp16Reflect : public fp16AllComponents
16213 {
fp16Reflectvkt::SpirVAssembly::fp16Reflect16214 	fp16Reflect() : fp16AllComponents()
16215 	{
16216 		flavorNames.push_back("EmulatingFP16");
16217 		flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16218 		flavorNames.push_back("FloatCalc");
16219 		flavorNames.push_back("FloatCalc+KeepZeroSign");
16220 		flavorNames.push_back("EmulatingFP16+2Nfirst");
16221 		flavorNames.push_back("EmulatingFP16+2Ifirst");
16222 	}
16223 
getULPsvkt::SpirVAssembly::fp16Reflect16224 	virtual double getULPs(vector<const deFloat16*>& in)
16225 	{
16226 		DE_UNREF(in);
16227 
16228 		return 256.0; // This is not a precision test. Value is not from spec
16229 	}
16230 
16231 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Reflect16232 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16233 	{
16234 		DE_ASSERT(in.size() == 2);
16235 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16236 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16237 
16238 		if (getFlavor() < 4)
16239 		{
16240 			const bool	keepZeroSign	((flavor & 1) != 0 ? true : false);
16241 			const bool	floatCalc		((flavor & 2) != 0 ? true : false);
16242 
16243 			if (floatCalc)
16244 			{
16245 				float	dp(0.0f);
16246 
16247 				for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16248 				{
16249 					const fp16type	i	(in[0][componentNdx]);
16250 					const fp16type	n	(in[1][componentNdx]);
16251 					const float		id	(i.asFloat());
16252 					const float		nd	(n.asFloat());
16253 					const float		qd	(id * nd);
16254 
16255 					if (keepZeroSign)
16256 						dp = (componentNdx == 0) ? qd : dp + qd;
16257 					else
16258 						dp = dp + qd;
16259 				}
16260 
16261 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16262 				{
16263 					const fp16type	i		(in[0][componentNdx]);
16264 					const fp16type	n		(in[1][componentNdx]);
16265 					const float		dpnd	(dp * n.asFloat());
16266 					const float		dpn2d	(2.0f * dpnd);
16267 					const float		idpn2d	(i.asFloat() - dpn2d);
16268 					const fp16type	result	(idpn2d);
16269 
16270 					out[componentNdx] = result.bits();
16271 				}
16272 			}
16273 			else
16274 			{
16275 				fp16type	dp(0.0);
16276 
16277 				for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16278 				{
16279 					const fp16type	i	(in[0][componentNdx]);
16280 					const fp16type	n	(in[1][componentNdx]);
16281 					const double	id	(i.asDouble());
16282 					const double	nd	(n.asDouble());
16283 					const fp16type	q	(id * nd);
16284 
16285 					if (keepZeroSign)
16286 						dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16287 					else
16288 						dp = fp16type(dp.asDouble() + q.asDouble());
16289 				}
16290 
16291 				if (dp.isNaN())
16292 					return false;
16293 
16294 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16295 				{
16296 					const fp16type	i		(in[0][componentNdx]);
16297 					const fp16type	n		(in[1][componentNdx]);
16298 					const fp16type	dpn		(dp.asDouble() * n.asDouble());
16299 					const fp16type	dpn2	(2 * dpn.asDouble());
16300 					const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16301 
16302 					out[componentNdx] = idpn2.bits();
16303 				}
16304 			}
16305 		}
16306 		else if (getFlavor() == 4)
16307 		{
16308 			fp16type	dp(0.0);
16309 
16310 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16311 			{
16312 				const fp16type	i	(in[0][componentNdx]);
16313 				const fp16type	n	(in[1][componentNdx]);
16314 				const double	id	(i.asDouble());
16315 				const double	nd	(n.asDouble());
16316 				const fp16type	q	(id * nd);
16317 
16318 				dp = fp16type(dp.asDouble() + q.asDouble());
16319 			}
16320 
16321 			if (dp.isNaN())
16322 				return false;
16323 
16324 			for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16325 			{
16326 				const fp16type	i		(in[0][componentNdx]);
16327 				const fp16type	n		(in[1][componentNdx]);
16328 				const fp16type	n2		(2 * n.asDouble());
16329 				const fp16type	dpn2	(dp.asDouble() * n2.asDouble());
16330 				const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16331 
16332 				out[componentNdx] = idpn2.bits();
16333 			}
16334 		}
16335 		else if (getFlavor() == 5)
16336 		{
16337 			fp16type	dp2(0.0);
16338 
16339 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16340 			{
16341 				const fp16type	i	(in[0][componentNdx]);
16342 				const fp16type	n	(in[1][componentNdx]);
16343 				const fp16type	i2	(2.0 * i.asDouble());
16344 				const double	i2d	(i2.asDouble());
16345 				const double	nd	(n.asDouble());
16346 				const fp16type	q	(i2d * nd);
16347 
16348 				dp2 = fp16type(dp2.asDouble() + q.asDouble());
16349 			}
16350 
16351 			if (dp2.isNaN())
16352 				return false;
16353 
16354 			for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16355 			{
16356 				const fp16type	i		(in[0][componentNdx]);
16357 				const fp16type	n		(in[1][componentNdx]);
16358 				const fp16type	dpn2	(dp2.asDouble() * n.asDouble());
16359 				const fp16type	idpn2	(i.asDouble() - dpn2.asDouble());
16360 
16361 				out[componentNdx] = idpn2.bits();
16362 			}
16363 		}
16364 		else
16365 		{
16366 			TCU_THROW(InternalError, "Unknown flavor");
16367 		}
16368 
16369 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16370 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16371 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16372 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16373 
16374 		return true;
16375 	}
16376 };
16377 
16378 struct fp16Refract : public fp16AllComponents
16379 {
fp16Refractvkt::SpirVAssembly::fp16Refract16380 	fp16Refract() : fp16AllComponents()
16381 	{
16382 		flavorNames.push_back("EmulatingFP16");
16383 		flavorNames.push_back("EmulatingFP16+KeepZeroSign");
16384 		flavorNames.push_back("FloatCalc");
16385 		flavorNames.push_back("FloatCalc+KeepZeroSign");
16386 	}
16387 
getULPsvkt::SpirVAssembly::fp16Refract16388 	virtual double getULPs(vector<const deFloat16*>& in)
16389 	{
16390 		DE_UNREF(in);
16391 
16392 		return 8192.0; // This is not a precision test. Value is not from spec
16393 	}
16394 
16395 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Refract16396 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16397 	{
16398 		DE_ASSERT(in.size() == 3);
16399 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16400 		DE_ASSERT(getArgCompCount(1) == getOutCompCount());
16401 		DE_ASSERT(getArgCompCount(2) == 1);
16402 
16403 		const bool		keepZeroSign	((flavor & 1) != 0 ? true : false);
16404 		const bool		doubleCalc		((flavor & 2) != 0 ? true : false);
16405 		const fp16type	eta				(*in[2]);
16406 
16407 		if (doubleCalc)
16408 		{
16409 			double	dp	(0.0);
16410 
16411 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16412 			{
16413 				const fp16type	i	(in[0][componentNdx]);
16414 				const fp16type	n	(in[1][componentNdx]);
16415 				const double	id	(i.asDouble());
16416 				const double	nd	(n.asDouble());
16417 				const double	qd	(id * nd);
16418 
16419 				if (keepZeroSign)
16420 					dp = (componentNdx == 0) ? qd : dp + qd;
16421 				else
16422 					dp = dp + qd;
16423 			}
16424 
16425 			const double	eta2	(eta.asDouble() * eta.asDouble());
16426 			const double	dp2		(dp * dp);
16427 			const double	dp1		(1.0 - dp2);
16428 			const double	dpe		(eta2 * dp1);
16429 			const double	k		(1.0 - dpe);
16430 
16431 			if (k < 0.0)
16432 			{
16433 				const fp16type	zero	(0.0);
16434 
16435 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16436 					out[componentNdx] = zero.bits();
16437 			}
16438 			else
16439 			{
16440 				const double	sk	(deSqrt(k));
16441 
16442 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16443 				{
16444 					const fp16type	i		(in[0][componentNdx]);
16445 					const fp16type	n		(in[1][componentNdx]);
16446 					const double	etai	(i.asDouble() * eta.asDouble());
16447 					const double	etadp	(eta.asDouble() * dp);
16448 					const double	etadpk	(etadp + sk);
16449 					const double	etadpkn	(etadpk * n.asDouble());
16450 					const double	full	(etai - etadpkn);
16451 					const fp16type	result	(full);
16452 
16453 					if (result.isInf())
16454 						return false;
16455 
16456 					out[componentNdx] = result.bits();
16457 				}
16458 			}
16459 		}
16460 		else
16461 		{
16462 			fp16type	dp	(0.0);
16463 
16464 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16465 			{
16466 				const fp16type	i	(in[0][componentNdx]);
16467 				const fp16type	n	(in[1][componentNdx]);
16468 				const double	id	(i.asDouble());
16469 				const double	nd	(n.asDouble());
16470 				const fp16type	q	(id * nd);
16471 
16472 				if (keepZeroSign)
16473 					dp = (componentNdx == 0) ? q : fp16type(dp.asDouble() + q.asDouble());
16474 				else
16475 					dp = fp16type(dp.asDouble() + q.asDouble());
16476 			}
16477 
16478 			if (dp.isNaN())
16479 				return false;
16480 
16481 			const fp16type	eta2(eta.asDouble() * eta.asDouble());
16482 			const fp16type	dp2	(dp.asDouble() * dp.asDouble());
16483 			const fp16type	dp1	(1.0 - dp2.asDouble());
16484 			const fp16type	dpe	(eta2.asDouble() * dp1.asDouble());
16485 			const fp16type	k	(1.0 - dpe.asDouble());
16486 
16487 			if (k.asDouble() < 0.0)
16488 			{
16489 				const fp16type	zero	(0.0);
16490 
16491 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16492 					out[componentNdx] = zero.bits();
16493 			}
16494 			else
16495 			{
16496 				const fp16type	sk	(deSqrt(k.asDouble()));
16497 
16498 				for (size_t componentNdx = 0; componentNdx < getOutCompCount(); ++componentNdx)
16499 				{
16500 					const fp16type	i		(in[0][componentNdx]);
16501 					const fp16type	n		(in[1][componentNdx]);
16502 					const fp16type	etai	(i.asDouble() * eta.asDouble());
16503 					const fp16type	etadp	(eta.asDouble() * dp.asDouble());
16504 					const fp16type	etadpk	(etadp.asDouble() + sk.asDouble());
16505 					const fp16type	etadpkn	(etadpk.asDouble() * n.asDouble());
16506 					const fp16type	full	(etai.asDouble() - etadpkn.asDouble());
16507 
16508 					if (full.isNaN() || full.isInf())
16509 						return false;
16510 
16511 					out[componentNdx] = full.bits();
16512 				}
16513 			}
16514 		}
16515 
16516 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16517 			min[ndx] = getMin(fp16type(out[ndx]).asDouble(), getULPs(in));
16518 		for (size_t ndx = 0; ndx < getOutCompCount(); ++ndx)
16519 			max[ndx] = getMax(fp16type(out[ndx]).asDouble(), getULPs(in));
16520 
16521 		return true;
16522 	}
16523 };
16524 
16525 struct fp16Dot : public fp16AllComponents
16526 {
fp16Dotvkt::SpirVAssembly::fp16Dot16527 	fp16Dot() : fp16AllComponents()
16528 	{
16529 		flavorNames.push_back("EmulatingFP16");
16530 		flavorNames.push_back("FloatCalc");
16531 		flavorNames.push_back("DoubleCalc");
16532 
16533 		permutationsFlavorStart = 0;
16534 		permutationsFlavorEnd = flavorNames.size();
16535 
16536 		// flavorNames will be extended later
16537 	}
16538 
setArgCompCountvkt::SpirVAssembly::fp16Dot16539 	virtual void	setArgCompCount			(size_t argNo, size_t compCount)
16540 	{
16541 		DE_ASSERT(argCompCount[argNo] == 0); // Once only
16542 
16543 		if (argNo == 0 && argCompCount[argNo] == 0)
16544 		{
16545 			const size_t		maxPermutationsCount	= 24u; // Equal to 4!
16546 			std::vector<int>	indices;
16547 
16548 			for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16549 				indices.push_back(static_cast<int>(componentNdx));
16550 
16551 			m_permutations.reserve(maxPermutationsCount);
16552 
16553 			permutationsFlavorStart = flavorNames.size();
16554 
16555 			do
16556 			{
16557 				tcu::UVec4	permutation;
16558 				std::string	name		= "Permutted_";
16559 
16560 				for (size_t componentNdx = 0; componentNdx < compCount; ++componentNdx)
16561 				{
16562 					permutation[static_cast<int>(componentNdx)] = indices[componentNdx];
16563 					name += de::toString(indices[componentNdx]);
16564 				}
16565 
16566 				m_permutations.push_back(permutation);
16567 				flavorNames.push_back(name);
16568 
16569 			} while(std::next_permutation(indices.begin(), indices.end()));
16570 
16571 			permutationsFlavorEnd = flavorNames.size();
16572 		}
16573 
16574 		fp16AllComponents::setArgCompCount(argNo, compCount);
16575 	}
16576 
getULPsvkt::SpirVAssembly::fp16Dot16577 	virtual double	getULPs(vector<const deFloat16*>& in)
16578 	{
16579 		DE_UNREF(in);
16580 
16581 		return 16.0; // This is not a precision test. Value is not from spec
16582 	}
16583 
16584 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Dot16585 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16586 	{
16587 		DE_ASSERT(in.size() == 2);
16588 		DE_ASSERT(getArgCompCount(0) == getArgCompCount(1));
16589 		DE_ASSERT(getOutCompCount() == 1);
16590 
16591 		double	result	(0.0);
16592 		double	eps		(0.0);
16593 
16594 		if (getFlavor() == 0)
16595 		{
16596 			fp16type	dp	(0.0);
16597 
16598 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16599 			{
16600 				const fp16type	x	(in[0][componentNdx]);
16601 				const fp16type	y	(in[1][componentNdx]);
16602 				const fp16type	q	(x.asDouble() * y.asDouble());
16603 
16604 				dp = fp16type(dp.asDouble() + q.asDouble());
16605 				eps += floatFormat16.ulp(q.asDouble(), 2.0);
16606 			}
16607 
16608 			result = dp.asDouble();
16609 		}
16610 		else if (getFlavor() == 1)
16611 		{
16612 			float	dp	(0.0);
16613 
16614 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16615 			{
16616 				const fp16type	x	(in[0][componentNdx]);
16617 				const fp16type	y	(in[1][componentNdx]);
16618 				const float		q	(x.asFloat() * y.asFloat());
16619 
16620 				dp += q;
16621 				eps += floatFormat16.ulp(static_cast<double>(q), 2.0);
16622 			}
16623 
16624 			result = dp;
16625 		}
16626 		else if (getFlavor() == 2)
16627 		{
16628 			double	dp	(0.0);
16629 
16630 			for (size_t componentNdx = 0; componentNdx < getArgCompCount(1); ++componentNdx)
16631 			{
16632 				const fp16type	x	(in[0][componentNdx]);
16633 				const fp16type	y	(in[1][componentNdx]);
16634 				const double	q	(x.asDouble() * y.asDouble());
16635 
16636 				dp += q;
16637 				eps += floatFormat16.ulp(q, 2.0);
16638 			}
16639 
16640 			result = dp;
16641 		}
16642 		else if (de::inBounds<size_t>(getFlavor(), permutationsFlavorStart, permutationsFlavorEnd))
16643 		{
16644 			const int			compCount		(static_cast<int>(getArgCompCount(1)));
16645 			const size_t		permutationNdx	(getFlavor() - permutationsFlavorStart);
16646 			const tcu::UVec4&	permutation		(m_permutations[permutationNdx]);
16647 			fp16type			dp				(0.0);
16648 
16649 			for (int permComponentNdx = 0; permComponentNdx < compCount; ++permComponentNdx)
16650 			{
16651 				const size_t		componentNdx	(permutation[permComponentNdx]);
16652 				const fp16type		x				(in[0][componentNdx]);
16653 				const fp16type		y				(in[1][componentNdx]);
16654 				const fp16type		q				(x.asDouble() * y.asDouble());
16655 
16656 				dp = fp16type(dp.asDouble() + q.asDouble());
16657 				eps += floatFormat16.ulp(q.asDouble(), 2.0);
16658 			}
16659 
16660 			result = dp.asDouble();
16661 		}
16662 		else
16663 		{
16664 			TCU_THROW(InternalError, "Unknown flavor");
16665 		}
16666 
16667 		out[0] = fp16type(result).bits();
16668 		min[0] = result - eps;
16669 		max[0] = result + eps;
16670 
16671 		return true;
16672 	}
16673 
16674 private:
16675 	std::vector<tcu::UVec4> m_permutations;
16676 	size_t					permutationsFlavorStart;
16677 	size_t					permutationsFlavorEnd;
16678 };
16679 
16680 struct fp16VectorTimesScalar : public fp16AllComponents
16681 {
getULPsvkt::SpirVAssembly::fp16VectorTimesScalar16682 	virtual double getULPs(vector<const deFloat16*>& in)
16683 	{
16684 		DE_UNREF(in);
16685 
16686 		return 2.0;
16687 	}
16688 
16689 	template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesScalar16690 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16691 	{
16692 		DE_ASSERT(in.size() == 2);
16693 		DE_ASSERT(getArgCompCount(0) == getOutCompCount());
16694 		DE_ASSERT(getArgCompCount(1) == 1);
16695 
16696 		fp16type	s	(*in[1]);
16697 
16698 		for (size_t componentNdx = 0; componentNdx < getArgCompCount(0); ++componentNdx)
16699 		{
16700 			const fp16type	x	   (in[0][componentNdx]);
16701 			const double    result (s.asDouble() * x.asDouble());
16702 			const fp16type	m	   (result);
16703 
16704 			out[componentNdx] = m.bits();
16705 			min[componentNdx] = getMin(result, getULPs(in));
16706 			max[componentNdx] = getMax(result, getULPs(in));
16707 		}
16708 
16709 		return true;
16710 	}
16711 };
16712 
16713 struct fp16MatrixBase : public fp16AllComponents
16714 {
getComponentValidityvkt::SpirVAssembly::fp16MatrixBase16715 	deUint32		getComponentValidity			()
16716 	{
16717 		return static_cast<deUint32>(-1);
16718 	}
16719 
getNdxvkt::SpirVAssembly::fp16MatrixBase16720 	inline size_t	getNdx							(const size_t rowCount, const size_t col, const size_t row)
16721 	{
16722 		const size_t minComponentCount	= 0;
16723 		const size_t maxComponentCount	= 3;
16724 		const size_t alignedRowsCount	= (rowCount == 3) ? 4 : rowCount;
16725 
16726 		DE_ASSERT(de::inRange(rowCount, minComponentCount + 1, maxComponentCount + 1));
16727 		DE_ASSERT(de::inRange(col, minComponentCount, maxComponentCount));
16728 		DE_ASSERT(de::inBounds(row, minComponentCount, rowCount));
16729 		DE_UNREF(minComponentCount);
16730 		DE_UNREF(maxComponentCount);
16731 
16732 		return col * alignedRowsCount + row;
16733 	}
16734 
getComponentMatrixValidityMaskvkt::SpirVAssembly::fp16MatrixBase16735 	deUint32		getComponentMatrixValidityMask	(size_t cols, size_t rows)
16736 	{
16737 		deUint32	result	= 0u;
16738 
16739 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16740 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16741 			{
16742 				const size_t bitNdx = getNdx(rows, colNdx, rowNdx);
16743 
16744 				DE_ASSERT(bitNdx < sizeof(result) * 8);
16745 
16746 				result |= (1<<bitNdx);
16747 			}
16748 
16749 		return result;
16750 	}
16751 };
16752 
16753 template<size_t cols, size_t rows>
16754 struct fp16Transpose : public fp16MatrixBase
16755 {
getULPsvkt::SpirVAssembly::fp16Transpose16756 	virtual double getULPs(vector<const deFloat16*>& in)
16757 	{
16758 		DE_UNREF(in);
16759 
16760 		return 1.0;
16761 	}
16762 
getComponentValidityvkt::SpirVAssembly::fp16Transpose16763 	deUint32	getComponentValidity	()
16764 	{
16765 		return getComponentMatrixValidityMask(rows, cols);
16766 	}
16767 
16768 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Transpose16769 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16770 	{
16771 		DE_ASSERT(in.size() == 1);
16772 
16773 		const size_t		alignedCols	= (cols == 3) ? 4 : cols;
16774 		const size_t		alignedRows	= (rows == 3) ? 4 : rows;
16775 		vector<deFloat16>	output		(alignedCols * alignedRows, 0);
16776 
16777 		DE_ASSERT(output.size() == alignedCols * alignedRows);
16778 
16779 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16780 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16781 				output[rowNdx * alignedCols + colNdx] = in[0][colNdx * alignedRows + rowNdx];
16782 
16783 		deMemcpy(out, &output[0], sizeof(deFloat16) * output.size());
16784 		deMemcpy(min, &output[0], sizeof(deFloat16) * output.size());
16785 		deMemcpy(max, &output[0], sizeof(deFloat16) * output.size());
16786 
16787 		return true;
16788 	}
16789 };
16790 
16791 template<size_t cols, size_t rows>
16792 struct fp16MatrixTimesScalar : public fp16MatrixBase
16793 {
getULPsvkt::SpirVAssembly::fp16MatrixTimesScalar16794 	virtual double getULPs(vector<const deFloat16*>& in)
16795 	{
16796 		DE_UNREF(in);
16797 
16798 		return 4.0;
16799 	}
16800 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesScalar16801 	deUint32	getComponentValidity	()
16802 	{
16803 		return getComponentMatrixValidityMask(cols, rows);
16804 	}
16805 
16806 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesScalar16807 	bool calc(vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16808 	{
16809 		DE_ASSERT(in.size() == 2);
16810 		DE_ASSERT(getArgCompCount(1) == 1);
16811 
16812 		const fp16type	y			(in[1][0]);
16813 		const float		scalar		(y.asFloat());
16814 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16815 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16816 
16817 		DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16818 		DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
16819 		DE_UNREF(alignedCols);
16820 
16821 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16822 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16823 			{
16824 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
16825 				const fp16type	x	(in[0][ndx]);
16826 				const double	result	(scalar * x.asFloat());
16827 
16828 				out[ndx] = fp16type(result).bits();
16829 				min[ndx] = getMin(result, getULPs(in));
16830 				max[ndx] = getMax(result, getULPs(in));
16831 			}
16832 
16833 		return true;
16834 	}
16835 };
16836 
16837 template<size_t cols, size_t rows>
16838 struct fp16VectorTimesMatrix : public fp16MatrixBase
16839 {
fp16VectorTimesMatrixvkt::SpirVAssembly::fp16VectorTimesMatrix16840 	fp16VectorTimesMatrix() : fp16MatrixBase()
16841 	{
16842 		flavorNames.push_back("EmulatingFP16");
16843 		flavorNames.push_back("FloatCalc");
16844 	}
16845 
getULPsvkt::SpirVAssembly::fp16VectorTimesMatrix16846 	virtual double getULPs (vector<const deFloat16*>& in)
16847 	{
16848 		DE_UNREF(in);
16849 
16850 		return (8.0 * cols);
16851 	}
16852 
getComponentValidityvkt::SpirVAssembly::fp16VectorTimesMatrix16853 	deUint32 getComponentValidity ()
16854 	{
16855 		return getComponentMatrixValidityMask(cols, 1);
16856 	}
16857 
16858 	template<class fp16type>
calcvkt::SpirVAssembly::fp16VectorTimesMatrix16859 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16860 	{
16861 		DE_ASSERT(in.size() == 2);
16862 
16863 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16864 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16865 
16866 		DE_ASSERT(getOutCompCount() == cols);
16867 		DE_ASSERT(getArgCompCount(0) == rows);
16868 		DE_ASSERT(getArgCompCount(1) == alignedCols * alignedRows);
16869 		DE_UNREF(alignedCols);
16870 
16871 		if (getFlavor() == 0)
16872 		{
16873 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16874 			{
16875 				fp16type	s	(fp16type::zero(1));
16876 
16877 				for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16878 				{
16879 					const fp16type	v	(in[0][rowNdx]);
16880 					const float		vf	(v.asFloat());
16881 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16882 					const fp16type	x	(in[1][ndx]);
16883 					const float		xf	(x.asFloat());
16884 					const fp16type	m	(vf * xf);
16885 
16886 					s = fp16type(s.asFloat() + m.asFloat());
16887 				}
16888 
16889 				out[colNdx] = s.bits();
16890 				min[colNdx] = getMin(s.asDouble(), getULPs(in));
16891 				max[colNdx] = getMax(s.asDouble(), getULPs(in));
16892 			}
16893 		}
16894 		else if (getFlavor() == 1)
16895 		{
16896 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16897 			{
16898 				float	s	(0.0f);
16899 
16900 				for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16901 				{
16902 					const fp16type	v	(in[0][rowNdx]);
16903 					const float		vf	(v.asFloat());
16904 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16905 					const fp16type	x	(in[1][ndx]);
16906 					const float		xf	(x.asFloat());
16907 					const float		m	(vf * xf);
16908 
16909 					s += m;
16910 				}
16911 
16912 				out[colNdx] = fp16type(s).bits();
16913 				min[colNdx] = getMin(static_cast<double>(s), getULPs(in));
16914 				max[colNdx] = getMax(static_cast<double>(s), getULPs(in));
16915 			}
16916 		}
16917 		else
16918 		{
16919 			TCU_THROW(InternalError, "Unknown flavor");
16920 		}
16921 
16922 		return true;
16923 	}
16924 };
16925 
16926 template<size_t cols, size_t rows>
16927 struct fp16MatrixTimesVector : public fp16MatrixBase
16928 {
fp16MatrixTimesVectorvkt::SpirVAssembly::fp16MatrixTimesVector16929 	fp16MatrixTimesVector() : fp16MatrixBase()
16930 	{
16931 		flavorNames.push_back("EmulatingFP16");
16932 		flavorNames.push_back("FloatCalc");
16933 	}
16934 
getULPsvkt::SpirVAssembly::fp16MatrixTimesVector16935 	virtual double getULPs (vector<const deFloat16*>& in)
16936 	{
16937 		DE_UNREF(in);
16938 
16939 		return (8.0 * rows);
16940 	}
16941 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesVector16942 	deUint32 getComponentValidity ()
16943 	{
16944 		return getComponentMatrixValidityMask(rows, 1);
16945 	}
16946 
16947 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesVector16948 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
16949 	{
16950 		DE_ASSERT(in.size() == 2);
16951 
16952 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
16953 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
16954 
16955 		DE_ASSERT(getOutCompCount() == rows);
16956 		DE_ASSERT(getArgCompCount(0) == alignedCols * alignedRows);
16957 		DE_ASSERT(getArgCompCount(1) == cols);
16958 		DE_UNREF(alignedCols);
16959 
16960 		if (getFlavor() == 0)
16961 		{
16962 			for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16963 			{
16964 				fp16type	s	(fp16type::zero(1));
16965 
16966 				for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16967 				{
16968 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16969 					const fp16type	x	(in[0][ndx]);
16970 					const float		xf	(x.asFloat());
16971 					const fp16type	v	(in[1][colNdx]);
16972 					const float		vf	(v.asFloat());
16973 					const fp16type	m	(vf * xf);
16974 
16975 					s = fp16type(s.asFloat() + m.asFloat());
16976 				}
16977 
16978 				out[rowNdx] = s.bits();
16979 				min[rowNdx] = getMin(s.asDouble(), getULPs(in));
16980 				max[rowNdx] = getMax(s.asDouble(), getULPs(in));
16981 			}
16982 		}
16983 		else if (getFlavor() == 1)
16984 		{
16985 			for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
16986 			{
16987 				float	s	(0.0f);
16988 
16989 				for (size_t colNdx = 0; colNdx < cols; ++colNdx)
16990 				{
16991 					const size_t	ndx	(colNdx * alignedRows + rowNdx);
16992 					const fp16type	x	(in[0][ndx]);
16993 					const float		xf	(x.asFloat());
16994 					const fp16type	v	(in[1][colNdx]);
16995 					const float		vf	(v.asFloat());
16996 					const float		m	(vf * xf);
16997 
16998 					s += m;
16999 				}
17000 
17001 				out[rowNdx] = fp16type(s).bits();
17002 				min[rowNdx] = getMin(static_cast<double>(s), getULPs(in));
17003 				max[rowNdx] = getMax(static_cast<double>(s), getULPs(in));
17004 			}
17005 		}
17006 		else
17007 		{
17008 			TCU_THROW(InternalError, "Unknown flavor");
17009 		}
17010 
17011 		return true;
17012 	}
17013 };
17014 
17015 template<size_t colsL, size_t rowsL, size_t colsR, size_t rowsR>
17016 struct fp16MatrixTimesMatrix : public fp16MatrixBase
17017 {
fp16MatrixTimesMatrixvkt::SpirVAssembly::fp16MatrixTimesMatrix17018 	fp16MatrixTimesMatrix() : fp16MatrixBase()
17019 	{
17020 		flavorNames.push_back("EmulatingFP16");
17021 		flavorNames.push_back("FloatCalc");
17022 	}
17023 
getULPsvkt::SpirVAssembly::fp16MatrixTimesMatrix17024 	virtual double getULPs (vector<const deFloat16*>& in)
17025 	{
17026 		DE_UNREF(in);
17027 
17028 		return 32.0;
17029 	}
17030 
getComponentValidityvkt::SpirVAssembly::fp16MatrixTimesMatrix17031 	deUint32 getComponentValidity ()
17032 	{
17033 		return getComponentMatrixValidityMask(colsR, rowsL);
17034 	}
17035 
17036 	template<class fp16type>
calcvkt::SpirVAssembly::fp16MatrixTimesMatrix17037 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17038 	{
17039 		DE_STATIC_ASSERT(colsL == rowsR);
17040 
17041 		DE_ASSERT(in.size() == 2);
17042 
17043 		const size_t	alignedColsL	= (colsL == 3) ? 4 : colsL;
17044 		const size_t	alignedRowsL	= (rowsL == 3) ? 4 : rowsL;
17045 		const size_t	alignedColsR	= (colsR == 3) ? 4 : colsR;
17046 		const size_t	alignedRowsR	= (rowsR == 3) ? 4 : rowsR;
17047 
17048 		DE_ASSERT(getOutCompCount() == alignedColsR * alignedRowsL);
17049 		DE_ASSERT(getArgCompCount(0) == alignedColsL * alignedRowsL);
17050 		DE_ASSERT(getArgCompCount(1) == alignedColsR * alignedRowsR);
17051 		DE_UNREF(alignedColsL);
17052 		DE_UNREF(alignedColsR);
17053 
17054 		if (getFlavor() == 0)
17055 		{
17056 			for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17057 			{
17058 				for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17059 				{
17060 					const size_t	ndx	(colNdx * alignedRowsL + rowNdx);
17061 					fp16type		s	(fp16type::zero(1));
17062 
17063 					for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17064 					{
17065 						const size_t	ndxl	(commonNdx * alignedRowsL + rowNdx);
17066 						const fp16type	l		(in[0][ndxl]);
17067 						const float		lf		(l.asFloat());
17068 						const size_t	ndxr	(colNdx * alignedRowsR + commonNdx);
17069 						const fp16type	r		(in[1][ndxr]);
17070 						const float		rf		(r.asFloat());
17071 						const fp16type	m		(lf * rf);
17072 
17073 						s = fp16type(s.asFloat() + m.asFloat());
17074 					}
17075 
17076 					out[ndx] = s.bits();
17077 					min[ndx] = getMin(s.asDouble(), getULPs(in));
17078 					max[ndx] = getMax(s.asDouble(), getULPs(in));
17079 				}
17080 			}
17081 		}
17082 		else if (getFlavor() == 1)
17083 		{
17084 			for (size_t rowNdx = 0; rowNdx < rowsL; ++rowNdx)
17085 			{
17086 				for (size_t colNdx = 0; colNdx < colsR; ++colNdx)
17087 				{
17088 					const size_t	ndx	(colNdx * alignedRowsL + rowNdx);
17089 					float			s	(0.0f);
17090 
17091 					for (size_t commonNdx = 0; commonNdx < colsL; ++commonNdx)
17092 					{
17093 						const size_t	ndxl	(commonNdx * alignedRowsL + rowNdx);
17094 						const fp16type	l		(in[0][ndxl]);
17095 						const float		lf		(l.asFloat());
17096 						const size_t	ndxr	(colNdx * alignedRowsR + commonNdx);
17097 						const fp16type	r		(in[1][ndxr]);
17098 						const float		rf		(r.asFloat());
17099 						const float		m		(lf * rf);
17100 
17101 						s += m;
17102 					}
17103 
17104 					out[ndx] = fp16type(s).bits();
17105 					min[ndx] = getMin(static_cast<double>(s), getULPs(in));
17106 					max[ndx] = getMax(static_cast<double>(s), getULPs(in));
17107 				}
17108 			}
17109 		}
17110 		else
17111 		{
17112 			TCU_THROW(InternalError, "Unknown flavor");
17113 		}
17114 
17115 		return true;
17116 	}
17117 };
17118 
17119 template<size_t cols, size_t rows>
17120 struct fp16OuterProduct : public fp16MatrixBase
17121 {
getULPsvkt::SpirVAssembly::fp16OuterProduct17122 	virtual double getULPs (vector<const deFloat16*>& in)
17123 	{
17124 		DE_UNREF(in);
17125 
17126 		return 2.0;
17127 	}
17128 
getComponentValidityvkt::SpirVAssembly::fp16OuterProduct17129 	deUint32 getComponentValidity ()
17130 	{
17131 		return getComponentMatrixValidityMask(cols, rows);
17132 	}
17133 
17134 	template<class fp16type>
calcvkt::SpirVAssembly::fp16OuterProduct17135 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17136 	{
17137 		DE_ASSERT(in.size() == 2);
17138 
17139 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17140 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17141 
17142 		DE_ASSERT(getArgCompCount(0) == rows);
17143 		DE_ASSERT(getArgCompCount(1) == cols);
17144 		DE_ASSERT(getOutCompCount() == alignedCols * alignedRows);
17145 		DE_UNREF(alignedCols);
17146 
17147 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17148 		{
17149 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17150 			{
17151 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
17152 				const fp16type	x	(in[0][rowNdx]);
17153 				const float		xf	(x.asFloat());
17154 				const fp16type	y	(in[1][colNdx]);
17155 				const float		yf	(y.asFloat());
17156 				const fp16type	m	(xf * yf);
17157 
17158 				out[ndx] = m.bits();
17159 				min[ndx] = getMin(m.asDouble(), getULPs(in));
17160 				max[ndx] = getMax(m.asDouble(), getULPs(in));
17161 			}
17162 		}
17163 
17164 		return true;
17165 	}
17166 };
17167 
17168 template<size_t size>
17169 struct fp16Determinant;
17170 
17171 template<>
17172 struct fp16Determinant<2> : public fp16MatrixBase
17173 {
getULPsvkt::SpirVAssembly::fp16Determinant17174 	virtual double getULPs (vector<const deFloat16*>& in)
17175 	{
17176 		DE_UNREF(in);
17177 
17178 		return 128.0; // This is not a precision test. Value is not from spec
17179 	}
17180 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17181 	deUint32 getComponentValidity ()
17182 	{
17183 		return 1;
17184 	}
17185 
17186 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17187 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17188 	{
17189 		const size_t	cols		= 2;
17190 		const size_t	rows		= 2;
17191 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17192 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17193 
17194 		DE_ASSERT(in.size() == 1);
17195 		DE_ASSERT(getOutCompCount() == 1);
17196 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17197 		DE_UNREF(alignedCols);
17198 		DE_UNREF(alignedRows);
17199 
17200 		// [ a b ]
17201 		// [ c d ]
17202 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17203 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17204 		const float		c		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17205 		const float		d		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17206 		const float		ad		(a * d);
17207 		const fp16type	adf16	(ad);
17208 		const float		bc		(b * c);
17209 		const fp16type	bcf16	(bc);
17210 		const float		r		(adf16.asFloat() - bcf16.asFloat());
17211 		const fp16type	rf16	(r);
17212 
17213 		out[0] = rf16.bits();
17214 		min[0] = getMin(r, getULPs(in));
17215 		max[0] = getMax(r, getULPs(in));
17216 
17217 		return true;
17218 	}
17219 };
17220 
17221 template<>
17222 struct fp16Determinant<3> : public fp16MatrixBase
17223 {
getULPsvkt::SpirVAssembly::fp16Determinant17224 	virtual double getULPs (vector<const deFloat16*>& in)
17225 	{
17226 		DE_UNREF(in);
17227 
17228 		return 128.0; // This is not a precision test. Value is not from spec
17229 	}
17230 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17231 	deUint32 getComponentValidity ()
17232 	{
17233 		return 1;
17234 	}
17235 
17236 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17237 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17238 	{
17239 		const size_t	cols		= 3;
17240 		const size_t	rows		= 3;
17241 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17242 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17243 
17244 		DE_ASSERT(in.size() == 1);
17245 		DE_ASSERT(getOutCompCount() == 1);
17246 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17247 		DE_UNREF(alignedCols);
17248 		DE_UNREF(alignedRows);
17249 
17250 		// [ a b c ]
17251 		// [ d e f ]
17252 		// [ g h i ]
17253 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17254 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17255 		const float		c		(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17256 		const float		d		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17257 		const float		e		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17258 		const float		f		(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17259 		const float		g		(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17260 		const float		h		(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17261 		const float		i		(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17262 		const fp16type	aei		(a * e * i);
17263 		const fp16type	bfg		(b * f * g);
17264 		const fp16type	cdh		(c * d * h);
17265 		const fp16type	ceg		(c * e * g);
17266 		const fp16type	bdi		(b * d * i);
17267 		const fp16type	afh		(a * f * h);
17268 		const float		r		(aei.asFloat() + bfg.asFloat() + cdh.asFloat() - ceg.asFloat() - bdi.asFloat() - afh.asFloat());
17269 		const fp16type	rf16	(r);
17270 
17271 		out[0] = rf16.bits();
17272 		min[0] = getMin(r, getULPs(in));
17273 		max[0] = getMax(r, getULPs(in));
17274 
17275 		return true;
17276 	}
17277 };
17278 
17279 template<>
17280 struct fp16Determinant<4> : public fp16MatrixBase
17281 {
getULPsvkt::SpirVAssembly::fp16Determinant17282 	virtual double getULPs (vector<const deFloat16*>& in)
17283 	{
17284 		DE_UNREF(in);
17285 
17286 		return 128.0; // This is not a precision test. Value is not from spec
17287 	}
17288 
getComponentValidityvkt::SpirVAssembly::fp16Determinant17289 	deUint32 getComponentValidity ()
17290 	{
17291 		return 1;
17292 	}
17293 
17294 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Determinant17295 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17296 	{
17297 		const size_t	rows		= 4;
17298 		const size_t	cols		= 4;
17299 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17300 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17301 
17302 		DE_ASSERT(in.size() == 1);
17303 		DE_ASSERT(getOutCompCount() == 1);
17304 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17305 		DE_UNREF(alignedCols);
17306 		DE_UNREF(alignedRows);
17307 
17308 		// [ a b c d ]
17309 		// [ e f g h ]
17310 		// [ i j k l ]
17311 		// [ m n o p ]
17312 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17313 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17314 		const float		c		(fp16type(in[0][getNdx(rows, 2, 0)]).asFloat());
17315 		const float		d		(fp16type(in[0][getNdx(rows, 3, 0)]).asFloat());
17316 		const float		e		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17317 		const float		f		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17318 		const float		g		(fp16type(in[0][getNdx(rows, 2, 1)]).asFloat());
17319 		const float		h		(fp16type(in[0][getNdx(rows, 3, 1)]).asFloat());
17320 		const float		i		(fp16type(in[0][getNdx(rows, 0, 2)]).asFloat());
17321 		const float		j		(fp16type(in[0][getNdx(rows, 1, 2)]).asFloat());
17322 		const float		k		(fp16type(in[0][getNdx(rows, 2, 2)]).asFloat());
17323 		const float		l		(fp16type(in[0][getNdx(rows, 3, 2)]).asFloat());
17324 		const float		m		(fp16type(in[0][getNdx(rows, 0, 3)]).asFloat());
17325 		const float		n		(fp16type(in[0][getNdx(rows, 1, 3)]).asFloat());
17326 		const float		o		(fp16type(in[0][getNdx(rows, 2, 3)]).asFloat());
17327 		const float		p		(fp16type(in[0][getNdx(rows, 3, 3)]).asFloat());
17328 
17329 		// [ f g h ]
17330 		// [ j k l ]
17331 		// [ n o p ]
17332 		const fp16type	fkp		(f * k * p);
17333 		const fp16type	gln		(g * l * n);
17334 		const fp16type	hjo		(h * j * o);
17335 		const fp16type	hkn		(h * k * n);
17336 		const fp16type	gjp		(g * j * p);
17337 		const fp16type	flo		(f * l * o);
17338 		const fp16type	detA	(a * (fkp.asFloat() + gln.asFloat() + hjo.asFloat() - hkn.asFloat() - gjp.asFloat() - flo.asFloat()));
17339 
17340 		// [ e g h ]
17341 		// [ i k l ]
17342 		// [ m o p ]
17343 		const fp16type	ekp		(e * k * p);
17344 		const fp16type	glm		(g * l * m);
17345 		const fp16type	hio		(h * i * o);
17346 		const fp16type	hkm		(h * k * m);
17347 		const fp16type	gip		(g * i * p);
17348 		const fp16type	elo		(e * l * o);
17349 		const fp16type	detB	(b * (ekp.asFloat() + glm.asFloat() + hio.asFloat() - hkm.asFloat() - gip.asFloat() - elo.asFloat()));
17350 
17351 		// [ e f h ]
17352 		// [ i j l ]
17353 		// [ m n p ]
17354 		const fp16type	ejp		(e * j * p);
17355 		const fp16type	flm		(f * l * m);
17356 		const fp16type	hin		(h * i * n);
17357 		const fp16type	hjm		(h * j * m);
17358 		const fp16type	fip		(f * i * p);
17359 		const fp16type	eln		(e * l * n);
17360 		const fp16type	detC	(c * (ejp.asFloat() + flm.asFloat() + hin.asFloat() - hjm.asFloat() - fip.asFloat() - eln.asFloat()));
17361 
17362 		// [ e f g ]
17363 		// [ i j k ]
17364 		// [ m n o ]
17365 		const fp16type	ejo		(e * j * o);
17366 		const fp16type	fkm		(f * k * m);
17367 		const fp16type	gin		(g * i * n);
17368 		const fp16type	gjm		(g * j * m);
17369 		const fp16type	fio		(f * i * o);
17370 		const fp16type	ekn		(e * k * n);
17371 		const fp16type	detD	(d * (ejo.asFloat() + fkm.asFloat() + gin.asFloat() - gjm.asFloat() - fio.asFloat() - ekn.asFloat()));
17372 
17373 		const float		r		(detA.asFloat() - detB.asFloat() + detC.asFloat() - detD.asFloat());
17374 		const fp16type	rf16	(r);
17375 
17376 		out[0] = rf16.bits();
17377 		min[0] = getMin(r, getULPs(in));
17378 		max[0] = getMax(r, getULPs(in));
17379 
17380 		return true;
17381 	}
17382 };
17383 
17384 template<size_t size>
17385 struct fp16Inverse;
17386 
17387 template<>
17388 struct fp16Inverse<2> : public fp16MatrixBase
17389 {
getULPsvkt::SpirVAssembly::fp16Inverse17390 	virtual double getULPs (vector<const deFloat16*>& in)
17391 	{
17392 		DE_UNREF(in);
17393 
17394 		return 128.0; // This is not a precision test. Value is not from spec
17395 	}
17396 
getComponentValidityvkt::SpirVAssembly::fp16Inverse17397 	deUint32 getComponentValidity ()
17398 	{
17399 		return getComponentMatrixValidityMask(2, 2);
17400 	}
17401 
17402 	template<class fp16type>
calcvkt::SpirVAssembly::fp16Inverse17403 	bool calc (vector<const deFloat16*>& in, deFloat16* out, double* min, double* max)
17404 	{
17405 		const size_t	cols		= 2;
17406 		const size_t	rows		= 2;
17407 		const size_t	alignedCols	= (cols == 3) ? 4 : cols;
17408 		const size_t	alignedRows	= (rows == 3) ? 4 : rows;
17409 
17410 		DE_ASSERT(in.size() == 1);
17411 		DE_ASSERT(getOutCompCount() == alignedRows * alignedCols);
17412 		DE_ASSERT(getArgCompCount(0) == alignedRows * alignedCols);
17413 		DE_UNREF(alignedCols);
17414 
17415 		// [ a b ]
17416 		// [ c d ]
17417 		const float		a		(fp16type(in[0][getNdx(rows, 0, 0)]).asFloat());
17418 		const float		b		(fp16type(in[0][getNdx(rows, 1, 0)]).asFloat());
17419 		const float		c		(fp16type(in[0][getNdx(rows, 0, 1)]).asFloat());
17420 		const float		d		(fp16type(in[0][getNdx(rows, 1, 1)]).asFloat());
17421 		const float		ad		(a * d);
17422 		const fp16type	adf16	(ad);
17423 		const float		bc		(b * c);
17424 		const fp16type	bcf16	(bc);
17425 		const float		det		(adf16.asFloat() - bcf16.asFloat());
17426 		const fp16type	det16	(det);
17427 
17428 		out[0] = fp16type( d / det16.asFloat()).bits();
17429 		out[1] = fp16type(-c / det16.asFloat()).bits();
17430 		out[2] = fp16type(-b / det16.asFloat()).bits();
17431 		out[3] = fp16type( a / det16.asFloat()).bits();
17432 
17433 		for (size_t rowNdx = 0; rowNdx < rows; ++rowNdx)
17434 			for (size_t colNdx = 0; colNdx < cols; ++colNdx)
17435 			{
17436 				const size_t	ndx	(colNdx * alignedRows + rowNdx);
17437 				const fp16type	s	(out[ndx]);
17438 
17439 				min[ndx] = getMin(s.asDouble(), getULPs(in));
17440 				max[ndx] = getMax(s.asDouble(), getULPs(in));
17441 			}
17442 
17443 		return true;
17444 	}
17445 };
17446 
fp16ToString(deFloat16 val)17447 inline std::string fp16ToString(deFloat16 val)
17448 {
17449 	return tcu::toHex<4>(val).toString() + " (" + de::floatToString(tcu::Float16(val).asFloat(), 10) + ")";
17450 }
17451 
17452 template <size_t RES_COMPONENTS, size_t ARG0_COMPONENTS, size_t ARG1_COMPONENTS, size_t ARG2_COMPONENTS, class TestedArithmeticFunction>
compareFP16ArithmeticFunc(const std::vector<Resource> & inputs,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)17453 bool compareFP16ArithmeticFunc (const std::vector<Resource>& inputs, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
17454 {
17455 	if (inputs.size() < 1 || inputs.size() > 3 || outputAllocs.size() != 1 || expectedOutputs.size() != 1)
17456 		return false;
17457 
17458 	const size_t	resultStep			= (RES_COMPONENTS == 3) ? 4 : RES_COMPONENTS;
17459 	const size_t	iterationsCount		= expectedOutputs[0].getByteSize() / (sizeof(deFloat16) * resultStep);
17460 	const size_t	inputsSteps[3]		=
17461 	{
17462 		(ARG0_COMPONENTS == 3) ? 4 : ARG0_COMPONENTS,
17463 		(ARG1_COMPONENTS == 3) ? 4 : ARG1_COMPONENTS,
17464 		(ARG2_COMPONENTS == 3) ? 4 : ARG2_COMPONENTS,
17465 	};
17466 
17467 	DE_ASSERT(expectedOutputs[0].getByteSize() > 0);
17468 	DE_ASSERT(expectedOutputs[0].getByteSize() == sizeof(deFloat16) * iterationsCount * resultStep);
17469 
17470 	for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17471 	{
17472 		DE_ASSERT(inputs[inputNdx].getByteSize() > 0);
17473 		DE_ASSERT(inputs[inputNdx].getByteSize() == sizeof(deFloat16) * iterationsCount * inputsSteps[inputNdx]);
17474 	}
17475 
17476 	const deFloat16* const		outputAsFP16					= (const deFloat16*)outputAllocs[0]->getHostPtr();
17477 	TestedArithmeticFunction	func;
17478 
17479 	func.setOutCompCount(RES_COMPONENTS);
17480 	func.setArgCompCount(0, ARG0_COMPONENTS);
17481 	func.setArgCompCount(1, ARG1_COMPONENTS);
17482 	func.setArgCompCount(2, ARG2_COMPONENTS);
17483 
17484 	const bool					callOncePerComponent			= func.callOncePerComponent();
17485 	const deUint32				componentValidityMask			= func.getComponentValidity();
17486 	const size_t				denormModesCount				= 2;
17487 	const char*					denormModes[denormModesCount]	= { "keep denormal numbers", "flush to zero" };
17488 	const size_t				successfulRunsPerComponent		= denormModesCount * func.getFlavorCount();
17489 	bool						success							= true;
17490 	size_t						validatedCount					= 0;
17491 
17492 	vector<deUint8>	inputBytes[3];
17493 
17494 	for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17495 		inputs[inputNdx].getBytes(inputBytes[inputNdx]);
17496 
17497 	const deFloat16* const			inputsAsFP16[3]			=
17498 	{
17499 		inputs.size() >= 1 ? (const deFloat16*)&inputBytes[0][0] : DE_NULL,
17500 		inputs.size() >= 2 ? (const deFloat16*)&inputBytes[1][0] : DE_NULL,
17501 		inputs.size() >= 3 ? (const deFloat16*)&inputBytes[2][0] : DE_NULL,
17502 	};
17503 
17504 	for (size_t idx = 0; idx < iterationsCount; ++idx)
17505 	{
17506 		std::vector<size_t>			successfulRuns		(RES_COMPONENTS, successfulRunsPerComponent);
17507 		std::vector<std::string>	errors				(RES_COMPONENTS);
17508 		bool						iterationValidated	(true);
17509 
17510 		for (size_t denormNdx = 0; denormNdx < 2; ++denormNdx)
17511 		{
17512 			for (size_t flavorNdx = 0; flavorNdx < func.getFlavorCount(); ++flavorNdx)
17513 			{
17514 				func.setFlavor(flavorNdx);
17515 
17516 				const deFloat16*			iterationOutputFP16		= &outputAsFP16[idx * resultStep];
17517 				vector<deFloat16>			iterationCalculatedFP16	(resultStep, 0);
17518 				vector<double>				iterationEdgeMin		(resultStep, 0.0);
17519 				vector<double>				iterationEdgeMax		(resultStep, 0.0);
17520 				vector<const deFloat16*>	arguments;
17521 
17522 				for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17523 				{
17524 					std::string	error;
17525 					bool		reportError = false;
17526 
17527 					if (callOncePerComponent || componentNdx == 0)
17528 					{
17529 						bool funcCallResult;
17530 
17531 						arguments.clear();
17532 
17533 						for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17534 							arguments.push_back(&inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + componentNdx]);
17535 
17536 						if (denormNdx == 0)
17537 							funcCallResult = func.template calc<tcu::Float16>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17538 						else
17539 							funcCallResult = func.template calc<tcu::Float16Denormless>(arguments, &iterationCalculatedFP16[componentNdx], &iterationEdgeMin[componentNdx], &iterationEdgeMax[componentNdx]);
17540 
17541 						if (!funcCallResult)
17542 						{
17543 							iterationValidated = false;
17544 
17545 							if (callOncePerComponent)
17546 								continue;
17547 							else
17548 								break;
17549 						}
17550 					}
17551 
17552 					if ((componentValidityMask != 0) && (componentValidityMask & (1<<componentNdx)) == 0)
17553 						continue;
17554 
17555 					reportError = !compare16BitFloat(iterationCalculatedFP16[componentNdx], iterationOutputFP16[componentNdx], error);
17556 
17557 					if (reportError)
17558 					{
17559 						tcu::Float16 expected	(iterationCalculatedFP16[componentNdx]);
17560 						tcu::Float16 outputted	(iterationOutputFP16[componentNdx]);
17561 						tcu::Float64 edgeMin    (iterationEdgeMin[componentNdx]);
17562 						tcu::Float64 edgeMax    (iterationEdgeMax[componentNdx]);
17563 
17564 						if (reportError && expected.isNaN())
17565 							reportError = false;
17566 
17567 						if (reportError && !expected.isNaN() && !outputted.isNaN())
17568 						{
17569 							if (reportError && !expected.isInf() && !outputted.isInf())
17570 							{
17571 								// Ignore rounding
17572 								if (expected.bits() == outputted.bits() + 1 || expected.bits() + 1 == outputted.bits())
17573 									reportError = false;
17574 							}
17575 
17576 							if (reportError && expected.isInf())
17577 							{
17578 								// RTZ rounding mode returns +/-65504 instead of Inf on overflow
17579 								if (expected.sign() == 1 && outputted.bits() == 0x7bff && edgeMin.asDouble() <= std::numeric_limits<double>::max())
17580 									reportError = false;
17581 								else if (expected.sign() == -1 && outputted.bits() == 0xfbff && edgeMax.asDouble() >= -std::numeric_limits<double>::max())
17582 									reportError = false;
17583 							}
17584 
17585 							if (reportError)
17586 							{
17587 								const double	outputtedDouble	= outputted.asDouble();
17588 
17589 							    DE_ASSERT(edgeMin.isNaN() || edgeMax.isNaN() || (edgeMin.asDouble() <= edgeMax.asDouble()));
17590 
17591 								if (de::inRange(outputtedDouble, edgeMin.asDouble(), edgeMax.asDouble()))
17592 									reportError = false;
17593 							}
17594 						}
17595 
17596 						if (reportError)
17597 						{
17598 							const size_t		inputsComps[3]	=
17599 							{
17600 								ARG0_COMPONENTS,
17601 								ARG1_COMPONENTS,
17602 								ARG2_COMPONENTS,
17603 							};
17604 							string				inputsValues	("Inputs:");
17605 							string				flavorName		(func.getFlavorCount() == 1 ? "" : string(" flavor ") + de::toString(flavorNdx) + " (" + func.getCurrentFlavorName() + ")");
17606 							std::stringstream	errStream;
17607 
17608 							for (size_t inputNdx = 0; inputNdx < inputs.size(); ++inputNdx)
17609 							{
17610 								const size_t	inputCompsCount = inputsComps[inputNdx];
17611 
17612 								inputsValues += " [" + de::toString(inputNdx) + "]=(";
17613 
17614 								for (size_t compNdx = 0; compNdx < inputCompsCount; ++compNdx)
17615 								{
17616 									const deFloat16 inputComponentValue = inputsAsFP16[inputNdx][idx * inputsSteps[inputNdx] + compNdx];
17617 
17618 									inputsValues += fp16ToString(inputComponentValue) + ((compNdx + 1 == inputCompsCount) ? ")": ", ");
17619 								}
17620 							}
17621 
17622 							errStream	<< "At"
17623 										<< " iteration " << de::toString(idx)
17624 										<< " component " << de::toString(componentNdx)
17625 										<< " denormMode " << de::toString(denormNdx)
17626 										<< " (" << denormModes[denormNdx] << ")"
17627 										<< " " << flavorName
17628 										<< " " << inputsValues
17629 										<< " outputted:" + fp16ToString(iterationOutputFP16[componentNdx])
17630 										<< " expected:" + fp16ToString(iterationCalculatedFP16[componentNdx])
17631 										<< " or in range: [" << iterationEdgeMin[componentNdx] << ", " << iterationEdgeMax[componentNdx] << "]."
17632 										<< " " << error << "."
17633 										<< std::endl;
17634 
17635 							errors[componentNdx] += errStream.str();
17636 
17637 							successfulRuns[componentNdx]--;
17638 						}
17639 					}
17640 				}
17641 			}
17642 		}
17643 
17644 		for (size_t componentNdx = 0; componentNdx < RES_COMPONENTS; ++componentNdx)
17645 		{
17646 			// Check if any component has total failure
17647 			if (successfulRuns[componentNdx] == 0)
17648 			{
17649 				// Test failed in all denorm modes and all flavors for certain component: dump errors
17650 				log << TestLog::Message << errors[componentNdx] << TestLog::EndMessage;
17651 
17652 				success = false;
17653 			}
17654 		}
17655 
17656 		if (iterationValidated)
17657 			validatedCount++;
17658 	}
17659 
17660 	if (validatedCount < 16)
17661 		TCU_THROW(InternalError, "Too few samples have been validated.");
17662 
17663 	return success;
17664 }
17665 
17666 // IEEE-754 floating point numbers:
17667 // +--------+------+----------+-------------+
17668 // | binary | sign | exponent | significand |
17669 // +--------+------+----------+-------------+
17670 // | 16-bit |  1   |    5     |     10      |
17671 // +--------+------+----------+-------------+
17672 // | 32-bit |  1   |    8     |     23      |
17673 // +--------+------+----------+-------------+
17674 //
17675 // 16-bit floats:
17676 //
17677 // 0   000 00   00 0000 0001 (0x0001: 2e-24:         minimum positive denormalized)
17678 // 0   000 00   11 1111 1111 (0x03ff: 2e-14 - 2e-24: maximum positive denormalized)
17679 // 0   000 01   00 0000 0000 (0x0400: 2e-14:         minimum positive normalized)
17680 // 0   111 10   11 1111 1111 (0x7bff: 65504:         maximum positive normalized)
17681 //
17682 // 0   000 00   00 0000 0000 (0x0000: +0)
17683 // 0   111 11   00 0000 0000 (0x7c00: +Inf)
17684 // 0   000 00   11 1111 0000 (0x03f0: +Denorm)
17685 // 0   000 01   00 0000 0001 (0x0401: +Norm)
17686 // 0   111 11   00 0000 1111 (0x7c0f: +SNaN)
17687 // 0   111 11   11 1111 0000 (0x7ff0: +QNaN)
17688 // Generate and return 16-bit floats and their corresponding 32-bit values.
17689 //
17690 // The first 14 number pairs are manually picked, while the rest are randomly generated.
17691 // Expected count to be at least 14 (numPicks).
getFloat16a(de::Random & rnd,deUint32 count)17692 vector<deFloat16> getFloat16a (de::Random& rnd, deUint32 count)
17693 {
17694 	vector<deFloat16>	float16;
17695 
17696 	float16.reserve(count);
17697 
17698 	// Zero
17699 	float16.push_back(deUint16(0x0000));
17700 	float16.push_back(deUint16(0x8000));
17701 	// Infinity
17702 	float16.push_back(deUint16(0x7c00));
17703 	float16.push_back(deUint16(0xfc00));
17704 	// Normalized
17705 	float16.push_back(deUint16(0x0401));
17706 	float16.push_back(deUint16(0x8401));
17707 	// Some normal number
17708 	float16.push_back(deUint16(0x14cb));
17709 	float16.push_back(deUint16(0x94cb));
17710 	// Min/max positive normal
17711 	float16.push_back(deUint16(0x0400));
17712 	float16.push_back(deUint16(0x7bff));
17713 	// Min/max negative normal
17714 	float16.push_back(deUint16(0x8400));
17715 	float16.push_back(deUint16(0xfbff));
17716 	// PI
17717 	float16.push_back(deUint16(0x4248)); // 3.140625
17718 	float16.push_back(deUint16(0xb248)); // -3.140625
17719 	// PI/2
17720 	float16.push_back(deUint16(0x3e48)); // 1.5703125
17721 	float16.push_back(deUint16(0xbe48)); // -1.5703125
17722 	float16.push_back(deUint16(0x3c00)); // 1.0
17723 	float16.push_back(deUint16(0x3800)); // 0.5
17724 	// Some useful constants
17725 	float16.push_back(tcu::Float16(-2.5f).bits());
17726 	float16.push_back(tcu::Float16(-1.0f).bits());
17727 	float16.push_back(tcu::Float16( 0.4f).bits());
17728 	float16.push_back(tcu::Float16( 2.5f).bits());
17729 
17730 	const deUint32		numPicks	= static_cast<deUint32>(float16.size());
17731 
17732 	DE_ASSERT(count >= numPicks);
17733 	count -= numPicks;
17734 
17735 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17736 	{
17737 		int			sign		= (rnd.getUint16() % 2 == 0) ? +1 : -1;
17738 		int			exponent	= (rnd.getUint16() % 29) - 14 + 1;
17739 		deUint16	mantissa	= static_cast<deUint16>(2 * (rnd.getUint16() % 512));
17740 
17741 		// Exclude power of -14 to avoid denorms
17742 		DE_ASSERT(de::inRange(exponent, -13, 15));
17743 
17744 		float16.push_back(tcu::Float16::constructBits(sign, exponent, mantissa).bits());
17745 	}
17746 
17747 	return float16;
17748 }
17749 
getInputData1(deUint32 seed,size_t count,size_t argNo)17750 static inline vector<deFloat16> getInputData1 (deUint32 seed, size_t count, size_t argNo)
17751 {
17752 	DE_UNREF(argNo);
17753 
17754 	de::Random	rnd(seed);
17755 
17756 	return getFloat16a(rnd, static_cast<deUint32>(count));
17757 }
17758 
getInputData2(deUint32 seed,size_t count,size_t argNo)17759 static inline vector<deFloat16> getInputData2 (deUint32 seed, size_t count, size_t argNo)
17760 {
17761 	de::Random	rnd		(seed);
17762 	size_t		newCount = static_cast<size_t>(deSqrt(double(count)));
17763 
17764 	DE_ASSERT(newCount * newCount == count);
17765 
17766 	vector<deFloat16>	float16 = getFloat16a(rnd, static_cast<deUint32>(newCount));
17767 
17768 	return squarize(float16, static_cast<deUint32>(argNo));
17769 }
17770 
getInputData3(deUint32 seed,size_t count,size_t argNo)17771 static inline vector<deFloat16> getInputData3 (deUint32 seed, size_t count, size_t argNo)
17772 {
17773 	if (argNo == 0 || argNo == 1)
17774 		return getInputData2(seed, count, argNo);
17775 	else
17776 		return getInputData1(seed<<argNo, count, argNo);
17777 }
17778 
getInputData(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17779 vector<deFloat16> getInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17780 {
17781 	DE_UNREF(stride);
17782 
17783 	vector<deFloat16>	result;
17784 
17785 	switch (argCount)
17786 	{
17787 		case 1:result = getInputData1(seed, count, argNo); break;
17788 		case 2:result = getInputData2(seed, count, argNo); break;
17789 		case 3:result = getInputData3(seed, count, argNo); break;
17790 		default: TCU_THROW(InternalError, "Invalid argument count specified");
17791 	}
17792 
17793 	if (compCount == 3)
17794 	{
17795 		const size_t		newCount = (3 * count) / 4;
17796 		vector<deFloat16>	newResult;
17797 
17798 		newResult.reserve(result.size());
17799 
17800 		for (size_t ndx = 0; ndx < newCount; ++ndx)
17801 		{
17802 			newResult.push_back(result[ndx]);
17803 
17804 			if (ndx % 3 == 2)
17805 				newResult.push_back(0);
17806 		}
17807 
17808 		result = newResult;
17809 	}
17810 
17811 	DE_ASSERT(result.size() == count);
17812 
17813 	return result;
17814 }
17815 
17816 // Generator for functions requiring data in range [1, inf]
getInputDataAC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17817 vector<deFloat16> getInputDataAC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17818 {
17819 	vector<deFloat16>	result;
17820 
17821 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17822 
17823 	// Filter out values below 1.0 from upper half of numbers
17824 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17825 	{
17826 		const float f = tcu::Float16(result[idx]).asFloat();
17827 
17828 		if (f < 1.0f)
17829 			result[idx] = tcu::Float16(1.0f - f).bits();
17830 	}
17831 
17832 	return result;
17833 }
17834 
17835 // Generator for functions requiring data in range [-1, 1]
getInputDataA(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17836 vector<deFloat16> getInputDataA (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17837 {
17838 	vector<deFloat16>	result;
17839 
17840 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17841 
17842 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17843 	{
17844 		const float f = tcu::Float16(result[idx]).asFloat();
17845 
17846 		if (!de::inRange(f, -1.0f, 1.0f))
17847 			result[idx] = tcu::Float16(deFloatFrac(f)).bits();
17848 	}
17849 
17850 	return result;
17851 }
17852 
17853 // Generator for functions requiring data in range [-pi, pi]
getInputDataPI(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17854 vector<deFloat16> getInputDataPI (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17855 {
17856 	vector<deFloat16>	result;
17857 
17858 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17859 
17860 	for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17861 	{
17862 		const float f = tcu::Float16(result[idx]).asFloat();
17863 
17864 		if (!de::inRange(f, -DE_PI, DE_PI))
17865 			result[idx] = tcu::Float16(fmodf(f, DE_PI)).bits();
17866 	}
17867 
17868 	return result;
17869 }
17870 
17871 // Generator for functions requiring data in range [0, inf]
getInputDataP(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17872 vector<deFloat16> getInputDataP (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17873 {
17874 	vector<deFloat16>	result;
17875 
17876 	result = getInputData(seed, count, compCount, stride, argCount, argNo);
17877 
17878 	if (argNo == 0)
17879 	{
17880 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17881 			result[idx] &= static_cast<deFloat16>(~0x8000);
17882 	}
17883 
17884 	return result;
17885 }
17886 
getInputDataV(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17887 vector<deFloat16> getInputDataV (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17888 {
17889 	DE_UNREF(stride);
17890 	DE_UNREF(argCount);
17891 
17892 	vector<deFloat16>	result;
17893 
17894 	if (argNo == 0)
17895 		result = getInputData2(seed, count, argNo);
17896 	else
17897 	{
17898 		const size_t		alignedCount	= (compCount == 3) ? 4 : compCount;
17899 		const size_t		newCountX		= static_cast<size_t>(deSqrt(double(count * alignedCount)));
17900 		const size_t		newCountY		= count / newCountX;
17901 		de::Random			rnd				(seed);
17902 		vector<deFloat16>	float16			= getFloat16a(rnd, static_cast<deUint32>(newCountX));
17903 
17904 		DE_ASSERT(newCountX * newCountX == alignedCount * count);
17905 
17906 		for (size_t numIdx = 0; numIdx < newCountX; ++numIdx)
17907 		{
17908 			const vector<deFloat16>	tmp(newCountY, float16[numIdx]);
17909 
17910 			result.insert(result.end(), tmp.begin(), tmp.end());
17911 		}
17912 	}
17913 
17914 	DE_ASSERT(result.size() == count);
17915 
17916 	return result;
17917 }
17918 
getInputDataM(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17919 vector<deFloat16> getInputDataM (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17920 {
17921 	DE_UNREF(compCount);
17922 	DE_UNREF(stride);
17923 	DE_UNREF(argCount);
17924 
17925 	de::Random			rnd		(seed << argNo);
17926 	vector<deFloat16>	result;
17927 
17928 	result = getFloat16a(rnd, static_cast<deUint32>(count));
17929 
17930 	DE_ASSERT(result.size() == count);
17931 
17932 	return result;
17933 }
17934 
getInputDataD(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17935 vector<deFloat16> getInputDataD (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17936 {
17937 	DE_UNREF(compCount);
17938 	DE_UNREF(argCount);
17939 
17940 	de::Random			rnd		(seed << argNo);
17941 	vector<deFloat16>	result;
17942 
17943 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
17944 	{
17945 		int num	= (rnd.getUint16() % 16) - 8;
17946 
17947 		result.push_back(tcu::Float16(float(num)).bits());
17948 	}
17949 
17950 	result[0 * stride] = deUint16(0x7c00); // +Inf
17951 	result[1 * stride] = deUint16(0xfc00); // -Inf
17952 
17953 	DE_ASSERT(result.size() == count);
17954 
17955 	return result;
17956 }
17957 
17958 // Generator for smoothstep function
getInputDataSS(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17959 vector<deFloat16> getInputDataSS (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17960 {
17961 	vector<deFloat16>	result;
17962 
17963 	result = getInputDataD(seed, count, compCount, stride, argCount, argNo);
17964 
17965 	if (argNo == 0)
17966 	{
17967 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17968 		{
17969 			const float f = tcu::Float16(result[idx]).asFloat();
17970 
17971 			if (f > 4.0f)
17972 				result[idx] = tcu::Float16(-f).bits();
17973 		}
17974 	}
17975 
17976 	if (argNo == 1)
17977 	{
17978 		for (size_t idx = result.size() / 2; idx < result.size(); ++idx)
17979 		{
17980 			const float f = tcu::Float16(result[idx]).asFloat();
17981 
17982 			if (f < 4.0f)
17983 				result[idx] = tcu::Float16(-f).bits();
17984 		}
17985 	}
17986 
17987 	return result;
17988 }
17989 
17990 // Generates normalized vectors for arguments 0 and 1
getInputDataN(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)17991 vector<deFloat16> getInputDataN (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
17992 {
17993 	DE_UNREF(compCount);
17994 	DE_UNREF(argCount);
17995 
17996 	de::Random			rnd		(seed << argNo);
17997 	vector<deFloat16>	result;
17998 
17999 	if (argNo == 0 || argNo == 1)
18000 	{
18001 		// The input parameters for the incident vector I and the surface normal N must already be normalized
18002 		for (size_t numIdx = 0; numIdx < count; numIdx += stride)
18003 		{
18004 			vector <float>	unnormolized;
18005 			float			sum				= 0;
18006 
18007 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18008 				unnormolized.push_back(float((rnd.getUint16() % 16) - 8));
18009 
18010 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18011 				sum += unnormolized[compIdx] * unnormolized[compIdx];
18012 
18013 			sum = deFloatSqrt(sum);
18014 			if (sum == 0.0f)
18015 				unnormolized[0] = sum = 1.0f;
18016 
18017 			for (size_t compIdx = 0; compIdx < compCount; ++compIdx)
18018 				result.push_back(tcu::Float16(unnormolized[compIdx] / sum).bits());
18019 
18020 			for (size_t compIdx = compCount; compIdx < stride; ++compIdx)
18021 				result.push_back(0);
18022 		}
18023 	}
18024 	else
18025 	{
18026 		// Input parameter eta
18027 		for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18028 		{
18029 			int num	= (rnd.getUint16() % 16) - 8;
18030 
18031 			result.push_back(tcu::Float16(float(num)).bits());
18032 		}
18033 	}
18034 
18035 	DE_ASSERT(result.size() == count);
18036 
18037 	return result;
18038 }
18039 
18040 // Data generator for complex matrix functions like determinant and inverse
getInputDataC(deUint32 seed,size_t count,size_t compCount,size_t stride,size_t argCount,size_t argNo)18041 vector<deFloat16> getInputDataC (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo)
18042 {
18043 	DE_UNREF(compCount);
18044 	DE_UNREF(stride);
18045 	DE_UNREF(argCount);
18046 
18047 	de::Random			rnd		(seed << argNo);
18048 	vector<deFloat16>	result;
18049 
18050 	for (deUint32 numIdx = 0; numIdx < count; ++numIdx)
18051 	{
18052 		int num	= (rnd.getUint16() % 16) - 8;
18053 
18054 		result.push_back(tcu::Float16(float(num)).bits());
18055 	}
18056 
18057 	DE_ASSERT(result.size() == count);
18058 
18059 	return result;
18060 }
18061 
18062 struct Math16TestType
18063 {
18064 	const char*		typePrefix;
18065 	const size_t	typeComponents;
18066 	const size_t	typeArrayStride;
18067 	const size_t	typeStructStride;
18068 	const char*		storage_type;
18069 };
18070 
18071 enum Math16DataTypes
18072 {
18073 	NONE	= 0,
18074 	SCALAR	= 1,
18075 	VEC2	= 2,
18076 	VEC3	= 3,
18077 	VEC4	= 4,
18078 	MAT2X2,
18079 	MAT2X3,
18080 	MAT2X4,
18081 	MAT3X2,
18082 	MAT3X3,
18083 	MAT3X4,
18084 	MAT4X2,
18085 	MAT4X3,
18086 	MAT4X4,
18087 	MATH16_TYPE_LAST
18088 };
18089 
18090 struct Math16ArgFragments
18091 {
18092 	const char*	bodies;
18093 	const char*	variables;
18094 	const char*	decorations;
18095 	const char*	funcVariables;
18096 };
18097 
18098 typedef vector<deFloat16> Math16GetInputData (deUint32 seed, size_t count, size_t compCount, size_t stride, size_t argCount, size_t argNo);
18099 
18100 struct Math16TestFunc
18101 {
18102 	const char*					funcName;
18103 	const char*					funcSuffix;
18104 	size_t						funcArgsCount;
18105 	size_t						typeResult;
18106 	size_t						typeArg0;
18107 	size_t						typeArg1;
18108 	size_t						typeArg2;
18109 	Math16GetInputData*			getInputDataFunc;
18110 	VerifyIOFunc				verifyFunc;
18111 };
18112 
18113 template<class SpecResource>
createFloat16ArithmeticFuncTest(tcu::TestContext & testCtx,tcu::TestCaseGroup & testGroup,const size_t testTypeIdx,const Math16TestFunc & testFunc)18114 void createFloat16ArithmeticFuncTest (tcu::TestContext& testCtx, tcu::TestCaseGroup& testGroup, const size_t testTypeIdx, const Math16TestFunc& testFunc)
18115 {
18116 	const int					testSpecificSeed			= deStringHash(testGroup.getName());
18117 	const int					seed						= testCtx.getCommandLine().getBaseSeed() ^ testSpecificSeed;
18118 	const size_t				numDataPointsByAxis			= 32;
18119 	const size_t				numDataPoints				= numDataPointsByAxis * numDataPointsByAxis;
18120 	const char*					componentType				= "f16";
18121 	const Math16TestType		testTypes[MATH16_TYPE_LAST]	=
18122 	{
18123 		{ "",		0,	 0,						 0,						"" },
18124 		{ "",		1,	 1 * sizeof(deFloat16),	 2 * sizeof(deFloat16),	"u32_half_ndp" },
18125 		{ "v2",		2,	 2 * sizeof(deFloat16),	 2 * sizeof(deFloat16),	"u32_ndp" },
18126 		{ "v3",		3,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18127 		{ "v4",		4,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18128 		{ "m2x2",	0,	 4 * sizeof(deFloat16),	 4 * sizeof(deFloat16),	"u32_ndp_2" },
18129 		{ "m2x3",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18130 		{ "m2x4",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18131 		{ "m3x2",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_3" },
18132 		{ "m3x3",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_6" },
18133 		{ "m3x4",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_6" },
18134 		{ "m4x2",	0,	 8 * sizeof(deFloat16),	 8 * sizeof(deFloat16),	"u32_ndp_4" },
18135 		{ "m4x3",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_8" },
18136 		{ "m4x4",	0,	16 * sizeof(deFloat16),	16 * sizeof(deFloat16),	"u32_ndp_8" },
18137 	};
18138 
18139 	DE_ASSERT(testTypeIdx == testTypes[testTypeIdx].typeComponents);
18140 
18141 
18142 	const StringTemplate preMain
18143 	(
18144 		"     %c_i32_ndp  = OpConstant %i32 ${num_data_points}\n"
18145 
18146 		"        %f16     = OpTypeFloat 16\n"
18147 		"        %v2f16   = OpTypeVector %f16 2\n"
18148 		"        %v3f16   = OpTypeVector %f16 3\n"
18149 		"        %v4f16   = OpTypeVector %f16 4\n"
18150 		"        %m2x2f16 = OpTypeMatrix %v2f16 2\n"
18151 		"        %m2x3f16 = OpTypeMatrix %v3f16 2\n"
18152 		"        %m2x4f16 = OpTypeMatrix %v4f16 2\n"
18153 		"        %m3x2f16 = OpTypeMatrix %v2f16 3\n"
18154 		"        %m3x3f16 = OpTypeMatrix %v3f16 3\n"
18155 		"        %m3x4f16 = OpTypeMatrix %v4f16 3\n"
18156 		"        %m4x2f16 = OpTypeMatrix %v2f16 4\n"
18157 		"        %m4x3f16 = OpTypeMatrix %v3f16 4\n"
18158 		"        %m4x4f16 = OpTypeMatrix %v4f16 4\n"
18159 
18160 		"       %fp_v2i32 = OpTypePointer Function %v2i32\n"
18161 		"       %fp_v3i32 = OpTypePointer Function %v3i32\n"
18162 		"       %fp_v4i32 = OpTypePointer Function %v4i32\n"
18163 
18164 		"      %c_u32_ndp = OpConstant %u32 ${num_data_points}\n"
18165 		" %c_u32_half_ndp = OpSpecConstantOp %u32 UDiv %c_i32_ndp %c_u32_2\n"
18166 		"        %c_u32_5 = OpConstant %u32 5\n"
18167 		"        %c_u32_6 = OpConstant %u32 6\n"
18168 		"        %c_u32_7 = OpConstant %u32 7\n"
18169 		"        %c_u32_8 = OpConstant %u32 8\n"
18170 		"        %c_f16_0 = OpConstant %f16 0\n"
18171 		"        %c_f16_1 = OpConstant %f16 1\n"
18172 		"      %c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
18173 		"         %up_u32 = OpTypePointer Uniform %u32\n"
18174 		"%c_u32_high_ones = OpConstant %u32 0xffff0000\n"
18175 		" %c_u32_low_ones = OpConstant %u32 0x0000ffff\n"
18176 
18177 		"    %ra_u32_half_ndp = OpTypeArray %u32 %c_u32_half_ndp\n"
18178 		"  %SSBO_u32_half_ndp = OpTypeStruct %ra_u32_half_ndp\n"
18179 		"%up_SSBO_u32_half_ndp = OpTypePointer Uniform %SSBO_u32_half_ndp\n"
18180 		"         %ra_u32_ndp = OpTypeArray %u32 %c_u32_ndp\n"
18181 		"       %SSBO_u32_ndp = OpTypeStruct %ra_u32_ndp\n"
18182 		"    %up_SSBO_u32_ndp = OpTypePointer Uniform %SSBO_u32_ndp\n"
18183 		"           %ra_u32_2 = OpTypeArray %u32 %c_u32_2\n"
18184 		"        %up_ra_u32_2 = OpTypePointer Uniform %ra_u32_2\n"
18185 		"      %ra_ra_u32_ndp = OpTypeArray %ra_u32_2 %c_u32_ndp\n"
18186 		"     %SSBO_u32_ndp_2 = OpTypeStruct %ra_ra_u32_ndp\n"
18187 		"  %up_SSBO_u32_ndp_2 = OpTypePointer Uniform %SSBO_u32_ndp_2\n"
18188 		"           %ra_u32_4 = OpTypeArray %u32 %c_u32_4\n"
18189 		"        %up_ra_u32_4 = OpTypePointer Uniform %ra_u32_4\n"
18190 		"        %ra_ra_u32_4 = OpTypeArray %ra_u32_4 %c_u32_ndp\n"
18191 		"     %SSBO_u32_ndp_4 = OpTypeStruct %ra_ra_u32_4\n"
18192 		"  %up_SSBO_u32_ndp_4 = OpTypePointer Uniform %SSBO_u32_ndp_4\n"
18193 		"           %ra_u32_3 = OpTypeArray %u32 %c_u32_3\n"
18194 		"        %up_ra_u32_3 = OpTypePointer Uniform %ra_u32_3\n"
18195 		"        %ra_ra_u32_3 = OpTypeArray %ra_u32_3 %c_u32_ndp\n"
18196 		"     %SSBO_u32_ndp_3 = OpTypeStruct %ra_ra_u32_3\n"
18197 		"  %up_SSBO_u32_ndp_3 = OpTypePointer Uniform %SSBO_u32_ndp_3\n"
18198 		"           %ra_u32_6 = OpTypeArray %u32 %c_u32_6\n"
18199 		"        %up_ra_u32_6 = OpTypePointer Uniform %ra_u32_6\n"
18200 		"        %ra_ra_u32_6 = OpTypeArray %ra_u32_6 %c_u32_ndp\n"
18201 		"     %SSBO_u32_ndp_6 = OpTypeStruct %ra_ra_u32_6\n"
18202 		"  %up_SSBO_u32_ndp_6 = OpTypePointer Uniform %SSBO_u32_ndp_6\n"
18203 		"           %ra_u32_8 = OpTypeArray %u32 %c_u32_8\n"
18204 		"        %up_ra_u32_8 = OpTypePointer Uniform %ra_u32_8\n"
18205 		"        %ra_ra_u32_8 = OpTypeArray %ra_u32_8 %c_u32_ndp\n"
18206 		"     %SSBO_u32_ndp_8 = OpTypeStruct %ra_ra_u32_8\n"
18207 		"  %up_SSBO_u32_ndp_8 = OpTypePointer Uniform %SSBO_u32_ndp_8\n"
18208 
18209 		"         %f16_i32_fn = OpTypeFunction %f16 %i32\n"
18210 		"       %v2f16_i32_fn = OpTypeFunction %v2f16 %i32\n"
18211 		"       %v3f16_i32_fn = OpTypeFunction %v3f16 %i32\n"
18212 		"       %v4f16_i32_fn = OpTypeFunction %v4f16 %i32\n"
18213 		"     %m2x2f16_i32_fn = OpTypeFunction %m2x2f16 %i32\n"
18214 		"     %m2x3f16_i32_fn = OpTypeFunction %m2x3f16 %i32\n"
18215 		"     %m2x4f16_i32_fn = OpTypeFunction %m2x4f16 %i32\n"
18216 		"     %m3x2f16_i32_fn = OpTypeFunction %m3x2f16 %i32\n"
18217 		"     %m3x3f16_i32_fn = OpTypeFunction %m3x3f16 %i32\n"
18218 		"     %m3x4f16_i32_fn = OpTypeFunction %m3x4f16 %i32\n"
18219 		"     %m4x2f16_i32_fn = OpTypeFunction %m4x2f16 %i32\n"
18220 		"     %m4x3f16_i32_fn = OpTypeFunction %m4x3f16 %i32\n"
18221 		"     %m4x4f16_i32_fn = OpTypeFunction %m4x4f16 %i32\n"
18222 		"    %void_f16_i32_fn = OpTypeFunction %void %f16 %i32\n"
18223 		"  %void_v2f16_i32_fn = OpTypeFunction %void %v2f16 %i32\n"
18224 		"  %void_v3f16_i32_fn = OpTypeFunction %void %v3f16 %i32\n"
18225 		"  %void_v4f16_i32_fn = OpTypeFunction %void %v4f16 %i32\n"
18226 		"%void_m2x2f16_i32_fn = OpTypeFunction %void %m2x2f16 %i32\n"
18227 		"%void_m2x3f16_i32_fn = OpTypeFunction %void %m2x3f16 %i32\n"
18228 		"%void_m2x4f16_i32_fn = OpTypeFunction %void %m2x4f16 %i32\n"
18229 		"%void_m3x2f16_i32_fn = OpTypeFunction %void %m3x2f16 %i32\n"
18230 		"%void_m3x3f16_i32_fn = OpTypeFunction %void %m3x3f16 %i32\n"
18231 		"%void_m3x4f16_i32_fn = OpTypeFunction %void %m3x4f16 %i32\n"
18232 		"%void_m4x2f16_i32_fn = OpTypeFunction %void %m4x2f16 %i32\n"
18233 		"%void_m4x3f16_i32_fn = OpTypeFunction %void %m4x3f16 %i32\n"
18234 		"%void_m4x4f16_i32_fn = OpTypeFunction %void %m4x4f16 %i32\n"
18235 		"${arg_vars}"
18236 	);
18237 
18238 	const StringTemplate decoration
18239 	(
18240 		"OpDecorate %ra_u32_half_ndp ArrayStride 4\n"
18241 		"OpMemberDecorate %SSBO_u32_half_ndp 0 Offset 0\n"
18242 		"OpDecorate %SSBO_u32_half_ndp BufferBlock\n"
18243 
18244 		"OpDecorate %ra_u32_ndp ArrayStride 4\n"
18245 		"OpMemberDecorate %SSBO_u32_ndp 0 Offset 0\n"
18246 		"OpDecorate %SSBO_u32_ndp BufferBlock\n"
18247 
18248 		"OpDecorate %ra_u32_2 ArrayStride 4\n"
18249 		"OpDecorate %ra_ra_u32_ndp ArrayStride 8\n"
18250 		"OpMemberDecorate %SSBO_u32_ndp_2 0 Offset 0\n"
18251 		"OpDecorate %SSBO_u32_ndp_2 BufferBlock\n"
18252 
18253 		"OpDecorate %ra_u32_4 ArrayStride 4\n"
18254 		"OpDecorate %ra_ra_u32_4 ArrayStride 16\n"
18255 		"OpMemberDecorate %SSBO_u32_ndp_4 0 Offset 0\n"
18256 		"OpDecorate %SSBO_u32_ndp_4 BufferBlock\n"
18257 
18258 		"OpDecorate %ra_u32_3 ArrayStride 4\n"
18259 		"OpDecorate %ra_ra_u32_3 ArrayStride 16\n"
18260 		"OpMemberDecorate %SSBO_u32_ndp_3 0 Offset 0\n"
18261 		"OpDecorate %SSBO_u32_ndp_3 BufferBlock\n"
18262 
18263 		"OpDecorate %ra_u32_6 ArrayStride 4\n"
18264 		"OpDecorate %ra_ra_u32_6 ArrayStride 32\n"
18265 		"OpMemberDecorate %SSBO_u32_ndp_6 0 Offset 0\n"
18266 		"OpDecorate %SSBO_u32_ndp_6 BufferBlock\n"
18267 
18268 		"OpDecorate %ra_u32_8 ArrayStride 4\n"
18269 		"OpDecorate %ra_ra_u32_8 ArrayStride 32\n"
18270 		"OpMemberDecorate %SSBO_u32_ndp_8 0 Offset 0\n"
18271 		"OpDecorate %SSBO_u32_ndp_8 BufferBlock\n"
18272 
18273 		"${arg_decorations}"
18274 	);
18275 
18276 	const StringTemplate testFun
18277 	(
18278 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
18279 		"    %param = OpFunctionParameter %v4f32\n"
18280 		"    %entry = OpLabel\n"
18281 
18282 		"        %i = OpVariable %fp_i32 Function\n"
18283 		"${arg_infunc_vars}"
18284 		"             OpStore %i %c_i32_0\n"
18285 		"             OpBranch %loop\n"
18286 
18287 		"     %loop = OpLabel\n"
18288 		"    %i_cmp = OpLoad %i32 %i\n"
18289 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
18290 		"             OpLoopMerge %merge %next None\n"
18291 		"             OpBranchConditional %lt %write %merge\n"
18292 
18293 		"    %write = OpLabel\n"
18294 		"      %ndx = OpLoad %i32 %i\n"
18295 
18296 		"${arg_func_call}"
18297 
18298 		"             OpBranch %next\n"
18299 
18300 		"     %next = OpLabel\n"
18301 		"    %i_cur = OpLoad %i32 %i\n"
18302 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
18303 		"             OpStore %i %i_new\n"
18304 		"             OpBranch %loop\n"
18305 
18306 		"    %merge = OpLabel\n"
18307 		"             OpReturnValue %param\n"
18308 		"             OpFunctionEnd\n"
18309 	);
18310 
18311 	const Math16ArgFragments	argFragment1	=
18312 	{
18313 		"     %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18314 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0\n"
18315 		"     %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18316 		"",
18317 		"",
18318 		"",
18319 	};
18320 
18321 	const Math16ArgFragments	argFragment2	=
18322 	{
18323 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18324 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18325 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1\n"
18326 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18327 		"",
18328 		"",
18329 		"",
18330 	};
18331 
18332 	const Math16ArgFragments	argFragment3	=
18333 	{
18334 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18335 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18336 		" %val_src2 = OpFunctionCall %${t2} %ld_arg_ssbo_src2 %ndx\n"
18337 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1 %val_src2\n"
18338 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18339 		"",
18340 		"",
18341 		"",
18342 	};
18343 
18344 	const Math16ArgFragments	argFragmentLdExp	=
18345 	{
18346 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18347 		" %val_src1 = OpFunctionCall %${t1} %ld_arg_ssbo_src1 %ndx\n"
18348 		"%val_src1i = OpConvertFToS %${dr}i32 %val_src1\n"
18349 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %val_src1i\n"
18350 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18351 
18352 		"",
18353 
18354 		"",
18355 
18356 		"",
18357 	};
18358 
18359 	const Math16ArgFragments	argFragmentModfFrac	=
18360 	{
18361 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18362 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18363 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18364 
18365 		"   %fp_tmp = OpTypePointer Function %${tr}\n",
18366 
18367 		"",
18368 
18369 		"      %tmp = OpVariable %fp_tmp Function\n",
18370 	};
18371 
18372 	const Math16ArgFragments	argFragmentModfInt	=
18373 	{
18374 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18375 		"%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %tmp\n"
18376 		"     %tmp0 = OpAccessChain %fp_tmp %tmp\n"
18377 		"  %val_dst = OpLoad %${tr} %tmp0\n"
18378 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18379 
18380 		"   %fp_tmp = OpTypePointer Function %${tr}\n",
18381 
18382 		"",
18383 
18384 		"      %tmp = OpVariable %fp_tmp Function\n",
18385 	};
18386 
18387 	const Math16ArgFragments	argFragmentModfStruct	=
18388 	{
18389 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18390 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18391 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18392 		"             OpStore %tmp_ptr_s %val_tmp\n"
18393 		"%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_${struct_member}\n"
18394 		"  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18395 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18396 
18397 		"  %fp_${tr} = OpTypePointer Function %${tr}\n"
18398 		"   %st_tmp = OpTypeStruct %${tr} %${tr}\n"
18399 		"   %fp_tmp = OpTypePointer Function %st_tmp\n"
18400 		"   %c_frac = OpConstant %i32 0\n"
18401 		"    %c_int = OpConstant %i32 1\n",
18402 
18403 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18404 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18405 
18406 		"      %tmp = OpVariable %fp_tmp Function\n",
18407 	};
18408 
18409 	const Math16ArgFragments	argFragmentFrexpStructS	=
18410 	{
18411 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18412 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18413 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18414 		"             OpStore %tmp_ptr_s %val_tmp\n"
18415 		"%tmp_ptr_l = OpAccessChain %fp_${tr} %tmp %c_i32_0\n"
18416 		"  %val_dst = OpLoad %${tr} %tmp_ptr_l\n"
18417 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18418 
18419 		"  %fp_${tr} = OpTypePointer Function %${tr}\n"
18420 		"   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18421 		"   %fp_tmp = OpTypePointer Function %st_tmp\n",
18422 
18423 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18424 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18425 
18426 		"      %tmp = OpVariable %fp_tmp Function\n",
18427 	};
18428 
18429 	const Math16ArgFragments	argFragmentFrexpStructE	=
18430 	{
18431 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18432 		"  %val_tmp = ${op} %st_tmp ${ext_inst} %val_src0\n"
18433 		"%tmp_ptr_s = OpAccessChain %fp_tmp %tmp\n"
18434 		"             OpStore %tmp_ptr_s %val_tmp\n"
18435 		"%tmp_ptr_l = OpAccessChain %fp_${dr}i32 %tmp %c_i32_1\n"
18436 		"%val_dst_i = OpLoad %${dr}i32 %tmp_ptr_l\n"
18437 		"  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18438 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18439 
18440 		"   %st_tmp = OpTypeStruct %${tr} %${dr}i32\n"
18441 		"   %fp_tmp = OpTypePointer Function %st_tmp\n",
18442 
18443 		"OpMemberDecorate %st_tmp 0 Offset 0\n"
18444 		"OpMemberDecorate %st_tmp 1 Offset ${struct_stride}\n",
18445 
18446 		"      %tmp = OpVariable %fp_tmp Function\n",
18447 	};
18448 
18449 	const Math16ArgFragments	argFragmentFrexpS		=
18450 	{
18451 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18452 		"  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18453 		"  %val_dst = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18454 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18455 
18456 		"",
18457 
18458 		"",
18459 
18460 		"      %tmp = OpVariable %fp_${dr}i32 Function\n",
18461 	};
18462 
18463 	const Math16ArgFragments	argFragmentFrexpE		=
18464 	{
18465 		" %val_src0 = OpFunctionCall %${t0} %ld_arg_ssbo_src0 %ndx\n"
18466 		"  %out_exp = OpAccessChain %fp_${dr}i32 %tmp\n"
18467 		"%val_dummy = ${op} %${tr} ${ext_inst} %val_src0 %out_exp\n"
18468 		"%val_dst_i = OpLoad %${dr}i32 %out_exp\n"
18469 		"  %val_dst = OpConvertSToF %${tr} %val_dst_i\n"
18470 		"      %dst = OpFunctionCall %void %st_fn_ssbo_dst %val_dst %ndx\n",
18471 
18472 		"",
18473 
18474 		"",
18475 
18476 		"      %tmp = OpVariable %fp_${dr}i32 Function\n",
18477 	};
18478 
18479 	string load_funcs[MATH16_TYPE_LAST];
18480 	load_funcs[SCALAR] = loadScalarF16FromUint;
18481 	load_funcs[VEC2]   = loadV2F16FromUint;
18482 	load_funcs[VEC3]   = loadV3F16FromUints;
18483 	load_funcs[VEC4]   = loadV4F16FromUints;
18484 	load_funcs[MAT2X2] = loadM2x2F16FromUints;
18485 	load_funcs[MAT2X3] = loadM2x3F16FromUints;
18486 	load_funcs[MAT2X4] = loadM2x4F16FromUints;
18487 	load_funcs[MAT3X2] = loadM3x2F16FromUints;
18488 	load_funcs[MAT3X3] = loadM3x3F16FromUints;
18489 	load_funcs[MAT3X4] = loadM3x4F16FromUints;
18490 	load_funcs[MAT4X2] = loadM4x2F16FromUints;
18491 	load_funcs[MAT4X3] = loadM4x3F16FromUints;
18492 	load_funcs[MAT4X4] = loadM4x4F16FromUints;
18493 
18494 	string store_funcs[MATH16_TYPE_LAST];
18495 	store_funcs[SCALAR] = storeScalarF16AsUint;
18496 	store_funcs[VEC2]   = storeV2F16AsUint;
18497 	store_funcs[VEC3]   = storeV3F16AsUints;
18498 	store_funcs[VEC4]   = storeV4F16AsUints;
18499 	store_funcs[MAT2X2] = storeM2x2F16AsUints;
18500 	store_funcs[MAT2X3] = storeM2x3F16AsUints;
18501 	store_funcs[MAT2X4] = storeM2x4F16AsUints;
18502 	store_funcs[MAT3X2] = storeM3x2F16AsUints;
18503 	store_funcs[MAT3X3] = storeM3x3F16AsUints;
18504 	store_funcs[MAT3X4] = storeM3x4F16AsUints;
18505 	store_funcs[MAT4X2] = storeM4x2F16AsUints;
18506 	store_funcs[MAT4X3] = storeM4x3F16AsUints;
18507 	store_funcs[MAT4X4] = storeM4x4F16AsUints;
18508 
18509 	const Math16TestType&		testType				= testTypes[testTypeIdx];
18510 	const string				funcNameString			= string(testFunc.funcName) + string(testFunc.funcSuffix);
18511 	const string				testName				= de::toLower(funcNameString);
18512 	const Math16ArgFragments*	argFragments			= DE_NULL;
18513 	const size_t				typeStructStride		= testType.typeStructStride;
18514 	const bool					extInst					= !(testFunc.funcName[0] == 'O' && testFunc.funcName[1] == 'p');
18515 	const size_t				numFloatsPerArg0Type	= testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16);
18516 	const size_t				iterations				= numDataPoints / numFloatsPerArg0Type;
18517 	const size_t				numFloatsPerResultType	= testTypes[testFunc.typeResult].typeArrayStride / sizeof(deFloat16);
18518 	const vector<deFloat16>		float16DummyOutput		(iterations * numFloatsPerResultType, 0);
18519 	VulkanFeatures				features;
18520 	SpecResource				specResource;
18521 	map<string, string>			specs;
18522 	map<string, string>			fragments;
18523 	vector<string>				extensions;
18524 	string						funcCall;
18525 	string						funcVariables;
18526 	string						variables;
18527 	string						declarations;
18528 	string						decorations;
18529 	string						functions;
18530 
18531 	switch (testFunc.funcArgsCount)
18532 	{
18533 		case 1:
18534 		{
18535 			argFragments = &argFragment1;
18536 
18537 			if (funcNameString == "ModfFrac")		argFragments = &argFragmentModfFrac;
18538 			if (funcNameString == "ModfInt")		argFragments = &argFragmentModfInt;
18539 			if (funcNameString == "ModfStructFrac")	argFragments = &argFragmentModfStruct;
18540 			if (funcNameString == "ModfStructInt")	argFragments = &argFragmentModfStruct;
18541 			if (funcNameString == "FrexpS")			argFragments = &argFragmentFrexpS;
18542 			if (funcNameString == "FrexpE")			argFragments = &argFragmentFrexpE;
18543 			if (funcNameString == "FrexpStructS")	argFragments = &argFragmentFrexpStructS;
18544 			if (funcNameString == "FrexpStructE")	argFragments = &argFragmentFrexpStructE;
18545 
18546 			break;
18547 		}
18548 		case 2:
18549 		{
18550 			argFragments = &argFragment2;
18551 
18552 			if (funcNameString == "Ldexp")			argFragments = &argFragmentLdExp;
18553 
18554 			break;
18555 		}
18556 		case 3:
18557 		{
18558 			argFragments = &argFragment3;
18559 
18560 			break;
18561 		}
18562 		default:
18563 		{
18564 			TCU_THROW(InternalError, "Invalid number of arguments");
18565 		}
18566 	}
18567 
18568 	functions = StringTemplate(store_funcs[testFunc.typeResult]).specialize({{"var", "ssbo_dst"}});
18569 	if (testFunc.funcArgsCount == 1)
18570 	{
18571 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18572 		variables +=
18573 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18574 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18575 
18576 		decorations +=
18577 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18578 			"OpDecorate %ssbo_src0 Binding 0\n"
18579 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18580 			"OpDecorate %ssbo_dst Binding 1\n";
18581 	}
18582 	else if (testFunc.funcArgsCount == 2)
18583 	{
18584 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18585 		functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18586 		variables +=
18587 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18588 			" %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18589 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18590 
18591 		decorations +=
18592 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18593 			"OpDecorate %ssbo_src0 Binding 0\n"
18594 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
18595 			"OpDecorate %ssbo_src1 Binding 1\n"
18596 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18597 			"OpDecorate %ssbo_dst Binding 2\n";
18598 	}
18599 	else if (testFunc.funcArgsCount == 3)
18600 	{
18601 		functions += StringTemplate(load_funcs[testFunc.typeArg0]).specialize({{"var", "ssbo_src0"}});
18602 		functions += StringTemplate(load_funcs[testFunc.typeArg1]).specialize({{"var", "ssbo_src1"}});
18603 		functions += StringTemplate(load_funcs[testFunc.typeArg2]).specialize({{"var", "ssbo_src2"}});
18604 		variables +=
18605 			" %ssbo_src0 = OpVariable %up_SSBO_${store_t0} Uniform\n"
18606 			" %ssbo_src1 = OpVariable %up_SSBO_${store_t1} Uniform\n"
18607 			" %ssbo_src2 = OpVariable %up_SSBO_${store_t2} Uniform\n"
18608 			"  %ssbo_dst = OpVariable %up_SSBO_${store_tr} Uniform\n";
18609 
18610 		decorations +=
18611 			"OpDecorate %ssbo_src0 DescriptorSet 0\n"
18612 			"OpDecorate %ssbo_src0 Binding 0\n"
18613 			"OpDecorate %ssbo_src1 DescriptorSet 0\n"
18614 			"OpDecorate %ssbo_src1 Binding 1\n"
18615 			"OpDecorate %ssbo_src2 DescriptorSet 0\n"
18616 			"OpDecorate %ssbo_src2 Binding 2\n"
18617 			"OpDecorate %ssbo_dst DescriptorSet 0\n"
18618 			"OpDecorate %ssbo_dst Binding 3\n";
18619 	}
18620 	else
18621 	{
18622 		TCU_THROW(InternalError, "Invalid number of function arguments");
18623 	}
18624 
18625 	variables	+= argFragments->variables;
18626 	decorations	+= argFragments->decorations;
18627 
18628 	specs["dr"]					= testTypes[testFunc.typeResult].typePrefix;
18629 	specs["d0"]					= testTypes[testFunc.typeArg0].typePrefix;
18630 	specs["d1"]					= testTypes[testFunc.typeArg1].typePrefix;
18631 	specs["d2"]					= testTypes[testFunc.typeArg2].typePrefix;
18632 	specs["tr"]					= string(testTypes[testFunc.typeResult].typePrefix) + componentType;
18633 	specs["t0"]					= string(testTypes[testFunc.typeArg0].typePrefix) + componentType;
18634 	specs["t1"]					= string(testTypes[testFunc.typeArg1].typePrefix) + componentType;
18635 	specs["t2"]					= string(testTypes[testFunc.typeArg2].typePrefix) + componentType;
18636 	specs["store_tr"]			= string(testTypes[testFunc.typeResult].storage_type);
18637 	specs["store_t0"]			= string(testTypes[testFunc.typeArg0].storage_type);
18638 	specs["store_t1"]			= string(testTypes[testFunc.typeArg1].storage_type);
18639 	specs["store_t2"]			= string(testTypes[testFunc.typeArg2].storage_type);
18640 	specs["struct_stride"]		= de::toString(typeStructStride);
18641 	specs["op"]					= extInst ? "OpExtInst" : testFunc.funcName;
18642 	specs["ext_inst"]			= extInst ? string("%ext_import ") + testFunc.funcName : "";
18643 	specs["struct_member"]		= de::toLower(testFunc.funcSuffix);
18644 
18645 	variables					= StringTemplate(variables).specialize(specs);
18646 	decorations					= StringTemplate(decorations).specialize(specs);
18647 	funcVariables				= StringTemplate(argFragments->funcVariables).specialize(specs);
18648 	funcCall					= StringTemplate(argFragments->bodies).specialize(specs);
18649 
18650 	specs["num_data_points"]	= de::toString(iterations);
18651 	specs["arg_vars"]			= variables;
18652 	specs["arg_decorations"]	= decorations;
18653 	specs["arg_infunc_vars"]	= funcVariables;
18654 	specs["arg_func_call"]		= funcCall;
18655 
18656 	fragments["extension"]		= "%ext_import = OpExtInstImport \"GLSL.std.450\"";
18657 	fragments["capability"]		= "OpCapability Matrix\nOpCapability Float16\n";
18658 	fragments["decoration"]		= decoration.specialize(specs);
18659 	fragments["pre_main"]		= preMain.specialize(specs) + functions;
18660 	fragments["testfun"]		= testFun.specialize(specs);
18661 
18662 	for (size_t inputArgNdx = 0; inputArgNdx < testFunc.funcArgsCount; ++inputArgNdx)
18663 	{
18664 		const size_t			numFloatsPerItem	= (inputArgNdx == 0) ? testTypes[testFunc.typeArg0].typeArrayStride / sizeof(deFloat16)
18665 													: (inputArgNdx == 1) ? testTypes[testFunc.typeArg1].typeArrayStride / sizeof(deFloat16)
18666 													: (inputArgNdx == 2) ? testTypes[testFunc.typeArg2].typeArrayStride / sizeof(deFloat16)
18667 													: -1;
18668 		const vector<deFloat16>	inputData			= testFunc.getInputDataFunc(seed, numFloatsPerItem * iterations, testTypeIdx, numFloatsPerItem, testFunc.funcArgsCount, inputArgNdx);
18669 
18670 		specResource.inputs.push_back(Resource(BufferSp(new Float16Buffer(inputData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18671 	}
18672 
18673 	specResource.outputs.push_back(Resource(BufferSp(new Float16Buffer(float16DummyOutput)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
18674 	specResource.verifyIO = testFunc.verifyFunc;
18675 
18676 	extensions.push_back("VK_KHR_shader_float16_int8");
18677 
18678 	features.extFloat16Int8.shaderFloat16 = true;
18679 
18680 	finalizeTestsCreation(specResource, fragments, testCtx, testGroup, testName, features, extensions, IVec3(1, 1, 1));
18681 }
18682 
18683 template<size_t C, class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18684 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18685 {
18686 	DE_STATIC_ASSERT(C >= 1 && C <= 4);
18687 
18688 	const std::string				testGroupName	(string("arithmetic_") + de::toString(C));
18689 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18690 	const Math16TestFunc			testFuncs[]		=
18691 	{
18692 		{	"OpFNegate",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16OpFNegate>					},
18693 		{	"Round",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Round>						},
18694 		{	"RoundEven",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16RoundEven>					},
18695 		{	"Trunc",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Trunc>						},
18696 		{	"FAbs",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FAbs>						},
18697 		{	"FSign",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FSign>						},
18698 		{	"Floor",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Floor>						},
18699 		{	"Ceil",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Ceil>						},
18700 		{	"Fract",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Fract>						},
18701 		{	"Radians",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Radians>						},
18702 		{	"Degrees",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Degrees>						},
18703 		{	"Sin",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sin>							},
18704 		{	"Cos",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cos>							},
18705 		{	"Tan",					"",			1,	C,		C,		0,		0, &getInputDataPI,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tan>							},
18706 		{	"Asin",					"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asin>						},
18707 		{	"Acos",					"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acos>						},
18708 		{	"Atan",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atan>						},
18709 		{	"Sinh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sinh>						},
18710 		{	"Cosh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Cosh>						},
18711 		{	"Tanh",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Tanh>						},
18712 		{	"Asinh",				"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Asinh>						},
18713 		{	"Acosh",				"",			1,	C,		C,		0,		0, &getInputDataAC,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Acosh>						},
18714 		{	"Atanh",				"",			1,	C,		C,		0,		0, &getInputDataA,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Atanh>						},
18715 		{	"Exp",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp>							},
18716 		{	"Log",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log>							},
18717 		{	"Exp2",					"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Exp2>						},
18718 		{	"Log2",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Log2>						},
18719 		{	"Sqrt",					"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Sqrt>						},
18720 		{	"InverseSqrt",			"",			1,	C,		C,		0,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16InverseSqrt>					},
18721 		{	"Modf",					"Frac",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>					},
18722 		{	"Modf",					"Int",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>						},
18723 		{	"ModfStruct",			"Frac",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfFrac>					},
18724 		{	"ModfStruct",			"Int",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16ModfInt>						},
18725 		{	"Frexp",				"S",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>						},
18726 		{	"Frexp",				"E",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>						},
18727 		{	"FrexpStruct",			"S",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpS>						},
18728 		{	"FrexpStruct",			"E",		1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16FrexpE>						},
18729 		{	"OpFAdd",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFAdd>						},
18730 		{	"OpFSub",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFSub>						},
18731 		{	"OpFMul",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFMul>						},
18732 		{	"OpFDiv",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16OpFDiv>						},
18733 		{	"Atan2",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Atan2>						},
18734 		{	"Pow",					"",			2,	C,		C,		C,		0, &getInputDataP,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Pow>							},
18735 		{	"FMin",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMin>						},
18736 		{	"FMax",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16FMax>						},
18737 		{	"Step",					"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Step>						},
18738 		{	"Ldexp",				"",			2,	C,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Ldexp>						},
18739 		{	"FClamp",				"",			3,	C,		C,		C,		C, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FClamp>						},
18740 		{	"FMix",					"",			3,	C,		C,		C,		C, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FMix>						},
18741 		{	"SmoothStep",			"",			3,	C,		C,		C,		C, &getInputDataSS,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16SmoothStep>					},
18742 		{	"Fma",					"",			3,	C,		C,		C,		C, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16Fma>							},
18743 		{	"Length",				"",			1,	1,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  1,  C,  0,  0, fp16Length>						},
18744 		{	"Distance",				"",			2,	1,		C,		C,		0, &getInputData,	compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Distance>					},
18745 		{	"Cross",				"",			2,	C,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Cross>						},
18746 		{	"Normalize",			"",			1,	C,		C,		0,		0, &getInputData,	compareFP16ArithmeticFunc<  C,  C,  0,  0, fp16Normalize>					},
18747 		{	"FaceForward",			"",			3,	C,		C,		C,		C, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  C, fp16FaceForward>					},
18748 		{	"Reflect",				"",			2,	C,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  C,  C,  C,  0, fp16Reflect>						},
18749 		{	"Refract",				"",			3,	C,		C,		C,		1, &getInputDataN,	compareFP16ArithmeticFunc<  C,  C,  C,  1, fp16Refract>						},
18750 		{	"OpDot",				"",			2,	1,		C,		C,		0, &getInputDataD,	compareFP16ArithmeticFunc<  1,  C,  C,  0, fp16Dot>							},
18751 		{	"OpVectorTimesScalar",	"",			2,	C,		C,		1,		0, &getInputDataV,	compareFP16ArithmeticFunc<  C,  C,  1,  0, fp16VectorTimesScalar>			},
18752 	};
18753 
18754 	for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18755 	{
18756 		const Math16TestFunc&	testFunc		= testFuncs[testFuncIdx];
18757 		const string			funcNameString	= testFunc.funcName;
18758 
18759 		if ((C != 3) && funcNameString == "Cross")
18760 			continue;
18761 
18762 		if ((C < 2) && funcNameString == "OpDot")
18763 			continue;
18764 
18765 		if ((C < 2) && funcNameString == "OpVectorTimesScalar")
18766 			continue;
18767 
18768 		createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), C, testFunc);
18769 	}
18770 
18771 	return testGroup.release();
18772 }
18773 
18774 template<class SpecResource>
createFloat16ArithmeticSet(tcu::TestContext & testCtx)18775 tcu::TestCaseGroup* createFloat16ArithmeticSet (tcu::TestContext& testCtx)
18776 {
18777 	const std::string				testGroupName	("arithmetic");
18778 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 16 arithmetic and related tests"));
18779 	const Math16TestFunc			testFuncs[]		=
18780 	{
18781 		{	"OpTranspose",			"2x2",		1,	MAT2X2,	MAT2X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Transpose<2,2> >				},
18782 		{	"OpTranspose",			"3x2",		1,	MAT2X3,	MAT3X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<3,2> >				},
18783 		{	"OpTranspose",			"4x2",		1,	MAT2X4,	MAT4X2,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<4,2> >				},
18784 		{	"OpTranspose",			"2x3",		1,	MAT3X2,	MAT2X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,3> >				},
18785 		{	"OpTranspose",			"3x3",		1,	MAT3X3,	MAT3X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,3> >				},
18786 		{	"OpTranspose",			"4x3",		1,	MAT3X4,	MAT4X3,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,3> >				},
18787 		{	"OpTranspose",			"2x4",		1,	MAT4X2,	MAT2X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc<  8,  8,  0,  0, fp16Transpose<2,4> >				},
18788 		{	"OpTranspose",			"3x4",		1,	MAT4X3,	MAT3X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<3,4> >				},
18789 		{	"OpTranspose",			"4x4",		1,	MAT4X4,	MAT4X4,	0,		0, &getInputDataM,	compareFP16ArithmeticFunc< 16, 16,  0,  0, fp16Transpose<4,4> >				},
18790 		{	"OpMatrixTimesScalar",	"2x2",		2,	MAT2X2,	MAT2X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4,  1,  0, fp16MatrixTimesScalar<2,2> >		},
18791 		{	"OpMatrixTimesScalar",	"2x3",		2,	MAT2X3,	MAT2X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,3> >		},
18792 		{	"OpMatrixTimesScalar",	"2x4",		2,	MAT2X4,	MAT2X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<2,4> >		},
18793 		{	"OpMatrixTimesScalar",	"3x2",		2,	MAT3X2,	MAT3X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<3,2> >		},
18794 		{	"OpMatrixTimesScalar",	"3x3",		2,	MAT3X3,	MAT3X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,3> >		},
18795 		{	"OpMatrixTimesScalar",	"3x4",		2,	MAT3X4,	MAT3X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<3,4> >		},
18796 		{	"OpMatrixTimesScalar",	"4x2",		2,	MAT4X2,	MAT4X2,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  1,  0, fp16MatrixTimesScalar<4,2> >		},
18797 		{	"OpMatrixTimesScalar",	"4x3",		2,	MAT4X3,	MAT4X3,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,3> >		},
18798 		{	"OpMatrixTimesScalar",	"4x4",		2,	MAT4X4,	MAT4X4,	1,		0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16,  1,  0, fp16MatrixTimesScalar<4,4> >		},
18799 		{	"OpVectorTimesMatrix",	"2x2",		2,	VEC2,	VEC2,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  2,  4,  0, fp16VectorTimesMatrix<2,2> >		},
18800 		{	"OpVectorTimesMatrix",	"2x3",		2,	VEC2,	VEC3,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  3,  8,  0, fp16VectorTimesMatrix<2,3> >		},
18801 		{	"OpVectorTimesMatrix",	"2x4",		2,	VEC2,	VEC4,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  4,  8,  0, fp16VectorTimesMatrix<2,4> >		},
18802 		{	"OpVectorTimesMatrix",	"3x2",		2,	VEC3,	VEC2,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  2,  8,  0, fp16VectorTimesMatrix<3,2> >		},
18803 		{	"OpVectorTimesMatrix",	"3x3",		2,	VEC3,	VEC3,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  3, 16,  0, fp16VectorTimesMatrix<3,3> >		},
18804 		{	"OpVectorTimesMatrix",	"3x4",		2,	VEC3,	VEC4,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  4, 16,  0, fp16VectorTimesMatrix<3,4> >		},
18805 		{	"OpVectorTimesMatrix",	"4x2",		2,	VEC4,	VEC2,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  2,  8,  0, fp16VectorTimesMatrix<4,2> >		},
18806 		{	"OpVectorTimesMatrix",	"4x3",		2,	VEC4,	VEC3,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  3, 16,  0, fp16VectorTimesMatrix<4,3> >		},
18807 		{	"OpVectorTimesMatrix",	"4x4",		2,	VEC4,	VEC4,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4, 16,  0, fp16VectorTimesMatrix<4,4> >		},
18808 		{	"OpMatrixTimesVector",	"2x2",		2,	VEC2,	MAT2X2,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  4,  2,  0, fp16MatrixTimesVector<2,2> >		},
18809 		{	"OpMatrixTimesVector",	"2x3",		2,	VEC3,	MAT2X3,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3,  8,  2,  0, fp16MatrixTimesVector<2,3> >		},
18810 		{	"OpMatrixTimesVector",	"2x4",		2,	VEC4,	MAT2X4,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  2,  0, fp16MatrixTimesVector<2,4> >		},
18811 		{	"OpMatrixTimesVector",	"3x2",		2,	VEC2,	MAT3X2,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  8,  3,  0, fp16MatrixTimesVector<3,2> >		},
18812 		{	"OpMatrixTimesVector",	"3x3",		2,	VEC3,	MAT3X3,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3, 16,  3,  0, fp16MatrixTimesVector<3,3> >		},
18813 		{	"OpMatrixTimesVector",	"3x4",		2,	VEC4,	MAT3X4,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4, 16,  3,  0, fp16MatrixTimesVector<3,4> >		},
18814 		{	"OpMatrixTimesVector",	"4x2",		2,	VEC2,	MAT4X2,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  2,  8,  4,  0, fp16MatrixTimesVector<4,2> >		},
18815 		{	"OpMatrixTimesVector",	"4x3",		2,	VEC3,	MAT4X3,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  3, 16,  4,  0, fp16MatrixTimesVector<4,3> >		},
18816 		{	"OpMatrixTimesVector",	"4x4",		2,	VEC4,	MAT4X4,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4, 16,  4,  0, fp16MatrixTimesVector<4,4> >		},
18817 		{	"OpMatrixTimesMatrix",	"2x2_2x2",	2,	MAT2X2,	MAT2X2,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  4,  4,  0, fp16MatrixTimesMatrix<2,2,2,2> >	},
18818 		{	"OpMatrixTimesMatrix",	"2x2_3x2",	2,	MAT3X2,	MAT2X2,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,3,2> >	},
18819 		{	"OpMatrixTimesMatrix",	"2x2_4x2",	2,	MAT4X2,	MAT2X2,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  8,  0, fp16MatrixTimesMatrix<2,2,4,2> >	},
18820 		{	"OpMatrixTimesMatrix",	"2x3_2x2",	2,	MAT2X3,	MAT2X3,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,3,2,2> >	},
18821 		{	"OpMatrixTimesMatrix",	"2x3_3x2",	2,	MAT3X3,	MAT2X3,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,3,2> >	},
18822 		{	"OpMatrixTimesMatrix",	"2x3_4x2",	2,	MAT4X3,	MAT2X3,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,3,4,2> >	},
18823 		{	"OpMatrixTimesMatrix",	"2x4_2x2",	2,	MAT2X4,	MAT2X4,	MAT2X2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8,  4,  0, fp16MatrixTimesMatrix<2,4,2,2> >	},
18824 		{	"OpMatrixTimesMatrix",	"2x4_3x2",	2,	MAT3X4,	MAT2X4,	MAT3X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,3,2> >	},
18825 		{	"OpMatrixTimesMatrix",	"2x4_4x2",	2,	MAT4X4,	MAT2X4,	MAT4X2,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  8,  8,  0, fp16MatrixTimesMatrix<2,4,4,2> >	},
18826 		{	"OpMatrixTimesMatrix",	"3x2_2x3",	2,	MAT2X2,	MAT3X2,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<3,2,2,3> >	},
18827 		{	"OpMatrixTimesMatrix",	"3x2_3x3",	2,	MAT3X2,	MAT3X2,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,3,3> >	},
18828 		{	"OpMatrixTimesMatrix",	"3x2_4x3",	2,	MAT4X2,	MAT3X2,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<3,2,4,3> >	},
18829 		{	"OpMatrixTimesMatrix",	"3x3_2x3",	2,	MAT2X3,	MAT3X3,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,3,2,3> >	},
18830 		{	"OpMatrixTimesMatrix",	"3x3_3x3",	2,	MAT3X3,	MAT3X3,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,3,3> >	},
18831 		{	"OpMatrixTimesMatrix",	"3x3_4x3",	2,	MAT4X3,	MAT3X3,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,3,4,3> >	},
18832 		{	"OpMatrixTimesMatrix",	"3x4_2x3",	2,	MAT2X4,	MAT3X4,	MAT2X3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<3,4,2,3> >	},
18833 		{	"OpMatrixTimesMatrix",	"3x4_3x3",	2,	MAT3X4,	MAT3X4,	MAT3X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,3,3> >	},
18834 		{	"OpMatrixTimesMatrix",	"3x4_4x3",	2,	MAT4X4,	MAT3X4,	MAT4X3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<3,4,4,3> >	},
18835 		{	"OpMatrixTimesMatrix",	"4x2_2x4",	2,	MAT2X2,	MAT4X2,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  8,  8,  0, fp16MatrixTimesMatrix<4,2,2,4> >	},
18836 		{	"OpMatrixTimesMatrix",	"4x2_3x4",	2,	MAT3X2,	MAT4X2,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,3,4> >	},
18837 		{	"OpMatrixTimesMatrix",	"4x2_4x4",	2,	MAT4X2,	MAT4X2,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  8, 16,  0, fp16MatrixTimesMatrix<4,2,4,4> >	},
18838 		{	"OpMatrixTimesMatrix",	"4x3_2x4",	2,	MAT2X3,	MAT4X3,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,3,2,4> >	},
18839 		{	"OpMatrixTimesMatrix",	"4x3_3x4",	2,	MAT3X3,	MAT4X3,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,3,4> >	},
18840 		{	"OpMatrixTimesMatrix",	"4x3_4x4",	2,	MAT4X3,	MAT4X3,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,3,4,4> >	},
18841 		{	"OpMatrixTimesMatrix",	"4x4_2x4",	2,	MAT2X4,	MAT4X4,	MAT2X4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8, 16,  8,  0, fp16MatrixTimesMatrix<4,4,2,4> >	},
18842 		{	"OpMatrixTimesMatrix",	"4x4_3x4",	2,	MAT3X4,	MAT4X4,	MAT3X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,3,4> >	},
18843 		{	"OpMatrixTimesMatrix",	"4x4_4x4",	2,	MAT4X4,	MAT4X4,	MAT4X4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16, 16, 16,  0, fp16MatrixTimesMatrix<4,4,4,4> >	},
18844 		{	"OpOuterProduct",		"2x2",		2,	MAT2X2,	VEC2,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  4,  2,  2,  0, fp16OuterProduct<2,2> >			},
18845 		{	"OpOuterProduct",		"2x3",		2,	MAT2X3,	VEC3,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  3,  2,  0, fp16OuterProduct<2,3> >			},
18846 		{	"OpOuterProduct",		"2x4",		2,	MAT2X4,	VEC4,	VEC2,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  4,  2,  0, fp16OuterProduct<2,4> >			},
18847 		{	"OpOuterProduct",		"3x2",		2,	MAT3X2,	VEC2,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  2,  3,  0, fp16OuterProduct<3,2> >			},
18848 		{	"OpOuterProduct",		"3x3",		2,	MAT3X3,	VEC3,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  3,  3,  0, fp16OuterProduct<3,3> >			},
18849 		{	"OpOuterProduct",		"3x4",		2,	MAT3X4,	VEC4,	VEC3,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  4,  3,  0, fp16OuterProduct<3,4> >			},
18850 		{	"OpOuterProduct",		"4x2",		2,	MAT4X2,	VEC2,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc<  8,  2,  4,  0, fp16OuterProduct<4,2> >			},
18851 		{	"OpOuterProduct",		"4x3",		2,	MAT4X3,	VEC3,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  3,  4,  0, fp16OuterProduct<4,3> >			},
18852 		{	"OpOuterProduct",		"4x4",		2,	MAT4X4,	VEC4,	VEC4,	0, &getInputDataD,	compareFP16ArithmeticFunc< 16,  4,  4,  0, fp16OuterProduct<4,4> >			},
18853 		{	"Determinant",			"2x2",		1,	SCALAR,	MAT2X2,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1,  4,  0,  0, fp16Determinant<2> >				},
18854 		{	"Determinant",			"3x3",		1,	SCALAR,	MAT3X3,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<3> >				},
18855 		{	"Determinant",			"4x4",		1,	SCALAR,	MAT4X4,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  1, 16,  0,  0, fp16Determinant<4> >				},
18856 		{	"MatrixInverse",		"2x2",		1,	MAT2X2,	MAT2X2,	NONE,	0, &getInputDataC,	compareFP16ArithmeticFunc<  4,  4,  0,  0, fp16Inverse<2> >					},
18857 	};
18858 
18859 	for (deUint32 testFuncIdx = 0; testFuncIdx < DE_LENGTH_OF_ARRAY(testFuncs); ++testFuncIdx)
18860 	{
18861 		const Math16TestFunc&	testFunc	= testFuncs[testFuncIdx];
18862 
18863 		createFloat16ArithmeticFuncTest<SpecResource>(testCtx, *testGroup.get(), 0, testFunc);
18864 	}
18865 
18866 	return testGroup.release();
18867 }
18868 
18869 struct ComparisonCase
18870 {
18871 	string name;
18872 	string desc;
18873 };
18874 
18875 template<size_t C>
createFloat32ComparisonComputeSet(tcu::TestContext & testCtx)18876 tcu::TestCaseGroup* createFloat32ComparisonComputeSet (tcu::TestContext& testCtx)
18877 {
18878 	const string					testGroupName	("comparison_" + de::toString(C));
18879 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18880 	const char*						dataDir			= "spirv_assembly/instruction/float32/comparison";
18881 
18882 	const ComparisonCase			amberTests[]	=
18883 	{
18884 		{ "modfstruct",		"modf and modfStruct"	},
18885 		{ "frexpstruct",	"frexp and frexpStruct"	}
18886 	};
18887 
18888 	for (ComparisonCase test : amberTests)
18889 	{
18890 		const string caseDesc ("Compare output of " + test.desc);
18891 		const string fileName (test.name + "_" + de::toString(C) + "_comp.amber");
18892 
18893 		testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18894 														   test.name.c_str(),
18895 														   caseDesc.c_str(),
18896 														   dataDir,
18897 														   fileName));
18898 	}
18899 
18900 	return testGroup.release();
18901 }
18902 
18903 struct ShaderStage
18904 {
18905 	string			name;
18906 	vector<string>	requirement;
18907 };
18908 
18909 template<size_t C>
createFloat32ComparisonGraphicsSet(tcu::TestContext & testCtx)18910 tcu::TestCaseGroup* createFloat32ComparisonGraphicsSet (tcu::TestContext& testCtx)
18911 {
18912 	const string					testGroupName	("comparison_" + de::toString(C));
18913 	de::MovePtr<tcu::TestCaseGroup>	testGroup		(new tcu::TestCaseGroup(testCtx, testGroupName.c_str(), "Float 32 comparison tests"));
18914 	const char*						dataDir			= "spirv_assembly/instruction/float32/comparison";
18915 
18916 	const ShaderStage				stages[]		=
18917 	{
18918 		{ "vert", vector<string>(0) },
18919 		{ "tesc", vector<string>(1, "Features.tessellationShader") },
18920 		{ "tese", vector<string>(1, "Features.tessellationShader") },
18921 		{ "geom", vector<string>(1, "Features.geometryShader") },
18922 		{ "frag", vector<string>(0) }
18923 	};
18924 
18925 	const ComparisonCase			amberTests[]	=
18926 	{
18927 		{ "modfstruct",		"modf and modfStruct"	},
18928 		{ "frexpstruct",	"frexp and frexpStruct"	}
18929 	};
18930 
18931 	for (ComparisonCase test : amberTests)
18932 	for (ShaderStage stage : stages)
18933 	{
18934 		const string caseName (test.name + "_" + stage.name);
18935 		const string caseDesc ("Compare output of " + test.desc);
18936 		const string fileName (test.name + "_" + de::toString(C) + "_" + stage.name + ".amber");
18937 
18938 		testGroup->addChild(cts_amber::createAmberTestCase(testCtx,
18939 														   caseName.c_str(),
18940 														   caseDesc.c_str(),
18941 														   dataDir,
18942 														   fileName,
18943 														   stage.requirement));
18944 	}
18945 
18946 	return testGroup.release();
18947 }
18948 
getNumberTypeName(const NumberType type)18949 const string getNumberTypeName (const NumberType type)
18950 {
18951 	if (type == NUMBERTYPE_INT32)
18952 	{
18953 		return "int";
18954 	}
18955 	else if (type == NUMBERTYPE_UINT32)
18956 	{
18957 		return "uint";
18958 	}
18959 	else if (type == NUMBERTYPE_FLOAT32)
18960 	{
18961 		return "float";
18962 	}
18963 	else
18964 	{
18965 		DE_ASSERT(false);
18966 		return "";
18967 	}
18968 }
18969 
getInt(de::Random & rnd)18970 deInt32 getInt(de::Random& rnd)
18971 {
18972 	return rnd.getInt(std::numeric_limits<int>::min(), std::numeric_limits<int>::max());
18973 }
18974 
repeatString(const string & str,int times)18975 const string repeatString (const string& str, int times)
18976 {
18977 	string filler;
18978 	for (int i = 0; i < times; ++i)
18979 	{
18980 		filler += str;
18981 	}
18982 	return filler;
18983 }
18984 
getRandomConstantString(const NumberType type,de::Random & rnd)18985 const string getRandomConstantString (const NumberType type, de::Random& rnd)
18986 {
18987 	if (type == NUMBERTYPE_INT32)
18988 	{
18989 		return numberToString<deInt32>(getInt(rnd));
18990 	}
18991 	else if (type == NUMBERTYPE_UINT32)
18992 	{
18993 		return numberToString<deUint32>(rnd.getUint32());
18994 	}
18995 	else if (type == NUMBERTYPE_FLOAT32)
18996 	{
18997 		return numberToString<float>(rnd.getFloat());
18998 	}
18999 	else
19000 	{
19001 		DE_ASSERT(false);
19002 		return "";
19003 	}
19004 }
19005 
createVectorCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19006 void createVectorCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19007 {
19008 	map<string, string> params;
19009 
19010 	// Vec2 to Vec4
19011 	for (int width = 2; width <= 4; ++width)
19012 	{
19013 		const string randomConst = numberToString(getInt(rnd));
19014 		const string widthStr = numberToString(width);
19015 		const string composite_type = "${customType}vec" + widthStr;
19016 		const int index = rnd.getInt(0, width-1);
19017 
19018 		params["type"]			= "vec";
19019 		params["name"]			= params["type"] + "_" + widthStr;
19020 		params["compositeDecl"]		= composite_type + " = OpTypeVector ${customType} " + widthStr +"\n";
19021 		params["compositeType"]		= composite_type;
19022 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19023 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct " + composite_type + repeatString(" %filler", width) + "\n";
19024 		params["indexes"]		= numberToString(index);
19025 		testCases.push_back(params);
19026 	}
19027 }
19028 
createArrayCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19029 void createArrayCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19030 {
19031 	const int limit = 10;
19032 	map<string, string> params;
19033 
19034 	for (int width = 2; width <= limit; ++width)
19035 	{
19036 		string randomConst = numberToString(getInt(rnd));
19037 		string widthStr = numberToString(width);
19038 		int index = rnd.getInt(0, width-1);
19039 
19040 		params["type"]			= "array";
19041 		params["name"]			= params["type"] + "_" + widthStr;
19042 		params["compositeDecl"]		= string("%arraywidth = OpConstant %u32 " + widthStr + "\n")
19043 											+	 "%composite = OpTypeArray ${customType} %arraywidth\n";
19044 		params["compositeType"]		= "%composite";
19045 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19046 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19047 		params["indexes"]		= numberToString(index);
19048 		testCases.push_back(params);
19049 	}
19050 }
19051 
createStructCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19052 void createStructCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19053 {
19054 	const int limit = 10;
19055 	map<string, string> params;
19056 
19057 	for (int width = 2; width <= limit; ++width)
19058 	{
19059 		string randomConst = numberToString(getInt(rnd));
19060 		int index = rnd.getInt(0, width-1);
19061 
19062 		params["type"]			= "struct";
19063 		params["name"]			= params["type"] + "_" + numberToString(width);
19064 		params["compositeDecl"]		= "%composite = OpTypeStruct" + repeatString(" ${customType}", width) + "\n";
19065 		params["compositeType"]		= "%composite";
19066 		params["filler"]		= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n";
19067 		params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %filler", width) + "\n";
19068 		params["indexes"]		= numberToString(index);
19069 		testCases.push_back(params);
19070 	}
19071 }
19072 
createMatrixCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19073 void createMatrixCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19074 {
19075 	map<string, string> params;
19076 
19077 	// Vec2 to Vec4
19078 	for (int width = 2; width <= 4; ++width)
19079 	{
19080 		string widthStr = numberToString(width);
19081 
19082 		for (int column = 2 ; column <= 4; ++column)
19083 		{
19084 			int index_0 = rnd.getInt(0, column-1);
19085 			int index_1 = rnd.getInt(0, width-1);
19086 			string columnStr = numberToString(column);
19087 
19088 			params["type"]		= "matrix";
19089 			params["name"]		= params["type"] + "_" + widthStr + "x" + columnStr;
19090 			params["compositeDecl"]	= string("%vectype   = OpTypeVector ${customType} " + widthStr + "\n")
19091 												+	 "%composite = OpTypeMatrix %vectype " + columnStr + "\n";
19092 			params["compositeType"]	= "%composite";
19093 
19094 			params["filler"]	= string("%filler    = OpConstant ${customType} ") + getRandomConstantString(type, rnd) + "\n"
19095 												+	 "%fillerVec = OpConstantComposite %vectype" + repeatString(" %filler", width) + "\n";
19096 
19097 			params["compositeConstruct"]	= "%instance  = OpCompositeConstruct %composite" + repeatString(" %fillerVec", column) + "\n";
19098 			params["indexes"]	= numberToString(index_0) + " " + numberToString(index_1);
19099 			testCases.push_back(params);
19100 		}
19101 	}
19102 }
19103 
createCompositeCases(vector<map<string,string>> & testCases,de::Random & rnd,const NumberType type)19104 void createCompositeCases (vector<map<string, string> >& testCases, de::Random& rnd, const NumberType type)
19105 {
19106 	createVectorCompositeCases(testCases, rnd, type);
19107 	createArrayCompositeCases(testCases, rnd, type);
19108 	createStructCompositeCases(testCases, rnd, type);
19109 	// Matrix only supports float types
19110 	if (type == NUMBERTYPE_FLOAT32)
19111 	{
19112 		createMatrixCompositeCases(testCases, rnd, type);
19113 	}
19114 }
19115 
getAssemblyTypeDeclaration(const NumberType type)19116 const string getAssemblyTypeDeclaration (const NumberType type)
19117 {
19118 	switch (type)
19119 	{
19120 		case NUMBERTYPE_INT32:		return "OpTypeInt 32 1";
19121 		case NUMBERTYPE_UINT32:		return "OpTypeInt 32 0";
19122 		case NUMBERTYPE_FLOAT32:	return "OpTypeFloat 32";
19123 		default:			DE_ASSERT(false); return "";
19124 	}
19125 }
19126 
getAssemblyTypeName(const NumberType type)19127 const string getAssemblyTypeName (const NumberType type)
19128 {
19129 	switch (type)
19130 	{
19131 		case NUMBERTYPE_INT32:		return "%i32";
19132 		case NUMBERTYPE_UINT32:		return "%u32";
19133 		case NUMBERTYPE_FLOAT32:	return "%f32";
19134 		default:			DE_ASSERT(false); return "";
19135 	}
19136 }
19137 
specializeCompositeInsertShaderTemplate(const NumberType type,const map<string,string> & params)19138 const string specializeCompositeInsertShaderTemplate (const NumberType type, const map<string, string>& params)
19139 {
19140 	map<string, string>	parameters(params);
19141 
19142 	const string customType = getAssemblyTypeName(type);
19143 	map<string, string> substCustomType;
19144 	substCustomType["customType"] = customType;
19145 	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19146 	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19147 	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19148 	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19149 	parameters["customType"] = customType;
19150 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19151 
19152 	if (parameters.at("compositeType") != "%u32vec3")
19153 	{
19154 		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19155 	}
19156 
19157 	return StringTemplate(
19158 		"OpCapability Shader\n"
19159 		"OpCapability Matrix\n"
19160 		"OpMemoryModel Logical GLSL450\n"
19161 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19162 		"OpExecutionMode %main LocalSize 1 1 1\n"
19163 
19164 		"OpSource GLSL 430\n"
19165 		"OpName %main           \"main\"\n"
19166 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19167 
19168 		// Decorators
19169 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19170 		"OpDecorate %buf BufferBlock\n"
19171 		"OpDecorate %indata DescriptorSet 0\n"
19172 		"OpDecorate %indata Binding 0\n"
19173 		"OpDecorate %outdata DescriptorSet 0\n"
19174 		"OpDecorate %outdata Binding 1\n"
19175 		"OpDecorate %customarr ArrayStride 4\n"
19176 		"${compositeDecorator}"
19177 		"OpMemberDecorate %buf 0 Offset 0\n"
19178 
19179 		// General types
19180 		"%void      = OpTypeVoid\n"
19181 		"%voidf     = OpTypeFunction %void\n"
19182 		"%u32       = OpTypeInt 32 0\n"
19183 		"%i32       = OpTypeInt 32 1\n"
19184 		"%f32       = OpTypeFloat 32\n"
19185 
19186 		// Composite declaration
19187 		"${compositeDecl}"
19188 
19189 		// Constants
19190 		"${filler}"
19191 
19192 		"${u32vec3Decl:opt}"
19193 		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19194 
19195 		// Inherited from custom
19196 		"%customptr = OpTypePointer Uniform ${customType}\n"
19197 		"%customarr = OpTypeRuntimeArray ${customType}\n"
19198 		"%buf       = OpTypeStruct %customarr\n"
19199 		"%bufptr    = OpTypePointer Uniform %buf\n"
19200 
19201 		"%indata    = OpVariable %bufptr Uniform\n"
19202 		"%outdata   = OpVariable %bufptr Uniform\n"
19203 
19204 		"%id        = OpVariable %uvec3ptr Input\n"
19205 		"%zero      = OpConstant %i32 0\n"
19206 
19207 		"%main      = OpFunction %void None %voidf\n"
19208 		"%label     = OpLabel\n"
19209 		"%idval     = OpLoad %u32vec3 %id\n"
19210 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19211 
19212 		"%inloc     = OpAccessChain %customptr %indata %zero %x\n"
19213 		"%outloc    = OpAccessChain %customptr %outdata %zero %x\n"
19214 		// Read the input value
19215 		"%inval     = OpLoad ${customType} %inloc\n"
19216 		// Create the composite and fill it
19217 		"${compositeConstruct}"
19218 		// Insert the input value to a place
19219 		"%instance2 = OpCompositeInsert ${compositeType} %inval %instance ${indexes}\n"
19220 		// Read back the value from the position
19221 		"%out_val   = OpCompositeExtract ${customType} %instance2 ${indexes}\n"
19222 		// Store it in the output position
19223 		"             OpStore %outloc %out_val\n"
19224 		"             OpReturn\n"
19225 		"             OpFunctionEnd\n"
19226 	).specialize(parameters);
19227 }
19228 
19229 template<typename T>
createCompositeBuffer(T number)19230 BufferSp createCompositeBuffer(T number)
19231 {
19232 	return BufferSp(new Buffer<T>(vector<T>(1, number)));
19233 }
19234 
createOpCompositeInsertGroup(tcu::TestContext & testCtx)19235 tcu::TestCaseGroup* createOpCompositeInsertGroup (tcu::TestContext& testCtx)
19236 {
19237 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "opcompositeinsert", "Test the OpCompositeInsert instruction"));
19238 	de::Random						rnd		(deStringHash(group->getName()));
19239 
19240 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19241 	{
19242 		NumberType						numberType		= NumberType(type);
19243 		const string					typeName		= getNumberTypeName(numberType);
19244 		const string					description		= "Test the OpCompositeInsert instruction with " + typeName + "s";
19245 		de::MovePtr<tcu::TestCaseGroup>	subGroup		(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19246 		vector<map<string, string> >	testCases;
19247 
19248 		createCompositeCases(testCases, rnd, numberType);
19249 
19250 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19251 		{
19252 			ComputeShaderSpec	spec;
19253 
19254 			spec.assembly = specializeCompositeInsertShaderTemplate(numberType, *test);
19255 
19256 			switch (numberType)
19257 			{
19258 				case NUMBERTYPE_INT32:
19259 				{
19260 					deInt32 number = getInt(rnd);
19261 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19262 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19263 					break;
19264 				}
19265 				case NUMBERTYPE_UINT32:
19266 				{
19267 					deUint32 number = rnd.getUint32();
19268 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19269 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19270 					break;
19271 				}
19272 				case NUMBERTYPE_FLOAT32:
19273 				{
19274 					float number = rnd.getFloat();
19275 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19276 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19277 					break;
19278 				}
19279 				default:
19280 					DE_ASSERT(false);
19281 			}
19282 
19283 			spec.numWorkGroups = IVec3(1, 1, 1);
19284 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpCompositeInsert test", spec));
19285 		}
19286 		group->addChild(subGroup.release());
19287 	}
19288 	return group.release();
19289 }
19290 
19291 struct AssemblyStructInfo
19292 {
AssemblyStructInfovkt::SpirVAssembly::AssemblyStructInfo19293 	AssemblyStructInfo (const deUint32 comp, const deUint32 idx)
19294 	: components	(comp)
19295 	, index			(idx)
19296 	{}
19297 
19298 	deUint32 components;
19299 	deUint32 index;
19300 };
19301 
specializeInBoundsShaderTemplate(const NumberType type,const AssemblyStructInfo & structInfo,const map<string,string> & params)19302 const string specializeInBoundsShaderTemplate (const NumberType type, const AssemblyStructInfo& structInfo, const map<string, string>& params)
19303 {
19304 	// Create the full index string
19305 	string				fullIndex	= numberToString(structInfo.index) + " " + params.at("indexes");
19306 	// Convert it to list of indexes
19307 	vector<string>		indexes		= de::splitString(fullIndex, ' ');
19308 
19309 	map<string, string>	parameters	(params);
19310 	parameters["structType"]	= repeatString(" ${compositeType}", structInfo.components);
19311 	parameters["structConstruct"]	= repeatString(" %instance", structInfo.components);
19312 	parameters["insertIndexes"]	= fullIndex;
19313 
19314 	// In matrix cases the last two index is the CompositeExtract indexes
19315 	const deUint32 extractIndexes = (parameters["type"] == "matrix") ? 2 : 1;
19316 
19317 	// Construct the extractIndex
19318 	for (vector<string>::const_iterator index = indexes.end() - extractIndexes; index != indexes.end(); ++index)
19319 	{
19320 		parameters["extractIndexes"] += " " + *index;
19321 	}
19322 
19323 	// Remove the last 1 or 2 element depends on matrix case or not
19324 	indexes.erase(indexes.end() - extractIndexes, indexes.end());
19325 
19326 	deUint32 id = 0;
19327 	// Generate AccessChain index expressions (except for the last one, because we use ptr to the composite)
19328 	for (vector<string>::const_iterator index = indexes.begin(); index != indexes.end(); ++index)
19329 	{
19330 		string indexId = "%index_" + numberToString(id++);
19331 		parameters["accessChainConstDeclaration"] += indexId + "   = OpConstant %u32 " + *index + "\n";
19332 		parameters["accessChainIndexes"] += " " + indexId;
19333 	}
19334 
19335 	parameters["compositeDecorator"] = (parameters["type"] == "array") ? "OpDecorate %composite ArrayStride 4\n" : "";
19336 
19337 	const string customType = getAssemblyTypeName(type);
19338 	map<string, string> substCustomType;
19339 	substCustomType["customType"] = customType;
19340 	parameters["compositeDecl"] = StringTemplate(parameters.at("compositeDecl")).specialize(substCustomType);
19341 	parameters["compositeType"] = StringTemplate(parameters.at("compositeType")).specialize(substCustomType);
19342 	parameters["compositeConstruct"] = StringTemplate(parameters.at("compositeConstruct")).specialize(substCustomType);
19343 	parameters["filler"] = StringTemplate(parameters.at("filler")).specialize(substCustomType);
19344 	parameters["customType"] = customType;
19345 
19346 	const string compositeType = parameters.at("compositeType");
19347 	map<string, string> substCompositeType;
19348 	substCompositeType["compositeType"] = compositeType;
19349 	parameters["structType"] = StringTemplate(parameters.at("structType")).specialize(substCompositeType);
19350 	if (compositeType != "%u32vec3")
19351 	{
19352 		parameters["u32vec3Decl"] = "%u32vec3   = OpTypeVector %u32 3\n";
19353 	}
19354 
19355 	return StringTemplate(
19356 		"OpCapability Shader\n"
19357 		"OpCapability Matrix\n"
19358 		"OpMemoryModel Logical GLSL450\n"
19359 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19360 		"OpExecutionMode %main LocalSize 1 1 1\n"
19361 
19362 		"OpSource GLSL 430\n"
19363 		"OpName %main           \"main\"\n"
19364 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19365 		// Decorators
19366 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19367 		"OpDecorate %buf BufferBlock\n"
19368 		"OpDecorate %indata DescriptorSet 0\n"
19369 		"OpDecorate %indata Binding 0\n"
19370 		"OpDecorate %outdata DescriptorSet 0\n"
19371 		"OpDecorate %outdata Binding 1\n"
19372 		"OpDecorate %customarr ArrayStride 4\n"
19373 		"${compositeDecorator}"
19374 		"OpMemberDecorate %buf 0 Offset 0\n"
19375 		// General types
19376 		"%void      = OpTypeVoid\n"
19377 		"%voidf     = OpTypeFunction %void\n"
19378 		"%i32       = OpTypeInt 32 1\n"
19379 		"%u32       = OpTypeInt 32 0\n"
19380 		"%f32       = OpTypeFloat 32\n"
19381 		// Custom types
19382 		"${compositeDecl}"
19383 		// %u32vec3 if not already declared in ${compositeDecl}
19384 		"${u32vec3Decl:opt}"
19385 		"%uvec3ptr  = OpTypePointer Input %u32vec3\n"
19386 		// Inherited from composite
19387 		"%composite_p = OpTypePointer Function ${compositeType}\n"
19388 		"%struct_t  = OpTypeStruct${structType}\n"
19389 		"%struct_p  = OpTypePointer Function %struct_t\n"
19390 		// Constants
19391 		"${filler}"
19392 		"${accessChainConstDeclaration}"
19393 		// Inherited from custom
19394 		"%customptr = OpTypePointer Uniform ${customType}\n"
19395 		"%customarr = OpTypeRuntimeArray ${customType}\n"
19396 		"%buf       = OpTypeStruct %customarr\n"
19397 		"%bufptr    = OpTypePointer Uniform %buf\n"
19398 		"%indata    = OpVariable %bufptr Uniform\n"
19399 		"%outdata   = OpVariable %bufptr Uniform\n"
19400 
19401 		"%id        = OpVariable %uvec3ptr Input\n"
19402 		"%zero      = OpConstant %u32 0\n"
19403 		"%main      = OpFunction %void None %voidf\n"
19404 		"%label     = OpLabel\n"
19405 		"%struct_v  = OpVariable %struct_p Function\n"
19406 		"%idval     = OpLoad %u32vec3 %id\n"
19407 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19408 		// Create the input/output type
19409 		"%inloc     = OpInBoundsAccessChain %customptr %indata %zero %x\n"
19410 		"%outloc    = OpInBoundsAccessChain %customptr %outdata %zero %x\n"
19411 		// Read the input value
19412 		"%inval     = OpLoad ${customType} %inloc\n"
19413 		// Create the composite and fill it
19414 		"${compositeConstruct}"
19415 		// Create the struct and fill it with the composite
19416 		"%struct    = OpCompositeConstruct %struct_t${structConstruct}\n"
19417 		// Insert the value
19418 		"%comp_obj  = OpCompositeInsert %struct_t %inval %struct ${insertIndexes}\n"
19419 		// Store the object
19420 		"             OpStore %struct_v %comp_obj\n"
19421 		// Get deepest possible composite pointer
19422 		"%inner_ptr = OpInBoundsAccessChain %composite_p %struct_v${accessChainIndexes}\n"
19423 		"%read_obj  = OpLoad ${compositeType} %inner_ptr\n"
19424 		// Read back the stored value
19425 		"%read_val  = OpCompositeExtract ${customType} %read_obj${extractIndexes}\n"
19426 		"             OpStore %outloc %read_val\n"
19427 		"             OpReturn\n"
19428 		"             OpFunctionEnd\n"
19429 	).specialize(parameters);
19430 }
19431 
createOpInBoundsAccessChainGroup(tcu::TestContext & testCtx)19432 tcu::TestCaseGroup* createOpInBoundsAccessChainGroup (tcu::TestContext& testCtx)
19433 {
19434 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opinboundsaccesschain", "Test the OpInBoundsAccessChain instruction"));
19435 	de::Random						rnd				(deStringHash(group->getName()));
19436 
19437 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19438 	{
19439 		NumberType						numberType	= NumberType(type);
19440 		const string					typeName	= getNumberTypeName(numberType);
19441 		const string					description	= "Test the OpInBoundsAccessChain instruction with " + typeName + "s";
19442 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19443 
19444 		vector<map<string, string> >	testCases;
19445 		createCompositeCases(testCases, rnd, numberType);
19446 
19447 		for (vector<map<string, string> >::const_iterator test = testCases.begin(); test != testCases.end(); ++test)
19448 		{
19449 			ComputeShaderSpec	spec;
19450 
19451 			// Number of components inside of a struct
19452 			deUint32 structComponents = rnd.getInt(2, 8);
19453 			// Component index value
19454 			deUint32 structIndex = rnd.getInt(0, structComponents - 1);
19455 			AssemblyStructInfo structInfo(structComponents, structIndex);
19456 
19457 			spec.assembly = specializeInBoundsShaderTemplate(numberType, structInfo, *test);
19458 
19459 			switch (numberType)
19460 			{
19461 				case NUMBERTYPE_INT32:
19462 				{
19463 					deInt32 number = getInt(rnd);
19464 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19465 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19466 					break;
19467 				}
19468 				case NUMBERTYPE_UINT32:
19469 				{
19470 					deUint32 number = rnd.getUint32();
19471 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19472 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19473 					break;
19474 				}
19475 				case NUMBERTYPE_FLOAT32:
19476 				{
19477 					float number = rnd.getFloat();
19478 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19479 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19480 					break;
19481 				}
19482 				default:
19483 					DE_ASSERT(false);
19484 			}
19485 			spec.numWorkGroups = IVec3(1, 1, 1);
19486 			subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, test->at("name").c_str(), "OpInBoundsAccessChain test", spec));
19487 		}
19488 		group->addChild(subGroup.release());
19489 	}
19490 	return group.release();
19491 }
19492 
19493 // If the params missing, uninitialized case
19494 const string specializeDefaultOutputShaderTemplate (const NumberType type, const map<string, string>& params = map<string, string>())
19495 {
19496 	map<string, string> parameters(params);
19497 
19498 	parameters["customType"]	= getAssemblyTypeName(type);
19499 
19500 	// Declare the const value, and use it in the initializer
19501 	if (params.find("constValue") != params.end())
19502 	{
19503 		parameters["variableInitializer"]	= " %const";
19504 	}
19505 	// Uninitialized case
19506 	else
19507 	{
19508 		parameters["commentDecl"]	= ";";
19509 	}
19510 
19511 	return StringTemplate(
19512 		"OpCapability Shader\n"
19513 		"OpMemoryModel Logical GLSL450\n"
19514 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19515 		"OpExecutionMode %main LocalSize 1 1 1\n"
19516 		"OpSource GLSL 430\n"
19517 		"OpName %main           \"main\"\n"
19518 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19519 		// Decorators
19520 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19521 		"OpDecorate %indata DescriptorSet 0\n"
19522 		"OpDecorate %indata Binding 0\n"
19523 		"OpDecorate %outdata DescriptorSet 0\n"
19524 		"OpDecorate %outdata Binding 1\n"
19525 		"OpDecorate %in_arr ArrayStride 4\n"
19526 		"OpDecorate %in_buf BufferBlock\n"
19527 		"OpMemberDecorate %in_buf 0 Offset 0\n"
19528 		// Base types
19529 		"%void       = OpTypeVoid\n"
19530 		"%voidf      = OpTypeFunction %void\n"
19531 		"%u32        = OpTypeInt 32 0\n"
19532 		"%i32        = OpTypeInt 32 1\n"
19533 		"%f32        = OpTypeFloat 32\n"
19534 		"%uvec3      = OpTypeVector %u32 3\n"
19535 		"%uvec3ptr   = OpTypePointer Input %uvec3\n"
19536 		"${commentDecl:opt}%const      = OpConstant ${customType} ${constValue:opt}\n"
19537 		// Derived types
19538 		"%in_ptr     = OpTypePointer Uniform ${customType}\n"
19539 		"%in_arr     = OpTypeRuntimeArray ${customType}\n"
19540 		"%in_buf     = OpTypeStruct %in_arr\n"
19541 		"%in_bufptr  = OpTypePointer Uniform %in_buf\n"
19542 		"%indata     = OpVariable %in_bufptr Uniform\n"
19543 		"%outdata    = OpVariable %in_bufptr Uniform\n"
19544 		"%id         = OpVariable %uvec3ptr Input\n"
19545 		"%var_ptr    = OpTypePointer Function ${customType}\n"
19546 		// Constants
19547 		"%zero       = OpConstant %i32 0\n"
19548 		// Main function
19549 		"%main       = OpFunction %void None %voidf\n"
19550 		"%label      = OpLabel\n"
19551 		"%out_var    = OpVariable %var_ptr Function${variableInitializer:opt}\n"
19552 		"%idval      = OpLoad %uvec3 %id\n"
19553 		"%x          = OpCompositeExtract %u32 %idval 0\n"
19554 		"%inloc      = OpAccessChain %in_ptr %indata %zero %x\n"
19555 		"%outloc     = OpAccessChain %in_ptr %outdata %zero %x\n"
19556 
19557 		"%outval     = OpLoad ${customType} %out_var\n"
19558 		"              OpStore %outloc %outval\n"
19559 		"              OpReturn\n"
19560 		"              OpFunctionEnd\n"
19561 	).specialize(parameters);
19562 }
19563 
compareFloats(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog & log)19564 bool compareFloats (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog& log)
19565 {
19566 	DE_ASSERT(outputAllocs.size() != 0);
19567 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19568 
19569 	// Use custom epsilon because of the float->string conversion
19570 	const float	epsilon	= 0.00001f;
19571 
19572 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19573 	{
19574 		vector<deUint8>	expectedBytes;
19575 		float			expected;
19576 		float			actual;
19577 
19578 		expectedOutputs[outputNdx].getBytes(expectedBytes);
19579 		memcpy(&expected, &expectedBytes.front(), expectedBytes.size());
19580 		memcpy(&actual, outputAllocs[outputNdx]->getHostPtr(), expectedBytes.size());
19581 
19582 		// Test with epsilon
19583 		if (fabs(expected - actual) > epsilon)
19584 		{
19585 			log << TestLog::Message << "Error: The actual and expected values not matching."
19586 				<< " Expected: " << expected << " Actual: " << actual << " Epsilon: " << epsilon << TestLog::EndMessage;
19587 			return false;
19588 		}
19589 	}
19590 	return true;
19591 }
19592 
19593 // Checks if the driver crash with uninitialized cases
passthruVerify(const std::vector<Resource> &,const vector<AllocationSp> & outputAllocs,const std::vector<Resource> & expectedOutputs,TestLog &)19594 bool passthruVerify (const std::vector<Resource>&, const vector<AllocationSp>& outputAllocs, const std::vector<Resource>& expectedOutputs, TestLog&)
19595 {
19596 	DE_ASSERT(outputAllocs.size() != 0);
19597 	DE_ASSERT(outputAllocs.size() == expectedOutputs.size());
19598 
19599 	// Copy and discard the result.
19600 	for (size_t outputNdx = 0; outputNdx < outputAllocs.size(); ++outputNdx)
19601 	{
19602 		vector<deUint8>	expectedBytes;
19603 		expectedOutputs[outputNdx].getBytes(expectedBytes);
19604 
19605 		const size_t	width			= expectedBytes.size();
19606 		vector<char>	data			(width);
19607 
19608 		memcpy(&data[0], outputAllocs[outputNdx]->getHostPtr(), width);
19609 	}
19610 	return true;
19611 }
19612 
createShaderDefaultOutputGroup(tcu::TestContext & testCtx)19613 tcu::TestCaseGroup* createShaderDefaultOutputGroup (tcu::TestContext& testCtx)
19614 {
19615 	de::MovePtr<tcu::TestCaseGroup>	group	(new tcu::TestCaseGroup(testCtx, "shader_default_output", "Test shader default output."));
19616 	de::Random						rnd		(deStringHash(group->getName()));
19617 
19618 	for (int type = NUMBERTYPE_INT32; type != NUMBERTYPE_END32; ++type)
19619 	{
19620 		NumberType						numberType	= NumberType(type);
19621 		const string					typeName	= getNumberTypeName(numberType);
19622 		const string					description	= "Test the OpVariable initializer with " + typeName + ".";
19623 		de::MovePtr<tcu::TestCaseGroup>	subGroup	(new tcu::TestCaseGroup(testCtx, typeName.c_str(), description.c_str()));
19624 
19625 		// 2 similar subcases (initialized and uninitialized)
19626 		for (int subCase = 0; subCase < 2; ++subCase)
19627 		{
19628 			ComputeShaderSpec spec;
19629 			spec.numWorkGroups = IVec3(1, 1, 1);
19630 
19631 			map<string, string>				params;
19632 
19633 			switch (numberType)
19634 			{
19635 				case NUMBERTYPE_INT32:
19636 				{
19637 					deInt32 number = getInt(rnd);
19638 					spec.inputs.push_back(createCompositeBuffer<deInt32>(number));
19639 					spec.outputs.push_back(createCompositeBuffer<deInt32>(number));
19640 					params["constValue"] = numberToString(number);
19641 					break;
19642 				}
19643 				case NUMBERTYPE_UINT32:
19644 				{
19645 					deUint32 number = rnd.getUint32();
19646 					spec.inputs.push_back(createCompositeBuffer<deUint32>(number));
19647 					spec.outputs.push_back(createCompositeBuffer<deUint32>(number));
19648 					params["constValue"] = numberToString(number);
19649 					break;
19650 				}
19651 				case NUMBERTYPE_FLOAT32:
19652 				{
19653 					float number = rnd.getFloat();
19654 					spec.inputs.push_back(createCompositeBuffer<float>(number));
19655 					spec.outputs.push_back(createCompositeBuffer<float>(number));
19656 					spec.verifyIO = &compareFloats;
19657 					params["constValue"] = numberToString(number);
19658 					break;
19659 				}
19660 				default:
19661 					DE_ASSERT(false);
19662 			}
19663 
19664 			// Initialized subcase
19665 			if (!subCase)
19666 			{
19667 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType, params);
19668 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "initialized", "OpVariable initializer tests.", spec));
19669 			}
19670 			// Uninitialized subcase
19671 			else
19672 			{
19673 				spec.assembly = specializeDefaultOutputShaderTemplate(numberType);
19674 				spec.verifyIO = &passthruVerify;
19675 				subGroup->addChild(new SpvAsmComputeShaderCase(testCtx, "uninitialized", "OpVariable initializer tests.", spec));
19676 			}
19677 		}
19678 		group->addChild(subGroup.release());
19679 	}
19680 	return group.release();
19681 }
19682 
createOpNopTests(tcu::TestContext & testCtx)19683 tcu::TestCaseGroup* createOpNopTests (tcu::TestContext& testCtx)
19684 {
19685 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
19686 	RGBA							defaultColors[4];
19687 	map<string, string>				opNopFragments;
19688 
19689 	getDefaultColors(defaultColors);
19690 
19691 	opNopFragments["testfun"]		=
19692 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19693 		"%param1 = OpFunctionParameter %v4f32\n"
19694 		"%label_testfun = OpLabel\n"
19695 		"OpNop\n"
19696 		"OpNop\n"
19697 		"OpNop\n"
19698 		"OpNop\n"
19699 		"OpNop\n"
19700 		"OpNop\n"
19701 		"OpNop\n"
19702 		"OpNop\n"
19703 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19704 		"%b = OpFAdd %f32 %a %a\n"
19705 		"OpNop\n"
19706 		"%c = OpFSub %f32 %b %a\n"
19707 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19708 		"OpNop\n"
19709 		"OpNop\n"
19710 		"OpReturnValue %ret\n"
19711 		"OpFunctionEnd\n";
19712 
19713 	createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, testGroup.get());
19714 
19715 	return testGroup.release();
19716 }
19717 
createOpNameTests(tcu::TestContext & testCtx)19718 tcu::TestCaseGroup* createOpNameTests (tcu::TestContext& testCtx)
19719 {
19720 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "opname","Test OpName"));
19721 	RGBA							defaultColors[4];
19722 	map<string, string>				opNameFragments;
19723 
19724 	getDefaultColors(defaultColors);
19725 
19726 	opNameFragments["testfun"] =
19727 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
19728 		"%param1     = OpFunctionParameter %v4f32\n"
19729 		"%label_func = OpLabel\n"
19730 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
19731 		"%b          = OpFAdd %f32 %a %a\n"
19732 		"%c          = OpFSub %f32 %b %a\n"
19733 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
19734 		"OpReturnValue %ret\n"
19735 		"OpFunctionEnd\n";
19736 
19737 	opNameFragments["debug"] =
19738 		"OpName %BP_main \"not_main\"";
19739 
19740 	createTestsForAllStages("opname", defaultColors, defaultColors, opNameFragments, testGroup.get());
19741 
19742 	return testGroup.release();
19743 }
19744 
createFloat16Tests(tcu::TestContext & testCtx)19745 tcu::TestCaseGroup* createFloat16Tests (tcu::TestContext& testCtx)
19746 {
19747 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19748 
19749 	testGroup->addChild(createOpConstantFloat16Tests(testCtx));
19750 	testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITH_NAN));
19751 	testGroup->addChild(createFloat16LogicalSet<GraphicsResources>(testCtx, TEST_WITHOUT_NAN));
19752 	testGroup->addChild(createFloat16FuncSet<GraphicsResources>(testCtx));
19753 	testGroup->addChild(createFloat16VectorExtractSet<GraphicsResources>(testCtx));
19754 	testGroup->addChild(createFloat16VectorInsertSet<GraphicsResources>(testCtx));
19755 	testGroup->addChild(createFloat16VectorShuffleSet<GraphicsResources>(testCtx));
19756 	testGroup->addChild(createFloat16CompositeConstructSet<GraphicsResources>(testCtx));
19757 	testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeExtract"));
19758 	testGroup->addChild(createFloat16CompositeInsertExtractSet<GraphicsResources>(testCtx, "OpCompositeInsert"));
19759 	testGroup->addChild(createFloat16ArithmeticSet<GraphicsResources>(testCtx));
19760 	testGroup->addChild(createFloat16ArithmeticSet<1, GraphicsResources>(testCtx));
19761 	testGroup->addChild(createFloat16ArithmeticSet<2, GraphicsResources>(testCtx));
19762 	testGroup->addChild(createFloat16ArithmeticSet<3, GraphicsResources>(testCtx));
19763 	testGroup->addChild(createFloat16ArithmeticSet<4, GraphicsResources>(testCtx));
19764 
19765 	return testGroup.release();
19766 }
19767 
createFloat32Tests(tcu::TestContext & testCtx)19768 tcu::TestCaseGroup* createFloat32Tests (tcu::TestContext& testCtx)
19769 {
19770 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19771 
19772 	testGroup->addChild(createFloat32ComparisonGraphicsSet<1>(testCtx));
19773 	testGroup->addChild(createFloat32ComparisonGraphicsSet<2>(testCtx));
19774 	testGroup->addChild(createFloat32ComparisonGraphicsSet<3>(testCtx));
19775 	testGroup->addChild(createFloat32ComparisonGraphicsSet<4>(testCtx));
19776 
19777 	return testGroup.release();
19778 }
19779 
createFloat16Group(tcu::TestContext & testCtx)19780 tcu::TestCaseGroup* createFloat16Group (tcu::TestContext& testCtx)
19781 {
19782 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "float16", "Float 16 tests"));
19783 
19784 	testGroup->addChild(createFloat16OpConstantCompositeGroup(testCtx));
19785 	testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITH_NAN));
19786 	testGroup->addChild(createFloat16LogicalSet<ComputeShaderSpec>(testCtx, TEST_WITHOUT_NAN));
19787 	testGroup->addChild(createFloat16FuncSet<ComputeShaderSpec>(testCtx));
19788 	testGroup->addChild(createFloat16VectorExtractSet<ComputeShaderSpec>(testCtx));
19789 	testGroup->addChild(createFloat16VectorInsertSet<ComputeShaderSpec>(testCtx));
19790 	testGroup->addChild(createFloat16VectorShuffleSet<ComputeShaderSpec>(testCtx));
19791 	testGroup->addChild(createFloat16CompositeConstructSet<ComputeShaderSpec>(testCtx));
19792 	testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeExtract"));
19793 	testGroup->addChild(createFloat16CompositeInsertExtractSet<ComputeShaderSpec>(testCtx, "OpCompositeInsert"));
19794 	testGroup->addChild(createFloat16ArithmeticSet<ComputeShaderSpec>(testCtx));
19795 	testGroup->addChild(createFloat16ArithmeticSet<1, ComputeShaderSpec>(testCtx));
19796 	testGroup->addChild(createFloat16ArithmeticSet<2, ComputeShaderSpec>(testCtx));
19797 	testGroup->addChild(createFloat16ArithmeticSet<3, ComputeShaderSpec>(testCtx));
19798 	testGroup->addChild(createFloat16ArithmeticSet<4, ComputeShaderSpec>(testCtx));
19799 
19800 	return testGroup.release();
19801 }
19802 
createFloat32Group(tcu::TestContext & testCtx)19803 tcu::TestCaseGroup* createFloat32Group (tcu::TestContext& testCtx)
19804 {
19805 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "float32", "Float 32 tests"));
19806 
19807 	testGroup->addChild(createFloat32ComparisonComputeSet<1>(testCtx));
19808 	testGroup->addChild(createFloat32ComparisonComputeSet<2>(testCtx));
19809 	testGroup->addChild(createFloat32ComparisonComputeSet<3>(testCtx));
19810 	testGroup->addChild(createFloat32ComparisonComputeSet<4>(testCtx));
19811 
19812 	return testGroup.release();
19813 }
19814 
createBoolMixedBitSizeGroup(tcu::TestContext & testCtx)19815 tcu::TestCaseGroup* createBoolMixedBitSizeGroup (tcu::TestContext& testCtx)
19816 {
19817 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "mixed_bitsize", "Tests boolean operands produced from instructions of different bit-sizes"));
19818 
19819 	de::Random						rnd				(deStringHash(group->getName()));
19820 	const int		numElements		= 100;
19821 	vector<float>	inputData		(numElements, 0);
19822 	vector<float>	outputData		(numElements, 0);
19823 	fillRandomScalars(rnd, 0.0f, 100.0f, &inputData[0], 100);
19824 
19825 	const StringTemplate			shaderTemplate	(
19826 		"${CAPS}\n"
19827 		"OpMemoryModel Logical GLSL450\n"
19828 		"OpEntryPoint GLCompute %main \"main\" %id\n"
19829 		"OpExecutionMode %main LocalSize 1 1 1\n"
19830 		"OpSource GLSL 430\n"
19831 		"OpName %main           \"main\"\n"
19832 		"OpName %id             \"gl_GlobalInvocationID\"\n"
19833 
19834 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
19835 
19836 		+ string(getComputeAsmInputOutputBufferTraits()) + string(getComputeAsmCommonTypes()) + string(getComputeAsmInputOutputBuffer()) +
19837 
19838 		"%id        = OpVariable %uvec3ptr Input\n"
19839 		"${CONST}\n"
19840 		"%main      = OpFunction %void None %voidf\n"
19841 		"%label     = OpLabel\n"
19842 		"%idval     = OpLoad %uvec3 %id\n"
19843 		"%x         = OpCompositeExtract %u32 %idval 0\n"
19844 		"%inloc     = OpAccessChain %f32ptr %indata %c0i32 %x\n"
19845 
19846 		"${TEST}\n"
19847 
19848 		"%outloc    = OpAccessChain %f32ptr %outdata %c0i32 %x\n"
19849 		"             OpStore %outloc %res\n"
19850 		"             OpReturn\n"
19851 		"             OpFunctionEnd\n"
19852 	);
19853 
19854 	// Each test case produces 4 boolean values, and we want each of these values
19855 	// to come froma different combination of the available bit-sizes, so compute
19856 	// all possible combinations here.
19857 	vector<deUint32>	widths;
19858 	widths.push_back(32);
19859 	widths.push_back(16);
19860 	widths.push_back(8);
19861 
19862 	vector<IVec4>	cases;
19863 	for (size_t width0 = 0; width0 < widths.size(); width0++)
19864 	{
19865 		for (size_t width1 = 0; width1 < widths.size(); width1++)
19866 		{
19867 			for (size_t width2 = 0; width2 < widths.size(); width2++)
19868 			{
19869 				for (size_t width3 = 0; width3 < widths.size(); width3++)
19870 				{
19871 					cases.push_back(IVec4(widths[width0], widths[width1], widths[width2], widths[width3]));
19872 				}
19873 			}
19874 		}
19875 	}
19876 
19877 	for (size_t caseNdx = 0; caseNdx < cases.size(); caseNdx++)
19878 	{
19879 		/// Skip cases where all bitsizes are the same, we are only interested in testing booleans produced from instructions with different native bit-sizes
19880 		if (cases[caseNdx][0] == cases[caseNdx][1] && cases[caseNdx][0] == cases[caseNdx][2] && cases[caseNdx][0] == cases[caseNdx][3])
19881 			continue;
19882 
19883 		map<string, string>	specializations;
19884 		ComputeShaderSpec	spec;
19885 
19886 		// Inject appropriate capabilities and reference constants depending
19887 		// on the bit-sizes required by this test case
19888 		bool hasFloat32	= cases[caseNdx][0] == 32 || cases[caseNdx][1] == 32 || cases[caseNdx][2] == 32 || cases[caseNdx][3] == 32;
19889 		bool hasFloat16	= cases[caseNdx][0] == 16 || cases[caseNdx][1] == 16 || cases[caseNdx][2] == 16 || cases[caseNdx][3] == 16;
19890 		bool hasInt8	= cases[caseNdx][0] == 8 || cases[caseNdx][1] == 8 || cases[caseNdx][2] == 8 || cases[caseNdx][3] == 8;
19891 
19892 		string capsStr	= "OpCapability Shader\n";
19893 		string constStr	=
19894 			"%c0i32     = OpConstant %i32 0\n"
19895 			"%c1f32     = OpConstant %f32 1.0\n"
19896 			"%c0f32     = OpConstant %f32 0.0\n";
19897 
19898 		if (hasFloat32)
19899 		{
19900 			constStr	+=
19901 				"%c10f32    = OpConstant %f32 10.0\n"
19902 				"%c25f32    = OpConstant %f32 25.0\n"
19903 				"%c50f32    = OpConstant %f32 50.0\n"
19904 				"%c90f32    = OpConstant %f32 90.0\n";
19905 		}
19906 
19907 		if (hasFloat16)
19908 		{
19909 			capsStr		+= "OpCapability Float16\n";
19910 			constStr	+=
19911 				"%f16       = OpTypeFloat 16\n"
19912 				"%c10f16    = OpConstant %f16 10.0\n"
19913 				"%c25f16    = OpConstant %f16 25.0\n"
19914 				"%c50f16    = OpConstant %f16 50.0\n"
19915 				"%c90f16    = OpConstant %f16 90.0\n";
19916 		}
19917 
19918 		if (hasInt8)
19919 		{
19920 			capsStr		+= "OpCapability Int8\n";
19921 			constStr	+=
19922 				"%i8        = OpTypeInt 8 1\n"
19923 				"%c10i8     = OpConstant %i8 10\n"
19924 				"%c25i8     = OpConstant %i8 25\n"
19925 				"%c50i8     = OpConstant %i8 50\n"
19926 				"%c90i8     = OpConstant %i8 90\n";
19927 		}
19928 
19929 		// Each invocation reads a different float32 value as input. Depending on
19930 		// the bit-sizes required by the particular test case, we also produce
19931 		// float16 and/or and int8 values by converting from the 32-bit float.
19932 		string testStr	= "";
19933 		testStr			+= "%inval32   = OpLoad %f32 %inloc\n";
19934 		if (hasFloat16)
19935 			testStr		+= "%inval16   = OpFConvert %f16 %inval32\n";
19936 		if (hasInt8)
19937 			testStr		+= "%inval8    = OpConvertFToS %i8 %inval32\n";
19938 
19939 		// Because conversions from Float to Int round towards 0 we want our "greater" comparisons to be >=,
19940 		// that way a float32/float16 comparison such as 50.6f >= 50.0f will preserve its result
19941 		// when converted to int8, since FtoS(50.6f) results in 50. For "less" comparisons, it is the
19942 		// other way around, so in this case we want < instead of <=.
19943 		if (cases[caseNdx][0] == 32)
19944 			testStr		+= "%cmp1      = OpFOrdGreaterThanEqual %bool %inval32 %c25f32\n";
19945 		else if (cases[caseNdx][0] == 16)
19946 			testStr		+= "%cmp1      = OpFOrdGreaterThanEqual %bool %inval16 %c25f16\n";
19947 		else
19948 			testStr		+= "%cmp1      = OpSGreaterThanEqual %bool %inval8 %c25i8\n";
19949 
19950 		if (cases[caseNdx][1] == 32)
19951 			testStr		+= "%cmp2      = OpFOrdLessThan %bool %inval32 %c50f32\n";
19952 		else if (cases[caseNdx][1] == 16)
19953 			testStr		+= "%cmp2      = OpFOrdLessThan %bool %inval16 %c50f16\n";
19954 		else
19955 			testStr		+= "%cmp2      = OpSLessThan %bool %inval8 %c50i8\n";
19956 
19957 		if (cases[caseNdx][2] == 32)
19958 			testStr		+= "%cmp3      = OpFOrdLessThan %bool %inval32 %c10f32\n";
19959 		else if (cases[caseNdx][2] == 16)
19960 			testStr		+= "%cmp3      = OpFOrdLessThan %bool %inval16 %c10f16\n";
19961 		else
19962 			testStr		+= "%cmp3      = OpSLessThan %bool %inval8 %c10i8\n";
19963 
19964 		if (cases[caseNdx][3] == 32)
19965 			testStr		+= "%cmp4      = OpFOrdGreaterThanEqual %bool %inval32 %c90f32\n";
19966 		else if (cases[caseNdx][3] == 16)
19967 			testStr		+= "%cmp4      = OpFOrdGreaterThanEqual %bool %inval16 %c90f16\n";
19968 		else
19969 			testStr		+= "%cmp4      = OpSGreaterThanEqual %bool %inval8 %c90i8\n";
19970 
19971 		testStr			+= "%and1      = OpLogicalAnd %bool %cmp1 %cmp2\n";
19972 		testStr			+= "%or1       = OpLogicalOr %bool %cmp3 %cmp4\n";
19973 		testStr			+= "%or2       = OpLogicalOr %bool %and1 %or1\n";
19974 		testStr			+= "%not1      = OpLogicalNot %bool %or2\n";
19975 		testStr			+= "%res       = OpSelect %f32 %not1 %c1f32 %c0f32\n";
19976 
19977 		specializations["CAPS"]		= capsStr;
19978 		specializations["CONST"]	= constStr;
19979 		specializations["TEST"]		= testStr;
19980 
19981 		// Compute expected result by evaluating the boolean expression computed in the shader for each input value
19982 		for (size_t ndx = 0; ndx < numElements; ++ndx)
19983 			outputData[ndx] = !((inputData[ndx] >= 25.0f && inputData[ndx] < 50.0f) || (inputData[ndx] < 10.0f || inputData[ndx] >= 90.0f));
19984 
19985 		spec.assembly = shaderTemplate.specialize(specializations);
19986 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputData)));
19987 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputData)));
19988 		spec.numWorkGroups = IVec3(numElements, 1, 1);
19989 		if (hasFloat16)
19990 			spec.requestedVulkanFeatures.extFloat16Int8.shaderFloat16 = true;
19991 		if (hasInt8)
19992 			spec.requestedVulkanFeatures.extFloat16Int8.shaderInt8 = true;
19993 		spec.extensions.push_back("VK_KHR_shader_float16_int8");
19994 
19995 		string testName = "b" + de::toString(cases[caseNdx][0]) + "b" + de::toString(cases[caseNdx][1]) + "b" + de::toString(cases[caseNdx][2]) + "b" + de::toString(cases[caseNdx][3]);
19996 		group->addChild(new SpvAsmComputeShaderCase(testCtx, testName.c_str(), "", spec));
19997 	}
19998 
19999 	return group.release();
20000 }
20001 
createBoolGroup(tcu::TestContext & testCtx)20002 tcu::TestCaseGroup* createBoolGroup (tcu::TestContext& testCtx)
20003 {
20004 	de::MovePtr<tcu::TestCaseGroup>		testGroup			(new tcu::TestCaseGroup(testCtx, "bool", "Boolean tests"));
20005 
20006 	testGroup->addChild(createBoolMixedBitSizeGroup(testCtx));
20007 
20008 	return testGroup.release();
20009 }
20010 
createOpNameAbuseTests(tcu::TestContext & testCtx)20011 tcu::TestCaseGroup* createOpNameAbuseTests (tcu::TestContext& testCtx)
20012 {
20013 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "opname_abuse", "OpName abuse tests"));
20014 	vector<CaseParameter>			abuseCases;
20015 	RGBA							defaultColors[4];
20016 	map<string, string>				opNameFragments;
20017 
20018 	getOpNameAbuseCases(abuseCases);
20019 	getDefaultColors(defaultColors);
20020 
20021 	opNameFragments["testfun"] =
20022 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20023 		"%param1     = OpFunctionParameter %v4f32\n"
20024 		"%label_func = OpLabel\n"
20025 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20026 		"%b          = OpFAdd %f32 %a %a\n"
20027 		"%c          = OpFSub %f32 %b %a\n"
20028 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
20029 		"OpReturnValue %ret\n"
20030 		"OpFunctionEnd\n";
20031 
20032 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20033 	{
20034 		string casename;
20035 		casename = string("main") + abuseCases[i].name;
20036 
20037 		opNameFragments["debug"] =
20038 			"OpName %BP_main \"" + abuseCases[i].param + "\"";
20039 
20040 		createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20041 	}
20042 
20043 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20044 	{
20045 		string casename;
20046 		casename = string("b") + abuseCases[i].name;
20047 
20048 		opNameFragments["debug"] =
20049 			"OpName %b \"" + abuseCases[i].param + "\"";
20050 
20051 		createTestsForAllStages(casename, defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20052 	}
20053 
20054 	{
20055 		opNameFragments["debug"] =
20056 			"OpName %test_code \"name1\"\n"
20057 			"OpName %param1    \"name2\"\n"
20058 			"OpName %a         \"name3\"\n"
20059 			"OpName %b         \"name4\"\n"
20060 			"OpName %c         \"name5\"\n"
20061 			"OpName %ret       \"name6\"\n";
20062 
20063 		createTestsForAllStages("everything_named", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20064 	}
20065 
20066 	{
20067 		opNameFragments["debug"] =
20068 			"OpName %test_code \"the_same\"\n"
20069 			"OpName %param1    \"the_same\"\n"
20070 			"OpName %a         \"the_same\"\n"
20071 			"OpName %b         \"the_same\"\n"
20072 			"OpName %c         \"the_same\"\n"
20073 			"OpName %ret       \"the_same\"\n";
20074 
20075 		createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20076 	}
20077 
20078 	{
20079 		opNameFragments["debug"] =
20080 			"OpName %BP_main \"to_be\"\n"
20081 			"OpName %BP_main \"or_not\"\n"
20082 			"OpName %BP_main \"to_be\"\n";
20083 
20084 		createTestsForAllStages("main_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20085 	}
20086 
20087 	{
20088 		opNameFragments["debug"] =
20089 			"OpName %b \"to_be\"\n"
20090 			"OpName %b \"or_not\"\n"
20091 			"OpName %b \"to_be\"\n";
20092 
20093 		createTestsForAllStages("b_has_multiple_names", defaultColors, defaultColors, opNameFragments, abuseGroup.get());
20094 	}
20095 
20096 	return abuseGroup.release();
20097 }
20098 
20099 
createOpMemberNameAbuseTests(tcu::TestContext & testCtx)20100 tcu::TestCaseGroup* createOpMemberNameAbuseTests (tcu::TestContext& testCtx)
20101 {
20102 	de::MovePtr<tcu::TestCaseGroup>	abuseGroup(new tcu::TestCaseGroup(testCtx, "opmembername_abuse", "OpName abuse tests"));
20103 	vector<CaseParameter>			abuseCases;
20104 	RGBA							defaultColors[4];
20105 	map<string, string>				opMemberNameFragments;
20106 
20107 	getOpNameAbuseCases(abuseCases);
20108 	getDefaultColors(defaultColors);
20109 
20110 	opMemberNameFragments["pre_main"] =
20111 		"%f3str = OpTypeStruct %f32 %f32 %f32\n";
20112 
20113 	opMemberNameFragments["testfun"] =
20114 		"%test_code  = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20115 		"%param1     = OpFunctionParameter %v4f32\n"
20116 		"%label_func = OpLabel\n"
20117 		"%a          = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
20118 		"%b          = OpFAdd %f32 %a %a\n"
20119 		"%c          = OpFSub %f32 %b %a\n"
20120 		"%cstr       = OpCompositeConstruct %f3str %c %c %c\n"
20121 		"%d          = OpCompositeExtract %f32 %cstr 0\n"
20122 		"%ret        = OpVectorInsertDynamic %v4f32 %param1 %d %c_i32_0\n"
20123 		"OpReturnValue %ret\n"
20124 		"OpFunctionEnd\n";
20125 
20126 	for (unsigned int i = 0; i < abuseCases.size(); i++)
20127 	{
20128 		string casename;
20129 		casename = string("f3str_x") + abuseCases[i].name;
20130 
20131 		opMemberNameFragments["debug"] =
20132 			"OpMemberName %f3str 0 \"" + abuseCases[i].param + "\"";
20133 
20134 		createTestsForAllStages(casename, defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20135 	}
20136 
20137 	{
20138 		opMemberNameFragments["debug"] =
20139 			"OpMemberName %f3str 0 \"name1\"\n"
20140 			"OpMemberName %f3str 1 \"name2\"\n"
20141 			"OpMemberName %f3str 2 \"name3\"\n";
20142 
20143 		createTestsForAllStages("everything_named", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20144 	}
20145 
20146 	{
20147 		opMemberNameFragments["debug"] =
20148 			"OpMemberName %f3str 0 \"the_same\"\n"
20149 			"OpMemberName %f3str 1 \"the_same\"\n"
20150 			"OpMemberName %f3str 2 \"the_same\"\n";
20151 
20152 		createTestsForAllStages("everything_named_the_same", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20153 	}
20154 
20155 	{
20156 		opMemberNameFragments["debug"] =
20157 			"OpMemberName %f3str 0 \"to_be\"\n"
20158 			"OpMemberName %f3str 1 \"or_not\"\n"
20159 			"OpMemberName %f3str 0 \"to_be\"\n"
20160 			"OpMemberName %f3str 2 \"makes_no\"\n"
20161 			"OpMemberName %f3str 0 \"difference\"\n"
20162 			"OpMemberName %f3str 0 \"to_me\"\n";
20163 
20164 
20165 		createTestsForAllStages("f3str_x_has_multiple_names", defaultColors, defaultColors, opMemberNameFragments, abuseGroup.get());
20166 	}
20167 
20168 	return abuseGroup.release();
20169 }
20170 
getSparseIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20171 vector<deUint32> getSparseIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20172 {
20173 	vector<deUint32>	result;
20174 	de::Random			rnd		(seed);
20175 
20176 	result.reserve(numDataPoints);
20177 
20178 	for (deUint32 dataPointNdx = 0; dataPointNdx < numDataPoints; ++dataPointNdx)
20179 		result.push_back(rnd.getUint32());
20180 
20181 	return result;
20182 }
20183 
getSparseIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2)20184 vector<deUint32> getSparseIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2)
20185 {
20186 	vector<deUint32>	result;
20187 
20188 	result.reserve(inData1.size());
20189 
20190 	for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20191 		result.push_back(inData1[dataPointNdx] + inData2[dataPointNdx]);
20192 
20193 	return result;
20194 }
20195 
20196 template<class SpecResource>
createSparseIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20197 void createSparseIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20198 {
20199 	const deUint32			numDataPoints	= 16;
20200 	const std::string		testName		("sparse_ids");
20201 	const deUint32			seed			(deStringHash(testName.c_str()));
20202 	const vector<deUint32>	inData1			(getSparseIdsAbuseData(numDataPoints, seed + 1));
20203 	const vector<deUint32>	inData2			(getSparseIdsAbuseData(numDataPoints, seed + 2));
20204 	const vector<deUint32>	outData			(getSparseIdsAbuseResults(inData1, inData2));
20205 	const StringTemplate	preMain
20206 	(
20207 		"%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20208 		"   %up_u32 = OpTypePointer Uniform %u32\n"
20209 		"   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20210 		"   %SSBO32 = OpTypeStruct %ra_u32\n"
20211 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20212 		"%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20213 		"%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20214 		" %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20215 	);
20216 	const StringTemplate	decoration
20217 	(
20218 		"OpDecorate %ra_u32 ArrayStride 4\n"
20219 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
20220 		"OpDecorate %SSBO32 BufferBlock\n"
20221 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
20222 		"OpDecorate %ssbo_src0 Binding 0\n"
20223 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
20224 		"OpDecorate %ssbo_src1 Binding 1\n"
20225 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
20226 		"OpDecorate %ssbo_dst Binding 2\n"
20227 	);
20228 	const StringTemplate	testFun
20229 	(
20230 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20231 		"    %param = OpFunctionParameter %v4f32\n"
20232 
20233 		"    %entry = OpLabel\n"
20234 		"        %i = OpVariable %fp_i32 Function\n"
20235 		"             OpStore %i %c_i32_0\n"
20236 		"             OpBranch %loop\n"
20237 
20238 		"     %loop = OpLabel\n"
20239 		"    %i_cmp = OpLoad %i32 %i\n"
20240 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20241 		"             OpLoopMerge %merge %next None\n"
20242 		"             OpBranchConditional %lt %write %merge\n"
20243 
20244 		"    %write = OpLabel\n"
20245 		"      %ndx = OpLoad %i32 %i\n"
20246 
20247 		"      %127 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20248 		"      %128 = OpLoad %u32 %127\n"
20249 
20250 		// The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20251 		"  %4194000 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20252 		"  %4194001 = OpLoad %u32 %4194000\n"
20253 
20254 		"  %2097151 = OpIAdd %u32 %128 %4194001\n"
20255 		"  %2097152 = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20256 		"             OpStore %2097152 %2097151\n"
20257 		"             OpBranch %next\n"
20258 
20259 		"     %next = OpLabel\n"
20260 		"    %i_cur = OpLoad %i32 %i\n"
20261 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20262 		"             OpStore %i %i_new\n"
20263 		"             OpBranch %loop\n"
20264 
20265 		"    %merge = OpLabel\n"
20266 		"             OpReturnValue %param\n"
20267 
20268 		"             OpFunctionEnd\n"
20269 	);
20270 	SpecResource			specResource;
20271 	map<string, string>		specs;
20272 	VulkanFeatures			features;
20273 	map<string, string>		fragments;
20274 	vector<string>			extensions;
20275 
20276 	specs["num_data_points"]	= de::toString(numDataPoints);
20277 
20278 	fragments["decoration"]		= decoration.specialize(specs);
20279 	fragments["pre_main"]		= preMain.specialize(specs);
20280 	fragments["testfun"]		= testFun.specialize(specs);
20281 
20282 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20283 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20284 	specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20285 
20286 	if (std::is_base_of<GraphicsResources, SpecResource>::value)
20287 	{
20288 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
20289 		features.coreFeatures.fragmentStoresAndAtomics			= true;
20290 	}
20291 
20292 	finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20293 }
20294 
getLotsIdsAbuseData(const deUint32 numDataPoints,const deUint32 seed)20295 vector<deUint32> getLotsIdsAbuseData (const deUint32 numDataPoints, const deUint32 seed)
20296 {
20297 	vector<deUint32>	result;
20298 	de::Random			rnd		(seed);
20299 
20300 	result.reserve(numDataPoints);
20301 
20302 	// Fixed value
20303 	result.push_back(1u);
20304 
20305 	// Random values
20306 	for (deUint32 dataPointNdx = 1; dataPointNdx < numDataPoints; ++dataPointNdx)
20307 		result.push_back(rnd.getUint8());
20308 
20309 	return result;
20310 }
20311 
getLotsIdsAbuseResults(const vector<deUint32> & inData1,const vector<deUint32> & inData2,const deUint32 count)20312 vector<deUint32> getLotsIdsAbuseResults (const vector<deUint32>& inData1, const vector<deUint32>& inData2, const deUint32 count)
20313 {
20314 	vector<deUint32>	result;
20315 
20316 	result.reserve(inData1.size());
20317 
20318 	for (size_t dataPointNdx = 0; dataPointNdx < inData1.size(); ++dataPointNdx)
20319 		result.push_back(inData1[dataPointNdx] + count * inData2[dataPointNdx]);
20320 
20321 	return result;
20322 }
20323 
20324 template<class SpecResource>
createLotsIdsAbuseTest(tcu::TestContext & testCtx,de::MovePtr<tcu::TestCaseGroup> & testGroup)20325 void createLotsIdsAbuseTest (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup>& testGroup)
20326 {
20327 	const deUint32			numDataPoints	= 16;
20328 	const deUint32			firstNdx		= 100u;
20329 	const deUint32			sequenceCount	= 10000u;
20330 	const std::string		testName		("lots_ids");
20331 	const deUint32			seed			(deStringHash(testName.c_str()));
20332 	const vector<deUint32>	inData1			(getLotsIdsAbuseData(numDataPoints, seed + 1));
20333 	const vector<deUint32>	inData2			(getLotsIdsAbuseData(numDataPoints, seed + 2));
20334 	const vector<deUint32>	outData			(getLotsIdsAbuseResults(inData1, inData2, sequenceCount));
20335 	const StringTemplate preMain
20336 	(
20337 		"%c_i32_ndp = OpConstant %i32 ${num_data_points}\n"
20338 		"   %up_u32 = OpTypePointer Uniform %u32\n"
20339 		"   %ra_u32 = OpTypeArray %u32 %c_i32_ndp\n"
20340 		"   %SSBO32 = OpTypeStruct %ra_u32\n"
20341 		"%up_SSBO32 = OpTypePointer Uniform %SSBO32\n"
20342 		"%ssbo_src0 = OpVariable %up_SSBO32 Uniform\n"
20343 		"%ssbo_src1 = OpVariable %up_SSBO32 Uniform\n"
20344 		" %ssbo_dst = OpVariable %up_SSBO32 Uniform\n"
20345 	);
20346 	const StringTemplate decoration
20347 	(
20348 		"OpDecorate %ra_u32 ArrayStride 4\n"
20349 		"OpMemberDecorate %SSBO32 0 Offset 0\n"
20350 		"OpDecorate %SSBO32 BufferBlock\n"
20351 		"OpDecorate %ssbo_src0 DescriptorSet 0\n"
20352 		"OpDecorate %ssbo_src0 Binding 0\n"
20353 		"OpDecorate %ssbo_src1 DescriptorSet 0\n"
20354 		"OpDecorate %ssbo_src1 Binding 1\n"
20355 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
20356 		"OpDecorate %ssbo_dst Binding 2\n"
20357 	);
20358 	const StringTemplate testFun
20359 	(
20360 		"%test_code = OpFunction %v4f32 None %v4f32_v4f32_function\n"
20361 		"    %param = OpFunctionParameter %v4f32\n"
20362 
20363 		"    %entry = OpLabel\n"
20364 		"        %i = OpVariable %fp_i32 Function\n"
20365 		"             OpStore %i %c_i32_0\n"
20366 		"             OpBranch %loop\n"
20367 
20368 		"     %loop = OpLabel\n"
20369 		"    %i_cmp = OpLoad %i32 %i\n"
20370 		"       %lt = OpSLessThan %bool %i_cmp %c_i32_ndp\n"
20371 		"             OpLoopMerge %merge %next None\n"
20372 		"             OpBranchConditional %lt %write %merge\n"
20373 
20374 		"    %write = OpLabel\n"
20375 		"      %ndx = OpLoad %i32 %i\n"
20376 
20377 		"       %90 = OpAccessChain %up_u32 %ssbo_src1 %c_i32_0 %ndx\n"
20378 		"       %91 = OpLoad %u32 %90\n"
20379 
20380 		"       %98 = OpAccessChain %up_u32 %ssbo_src0 %c_i32_0 %ndx\n"
20381 		"       %${zeroth_id} = OpLoad %u32 %98\n"
20382 
20383 		"${seq}\n"
20384 
20385 		// The test relies on SPIR-V compiler option SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS set in assembleSpirV()
20386 		"      %dst = OpAccessChain %up_u32 %ssbo_dst %c_i32_0 %ndx\n"
20387 		"             OpStore %dst %${last_id}\n"
20388 		"             OpBranch %next\n"
20389 
20390 		"     %next = OpLabel\n"
20391 		"    %i_cur = OpLoad %i32 %i\n"
20392 		"    %i_new = OpIAdd %i32 %i_cur %c_i32_1\n"
20393 		"             OpStore %i %i_new\n"
20394 		"             OpBranch %loop\n"
20395 
20396 		"    %merge = OpLabel\n"
20397 		"             OpReturnValue %param\n"
20398 
20399 		"             OpFunctionEnd\n"
20400 	);
20401 	deUint32				lastId			= firstNdx;
20402 	SpecResource			specResource;
20403 	map<string, string>		specs;
20404 	VulkanFeatures			features;
20405 	map<string, string>		fragments;
20406 	vector<string>			extensions;
20407 	std::string				sequence;
20408 
20409 	for (deUint32 sequenceNdx = 0; sequenceNdx < sequenceCount; ++sequenceNdx)
20410 	{
20411 		const deUint32		sequenceId		= sequenceNdx + firstNdx;
20412 		const std::string	sequenceIdStr	= de::toString(sequenceId);
20413 
20414 		sequence += "%" + sequenceIdStr + " = OpIAdd %u32 %91 %" + de::toString(sequenceId - 1) + "\n";
20415 		lastId = sequenceId;
20416 
20417 		if (sequenceNdx == 0)
20418 			sequence.reserve((10 + sequence.length()) * sequenceCount);
20419 	}
20420 
20421 	specs["num_data_points"]	= de::toString(numDataPoints);
20422 	specs["zeroth_id"]			= de::toString(firstNdx - 1);
20423 	specs["last_id"]			= de::toString(lastId);
20424 	specs["seq"]				= sequence;
20425 
20426 	fragments["decoration"]		= decoration.specialize(specs);
20427 	fragments["pre_main"]		= preMain.specialize(specs);
20428 	fragments["testfun"]		= testFun.specialize(specs);
20429 
20430 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData1)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20431 	specResource.inputs.push_back(Resource(BufferSp(new Uint32Buffer(inData2)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20432 	specResource.outputs.push_back(Resource(BufferSp(new Uint32Buffer(outData)), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER));
20433 
20434 	if (std::is_base_of<GraphicsResources, SpecResource>::value)
20435 	{
20436 		features.coreFeatures.vertexPipelineStoresAndAtomics	= true;
20437 		features.coreFeatures.fragmentStoresAndAtomics			= true;
20438 	}
20439 
20440 	finalizeTestsCreation(specResource, fragments, testCtx, *testGroup.get(), testName, features, extensions, IVec3(1, 1, 1));
20441 }
20442 
createSpirvIdsAbuseTests(tcu::TestContext & testCtx)20443 tcu::TestCaseGroup* createSpirvIdsAbuseTests (tcu::TestContext& testCtx)
20444 {
20445 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20446 
20447 	createSparseIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20448 	createLotsIdsAbuseTest<GraphicsResources>(testCtx, testGroup);
20449 
20450 	return testGroup.release();
20451 }
20452 
createSpirvIdsAbuseGroup(tcu::TestContext & testCtx)20453 tcu::TestCaseGroup* createSpirvIdsAbuseGroup (tcu::TestContext& testCtx)
20454 {
20455 	de::MovePtr<tcu::TestCaseGroup>	testGroup	(new tcu::TestCaseGroup(testCtx, "spirv_ids_abuse", "SPIR-V abuse tests"));
20456 
20457 	createSparseIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20458 	createLotsIdsAbuseTest<ComputeShaderSpec>(testCtx, testGroup);
20459 
20460 	return testGroup.release();
20461 }
20462 
createFunctionParamsGroup(tcu::TestContext & testCtx)20463 tcu::TestCaseGroup* createFunctionParamsGroup (tcu::TestContext& testCtx)
20464 {
20465 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "function_params", "Function parameter tests"));
20466 
20467 	static const char data_dir[] = "spirv_assembly/instruction/function_params";
20468 
20469 	static const struct
20470 	{
20471 		const std::string name;
20472 		const std::string desc;
20473 	} cases[] =
20474 	{
20475 		{ "sampler_param", "Test combined image sampler as function parameter" },
20476 	};
20477 
20478 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20479 	{
20480 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20481 																			cases[i].name.c_str(),
20482 																			cases[i].desc.c_str(),
20483 																			data_dir,
20484 																			cases[i].name + ".amber");
20485 		testGroup->addChild(testCase);
20486 	}
20487 
20488 	return testGroup.release();
20489 }
20490 
createEarlyFragmentTests(tcu::TestContext & testCtx)20491 tcu::TestCaseGroup* createEarlyFragmentTests(tcu::TestContext& testCtx)
20492 {
20493 	de::MovePtr<tcu::TestCaseGroup> earlyFragTests (new tcu::TestCaseGroup(testCtx, "early_fragment", "Early Fragment Tests"));
20494 
20495 	static const char dataDir[] = "spirv_assembly/instruction/graphics/early_fragment";
20496 
20497 	static const struct Case
20498 	{
20499 		const string name;
20500 		const string desc;
20501 	}
20502 	cases[] =
20503 	{
20504 		// Overwriting the gl_FragDepth should be ignored, when Early Fragment Test Mode is enabled.
20505 		{ "depth_less",				"gl_FragDepth > CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	},
20506 		{ "depth_greater",			"gl_FragDepth < CLEAR_DEPTH. Polygon depth > CLEAR_DEPTH."	},
20507 		{ "depth_less_or_equal",	"gl_FragDepth > CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20508 		{ "depth_greater_or_equal",	"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20509 		{ "depth_equal",			"gl_FragDepth < CLEAR_DEPTH. Polygon depth == CLEAR_DEPTH."	},
20510 		{ "depth_not_equal",		"gl_FragDepth == CLEAR_DEPTH. Polygon depth < CLEAR_DEPTH."	}
20511 	};
20512 
20513 	for (const auto& tCase : cases)
20514 	{
20515 		cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx,
20516 			tCase.name.c_str(),
20517 			tCase.desc.c_str(),
20518 			dataDir,
20519 			tCase.name + ".amber");
20520 
20521 		earlyFragTests->addChild(testCase);
20522 	}
20523 
20524 	return earlyFragTests.release();
20525 }
20526 
createOpExecutionModeTests(tcu::TestContext & testCtx)20527 tcu::TestCaseGroup* createOpExecutionModeTests (tcu::TestContext& testCtx)
20528 {
20529 	de::MovePtr<tcu::TestCaseGroup> testGroup (new tcu::TestCaseGroup(testCtx, "execution_mode", "Execution mode tests"));
20530 
20531 	static const char dataDir[] = "spirv_assembly/instruction/graphics/execution_mode";
20532 
20533 	static const struct Case
20534 	{
20535 		const string name;
20536 		const string desc;
20537 	} cases[] =
20538 	{
20539 		{ "depthless_0",		"FragDepth < Polygon depth: depth test should pass." },
20540 		{ "depthless_1",		"FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, but the depth test should pass." },
20541 		{ "depthless_2",		"FragDepth < Polygon depth: depth test should fail." },
20542 		{ "depthless_3",		"FragDepth > Polygon depth: violates the promise that FragDepth is less than the implicit depth, the depth test should fail." },
20543 		{ "depthless_4",		"FragDepth < Polygon depth: depth test should pass." },
20544 		{ "depthgreater_0",		"FragDepth > Polygon depth: depth test should pass." },
20545 		{ "depthgreater_1",		"FragDepth < Polygon depth: violates the promise that FragDepth is greater than the implicit depth, but the depth test should pass." },
20546 		{ "depthgreater_2",		"FragDepth > Polygon depth: depth test should fail." },
20547 		{ "depthgreater_3",		"FragDepth > Polygon depth: violates the promise that FragDepth is greater than the implicit depth, the depth test should fail." },
20548 		{ "depthgreater_4",		"FragDepth > Polygon depth: depth test should pass." },
20549 		{ "depthunchanged_0",	"FragDepth == Polygon depth: depth test should pass." },
20550 		{ "depthunchanged_1",	"FragDepth == Polygon depth: depth test should fail." },
20551 		{ "depthunchanged_2",	"FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should pass." },
20552 		{ "depthunchanged_3",	"FragDepth != Polygon depth: violates the promise that FragDepth is equal to the implicit depth, the depth test should fail." },
20553 	};
20554 
20555 	for (const auto& case_ : cases)
20556 	{
20557 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20558 																			case_.name.c_str(),
20559 																			case_.desc.c_str(),
20560 																			dataDir,
20561 																			case_.name + ".amber");
20562 		testGroup->addChild(testCase);
20563 	}
20564 
20565 	return testGroup.release();
20566 }
20567 
createQueryGroup(tcu::TestContext & testCtx)20568 tcu::TestCaseGroup* createQueryGroup (tcu::TestContext& testCtx)
20569 {
20570 	de::MovePtr<tcu::TestCaseGroup>	testGroup (new tcu::TestCaseGroup(testCtx, "image_query", "image query tests"));
20571 
20572 	static const char data_dir[] = "spirv_assembly/instruction/image_query";
20573 
20574 	static const struct
20575 	{
20576 		const std::string name;
20577 		const std::string desc;
20578 	} cases[] =
20579 	{
20580 		{ "samples_storage", "Test samples query can be used on storage images" },
20581 	};
20582 
20583 	vector<string> requirements(1, "Features.shaderStorageImageMultisample");
20584 
20585 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(cases); ++i)
20586 	{
20587 		cts_amber::AmberTestCase *testCase = cts_amber::createAmberTestCase(testCtx,
20588 																			cases[i].name.c_str(),
20589 																			cases[i].desc.c_str(),
20590 																			data_dir,
20591 																			cases[i].name + ".amber",
20592 																			requirements);
20593 		testGroup->addChild(testCase);
20594 	}
20595 
20596 	return testGroup.release();
20597 }
20598 
createInstructionTests(tcu::TestContext & testCtx)20599 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
20600 {
20601 	const bool testComputePipeline = true;
20602 
20603 	de::MovePtr<tcu::TestCaseGroup> instructionTests	(new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
20604 	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
20605 	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
20606 
20607 	computeTests->addChild(createSpivVersionCheckTests(testCtx, testComputePipeline));
20608 	computeTests->addChild(createLocalSizeGroup(testCtx, false));
20609 	computeTests->addChild(createLocalSizeGroup(testCtx, true));
20610 	computeTests->addChild(createNonSemanticInfoGroup(testCtx));
20611 	computeTests->addChild(createOpNopGroup(testCtx));
20612 	computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITHOUT_NAN));
20613 	computeTests->addChild(createOpFUnordGroup(testCtx, TEST_WITH_NAN));
20614 	computeTests->addChild(createOpAtomicGroup(testCtx, false));
20615 	computeTests->addChild(createOpAtomicGroup(testCtx, true));					// Using new StorageBuffer decoration
20616 	computeTests->addChild(createOpAtomicGroup(testCtx, false, 1024, true));	// Return value validation
20617 	computeTests->addChild(createOpAtomicGroup(testCtx, true, 65535, false, true));	// volatile atomics
20618 	computeTests->addChild(createOpLineGroup(testCtx));
20619 	computeTests->addChild(createOpModuleProcessedGroup(testCtx));
20620 	computeTests->addChild(createOpNoLineGroup(testCtx));
20621 	computeTests->addChild(createOpConstantNullGroup(testCtx));
20622 	computeTests->addChild(createOpConstantCompositeGroup(testCtx));
20623 	computeTests->addChild(createOpConstantUsageGroup(testCtx));
20624 	computeTests->addChild(createSpecConstantGroup(testCtx));
20625 	computeTests->addChild(createOpSourceGroup(testCtx));
20626 	computeTests->addChild(createOpSourceExtensionGroup(testCtx));
20627 	computeTests->addChild(createDecorationGroupGroup(testCtx));
20628 	computeTests->addChild(createOpPhiGroup(testCtx));
20629 	computeTests->addChild(createLoopControlGroup(testCtx));
20630 	computeTests->addChild(createFunctionControlGroup(testCtx));
20631 	computeTests->addChild(createSelectionControlGroup(testCtx));
20632 	computeTests->addChild(createBlockOrderGroup(testCtx));
20633 	computeTests->addChild(createMultipleShaderGroup(testCtx));
20634 	computeTests->addChild(createMemoryAccessGroup(testCtx));
20635 	computeTests->addChild(createOpCopyMemoryGroup(testCtx));
20636 	computeTests->addChild(createOpCopyObjectGroup(testCtx));
20637 	computeTests->addChild(createNoContractionGroup(testCtx));
20638 	computeTests->addChild(createOpUndefGroup(testCtx));
20639 	computeTests->addChild(createOpUnreachableGroup(testCtx));
20640 	computeTests->addChild(createOpQuantizeToF16Group(testCtx));
20641 	computeTests->addChild(createOpFRemGroup(testCtx));
20642 	computeTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20643 	computeTests->addChild(createOpSRemComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20644 	computeTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_PASS));
20645 	computeTests->addChild(createOpSModComputeGroup64(testCtx, QP_TEST_RESULT_PASS));
20646 	computeTests->addChild(createOpSDotKHRComputeGroup(testCtx));
20647 	computeTests->addChild(createOpUDotKHRComputeGroup(testCtx));
20648 	computeTests->addChild(createOpSUDotKHRComputeGroup(testCtx));
20649 	computeTests->addChild(createOpSDotAccSatKHRComputeGroup(testCtx));
20650 	computeTests->addChild(createOpUDotAccSatKHRComputeGroup(testCtx));
20651 	computeTests->addChild(createOpSUDotAccSatKHRComputeGroup(testCtx));
20652 	computeTests->addChild(createConvertComputeTests(testCtx, "OpSConvert", "sconvert"));
20653 	computeTests->addChild(createConvertComputeTests(testCtx, "OpUConvert", "uconvert"));
20654 	computeTests->addChild(createConvertComputeTests(testCtx, "OpFConvert", "fconvert"));
20655 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertSToF", "convertstof"));
20656 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToS", "convertftos"));
20657 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertUToF", "convertutof"));
20658 	computeTests->addChild(createConvertComputeTests(testCtx, "OpConvertFToU", "convertftou"));
20659 	computeTests->addChild(createOpCompositeInsertGroup(testCtx));
20660 	computeTests->addChild(createOpInBoundsAccessChainGroup(testCtx));
20661 	computeTests->addChild(createShaderDefaultOutputGroup(testCtx));
20662 	computeTests->addChild(createOpNMinGroup(testCtx));
20663 	computeTests->addChild(createOpNMaxGroup(testCtx));
20664 	computeTests->addChild(createOpNClampGroup(testCtx));
20665 	computeTests->addChild(createFloatControlsExtensionlessGroup(testCtx));
20666 	{
20667 		de::MovePtr<tcu::TestCaseGroup>	computeAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20668 
20669 		computeAndroidTests->addChild(createOpSRemComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20670 		computeAndroidTests->addChild(createOpSModComputeGroup(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20671 
20672 		computeTests->addChild(computeAndroidTests.release());
20673 	}
20674 
20675 	computeTests->addChild(create8BitStorageComputeGroup(testCtx));
20676 	computeTests->addChild(create16BitStorageComputeGroup(testCtx));
20677 	computeTests->addChild(createFloatControlsComputeGroup(testCtx));
20678 	computeTests->addChild(createUboMatrixPaddingComputeGroup(testCtx));
20679 	computeTests->addChild(createCompositeInsertComputeGroup(testCtx));
20680 	computeTests->addChild(createVariableInitComputeGroup(testCtx));
20681 	computeTests->addChild(createConditionalBranchComputeGroup(testCtx));
20682 	computeTests->addChild(createIndexingComputeGroup(testCtx));
20683 	computeTests->addChild(createVariablePointersComputeGroup(testCtx));
20684 	computeTests->addChild(createPhysicalPointersComputeGroup(testCtx));
20685 	computeTests->addChild(createImageSamplerComputeGroup(testCtx));
20686 	computeTests->addChild(createOpNameGroup(testCtx));
20687 	computeTests->addChild(createOpMemberNameGroup(testCtx));
20688 	computeTests->addChild(createPointerParameterComputeGroup(testCtx));
20689 	computeTests->addChild(createFloat16Group(testCtx));
20690 	computeTests->addChild(createFloat32Group(testCtx));
20691 	computeTests->addChild(createBoolGroup(testCtx));
20692 	computeTests->addChild(createWorkgroupMemoryComputeGroup(testCtx));
20693 	computeTests->addChild(createSpirvIdsAbuseGroup(testCtx));
20694 	computeTests->addChild(createSignedIntCompareGroup(testCtx));
20695 	computeTests->addChild(createSignedOpTestsGroup(testCtx));
20696 	computeTests->addChild(createUnusedVariableComputeTests(testCtx));
20697 	computeTests->addChild(createPtrAccessChainGroup(testCtx));
20698 	computeTests->addChild(createVectorShuffleGroup(testCtx));
20699 	computeTests->addChild(createHlslComputeGroup(testCtx));
20700 	computeTests->addChild(createEmptyStructComputeGroup(testCtx));
20701 	computeTests->addChild(create64bitCompareComputeGroup(testCtx));
20702 	computeTests->addChild(createOpArrayLengthComputeGroup(testCtx));
20703 	computeTests->addChild(createPhysicalStorageBufferTestGroup(testCtx));
20704 
20705 	graphicsTests->addChild(createCrossStageInterfaceTests(testCtx));
20706 	graphicsTests->addChild(createSpivVersionCheckTests(testCtx, !testComputePipeline));
20707 	graphicsTests->addChild(createOpNopTests(testCtx));
20708 	graphicsTests->addChild(createOpSourceTests(testCtx));
20709 	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
20710 	graphicsTests->addChild(createOpModuleProcessedTests(testCtx));
20711 	graphicsTests->addChild(createOpLineTests(testCtx));
20712 	graphicsTests->addChild(createOpNoLineTests(testCtx));
20713 	graphicsTests->addChild(createOpConstantNullTests(testCtx));
20714 	graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
20715 	graphicsTests->addChild(createMemoryAccessTests(testCtx));
20716 	graphicsTests->addChild(createOpUndefTests(testCtx));
20717 	graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
20718 	graphicsTests->addChild(createModuleTests(testCtx));
20719 	graphicsTests->addChild(createUnusedVariableTests(testCtx));
20720 	graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
20721 	graphicsTests->addChild(createOpPhiTests(testCtx));
20722 	graphicsTests->addChild(createNoContractionTests(testCtx));
20723 	graphicsTests->addChild(createOpQuantizeTests(testCtx));
20724 	graphicsTests->addChild(createLoopTests(testCtx));
20725 	graphicsTests->addChild(createSpecConstantTests(testCtx));
20726 	graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
20727 	graphicsTests->addChild(createBarrierTests(testCtx));
20728 	graphicsTests->addChild(createDecorationGroupTests(testCtx));
20729 	graphicsTests->addChild(createFRemTests(testCtx));
20730 	graphicsTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20731 	graphicsTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_PASS));
20732 
20733 	{
20734 		de::MovePtr<tcu::TestCaseGroup>	graphicsAndroidTests	(new tcu::TestCaseGroup(testCtx, "android", "Android CTS Tests"));
20735 
20736 		graphicsAndroidTests->addChild(createOpSRemGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20737 		graphicsAndroidTests->addChild(createOpSModGraphicsTests(testCtx, QP_TEST_RESULT_QUALITY_WARNING));
20738 
20739 		graphicsTests->addChild(graphicsAndroidTests.release());
20740 	}
20741 
20742 	graphicsTests->addChild(createOpNameTests(testCtx));
20743 	graphicsTests->addChild(createOpNameAbuseTests(testCtx));
20744 	graphicsTests->addChild(createOpMemberNameAbuseTests(testCtx));
20745 
20746 	graphicsTests->addChild(create8BitStorageGraphicsGroup(testCtx));
20747 	graphicsTests->addChild(create16BitStorageGraphicsGroup(testCtx));
20748 	graphicsTests->addChild(createFloatControlsGraphicsGroup(testCtx));
20749 	graphicsTests->addChild(createUboMatrixPaddingGraphicsGroup(testCtx));
20750 	graphicsTests->addChild(createCompositeInsertGraphicsGroup(testCtx));
20751 	graphicsTests->addChild(createVariableInitGraphicsGroup(testCtx));
20752 	graphicsTests->addChild(createConditionalBranchGraphicsGroup(testCtx));
20753 	graphicsTests->addChild(createIndexingGraphicsGroup(testCtx));
20754 	graphicsTests->addChild(createVariablePointersGraphicsGroup(testCtx));
20755 	graphicsTests->addChild(createImageSamplerGraphicsGroup(testCtx));
20756 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpSConvert", "sconvert"));
20757 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpUConvert", "uconvert"));
20758 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpFConvert", "fconvert"));
20759 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertSToF", "convertstof"));
20760 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToS", "convertftos"));
20761 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertUToF", "convertutof"));
20762 	graphicsTests->addChild(createConvertGraphicsTests(testCtx, "OpConvertFToU", "convertftou"));
20763 	graphicsTests->addChild(createPointerParameterGraphicsGroup(testCtx));
20764 	graphicsTests->addChild(createVaryingNameGraphicsGroup(testCtx));
20765 	graphicsTests->addChild(createFloat16Tests(testCtx));
20766 	graphicsTests->addChild(createFloat32Tests(testCtx));
20767 	graphicsTests->addChild(createSpirvIdsAbuseTests(testCtx));
20768 	graphicsTests->addChild(create64bitCompareGraphicsGroup(testCtx));
20769 	graphicsTests->addChild(createEarlyFragmentTests(testCtx));
20770 	graphicsTests->addChild(createOpExecutionModeTests(testCtx));
20771 
20772 	instructionTests->addChild(computeTests.release());
20773 	instructionTests->addChild(graphicsTests.release());
20774 	instructionTests->addChild(createSpirvVersion1p4Group(testCtx));
20775 	instructionTests->addChild(createFunctionParamsGroup(testCtx));
20776 	instructionTests->addChild(createQueryGroup(testCtx));
20777 	instructionTests->addChild(createTrinaryMinMaxGroup(testCtx));
20778 	instructionTests->addChild(createTerminateInvocationGroup(testCtx));
20779 
20780 	return instructionTests.release();
20781 }
20782 
20783 } // SpirVAssembly
20784 } // vkt
20785