• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Google LLC.
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief VK_KHR_zero_initialize_workgroup_memory tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32 
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42 
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49 
50 #include <algorithm>
51 #include <vector>
52 
53 using namespace vk;
54 
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61 
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<deUint32> specValues={},deUint32 increment=0)62 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
63 							deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
64 							vk::ComputePipelineConstructionType m_computePipelineConstructionType,
65 							const std::vector<deUint32> specValues = {},
66 							deUint32 increment = 0)
67 {
68 	const DeviceInterface&	vk			= context.getDeviceInterface();
69 	const VkDevice			device		= context.getDevice();
70 	Allocator&				allocator	= context.getDefaultAllocator();
71 	tcu::TestLog&			log			= context.getTestContext().getLog();
72 
73 	de::MovePtr<BufferWithMemory> buffer;
74 	VkDescriptorBufferInfo bufferDescriptor;
75 
76 	VkDeviceSize size = bufferSize;
77 	buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
78 		vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79 		MemoryRequirement::HostVisible));
80 	bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
81 
82 	deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
83 	deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84 
85 	DescriptorSetLayoutBuilder layoutBuilder;
86 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87 
88 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
90 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
91 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
92 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
93 
94 	std::vector<VkSpecializationMapEntry> entries(specValues.size());
95 	if (!specValues.empty())
96 	{
97 		for (deUint32 i = 0; i < specValues.size(); ++i)
98 		{
99 			entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
100 		}
101 	}
102 	const VkSpecializationInfo specInfo =
103 	{
104 		(deUint32)specValues.size(),
105 		entries.data(),
106 		specValues.size() * sizeof(deUint32),
107 		specValues.data(),
108 	};
109 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110 	flushAlloc(vk, device, buffer->getAllocation());
111 
112 	ComputePipelineWrapper			pipeline(vk, device, m_computePipelineConstructionType, context.getBinaryCollection().get("comp"));
113 	pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
114 	pipeline.setSpecializationInfo(specInfo);
115 	pipeline.buildPipeline();
116 
117 	const VkQueue queue = context.getUniversalQueue();
118 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
119 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120 		context.getUniversalQueueFamilyIndex());
121 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122 
123 	DescriptorSetUpdateBuilder setUpdateBuilder;
124 	setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126 	setUpdateBuilder.update(vk, device);
127 
128 	beginCommandBuffer(vk, *cmdBuffer, 0);
129 
130 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131 	pipeline.bind(*cmdBuffer);
132 
133 	vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134 
135 	endCommandBuffer(vk, *cmdBuffer);
136 
137 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
138 
139 	invalidateAlloc(vk, device, buffer->getAllocation());
140 
141 	for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
142 	{
143 		deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
144 		if (ptr[i] != expected)
145 		{
146 			log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
147 			return tcu::TestStatus::fail("compute failed");
148 		}
149 	}
150 
151 	return tcu::TestStatus::pass("compute succeeded");
152 }
153 
154 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
155 {
156 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)157 	MaxWorkgroupMemoryInstance	(Context& context, deUint32 numWorkgroups, const vk::ComputePipelineConstructionType computePipelineConstructionType)
158 		: TestInstance						(context)
159 		, m_numWorkgroups					(numWorkgroups)
160 		, m_computePipelineConstructionType	(computePipelineConstructionType)
161 	{
162 	}
163 	tcu::TestStatus iterate		(void);
164 
165 private:
166 	deUint32							m_numWorkgroups;
167 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
168 };
169 
170 class MaxWorkgroupMemoryTest : public vkt::TestCase
171 {
172 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,deUint32 numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)173 	MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
174 							const std::string& name,
175 							deUint32 numWorkgroups,
176 							const vk::ComputePipelineConstructionType computePipelineConstructionType)
177 		: TestCase							(testCtx, name),
178 		m_numWorkgroups						(numWorkgroups)
179 		, m_computePipelineConstructionType	(computePipelineConstructionType)
180 	{
181 	}
182 
183 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const184 	TestInstance* createInstance(Context& context) const
185 	{
186 		return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
187 	}
188 	virtual void checkSupport(Context& context) const;
189 
190 private:
191 	deUint32							m_numWorkgroups;
192 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
193 };
194 
checkSupport(Context & context) const195 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
196 {
197 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
198 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
199 }
200 
initPrograms(SourceCollections & sourceCollections) const201 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
202 {
203 	std::ostringstream src;
204 	src << "#version 450\n";
205 	src << "#extension GL_EXT_null_initializer : enable\n";
206 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
207 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
208 	src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
209 	src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
210 	src << "shared uvec4 wg_mem[num_elems] = {};\n";
211 	src << "void main() {\n";
212 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
213 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
214 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
215 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
216 	src << "  uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
217 	src << "  for (uint i = 0; i < num_elems; ++i) {\n";
218 	src << "    for (uint j = 0; j < 4; ++j) {\n";
219 	src << "      uint shared_idx = 4*i + j;\n";
220 	src << "      uint wg_val = wg_mem[i][j];\n";
221 	src << "      if (idx == shared_idx) {\n";
222 	src << "        atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
223 	src << "      } else if (idx == 0 && shared_idx >= wg_size) {\n";
224 	src << "        atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
225 	src << "      }\n";
226 	src << "    }\n";
227 	src << "  }\n";
228 	src << "}\n";
229 
230 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
231 }
232 
iterate(void)233 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate (void)
234 {
235 	VkPhysicalDeviceProperties properties;
236 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
237 	const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
238 
239 	const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
240 	deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
241 	deUint32 wgy = 1;
242 	deUint32 wgz = 1;
243 	if (wgx < maxWG)
244 	{
245 		wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
246 	}
247 	if ((wgx * wgy) < maxWG)
248 	{
249 		wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
250 	}
251 	const deUint32 size = maxMemSize;
252 	const deUint32 numElems = maxMemSize / 16;
253 
254 	return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
255 }
256 
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)257 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
258 {
259 	std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
260 	for (deUint32 i = 0; i < workgroups.size(); ++i) {
261 		deUint32 numWG = workgroups[i];
262 		group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
263 			de::toString(numWG), numWG, computePipelineConstructionType));
264 	}
265 }
266 
267 struct TypeCaseDef
268 {
269 	std::string	typeName;
270 	deUint32	typeSize;
271 	deUint32	numElements;
272 	deUint32	numRows;
273 	deUint32	numVariables;
274 };
275 
276 class TypeTestInstance : public vkt::TestInstance
277 {
278 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)279 	TypeTestInstance(Context& context, const TypeCaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
280 		: TestInstance						(context)
281 		, m_caseDef							(caseDef)
282 		, m_computePipelineConstructionType	(computePipelineConstructionType)
283 	{
284 	}
285 	tcu::TestStatus iterate (void);
286 
287 private:
288 	TypeCaseDef m_caseDef;
289 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
290 };
291 
292 class TypeTest : public vkt::TestCase
293 {
294 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)295 	TypeTest(tcu::TestContext& testCtx,
296 			const std::string& name,
297 			const TypeCaseDef& caseDef,
298 			const vk::ComputePipelineConstructionType computePipelineConstructionType)
299 		: TestCase							(testCtx, name)
300 		, m_caseDef							(caseDef)
301 		, m_computePipelineConstructionType	(computePipelineConstructionType)
302 	{
303 	}
304 
305 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const306 	TestInstance* createInstance(Context& context) const
307 	{
308 		return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
309 	}
310 	virtual void checkSupport(Context& context) const;
311 
312 private:
313 	TypeCaseDef							m_caseDef;
314 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
315 };
316 
checkSupport(Context & context) const317 void TypeTest::checkSupport(Context& context) const
318 {
319 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
320 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
321 
322 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
323 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
324 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
325 	f16_i8_features.pNext = DE_NULL;
326 
327 	VkPhysicalDeviceFeatures2 features2;
328 	deMemset(&features2, 0, sizeof(features2));
329 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
330 	features2.pNext = &f16_i8_features;
331 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
332 
333 	if (m_caseDef.typeName == "float16_t" ||
334 		m_caseDef.typeName == "f16vec2" ||
335 		m_caseDef.typeName == "f16vec3" ||
336 		m_caseDef.typeName == "f16vec4" ||
337 		m_caseDef.typeName == "f16mat2x2" ||
338 		m_caseDef.typeName == "f16mat2x3" ||
339 		m_caseDef.typeName == "f16mat2x4" ||
340 		m_caseDef.typeName == "f16mat3x2" ||
341 		m_caseDef.typeName == "f16mat3x3" ||
342 		m_caseDef.typeName == "f16mat3x4" ||
343 		m_caseDef.typeName == "f16mat4x2" ||
344 		m_caseDef.typeName == "f16mat4x3" ||
345 		m_caseDef.typeName == "f16mat4x4")
346 	{
347 		if (f16_i8_features.shaderFloat16 != VK_TRUE)
348 			TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
349 	}
350 
351 	if (m_caseDef.typeName == "float64_t" ||
352 		m_caseDef.typeName == "f64vec2" ||
353 		m_caseDef.typeName == "f64vec3" ||
354 		m_caseDef.typeName == "f64vec4"||
355 		m_caseDef.typeName == "f64mat2x2" ||
356 		m_caseDef.typeName == "f64mat2x3" ||
357 		m_caseDef.typeName == "f64mat2x4" ||
358 		m_caseDef.typeName == "f64mat3x2" ||
359 		m_caseDef.typeName == "f64mat3x3" ||
360 		m_caseDef.typeName == "f64mat3x4" ||
361 		m_caseDef.typeName == "f64mat4x2" ||
362 		m_caseDef.typeName == "f64mat4x3" ||
363 		m_caseDef.typeName == "f64mat4x4")
364 	{
365 		if (features2.features.shaderFloat64 != VK_TRUE)
366 			TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
367 	}
368 
369 	if (m_caseDef.typeName == "int8_t" ||
370 		m_caseDef.typeName == "i8vec2" ||
371 		m_caseDef.typeName == "i8vec3" ||
372 		m_caseDef.typeName == "i8vec4" ||
373 		m_caseDef.typeName == "uint8_t" ||
374 		m_caseDef.typeName == "u8vec2" ||
375 		m_caseDef.typeName == "u8vec3" ||
376 		m_caseDef.typeName == "u8vec4")
377 	{
378 		if (f16_i8_features.shaderInt8 != VK_TRUE)
379 			TCU_THROW(NotSupportedError, "shaderInt8 not supported");
380 	}
381 
382 	if (m_caseDef.typeName == "int16_t" ||
383 		m_caseDef.typeName == "i16vec2" ||
384 		m_caseDef.typeName == "i16vec3" ||
385 		m_caseDef.typeName == "i16vec4" ||
386 		m_caseDef.typeName == "uint16_t" ||
387 		m_caseDef.typeName == "u16vec2" ||
388 		m_caseDef.typeName == "u16vec3" ||
389 		m_caseDef.typeName == "u16vec4")
390 	{
391 		if (features2.features.shaderInt16 != VK_TRUE)
392 			TCU_THROW(NotSupportedError, "shaderInt16 not supported");
393 	}
394 
395 	if (m_caseDef.typeName == "int64_t" ||
396 		m_caseDef.typeName == "i64vec2" ||
397 		m_caseDef.typeName == "i64vec3" ||
398 		m_caseDef.typeName == "i64vec4" ||
399 		m_caseDef.typeName == "uint64_t" ||
400 		m_caseDef.typeName == "u64vec2" ||
401 		m_caseDef.typeName == "u64vec3" ||
402 		m_caseDef.typeName == "u64vec4")
403 	{
404 		if (features2.features.shaderInt64 != VK_TRUE)
405 			TCU_THROW(NotSupportedError, "shaderInt64 not supported");
406 	}
407 }
408 
initPrograms(SourceCollections & sourceCollections) const409 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
410 {
411 	std::ostringstream src;
412 	src << "#version 450\n";
413 	src << "#extension GL_EXT_null_initializer : enable\n";
414 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
415 	src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
416 	src << "layout(set = 0, binding = 0) buffer A  { uint a[]; } a;\n";
417 	for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
418 		src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
419 	}
420 	src << "void main() {\n";
421 	if (m_caseDef.numRows > 1)
422 	{
423 		src << "  uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
424 		src << "  uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
425 	}
426 	std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
427 	for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
428 	{
429 		if (m_caseDef.numElements == 1)
430 		{
431 			// Scalars.
432 			src << "  a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") ==  0) ? 0 : 1;\n";
433 		}
434 		else if (m_caseDef.numRows == 1)
435 		{
436 			// Vectors.
437 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) ==  0) ? 0 : 1;\n";
438 		}
439 		else
440 		{
441 			// Matrices.
442 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) ==  0) ? 0 : 1;\n";
443 		}
444 	}
445 	src << "}\n";
446 
447 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
448 }
449 
iterate(void)450 tcu::TestStatus TypeTestInstance::iterate (void)
451 {
452 	const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
453 	return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
454 }
455 
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)456 void AddTypeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
457 {
458 	deRandom rnd;
459 	deRandom_init(&rnd, 0);
460 	std::vector<TypeCaseDef> cases =
461 	{
462 		{"bool",		1,	1,	1,	0},
463 		{"bvec2",		1,	2,	1,	0},
464 		{"bvec3",		1,	3,	1,	0},
465 		{"bvec4",		1,	4,	1,	0},
466 		{"uint32_t",	4,	1,	1,	0},
467 		{"uvec2",		4,	2,	1,	0},
468 		{"uvec3",		4,	3,	1,	0},
469 		{"uvec4",		4,	4,	1,	0},
470 		{"int32_t",		4,	1,	1,	0},
471 		{"ivec2",		4,	2,	1,	0},
472 		{"ivec3",		4,	3,	1,	0},
473 		{"ivec4",		4,	4,	1,	0},
474 		{"uint8_t",		1,	1,	1,	0},
475 		{"u8vec2",		1,	2,	1,	0},
476 		{"u8vec3",		1,	3,	1,	0},
477 		{"u8vec4",		1,	4,	1,	0},
478 		{"int8_t",		1,	1,	1,	0},
479 		{"i8vec2",		1,	2,	1,	0},
480 		{"i8vec3",		1,	3,	1,	0},
481 		{"i8vec4",		1,	4,	1,	0},
482 		{"uint16_t",	2,	1,	1,	0},
483 		{"u16vec2",		2,	2,	1,	0},
484 		{"u16vec3",		2,	3,	1,	0},
485 		{"u16vec4",		2,	4,	1,	0},
486 		{"int16_t",		2,	1,	1,	0},
487 		{"i16vec2",		2,	2,	1,	0},
488 		{"i16vec3",		2,	3,	1,	0},
489 		{"i16vec4",		2,	4,	1,	0},
490 		{"uint64_t",	8,	1,	1,	0},
491 		{"u64vec2",		8,	2,	1,	0},
492 		{"u64vec3",		8,	3,	1,	0},
493 		{"u64vec4",		8,	4,	1,	0},
494 		{"int64_t",		8,	1,	1,	0},
495 		{"i64vec2",		8,	2,	1,	0},
496 		{"i64vec3",		8,	3,	1,	0},
497 		{"i64vec4",		8,	4,	1,	0},
498 		{"float32_t",	4,	1,	1,	0},
499 		{"f32vec2",		4,	2,	1,	0},
500 		{"f32vec3",		4,	3,	1,	0},
501 		{"f32vec4",		4,	4,	1,	0},
502 		{"f32mat2x2",	4,	2,	2,	0},
503 		{"f32mat2x3",	4,	3,	2,	0},
504 		{"f32mat2x4",	4,	4,	2,	0},
505 		{"f32mat3x2",	4,	2,	3,	0},
506 		{"f32mat3x3",	4,	3,	3,	0},
507 		{"f32mat3x4",	4,	4,	3,	0},
508 		{"f32mat4x2",	4,	2,	4,	0},
509 		{"f32mat4x3",	4,	3,	4,	0},
510 		{"f32mat4x4",	4,	4,	4,	0},
511 		{"float16_t",	2,	1,	1,	0},
512 		{"f16vec2",		2,	2,	1,	0},
513 		{"f16vec3",		2,	3,	1,	0},
514 		{"f16vec4",		2,	4,	1,	0},
515 		{"f16mat2x2",	2,	2,	2,	0},
516 		{"f16mat2x3",	2,	3,	2,	0},
517 		{"f16mat2x4",	2,	4,	2,	0},
518 		{"f16mat3x2",	2,	2,	3,	0},
519 		{"f16mat3x3",	2,	3,	3,	0},
520 		{"f16mat3x4",	2,	4,	3,	0},
521 		{"f16mat4x2",	2,	2,	4,	0},
522 		{"f16mat4x3",	2,	3,	4,	0},
523 		{"f16mat4x4",	2,	4,	4,	0},
524 		{"float64_t",	8,	1,	1,	0},
525 		{"f64vec2",		8,	2,	1,	0},
526 		{"f64vec3",		8,	3,	1,	0},
527 		{"f64vec4",		8,	4,	1,	0},
528 		{"f64mat2x2",	8,	2,	2,	0},
529 		{"f64mat2x3",	8,	3,	2,	0},
530 		{"f64mat2x4",	8,	4,	2,	0},
531 		{"f64mat3x2",	8,	2,	3,	0},
532 		{"f64mat3x3",	8,	3,	3,	0},
533 		{"f64mat3x4",	8,	4,	3,	0},
534 		{"f64mat4x2",	8,	2,	4,	0},
535 		{"f64mat4x3",	8,	3,	4,	0},
536 		{"f64mat4x4",	8,	4,	4,	0},
537 	};
538 
539 	for (deUint32 i = 0; i < cases.size(); ++i)
540 	{
541 		cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
542 		group->addChild(
543 			new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i], computePipelineConstructionType));
544 	}
545 }
546 
547 struct CompositeCaseDef
548 {
549 	deUint32				index;
550 	std::string				typeDefinition;
551 	std::string				assignment;
552 	deUint32				elements;
553 	std::vector<deUint32>	specValues;
554 
CompositeCaseDefvkt::compute::__anonfd4cf72c0111::CompositeCaseDef555 	CompositeCaseDef (uint32_t index_, const std::string& typeDefinition_, const std::string& assignment_, uint32_t elements_, const std::vector<uint32_t>& specValues_)
556 		: index				(index_)
557 		, typeDefinition	(typeDefinition_)
558 		, assignment		(assignment_)
559 		, elements			(elements_)
560 		, specValues		(specValues_)
561 		{}
562 };
563 
564 class CompositeTestInstance : public vkt::TestInstance
565 {
566 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)567 	CompositeTestInstance (Context& context, const CompositeCaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
568 		: TestInstance						(context)
569 		, m_caseDef							(caseDef)
570 		, m_computePipelineConstructionType	(computePipelineConstructionType)
571 	{
572 	}
573 	tcu::TestStatus iterate (void);
574 private:
575 	CompositeCaseDef					m_caseDef;
576 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
577 };
578 
579 class CompositeTest : public vkt::TestCase
580 {
581 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)582 	CompositeTest(tcu::TestContext& testCtx,
583 				  const std::string& name,
584 				  const CompositeCaseDef& caseDef,
585 				  const vk::ComputePipelineConstructionType computePipelineConstructionType)
586 		: TestCase							(testCtx, name)
587 		, m_caseDef							(caseDef)
588 		, m_computePipelineConstructionType	(computePipelineConstructionType)
589 	{
590 	}
591 
592 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const593 	TestInstance* createInstance(Context& context) const
594 	{
595 		return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
596 	}
597 	virtual void checkSupport(Context& context) const;
598 private:
599 	CompositeCaseDef					m_caseDef;
600 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
601 };
602 
checkSupport(Context & context) const603 void CompositeTest::checkSupport(Context& context) const
604 {
605 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
606 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
607 
608 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
609 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
610 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
611 	f16_i8_features.pNext = DE_NULL;
612 
613 	VkPhysicalDeviceFeatures2 features2;
614 	deMemset(&features2, 0, sizeof(features2));
615 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
616 	features2.pNext = &f16_i8_features;
617 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
618 
619 	bool needsFloat16	= (m_caseDef.index & 0x1) != 0;
620 	bool needsFloat64	= (m_caseDef.index & 0x2) != 0;
621 	bool needsInt8		= (m_caseDef.index & 0x4) != 0;
622 	bool needsInt16		= (m_caseDef.index & 0x8) != 0;
623 	bool needsInt64		= (m_caseDef.index & 0x10) != 0;
624 
625 	if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
626 		TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
627 	if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
628 		TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
629 	if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
630 		TCU_THROW(NotSupportedError, "shaderInt8 not supported");
631 	if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
632 		TCU_THROW(NotSupportedError, "shaderInt16 not supported");
633 	if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
634 		TCU_THROW(NotSupportedError, "shaderInt64 not supported");
635 }
636 
initPrograms(SourceCollections & sourceCollections) const637 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
638 {
639 	std::ostringstream src;
640 	src << "#version 450\n";
641 	src << "#extension GL_EXT_null_initializer : enable\n";
642 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
643 	src << "\n";
644 	for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
645 		src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
646 	}
647 	src << "\n";
648 	src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
649 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
650 	src << "\n";
651 	src << m_caseDef.typeDefinition;
652 	src << "\n";
653 	src << "void main() {\n";
654 	src << m_caseDef.assignment;
655 	src << "}\n";
656 
657 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
658 }
659 
iterate(void)660 tcu::TestStatus CompositeTestInstance::iterate (void)
661 {
662 	const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
663 	return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
664 }
665 
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)666 void AddCompositeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
667 {
668 	const std::vector<CompositeCaseDef> cases
669 	{
670 		{0,
671 		"shared uint wg_mem[specId0] = {};\n",
672 
673 		"for (uint i = 0; i < specId0; ++i) {\n"
674 		"  a.a[i] = wg_mem[i];\n"
675 		"}\n",
676 		16,
677 		{16},
678 		},
679 
680 		{0,
681 		"shared float wg_mem[specId0][specId1] = {};\n",
682 
683 		"for (uint i = 0; i < specId0; ++i) {\n"
684 		"  for (uint j = 0; j < specId1; ++j) {\n"
685 		"    uint idx = i * specId1 + j;\n"
686 		"    a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
687 		"  }\n"
688 		"}\n",
689 		32,
690 		{4, 8},
691 		},
692 
693 		{0,
694 		"struct Sa {\n"
695 		"  uint a;\n"
696 		"  uvec2 b;\n"
697 		"  uvec3 c;\n"
698 		"  uvec4 d;\n"
699 		"  float e;\n"
700 		"  vec2 f;\n"
701 		"  vec3 g;\n"
702 		"  vec4 h;\n"
703 		"  bool i;\n"
704 		"  bvec2 j;\n"
705 		"  bvec3 k;\n"
706 		"  bvec4 l;\n"
707 		"};\n"
708 		"shared Sa wg_mem = {};\n",
709 
710 		"uint i = 0;\n"
711 		"a.a[i++] = wg_mem.a;\n"
712 		"a.a[i++] = wg_mem.b.x;\n"
713 		"a.a[i++] = wg_mem.b.y;\n"
714 		"a.a[i++] = wg_mem.c.x;\n"
715 		"a.a[i++] = wg_mem.c.y;\n"
716 		"a.a[i++] = wg_mem.c.z;\n"
717 		"a.a[i++] = wg_mem.d.x;\n"
718 		"a.a[i++] = wg_mem.d.y;\n"
719 		"a.a[i++] = wg_mem.d.z;\n"
720 		"a.a[i++] = wg_mem.d.w;\n"
721 		"a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
722 		"a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
723 		"a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
724 		"a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
725 		"a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
726 		"a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
727 		"a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
728 		"a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
729 		"a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
730 		"a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
731 		"a.a[i++] = wg_mem.i ? 1 : 0;\n"
732 		"a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
733 		"a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
734 		"a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
735 		"a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
736 		"a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
737 		"a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
738 		"a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
739 		"a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
740 		"a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
741 		30,
742 		{},
743 		},
744 
745 		{0,
746 		"struct Sa {\n"
747 		"  uint a;\n"
748 		"};\n"
749 		"struct Sb {\n"
750 		"  uvec2 a;\n"
751 		"};\n"
752 		"struct Sc {\n"
753 		"  Sa a[specId0];\n"
754 		"  Sb b[specId1];\n"
755 		"};\n"
756 		"shared Sc wg_mem[specId2] = {};\n",
757 
758 		"uint idx = 0;\n"
759 		"for (uint i = 0; i < specId2; ++i) {\n"
760 		"  for (uint j = 0; j < specId0; ++j) {\n"
761 		"    a.a[idx++] = wg_mem[i].a[j].a;\n"
762 		"  }\n"
763 		"  for (uint j = 0; j < specId1; ++j) {\n"
764 		"    a.a[idx++] = wg_mem[i].b[j].a.x;\n"
765 		"    a.a[idx++] = wg_mem[i].b[j].a.y;\n"
766 		"  }\n"
767 		"}\n",
768 		32,
769 		{2,3,4},
770 		},
771 
772 		{1,
773 		"struct Sa {\n"
774 		"  f16vec2 a;\n"
775 		"  float16_t b[specId0];\n"
776 		"};\n"
777 		"shared Sa wg_mem = {};\n",
778 
779 		"uint idx = 0;\n"
780 		"a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
781 		"a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
782 		"for (uint i = 0; i < specId0; ++i) {\n"
783 		"  a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
784 		"}\n",
785 		18,
786 		{16},
787 		},
788 
789 		{2,
790 		"struct Sa {\n"
791 		"  f64vec2 a;\n"
792 		"  float64_t b[specId0];\n"
793 		"};\n"
794 		"shared Sa wg_mem = {};\n",
795 
796 		"uint idx = 0;\n"
797 		"a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
798 		"a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
799 		"for (uint i = 0; i < specId0; ++i) {\n"
800 		"  a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
801 		"}\n",
802 		7,
803 		{5},
804 		},
805 
806 		{4,
807 		"struct Sa {\n"
808 		"  i8vec2 a;\n"
809 		"  int8_t b[specId0];\n"
810 		"};\n"
811 		"shared Sa wg_mem = {};\n",
812 
813 		"uint idx = 0;\n"
814 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
815 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
816 		"for (uint i = 0; i < specId0; ++i) {\n"
817 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
818 		"}\n",
819 		34,
820 		{32},
821 		},
822 
823 		{8,
824 		"struct Sa {\n"
825 		"  i16vec2 a;\n"
826 		"  int16_t b[specId0];\n"
827 		"};\n"
828 		"shared Sa wg_mem = {};\n",
829 
830 		"uint idx = 0;\n"
831 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
832 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
833 		"for (uint i = 0; i < specId0; ++i) {\n"
834 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
835 		"}\n",
836 		122,
837 		{120},
838 		},
839 
840 		{16,
841 		"struct Sa {\n"
842 		"  i64vec2 a;\n"
843 		"  int64_t b[specId0];\n"
844 		"};\n"
845 		"shared Sa wg_mem = {};\n",
846 
847 		"uint idx = 0;\n"
848 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
849 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
850 		"for (uint i = 0; i < specId0; ++i) {\n"
851 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
852 		"}\n",
853 		63,
854 		{61},
855 		},
856 
857 		{0x1f,
858 		"struct Sa {\n"
859 		"  float16_t a;\n"
860 		"  float b;\n"
861 		"  int8_t c;\n"
862 		"  int16_t d;\n"
863 		"  int e;\n"
864 		"  int64_t f;\n"
865 		"  float64_t g;\n"
866 		"};\n"
867 		"shared Sa wg_mem = {};\n",
868 
869 		"uint idx = 0;\n"
870 		"a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
871 		"a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
872 		"a.a[idx++] = uint(wg_mem.c);\n"
873 		"a.a[idx++] = uint(wg_mem.d);\n"
874 		"a.a[idx++] = uint(wg_mem.e);\n"
875 		"a.a[idx++] = uint(wg_mem.f);\n"
876 		"a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
877 		7,
878 		{},
879 		},
880 
881 		{0,
882 		"struct Sa {\n"
883 		"  uint a;\n"
884 		"};\n"
885 		"struct Sb {\n"
886 		"  Sa a[specId0];\n"
887 		"  uint b;\n"
888 		"};\n"
889 		"struct Sc {\n"
890 		"  Sb b[specId1];\n"
891 		"  uint c;\n"
892 		"};\n"
893 		"struct Sd {\n"
894 		"  Sc c[specId2];\n"
895 		"  uint d;\n"
896 		"};\n"
897 		"struct Se {\n"
898 		"  Sd d[specId3];\n"
899 		"  uint e;\n"
900 		"};\n"
901 		"shared Se wg_mem[specId4] = {};\n",
902 
903 		"uint idx = 0;\n"
904 		"for (uint i1 = 0; i1 < specId4; ++i1) {\n"
905 		"  a.a[idx++] = wg_mem[i1].e;\n"
906 		"  for (uint i2 = 0; i2 < specId3; ++i2) {\n"
907 		"    a.a[idx++] = wg_mem[i1].d[i2].d;\n"
908 		"    for (uint i3 = 0; i3 < specId2; ++i3) {\n"
909 		"      a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
910 		"      for (uint i4 = 0; i4 < specId1; ++i4) {\n"
911 		"        a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
912 		"        for (uint i5 = 0; i5 < specId0; ++i5) {\n"
913 		"          a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
914 		"        }\n"
915 		"      }\n"
916 		"    }\n"
917 		"  }\n"
918 		"}\n",
919 		872,
920 		{6,5,4,3,2},
921 		},
922 	};
923 
924 	for (deUint32 i = 0; i < cases.size(); ++i)
925 	{
926 		group->addChild(
927 			new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
928 	}
929 }
930 
931 enum Dim {
932 	DimX,
933 	DimY,
934 	DimZ,
935 };
936 
937 class MaxWorkgroupsInstance : public vkt::TestInstance
938 {
939 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)940 	MaxWorkgroupsInstance(Context &context, Dim dim, const vk::ComputePipelineConstructionType computePipelineConstructionType)
941 		: TestInstance						(context)
942 		, m_dim								(dim)
943 		, m_computePipelineConstructionType	(computePipelineConstructionType)
944 	{
945 	}
946 	tcu::TestStatus iterate (void);
947 private:
948 	Dim									m_dim;
949 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
950 };
951 
952 class MaxWorkgroupsTest : public vkt::TestCase
953 {
954 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)955 	MaxWorkgroupsTest(tcu::TestContext& testCtx,
956 					  const std::string& name,
957 					  Dim dim,
958 					  const vk::ComputePipelineConstructionType computePipelineConstructionType)
959 		: TestCase							(testCtx, name)
960 		, m_dim								(dim)
961 		, m_computePipelineConstructionType	(computePipelineConstructionType)
962 	{
963 	}
964 
965 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const966 	TestInstance* createInstance(Context& context) const
967 	{
968 		return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
969 	}
970 	virtual void checkSupport(Context& context) const;
971 private:
972 	Dim									m_dim;
973 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
974 };
975 
checkSupport(Context & context) const976 void MaxWorkgroupsTest::checkSupport(Context& context) const
977 {
978 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
979 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
980 }
981 
initPrograms(SourceCollections & sourceCollections) const982 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
983 {
984 	std::ostringstream src;
985 	src << "#version 450\n";
986 	src << "#extension GL_EXT_null_initializer : enable\n";
987 	src << "\n";
988 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
989 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
990 	src << "shared uint wg_mem[2] = {};\n";
991 	std::string dim;
992 	switch (m_dim) {
993 		case DimX:
994 			dim = "x";
995 			break;
996 		case DimY:
997 			dim = "y";
998 			break;
999 		case DimZ:
1000 			dim = "z";
1001 			break;
1002 	}
1003 	src << "\n";
1004 	src << "void main() {\n";
1005 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1006 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1007 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
1008 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
1009 	src << "  if (gl_LocalInvocationID.x == 0) {\n";
1010 	src << "    wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1011 	src << "  }\n";
1012 	src << "  barrier();\n";
1013 	src << "  atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1014 	src << "}\n";
1015 
1016 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1017 }
1018 
iterate(void)1019 tcu::TestStatus MaxWorkgroupsInstance::iterate (void)
1020 {
1021 	VkPhysicalDeviceProperties properties;
1022 	deMemset(&properties, 0, sizeof(properties));
1023 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1024 
1025 	const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1026 	deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1027 	deUint32 wgy = 1;
1028 	deUint32 wgz = 1;
1029 	if (wgx < maxWG)
1030 	{
1031 		wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1032 	}
1033 	if ((wgx * wgy) < maxWG)
1034 	{
1035 		wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1036 	}
1037 	deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1038 
1039 	deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1040 	deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1041 	deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1042 
1043 	return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz}, /*increment*/ 1);
1044 }
1045 
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1046 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1047 {
1048 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
1049 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
1050 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
1051 }
1052 
1053 class SpecializeWorkgroupInstance : public vkt::TestInstance
1054 {
1055 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1056 	SpecializeWorkgroupInstance (Context &context, deUint32 x, deUint32 y, deUint32 z, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1057 		: TestInstance						(context)
1058 		, m_x								(x)
1059 		, m_y								(y)
1060 		, m_z								(z)
1061 		, m_computePipelineConstructionType	(computePipelineConstructionType)
1062 	{
1063 	}
1064 	tcu::TestStatus iterate (void);
1065 private:
1066 	deUint32							m_x;
1067 	deUint32							m_y;
1068 	deUint32							m_z;
1069 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1070 };
1071 
1072 class SpecializeWorkgroupTest : public vkt::TestCase
1073 {
1074 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,deUint32 x,deUint32 y,deUint32 z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1075 	SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1076 					  const std::string& name,
1077 					  deUint32 x, deUint32 y, deUint32 z,
1078 					  const vk::ComputePipelineConstructionType computePipelineConstructionType)
1079 		: TestCase							(testCtx, name)
1080 		, m_x								(x)
1081 		, m_y								(y)
1082 		, m_z								(z)
1083 		, m_computePipelineConstructionType	(computePipelineConstructionType)
1084 	{
1085 	}
1086 
1087 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1088 	TestInstance* createInstance(Context& context) const
1089 	{
1090 		return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1091 	}
1092 	virtual void checkSupport(Context& context) const;
1093 private:
1094 	deUint32							m_x;
1095 	deUint32							m_y;
1096 	deUint32							m_z;
1097 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1098 };
1099 
checkSupport(Context & context) const1100 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1101 {
1102 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1103 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1104 
1105 	VkPhysicalDeviceProperties properties;
1106 	deMemset(&properties, 0, sizeof(properties));
1107 	context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1108 	if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1109 		TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1110 }
1111 
initPrograms(SourceCollections & sourceCollections) const1112 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1113 {
1114 	std::ostringstream src;
1115 	src << "#version 450\n";
1116 	src << "#extension GL_EXT_null_initializer : enable\n";
1117 	src << "\n";
1118 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1119 	src << "layout(constant_id = 1) const uint WGY = 1;\n";
1120 	src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1121 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1122 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1123 	src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1124 	src << "\n";
1125 	src << "void main() {\n";
1126 	src << "  a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1127 	src << "}\n";
1128 
1129 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1130 }
1131 
iterate(void)1132 tcu::TestStatus SpecializeWorkgroupInstance::iterate (void)
1133 {
1134 	const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1135 	return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1136 }
1137 
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1138 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1139 {
1140 	for (deUint32 z = 1; z <= 8; ++z)
1141 	{
1142 		for (deUint32 y = 1; y <= 8; ++y)
1143 		{
1144 			for (deUint32 x = 1; x <= 8; ++x)
1145 			{
1146 				group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1147 					de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1148 					x, y, z,
1149 					computePipelineConstructionType));
1150 			}
1151 		}
1152 	}
1153 }
1154 
1155 class RepeatedPipelineInstance : public vkt::TestInstance
1156 {
1157 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1158 	RepeatedPipelineInstance (Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1159 		: TestInstance						(context)
1160 		, m_xSize							(xSize)
1161 		, m_repeat							(repeat)
1162 		, m_odd								(odd)
1163 	{
1164 	}
1165 	tcu::TestStatus iterate (void);
1166 private:
1167 	deUint32							m_xSize;
1168 	deUint32							m_repeat;
1169 	deUint32							m_odd;
1170 };
1171 
1172 class RepeatedPipelineTest : public vkt::TestCase
1173 {
1174 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,deUint32 xSize,deUint32 repeat,deUint32 odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1175 	RepeatedPipelineTest(tcu::TestContext& testCtx,
1176 						const std::string& name,
1177 						deUint32 xSize, deUint32 repeat, deUint32 odd,
1178 						const vk::ComputePipelineConstructionType computePipelineConstructionType)
1179 		: TestCase							(testCtx, name)
1180 		, m_xSize							(xSize)
1181 		, m_repeat							(repeat)
1182 		, m_odd								(odd)
1183 		, m_computePipelineConstructionType	(computePipelineConstructionType)
1184 	{
1185 	}
1186 
1187 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1188 	TestInstance* createInstance(Context& context) const
1189 	{
1190 		return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1191 	}
1192 	virtual void checkSupport(Context& context) const;
1193 private:
1194 	deUint32							m_xSize;
1195 	deUint32							m_repeat;
1196 	deUint32							m_odd;
1197 	vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1198 };
1199 
checkSupport(Context & context) const1200 void RepeatedPipelineTest::checkSupport(Context& context) const
1201 {
1202 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1203 	checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1204 }
1205 
initPrograms(SourceCollections & sourceCollections) const1206 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1207 {
1208 	std::ostringstream src;
1209 	src << "#version 450\n";
1210 	src << "#extension GL_EXT_null_initializer : enable\n";
1211 	src << "\n";
1212 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1213 	src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1214 	src << "\n";
1215 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1216 	src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1217 	src << "\n";
1218 	src << "shared uint wg_mem[WGX][2] = {};\n";
1219 	src << "void main() {\n";
1220 	src << "  if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1221 	src << "    wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1222 	src << "  }\n";
1223 	src << "  barrier();\n";
1224 	src << "  a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1225 	src << "}\n";
1226 
1227 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1228 }
1229 
iterate(void)1230 tcu::TestStatus RepeatedPipelineInstance::iterate (void)
1231 {
1232 	Context& context					= m_context;
1233 	const deUint32 bufferSize			= m_xSize * 2 * (deUint32)sizeof(deUint32);
1234 	const deUint32 numBuffers			= 2;
1235 
1236 	const DeviceInterface&	vk			= context.getDeviceInterface();
1237 	const VkDevice			device		= context.getDevice();
1238 	Allocator&				allocator	= context.getDefaultAllocator();
1239 	tcu::TestLog&			log			= context.getTestContext().getLog();
1240 
1241 	de::MovePtr<BufferWithMemory> buffers[numBuffers];
1242 	VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1243 
1244 	VkDeviceSize size = bufferSize;
1245 	for (deUint32 i = 0; i < numBuffers; ++i)
1246 	{
1247 		buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1248 			vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1249 			MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1250 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1251 	}
1252 
1253 	deUint32* ptrs[numBuffers];
1254 	for (deUint32 i = 0; i < numBuffers; ++i)
1255 	{
1256 		ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1257 	}
1258 	for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1259 	{
1260 		ptrs[1][i] = i;
1261 	}
1262 	deMemset(ptrs[0], 0xff, (size_t)size);
1263 
1264 	DescriptorSetLayoutBuilder layoutBuilder;
1265 	for (deUint32 i = 0; i < numBuffers; ++i)
1266 	{
1267 		layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1268 	}
1269 
1270 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1271 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1272 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1273 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1274 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1275 
1276 	const deUint32 specData[1] =
1277 	{
1278 		m_xSize,
1279 	};
1280 	const vk::VkSpecializationMapEntry entries[1] =
1281 	{
1282 		{0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1283 	};
1284 	const vk::VkSpecializationInfo specInfo =
1285 	{
1286 		1,
1287 		entries,
1288 		sizeof(specData),
1289 		specData
1290 	};
1291 
1292 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1293 	{
1294 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1295 		DE_NULL,
1296 		(VkPipelineLayoutCreateFlags)0,
1297 		1,
1298 		&descriptorSetLayout.get(),
1299 		0u,
1300 		DE_NULL,
1301 	};
1302 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1303 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1304 
1305 	for (deUint32 i = 0; i < numBuffers; ++i)
1306 	{
1307 		flushAlloc(vk, device, buffers[i]->getAllocation());
1308 	}
1309 
1310 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1311 	const VkPipelineShaderStageCreateInfo shaderInfo =
1312 	{
1313 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1314 		DE_NULL,
1315 		0,
1316 		VK_SHADER_STAGE_COMPUTE_BIT,
1317 		*shader,
1318 		"main",
1319 		&specInfo,
1320 	};
1321 
1322 	const VkComputePipelineCreateInfo pipelineInfo =
1323 	{
1324 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1325 		DE_NULL,
1326 		0u,
1327 		shaderInfo,
1328 		*pipelineLayout,
1329 		(VkPipeline)0,
1330 		0u,
1331 	};
1332 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1333 
1334 	const VkQueue queue = context.getUniversalQueue();
1335 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1336 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1337 		context.getUniversalQueueFamilyIndex());
1338 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1339 
1340 	DescriptorSetUpdateBuilder setUpdateBuilder;
1341 	for (deUint32 i = 0; i < numBuffers; ++i)
1342 	{
1343 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1344 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1345 	}
1346 	setUpdateBuilder.update(vk, device);
1347 
1348 	beginCommandBuffer(vk, *cmdBuffer, 0);
1349 
1350 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1351 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1352 
1353 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1354 
1355 	endCommandBuffer(vk, *cmdBuffer);
1356 
1357 	for (deUint32 r = 0; r < m_repeat; ++r)
1358 	{
1359 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1360 
1361 		invalidateAlloc(vk, device, buffers[0]->getAllocation());
1362 
1363 		for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1364 		{
1365 			deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1366 			if (ptrs[0][i] != expected)
1367 			{
1368 				log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1369 				return tcu::TestStatus::fail("compute failed");
1370 			}
1371 		}
1372 
1373 		deMemset(ptrs[0], 0xff, (size_t)size);
1374 		flushAlloc(vk, device, buffers[0]->getAllocation());
1375 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1376 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1377 		setUpdateBuilder.update(vk, device);
1378 	}
1379 
1380 	return tcu::TestStatus::pass("compute succeeded");
1381 }
1382 
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1383 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1384 {
1385 	std::vector<deUint32> xSizes = {4, 16, 32, 64};
1386 	std::vector<deUint32> odds = {0, 1};
1387 	std::vector<deUint32> repeats = {2, 4, 8, 16};
1388 	for (deUint32 i = 0; i < xSizes.size(); ++i)
1389 	{
1390 		deUint32 x = xSizes[i];
1391 		for (deUint32 j = 0; j < odds.size(); ++j)
1392 		{
1393 			deUint32 odd = odds[j];
1394 			for (deUint32 k = 0; k < repeats.size(); ++k)
1395 			{
1396 				deUint32 repeat = repeats[k];
1397 				group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1398 					std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1399 					x, odd, repeat, computePipelineConstructionType));
1400 			}
1401 		}
1402 	}
1403 }
1404 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType)1405 void AddSharedMemoryTests (tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType)
1406 {
1407 	tcu::TestContext&			testCtx		= group->getTestContext();
1408 	std::string					filePath	= "compute/zero_initialize_workgroup_memory";
1409 	std::vector<std::string>	requirements;
1410 
1411 	std::string					testNames[]	=
1412 	{
1413 		"workgroup_size_128",
1414 		"workgroup_size_8x8x2",
1415 		"workgroup_size_8x2x8",
1416 		"workgroup_size_2x8x8",
1417 		"workgroup_size_8x4x4",
1418 		"workgroup_size_4x8x4",
1419 		"workgroup_size_4x4x8"
1420 	};
1421 
1422 	requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1423 
1424 	for (const auto& testName : testNames)
1425 	{
1426 		group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(), testName + ".amber", requirements));
1427 	}
1428 }
1429 #endif // CTS_USES_VULKANSC
1430 
1431 } // anonymous
1432 
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1433 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1434 {
1435 	de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1436 
1437 	tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1438 		// Read initialization of max workgroup memory
1439 		new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1440 	AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1441 	tests->addChild(maxWorkgroupMemoryGroup);
1442 
1443 	tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1444 	AddTypeTests(typeGroup, computePipelineConstructionType);
1445 	tests->addChild(typeGroup);
1446 
1447 	tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1448 	AddCompositeTests(compositeGroup, computePipelineConstructionType);
1449 	tests->addChild(compositeGroup);
1450 
1451 	tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1452 	AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1453 	tests->addChild(maxWorkgroupsGroup);
1454 
1455 	tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1456 	AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1457 	tests->addChild(specializeWorkgroupGroup);
1458 
1459 	tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1460 	AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1461 	tests->addChild(repeatPipelineGroup);
1462 
1463 #ifndef CTS_USES_VULKANSC
1464 	tcu::TestCaseGroup* subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1465 	AddSharedMemoryTests(subgroupInvocationGroup, computePipelineConstructionType);
1466 	tests->addChild(subgroupInvocationGroup);
1467 #endif // CTS_USES_VULKANSC
1468 
1469 	return tests.release();
1470 }
1471 
1472 } // compute
1473 } // vkt
1474