• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Google LLC.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief VK_KHR_zero_initialize_workgroup_memory tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "vktAmberTestCase.hpp"
30 
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageWithMemory.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkObjUtil.hpp"
38 #include "vkDefs.hpp"
39 #include "vkRef.hpp"
40 
41 #include "tcuCommandLine.hpp"
42 #include "tcuTestLog.hpp"
43 
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47 
48 #include <algorithm>
49 #include <vector>
50 
51 using namespace vk;
52 
53 namespace vkt
54 {
55 namespace compute
56 {
57 namespace
58 {
59 
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,const std::vector<deUint32> specValues={},deUint32 increment=0)60 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
61 							deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
62 							const std::vector<deUint32> specValues = {},
63 							deUint32 increment = 0)
64 {
65 	const DeviceInterface&	vk			= context.getDeviceInterface();
66 	const VkDevice			device		= context.getDevice();
67 	Allocator&				allocator	= context.getDefaultAllocator();
68 	tcu::TestLog&			log			= context.getTestContext().getLog();
69 
70 	de::MovePtr<BufferWithMemory> buffer;
71 	VkDescriptorBufferInfo bufferDescriptor;
72 
73 	VkDeviceSize size = bufferSize;
74 	buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
75 		vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
76 		MemoryRequirement::HostVisible));
77 	bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
78 
79 	deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
80 	deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
81 
82 	DescriptorSetLayoutBuilder layoutBuilder;
83 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
84 
85 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
86 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
87 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
88 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
89 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
90 
91 	std::vector<VkSpecializationMapEntry> entries(specValues.size());
92 	if (!specValues.empty())
93 	{
94 		for (deUint32 i = 0; i < specValues.size(); ++i)
95 		{
96 			entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
97 		}
98 	}
99 	const VkSpecializationInfo specInfo =
100 	{
101 		(deUint32)specValues.size(),
102 		entries.data(),
103 		specValues.size() * sizeof(deUint32),
104 		specValues.data(),
105 	};
106 
107 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
108 	{
109 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
110 		DE_NULL,
111 		(VkPipelineLayoutCreateFlags)0,
112 		1,
113 		&descriptorSetLayout.get(),
114 		0u,
115 		DE_NULL,
116 	};
117 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
118 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
119 	flushAlloc(vk, device, buffer->getAllocation());
120 
121 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
122 	const VkPipelineShaderStageCreateInfo shaderInfo =
123 	{
124 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
125 		DE_NULL,
126 		0,
127 		VK_SHADER_STAGE_COMPUTE_BIT,
128 		*shader,
129 		"main",
130 		specValues.empty() ? DE_NULL : &specInfo,
131 	};
132 
133 	const VkComputePipelineCreateInfo pipelineInfo =
134 	{
135 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
136 		DE_NULL,
137 		0u,
138 		shaderInfo,
139 		*pipelineLayout,
140 		(VkPipeline)0,
141 		0u,
142 	};
143 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
144 
145 	const VkQueue queue = context.getUniversalQueue();
146 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
147 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
148 		context.getUniversalQueueFamilyIndex());
149 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
150 
151 	DescriptorSetUpdateBuilder setUpdateBuilder;
152 	setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
153 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
154 	setUpdateBuilder.update(vk, device);
155 
156 	beginCommandBuffer(vk, *cmdBuffer, 0);
157 
158 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
159 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
160 
161 	vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
162 
163 	endCommandBuffer(vk, *cmdBuffer);
164 
165 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
166 
167 	invalidateAlloc(vk, device, buffer->getAllocation());
168 
169 	for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
170 	{
171 		deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
172 		if (ptr[i] != expected)
173 		{
174 			log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
175 			return tcu::TestStatus::fail("compute failed");
176 		}
177 	}
178 
179 	return tcu::TestStatus::pass("compute succeeded");
180 }
181 
182 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
183 {
184 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups)185 	MaxWorkgroupMemoryInstance(Context& context, deUint32 numWorkgroups)
186 		: TestInstance(context),
187 		m_numWorkgroups(numWorkgroups)
188 	{
189 	}
190 	tcu::TestStatus iterate(void);
191 
192 private:
193 	deUint32 m_numWorkgroups;
194 };
195 
196 class MaxWorkgroupMemoryTest : public vkt::TestCase
197 {
198 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 numWorkgroups)199 	MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
200 							const std::string& name,
201 							const std::string& description,
202 							deUint32 numWorkgroups)
203 		: TestCase(testCtx, name, description),
204 		m_numWorkgroups(numWorkgroups)
205 	{
206 	}
207 
208 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const209 	TestInstance* createInstance(Context& context) const
210 	{
211 		return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups);
212 	}
213 	virtual void checkSupport(Context& context) const;
214 
215 private:
216 	deUint32 m_numWorkgroups;
217 };
218 
checkSupport(Context & context) const219 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
220 {
221 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
222 }
223 
initPrograms(SourceCollections & sourceCollections) const224 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
225 {
226 	std::ostringstream src;
227 	src << "#version 450\n";
228 	src << "#extension GL_EXT_null_initializer : enable\n";
229 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
230 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
231 	src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
232 	src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
233 	src << "shared uvec4 wg_mem[num_elems] = {};\n";
234 	src << "void main() {\n";
235 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
236 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
237 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
238 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
239 	src << "  uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
240 	src << "  for (uint i = 0; i < num_elems; ++i) {\n";
241 	src << "    for (uint j = 0; j < 4; ++j) {\n";
242 	src << "      uint shared_idx = 4*i + j;\n";
243 	src << "      uint wg_val = wg_mem[i][j];\n";
244 	src << "      if (idx == shared_idx) {\n";
245 	src << "        atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
246 	src << "      } else if (idx == 0 && shared_idx >= wg_size) {\n";
247 	src << "        atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
248 	src << "      }\n";
249 	src << "    }\n";
250 	src << "  }\n";
251 	src << "}\n";
252 
253 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
254 }
255 
iterate(void)256 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
257 {
258 	VkPhysicalDeviceProperties properties;
259 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
260 	const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
261 
262 	const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
263 	deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
264 	deUint32 wgy = 1;
265 	deUint32 wgz = 1;
266 	if (wgx < maxWG)
267 	{
268 		wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
269 	}
270 	if ((wgx * wgy) < maxWG)
271 	{
272 		wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
273 	}
274 	const deUint32 size = maxMemSize;
275 	const deUint32 numElems = maxMemSize / 16;
276 
277 	return runCompute(m_context, size, m_numWorkgroups, 1, 1, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
278 }
279 
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group)280 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group)
281 {
282 	std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
283 	for (deUint32 i = 0; i < workgroups.size(); ++i) {
284 		deUint32 numWG = workgroups[i];
285 		group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
286 			de::toString(numWG), de::toString(numWG) + " workgroups", numWG));
287 	}
288 }
289 
290 struct TypeCaseDef
291 {
292 	std::string	typeName;
293 	deUint32	typeSize;
294 	deUint32	numElements;
295 	deUint32	numRows;
296 	deUint32	numVariables;
297 };
298 
299 class TypeTestInstance : public vkt::TestInstance
300 {
301 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef)302 	TypeTestInstance(Context& context, const TypeCaseDef& caseDef)
303 		: TestInstance(context),
304 		m_caseDef(caseDef)
305 	{
306 	}
307 	tcu::TestStatus iterate(void);
308 
309 private:
310 	TypeCaseDef m_caseDef;
311 };
312 
313 class TypeTest : public vkt::TestCase
314 {
315 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TypeCaseDef & caseDef)316 	TypeTest(tcu::TestContext& testCtx,
317 			const std::string& name,
318 			const std::string& description,
319 			const TypeCaseDef& caseDef)
320 		: TestCase(testCtx, name, description),
321 		m_caseDef(caseDef)
322 	{
323 	}
324 
325 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const326 	TestInstance* createInstance(Context& context) const
327 	{
328 		return new TypeTestInstance(context, m_caseDef);
329 	}
330 	virtual void checkSupport(Context& context) const;
331 
332 private:
333 	TypeCaseDef m_caseDef;
334 };
335 
checkSupport(Context & context) const336 void TypeTest::checkSupport(Context& context) const
337 {
338 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
339 
340 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
341 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
342 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
343 	f16_i8_features.pNext = DE_NULL;
344 
345 	VkPhysicalDeviceFeatures2 features2;
346 	deMemset(&features2, 0, sizeof(features2));
347 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
348 	features2.pNext = &f16_i8_features;
349 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
350 
351 	if (m_caseDef.typeName == "float16_t" ||
352 		m_caseDef.typeName == "f16vec2" ||
353 		m_caseDef.typeName == "f16vec3" ||
354 		m_caseDef.typeName == "f16vec4" ||
355 		m_caseDef.typeName == "f16mat2x2" ||
356 		m_caseDef.typeName == "f16mat2x3" ||
357 		m_caseDef.typeName == "f16mat2x4" ||
358 		m_caseDef.typeName == "f16mat3x2" ||
359 		m_caseDef.typeName == "f16mat3x3" ||
360 		m_caseDef.typeName == "f16mat3x4" ||
361 		m_caseDef.typeName == "f16mat4x2" ||
362 		m_caseDef.typeName == "f16mat4x3" ||
363 		m_caseDef.typeName == "f16mat4x4")
364 	{
365 		if (f16_i8_features.shaderFloat16 != VK_TRUE)
366 			TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
367 	}
368 
369 	if (m_caseDef.typeName == "float64_t" ||
370 		m_caseDef.typeName == "f64vec2" ||
371 		m_caseDef.typeName == "f64vec3" ||
372 		m_caseDef.typeName == "f64vec4"||
373 		m_caseDef.typeName == "f64mat2x2" ||
374 		m_caseDef.typeName == "f64mat2x3" ||
375 		m_caseDef.typeName == "f64mat2x4" ||
376 		m_caseDef.typeName == "f64mat3x2" ||
377 		m_caseDef.typeName == "f64mat3x3" ||
378 		m_caseDef.typeName == "f64mat3x4" ||
379 		m_caseDef.typeName == "f64mat4x2" ||
380 		m_caseDef.typeName == "f64mat4x3" ||
381 		m_caseDef.typeName == "f64mat4x4")
382 	{
383 		if (features2.features.shaderFloat64 != VK_TRUE)
384 			TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
385 	}
386 
387 	if (m_caseDef.typeName == "int8_t" ||
388 		m_caseDef.typeName == "i8vec2" ||
389 		m_caseDef.typeName == "i8vec3" ||
390 		m_caseDef.typeName == "i8vec4" ||
391 		m_caseDef.typeName == "uint8_t" ||
392 		m_caseDef.typeName == "u8vec2" ||
393 		m_caseDef.typeName == "u8vec3" ||
394 		m_caseDef.typeName == "u8vec4")
395 	{
396 		if (f16_i8_features.shaderInt8 != VK_TRUE)
397 			TCU_THROW(NotSupportedError, "shaderInt8 not supported");
398 	}
399 
400 	if (m_caseDef.typeName == "int16_t" ||
401 		m_caseDef.typeName == "i16vec2" ||
402 		m_caseDef.typeName == "i16vec3" ||
403 		m_caseDef.typeName == "i16vec4" ||
404 		m_caseDef.typeName == "uint16_t" ||
405 		m_caseDef.typeName == "u16vec2" ||
406 		m_caseDef.typeName == "u16vec3" ||
407 		m_caseDef.typeName == "u16vec4")
408 	{
409 		if (features2.features.shaderInt16 != VK_TRUE)
410 			TCU_THROW(NotSupportedError, "shaderInt16 not supported");
411 	}
412 
413 	if (m_caseDef.typeName == "int64_t" ||
414 		m_caseDef.typeName == "i64vec2" ||
415 		m_caseDef.typeName == "i64vec3" ||
416 		m_caseDef.typeName == "i64vec4" ||
417 		m_caseDef.typeName == "uint64_t" ||
418 		m_caseDef.typeName == "u64vec2" ||
419 		m_caseDef.typeName == "u64vec3" ||
420 		m_caseDef.typeName == "u64vec4")
421 	{
422 		if (features2.features.shaderInt64 != VK_TRUE)
423 			TCU_THROW(NotSupportedError, "shaderInt64 not supported");
424 	}
425 }
426 
initPrograms(SourceCollections & sourceCollections) const427 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
428 {
429 	std::ostringstream src;
430 	src << "#version 450\n";
431 	src << "#extension GL_EXT_null_initializer : enable\n";
432 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
433 	src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
434 	src << "layout(set = 0, binding = 0) buffer A  { uint a[]; } a;\n";
435 	for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
436 		src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
437 	}
438 	src << "void main() {\n";
439 	if (m_caseDef.numRows > 1)
440 	{
441 		src << "  uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
442 		src << "  uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
443 	}
444 	std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
445 	for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
446 	{
447 		if (m_caseDef.numElements == 1)
448 		{
449 			// Scalars.
450 			src << "  a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") ==  0) ? 0 : 1;\n";
451 		}
452 		else if (m_caseDef.numRows == 1)
453 		{
454 			// Vectors.
455 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) ==  0) ? 0 : 1;\n";
456 		}
457 		else
458 		{
459 			// Matrices.
460 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) ==  0) ? 0 : 1;\n";
461 		}
462 	}
463 	src << "}\n";
464 
465 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
466 }
467 
iterate(void)468 tcu::TestStatus TypeTestInstance::iterate(void)
469 {
470 	const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
471 	return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1);
472 }
473 
AddTypeTests(tcu::TestCaseGroup * group)474 void AddTypeTests(tcu::TestCaseGroup* group)
475 {
476 	deRandom rnd;
477 	deRandom_init(&rnd, 0);
478 	std::vector<TypeCaseDef> cases =
479 	{
480 		{"bool",		1,	1,	1,	0},
481 		{"bvec2",		1,	2,	1,	0},
482 		{"bvec3",		1,	3,	1,	0},
483 		{"bvec4",		1,	4,	1,	0},
484 		{"uint32_t",	4,	1,	1,	0},
485 		{"uvec2",		4,	2,	1,	0},
486 		{"uvec3",		4,	3,	1,	0},
487 		{"uvec4",		4,	4,	1,	0},
488 		{"int32_t",		4,	1,	1,	0},
489 		{"ivec2",		4,	2,	1,	0},
490 		{"ivec3",		4,	3,	1,	0},
491 		{"ivec4",		4,	4,	1,	0},
492 		{"uint8_t",		1,	1,	1,	0},
493 		{"u8vec2",		1,	2,	1,	0},
494 		{"u8vec3",		1,	3,	1,	0},
495 		{"u8vec4",		1,	4,	1,	0},
496 		{"int8_t",		1,	1,	1,	0},
497 		{"i8vec2",		1,	2,	1,	0},
498 		{"i8vec3",		1,	3,	1,	0},
499 		{"i8vec4",		1,	4,	1,	0},
500 		{"uint16_t",	2,	1,	1,	0},
501 		{"u16vec2",		2,	2,	1,	0},
502 		{"u16vec3",		2,	3,	1,	0},
503 		{"u16vec4",		2,	4,	1,	0},
504 		{"int16_t",		2,	1,	1,	0},
505 		{"i16vec2",		2,	2,	1,	0},
506 		{"i16vec3",		2,	3,	1,	0},
507 		{"i16vec4",		2,	4,	1,	0},
508 		{"uint64_t",	8,	1,	1,	0},
509 		{"u64vec2",		8,	2,	1,	0},
510 		{"u64vec3",		8,	3,	1,	0},
511 		{"u64vec4",		8,	4,	1,	0},
512 		{"int64_t",		8,	1,	1,	0},
513 		{"i64vec2",		8,	2,	1,	0},
514 		{"i64vec3",		8,	3,	1,	0},
515 		{"i64vec4",		8,	4,	1,	0},
516 		{"float32_t",	4,	1,	1,	0},
517 		{"f32vec2",		4,	2,	1,	0},
518 		{"f32vec3",		4,	3,	1,	0},
519 		{"f32vec4",		4,	4,	1,	0},
520 		{"f32mat2x2",	4,	2,	2,	0},
521 		{"f32mat2x3",	4,	3,	2,	0},
522 		{"f32mat2x4",	4,	4,	2,	0},
523 		{"f32mat3x2",	4,	2,	3,	0},
524 		{"f32mat3x3",	4,	3,	3,	0},
525 		{"f32mat3x4",	4,	4,	3,	0},
526 		{"f32mat4x2",	4,	2,	4,	0},
527 		{"f32mat4x3",	4,	3,	4,	0},
528 		{"f32mat4x4",	4,	4,	4,	0},
529 		{"float16_t",	2,	1,	1,	0},
530 		{"f16vec2",		2,	2,	1,	0},
531 		{"f16vec3",		2,	3,	1,	0},
532 		{"f16vec4",		2,	4,	1,	0},
533 		{"f16mat2x2",	2,	2,	2,	0},
534 		{"f16mat2x3",	2,	3,	2,	0},
535 		{"f16mat2x4",	2,	4,	2,	0},
536 		{"f16mat3x2",	2,	2,	3,	0},
537 		{"f16mat3x3",	2,	3,	3,	0},
538 		{"f16mat3x4",	2,	4,	3,	0},
539 		{"f16mat4x2",	2,	2,	4,	0},
540 		{"f16mat4x3",	2,	3,	4,	0},
541 		{"f16mat4x4",	2,	4,	4,	0},
542 		{"float64_t",	8,	1,	1,	0},
543 		{"f64vec2",		8,	2,	1,	0},
544 		{"f64vec3",		8,	3,	1,	0},
545 		{"f64vec4",		8,	4,	1,	0},
546 		{"f64mat2x2",	8,	2,	2,	0},
547 		{"f64mat2x3",	8,	3,	2,	0},
548 		{"f64mat2x4",	8,	4,	2,	0},
549 		{"f64mat3x2",	8,	2,	3,	0},
550 		{"f64mat3x3",	8,	3,	3,	0},
551 		{"f64mat3x4",	8,	4,	3,	0},
552 		{"f64mat4x2",	8,	2,	4,	0},
553 		{"f64mat4x3",	8,	3,	4,	0},
554 		{"f64mat4x4",	8,	4,	4,	0},
555 	};
556 
557 	for (deUint32 i = 0; i < cases.size(); ++i)
558 	{
559 		cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
560 		group->addChild(
561 			new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i].typeName.c_str(), cases[i]));
562 	}
563 }
564 
565 struct CompositeCaseDef
566 {
567 	deUint32				index;
568 	std::string				typeDefinition;
569 	std::string				assignment;
570 	deUint32				elements;
571 	std::vector<deUint32>	specValues;
572 
CompositeCaseDefvkt::compute::__anon7a1c2d770111::CompositeCaseDef573 	CompositeCaseDef (uint32_t index_, const std::string& typeDefinition_, const std::string& assignment_, uint32_t elements_, const std::vector<uint32_t>& specValues_)
574 		: index				(index_)
575 		, typeDefinition	(typeDefinition_)
576 		, assignment		(assignment_)
577 		, elements			(elements_)
578 		, specValues		(specValues_)
579 		{}
580 };
581 
582 class CompositeTestInstance : public vkt::TestInstance
583 {
584 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef)585 	CompositeTestInstance(Context& context, const CompositeCaseDef& caseDef)
586 		: TestInstance(context),
587 		m_caseDef(caseDef)
588 	{
589 	}
590 	tcu::TestStatus iterate(void);
591 private:
592 	CompositeCaseDef m_caseDef;
593 };
594 
595 class CompositeTest : public vkt::TestCase
596 {
597 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const CompositeCaseDef & caseDef)598 	CompositeTest(tcu::TestContext& testCtx,
599 				  const std::string& name,
600 				  const std::string& description,
601 				  const CompositeCaseDef& caseDef)
602 		: TestCase(testCtx, name, description),
603 		m_caseDef(caseDef)
604 	{
605 	}
606 
607 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const608 	TestInstance* createInstance(Context& context) const
609 	{
610 		return new CompositeTestInstance(context, m_caseDef);
611 	}
612 	virtual void checkSupport(Context& context) const;
613 private:
614 	CompositeCaseDef m_caseDef;
615 };
616 
checkSupport(Context & context) const617 void CompositeTest::checkSupport(Context& context) const
618 {
619 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
620 
621 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
622 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
623 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
624 	f16_i8_features.pNext = DE_NULL;
625 
626 	VkPhysicalDeviceFeatures2 features2;
627 	deMemset(&features2, 0, sizeof(features2));
628 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
629 	features2.pNext = &f16_i8_features;
630 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
631 
632 	bool needsFloat16	= (m_caseDef.index & 0x1) != 0;
633 	bool needsFloat64	= (m_caseDef.index & 0x2) != 0;
634 	bool needsInt8		= (m_caseDef.index & 0x4) != 0;
635 	bool needsInt16		= (m_caseDef.index & 0x8) != 0;
636 	bool needsInt64		= (m_caseDef.index & 0x10) != 0;
637 
638 	if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
639 		TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
640 	if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
641 		TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
642 	if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
643 		TCU_THROW(NotSupportedError, "shaderInt8 not supported");
644 	if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
645 		TCU_THROW(NotSupportedError, "shaderInt16 not supported");
646 	if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
647 		TCU_THROW(NotSupportedError, "shaderInt64 not supported");
648 }
649 
initPrograms(SourceCollections & sourceCollections) const650 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
651 {
652 	std::ostringstream src;
653 	src << "#version 450\n";
654 	src << "#extension GL_EXT_null_initializer : enable\n";
655 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
656 	src << "\n";
657 	for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
658 		src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
659 	}
660 	src << "\n";
661 	src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
662 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
663 	src << "\n";
664 	src << m_caseDef.typeDefinition;
665 	src << "\n";
666 	src << "void main() {\n";
667 	src << m_caseDef.assignment;
668 	src << "}\n";
669 
670 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
671 }
672 
iterate(void)673 tcu::TestStatus CompositeTestInstance::iterate(void)
674 {
675 	const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
676 	return runCompute(m_context, bufferSize, 1, 1, 1, m_caseDef.specValues);
677 }
678 
AddCompositeTests(tcu::TestCaseGroup * group)679 void AddCompositeTests(tcu::TestCaseGroup* group)
680 {
681 	const std::vector<CompositeCaseDef> cases
682 	{
683 		{0,
684 		"shared uint wg_mem[specId0] = {};\n",
685 
686 		"for (uint i = 0; i < specId0; ++i) {\n"
687 		"  a.a[i] = wg_mem[i];\n"
688 		"}\n",
689 		16,
690 		{16},
691 		},
692 
693 		{0,
694 		"shared float wg_mem[specId0][specId1] = {};\n",
695 
696 		"for (uint i = 0; i < specId0; ++i) {\n"
697 		"  for (uint j = 0; j < specId1; ++j) {\n"
698 		"    uint idx = i * specId1 + j;\n"
699 		"    a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
700 		"  }\n"
701 		"}\n",
702 		32,
703 		{4, 8},
704 		},
705 
706 		{0,
707 		"struct Sa {\n"
708 		"  uint a;\n"
709 		"  uvec2 b;\n"
710 		"  uvec3 c;\n"
711 		"  uvec4 d;\n"
712 		"  float e;\n"
713 		"  vec2 f;\n"
714 		"  vec3 g;\n"
715 		"  vec4 h;\n"
716 		"  bool i;\n"
717 		"  bvec2 j;\n"
718 		"  bvec3 k;\n"
719 		"  bvec4 l;\n"
720 		"};\n"
721 		"shared Sa wg_mem = {};\n",
722 
723 		"uint i = 0;\n"
724 		"a.a[i++] = wg_mem.a;\n"
725 		"a.a[i++] = wg_mem.b.x;\n"
726 		"a.a[i++] = wg_mem.b.y;\n"
727 		"a.a[i++] = wg_mem.c.x;\n"
728 		"a.a[i++] = wg_mem.c.y;\n"
729 		"a.a[i++] = wg_mem.c.z;\n"
730 		"a.a[i++] = wg_mem.d.x;\n"
731 		"a.a[i++] = wg_mem.d.y;\n"
732 		"a.a[i++] = wg_mem.d.z;\n"
733 		"a.a[i++] = wg_mem.d.w;\n"
734 		"a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
735 		"a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
736 		"a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
737 		"a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
738 		"a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
739 		"a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
740 		"a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
741 		"a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
742 		"a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
743 		"a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
744 		"a.a[i++] = wg_mem.i ? 1 : 0;\n"
745 		"a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
746 		"a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
747 		"a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
748 		"a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
749 		"a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
750 		"a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
751 		"a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
752 		"a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
753 		"a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
754 		30,
755 		{},
756 		},
757 
758 		{0,
759 		"struct Sa {\n"
760 		"  uint a;\n"
761 		"};\n"
762 		"struct Sb {\n"
763 		"  uvec2 a;\n"
764 		"};\n"
765 		"struct Sc {\n"
766 		"  Sa a[specId0];\n"
767 		"  Sb b[specId1];\n"
768 		"};\n"
769 		"shared Sc wg_mem[specId2] = {};\n",
770 
771 		"uint idx = 0;\n"
772 		"for (uint i = 0; i < specId2; ++i) {\n"
773 		"  for (uint j = 0; j < specId0; ++j) {\n"
774 		"    a.a[idx++] = wg_mem[i].a[j].a;\n"
775 		"  }\n"
776 		"  for (uint j = 0; j < specId1; ++j) {\n"
777 		"    a.a[idx++] = wg_mem[i].b[j].a.x;\n"
778 		"    a.a[idx++] = wg_mem[i].b[j].a.y;\n"
779 		"  }\n"
780 		"}\n",
781 		32,
782 		{2,3,4},
783 		},
784 
785 		{1,
786 		"struct Sa {\n"
787 		"  f16vec2 a;\n"
788 		"  float16_t b[specId0];\n"
789 		"};\n"
790 		"shared Sa wg_mem = {};\n",
791 
792 		"uint idx = 0;\n"
793 		"a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
794 		"a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
795 		"for (uint i = 0; i < specId0; ++i) {\n"
796 		"  a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
797 		"}\n",
798 		18,
799 		{16},
800 		},
801 
802 		{2,
803 		"struct Sa {\n"
804 		"  f64vec2 a;\n"
805 		"  float64_t b[specId0];\n"
806 		"};\n"
807 		"shared Sa wg_mem = {};\n",
808 
809 		"uint idx = 0;\n"
810 		"a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
811 		"a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
812 		"for (uint i = 0; i < specId0; ++i) {\n"
813 		"  a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
814 		"}\n",
815 		7,
816 		{5},
817 		},
818 
819 		{4,
820 		"struct Sa {\n"
821 		"  i8vec2 a;\n"
822 		"  int8_t b[specId0];\n"
823 		"};\n"
824 		"shared Sa wg_mem = {};\n",
825 
826 		"uint idx = 0;\n"
827 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
828 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
829 		"for (uint i = 0; i < specId0; ++i) {\n"
830 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
831 		"}\n",
832 		34,
833 		{32},
834 		},
835 
836 		{8,
837 		"struct Sa {\n"
838 		"  i16vec2 a;\n"
839 		"  int16_t b[specId0];\n"
840 		"};\n"
841 		"shared Sa wg_mem = {};\n",
842 
843 		"uint idx = 0;\n"
844 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
845 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
846 		"for (uint i = 0; i < specId0; ++i) {\n"
847 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
848 		"}\n",
849 		122,
850 		{120},
851 		},
852 
853 		{16,
854 		"struct Sa {\n"
855 		"  i64vec2 a;\n"
856 		"  int64_t b[specId0];\n"
857 		"};\n"
858 		"shared Sa wg_mem = {};\n",
859 
860 		"uint idx = 0;\n"
861 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
862 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
863 		"for (uint i = 0; i < specId0; ++i) {\n"
864 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
865 		"}\n",
866 		63,
867 		{61},
868 		},
869 
870 		{0x1f,
871 		"struct Sa {\n"
872 		"  float16_t a;\n"
873 		"  float b;\n"
874 		"  int8_t c;\n"
875 		"  int16_t d;\n"
876 		"  int e;\n"
877 		"  int64_t f;\n"
878 		"  float64_t g;\n"
879 		"};\n"
880 		"shared Sa wg_mem = {};\n",
881 
882 		"uint idx = 0;\n"
883 		"a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
884 		"a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
885 		"a.a[idx++] = uint(wg_mem.c);\n"
886 		"a.a[idx++] = uint(wg_mem.d);\n"
887 		"a.a[idx++] = uint(wg_mem.e);\n"
888 		"a.a[idx++] = uint(wg_mem.f);\n"
889 		"a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
890 		7,
891 		{},
892 		},
893 
894 		{0,
895 		"struct Sa {\n"
896 		"  uint a;\n"
897 		"};\n"
898 		"struct Sb {\n"
899 		"  Sa a[specId0];\n"
900 		"  uint b;\n"
901 		"};\n"
902 		"struct Sc {\n"
903 		"  Sb b[specId1];\n"
904 		"  uint c;\n"
905 		"};\n"
906 		"struct Sd {\n"
907 		"  Sc c[specId2];\n"
908 		"  uint d;\n"
909 		"};\n"
910 		"struct Se {\n"
911 		"  Sd d[specId3];\n"
912 		"  uint e;\n"
913 		"};\n"
914 		"shared Se wg_mem[specId4] = {};\n",
915 
916 		"uint idx = 0;\n"
917 		"for (uint i1 = 0; i1 < specId4; ++i1) {\n"
918 		"  a.a[idx++] = wg_mem[i1].e;\n"
919 		"  for (uint i2 = 0; i2 < specId3; ++i2) {\n"
920 		"    a.a[idx++] = wg_mem[i1].d[i2].d;\n"
921 		"    for (uint i3 = 0; i3 < specId2; ++i3) {\n"
922 		"      a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
923 		"      for (uint i4 = 0; i4 < specId1; ++i4) {\n"
924 		"        a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
925 		"        for (uint i5 = 0; i5 < specId0; ++i5) {\n"
926 		"          a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
927 		"        }\n"
928 		"      }\n"
929 		"    }\n"
930 		"  }\n"
931 		"}\n",
932 		872,
933 		{6,5,4,3,2},
934 		},
935 	};
936 
937 	for (deUint32 i = 0; i < cases.size(); ++i)
938 	{
939 		group->addChild(
940 			new CompositeTest(group->getTestContext(), de::toString(i), de::toString(i), cases[i]));
941 	}
942 }
943 
944 enum Dim {
945 	DimX,
946 	DimY,
947 	DimZ,
948 };
949 
950 class MaxWorkgroupsInstance : public vkt::TestInstance
951 {
952 public:
MaxWorkgroupsInstance(Context & context,Dim dim)953 	MaxWorkgroupsInstance(Context &context, Dim dim)
954 		: TestInstance(context),
955 		m_dim(dim)
956 	{
957 	}
958 	tcu::TestStatus iterate(void);
959 private:
960 	Dim m_dim;
961 };
962 
963 class MaxWorkgroupsTest : public vkt::TestCase
964 {
965 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,Dim dim)966 	MaxWorkgroupsTest(tcu::TestContext& testCtx,
967 					  const std::string& name,
968 					  const std::string& description,
969 					  Dim dim)
970 		: TestCase(testCtx, name, description),
971 		m_dim(dim)
972 	{
973 	}
974 
975 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const976 	TestInstance* createInstance(Context& context) const
977 	{
978 		return new MaxWorkgroupsInstance(context, m_dim);
979 	}
980 	virtual void checkSupport(Context& context) const;
981 private:
982 	Dim m_dim;
983 };
984 
checkSupport(Context & context) const985 void MaxWorkgroupsTest::checkSupport(Context& context) const
986 {
987 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
988 }
989 
initPrograms(SourceCollections & sourceCollections) const990 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
991 {
992 	std::ostringstream src;
993 	src << "#version 450\n";
994 	src << "#extension GL_EXT_null_initializer : enable\n";
995 	src << "\n";
996 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
997 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
998 	src << "shared uint wg_mem[2] = {};\n";
999 	std::string dim;
1000 	switch (m_dim) {
1001 		case DimX:
1002 			dim = "x";
1003 			break;
1004 		case DimY:
1005 			dim = "y";
1006 			break;
1007 		case DimZ:
1008 			dim = "z";
1009 			break;
1010 	}
1011 	src << "\n";
1012 	src << "void main() {\n";
1013 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1014 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1015 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
1016 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
1017 	src << "  if (gl_LocalInvocationID.x == 0) {\n";
1018 	src << "    wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1019 	src << "  }\n";
1020 	src << "  barrier();\n";
1021 	src << "  atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1022 	src << "}\n";
1023 
1024 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1025 }
1026 
iterate(void)1027 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
1028 {
1029 	VkPhysicalDeviceProperties properties;
1030 	deMemset(&properties, 0, sizeof(properties));
1031 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1032 
1033 	const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1034 	deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1035 	deUint32 wgy = 1;
1036 	deUint32 wgz = 1;
1037 	if (wgx < maxWG)
1038 	{
1039 		wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1040 	}
1041 	if ((wgx * wgy) < maxWG)
1042 	{
1043 		wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1044 	}
1045 	deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1046 
1047 	deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1048 	deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1049 	deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1050 
1051 	return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, {wgx, wgy, wgz}, /*increment*/ 1);
1052 }
1053 
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group)1054 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group)
1055 {
1056 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", "max x dim workgroups", DimX));
1057 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", "max y dim workgroups", DimY));
1058 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", "max z dim workgroups", DimZ));
1059 }
1060 
1061 class SpecializeWorkgroupInstance : public vkt::TestInstance
1062 {
1063 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z)1064 	SpecializeWorkgroupInstance(Context &context, deUint32 x, deUint32 y, deUint32 z)
1065 		: TestInstance(context),
1066 		m_x(x),
1067 		m_y(y),
1068 		m_z(z)
1069 	{
1070 	}
1071 	tcu::TestStatus iterate(void);
1072 private:
1073 	deUint32 m_x;
1074 	deUint32 m_y;
1075 	deUint32 m_z;
1076 };
1077 
1078 class SpecializeWorkgroupTest : public vkt::TestCase
1079 {
1080 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 x,deUint32 y,deUint32 z)1081 	SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1082 					  const std::string& name,
1083 					  const std::string& description,
1084 					  deUint32 x, deUint32 y, deUint32 z)
1085 		: TestCase(testCtx, name, description),
1086 		m_x(x),
1087 		m_y(y),
1088 		m_z(z)
1089 	{
1090 	}
1091 
1092 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1093 	TestInstance* createInstance(Context& context) const
1094 	{
1095 		return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z);
1096 	}
1097 	virtual void checkSupport(Context& context) const;
1098 private:
1099 	deUint32 m_x;
1100 	deUint32 m_y;
1101 	deUint32 m_z;
1102 };
1103 
checkSupport(Context & context) const1104 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1105 {
1106 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1107 
1108 	VkPhysicalDeviceProperties properties;
1109 	deMemset(&properties, 0, sizeof(properties));
1110 	context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1111 	if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1112 		TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1113 }
1114 
initPrograms(SourceCollections & sourceCollections) const1115 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1116 {
1117 	std::ostringstream src;
1118 	src << "#version 450\n";
1119 	src << "#extension GL_EXT_null_initializer : enable\n";
1120 	src << "\n";
1121 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1122 	src << "layout(constant_id = 1) const uint WGY = 1;\n";
1123 	src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1124 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1125 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1126 	src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1127 	src << "\n";
1128 	src << "void main() {\n";
1129 	src << "  a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1130 	src << "}\n";
1131 
1132 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1133 }
1134 
iterate(void)1135 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1136 {
1137 	const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1138 	return runCompute(m_context, size, 1, 1, 1, {m_x, m_y, m_z});
1139 }
1140 
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group)1141 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group)
1142 {
1143 	for (deUint32 z = 1; z <= 8; ++z)
1144 	{
1145 		for (deUint32 y = 1; y <= 8; ++y)
1146 		{
1147 			for (deUint32 x = 1; x <= 8; ++x)
1148 			{
1149 				group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1150 					de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1151 					de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1152 					x, y, z));
1153 			}
1154 		}
1155 	}
1156 }
1157 
1158 class RepeatedPipelineInstance : public vkt::TestInstance
1159 {
1160 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1161 	RepeatedPipelineInstance(Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1162 		: TestInstance(context),
1163 		m_xSize(xSize),
1164 		m_repeat(repeat),
1165 		m_odd(odd)
1166 	{
1167 	}
1168 	tcu::TestStatus iterate(void);
1169 private:
1170 	deUint32 m_xSize;
1171 	deUint32 m_repeat;
1172 	deUint32 m_odd;
1173 };
1174 
1175 class RepeatedPipelineTest : public vkt::TestCase
1176 {
1177 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 xSize,deUint32 repeat,deUint32 odd)1178 	RepeatedPipelineTest(tcu::TestContext& testCtx,
1179 						const std::string& name,
1180 						const std::string& description,
1181 						deUint32 xSize, deUint32 repeat, deUint32 odd)
1182 		: TestCase(testCtx, name, description),
1183 		m_xSize(xSize),
1184 		m_repeat(repeat),
1185 		m_odd(odd)
1186 	{
1187 	}
1188 
1189 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1190 	TestInstance* createInstance(Context& context) const
1191 	{
1192 		return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1193 	}
1194 	virtual void checkSupport(Context& context) const;
1195 private:
1196 	deUint32 m_xSize;
1197 	deUint32 m_repeat;
1198 	deUint32 m_odd;
1199 };
1200 
checkSupport(Context & context) const1201 void RepeatedPipelineTest::checkSupport(Context& context) const
1202 {
1203 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1204 }
1205 
initPrograms(SourceCollections & sourceCollections) const1206 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1207 {
1208 	std::ostringstream src;
1209 	src << "#version 450\n";
1210 	src << "#extension GL_EXT_null_initializer : enable\n";
1211 	src << "\n";
1212 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1213 	src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1214 	src << "\n";
1215 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1216 	src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1217 	src << "\n";
1218 	src << "shared uint wg_mem[WGX][2] = {};\n";
1219 	src << "void main() {\n";
1220 	src << "  if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1221 	src << "    wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1222 	src << "  }\n";
1223 	src << "  barrier();\n";
1224 	src << "  a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1225 	src << "}\n";
1226 
1227 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1228 }
1229 
iterate(void)1230 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1231 {
1232 	Context& context					= m_context;
1233 	const deUint32 bufferSize			= m_xSize * 2 * (deUint32)sizeof(deUint32);
1234 	const deUint32 numBuffers			= 2;
1235 
1236 	const DeviceInterface&	vk			= context.getDeviceInterface();
1237 	const VkDevice			device		= context.getDevice();
1238 	Allocator&				allocator	= context.getDefaultAllocator();
1239 	tcu::TestLog&			log			= context.getTestContext().getLog();
1240 
1241 	de::MovePtr<BufferWithMemory> buffers[numBuffers];
1242 	VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1243 
1244 	VkDeviceSize size = bufferSize;
1245 	for (deUint32 i = 0; i < numBuffers; ++i)
1246 	{
1247 		buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1248 			vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1249 			MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1250 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1251 	}
1252 
1253 	deUint32* ptrs[numBuffers];
1254 	for (deUint32 i = 0; i < numBuffers; ++i)
1255 	{
1256 		ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1257 	}
1258 	for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1259 	{
1260 		ptrs[1][i] = i;
1261 	}
1262 	deMemset(ptrs[0], 0xff, (size_t)size);
1263 
1264 	DescriptorSetLayoutBuilder layoutBuilder;
1265 	for (deUint32 i = 0; i < numBuffers; ++i)
1266 	{
1267 		layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1268 	}
1269 
1270 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1271 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1272 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1273 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1274 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1275 
1276 	const deUint32 specData[1] =
1277 	{
1278 		m_xSize,
1279 	};
1280 	const vk::VkSpecializationMapEntry entries[1] =
1281 	{
1282 		{0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1283 	};
1284 	const vk::VkSpecializationInfo specInfo =
1285 	{
1286 		1,
1287 		entries,
1288 		sizeof(specData),
1289 		specData
1290 	};
1291 
1292 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1293 	{
1294 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1295 		DE_NULL,
1296 		(VkPipelineLayoutCreateFlags)0,
1297 		1,
1298 		&descriptorSetLayout.get(),
1299 		0u,
1300 		DE_NULL,
1301 	};
1302 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1303 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1304 
1305 	for (deUint32 i = 0; i < numBuffers; ++i)
1306 	{
1307 		flushAlloc(vk, device, buffers[i]->getAllocation());
1308 	}
1309 
1310 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1311 	const VkPipelineShaderStageCreateInfo shaderInfo =
1312 	{
1313 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1314 		DE_NULL,
1315 		0,
1316 		VK_SHADER_STAGE_COMPUTE_BIT,
1317 		*shader,
1318 		"main",
1319 		&specInfo,
1320 	};
1321 
1322 	const VkComputePipelineCreateInfo pipelineInfo =
1323 	{
1324 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1325 		DE_NULL,
1326 		0u,
1327 		shaderInfo,
1328 		*pipelineLayout,
1329 		(VkPipeline)0,
1330 		0u,
1331 	};
1332 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1333 
1334 	const VkQueue queue = context.getUniversalQueue();
1335 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1336 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1337 		context.getUniversalQueueFamilyIndex());
1338 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1339 
1340 	DescriptorSetUpdateBuilder setUpdateBuilder;
1341 	for (deUint32 i = 0; i < numBuffers; ++i)
1342 	{
1343 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1344 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1345 	}
1346 	setUpdateBuilder.update(vk, device);
1347 
1348 	beginCommandBuffer(vk, *cmdBuffer, 0);
1349 
1350 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1351 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1352 
1353 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1354 
1355 	endCommandBuffer(vk, *cmdBuffer);
1356 
1357 	for (deUint32 r = 0; r < m_repeat; ++r)
1358 	{
1359 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1360 
1361 		invalidateAlloc(vk, device, buffers[0]->getAllocation());
1362 
1363 		for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1364 		{
1365 			deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1366 			if (ptrs[0][i] != expected)
1367 			{
1368 				log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1369 				return tcu::TestStatus::fail("compute failed");
1370 			}
1371 		}
1372 
1373 		deMemset(ptrs[0], 0xff, (size_t)size);
1374 		flushAlloc(vk, device, buffers[0]->getAllocation());
1375 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1376 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1377 		setUpdateBuilder.update(vk, device);
1378 	}
1379 
1380 	return tcu::TestStatus::pass("compute succeeded");
1381 }
1382 
AddRepeatedPipelineTests(tcu::TestCaseGroup * group)1383 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group)
1384 {
1385 	std::vector<deUint32> xSizes = {4, 16, 32, 64};
1386 	std::vector<deUint32> odds = {0, 1};
1387 	std::vector<deUint32> repeats = {2, 4, 8, 16};
1388 	for (deUint32 i = 0; i < xSizes.size(); ++i)
1389 	{
1390 		deUint32 x = xSizes[i];
1391 		for (deUint32 j = 0; j < odds.size(); ++j)
1392 		{
1393 			deUint32 odd = odds[j];
1394 			for (deUint32 k = 0; k < repeats.size(); ++k)
1395 			{
1396 				deUint32 repeat = repeats[k];
1397 				group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1398 					std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1399 					std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1400 					x, odd, repeat));
1401 			}
1402 		}
1403 	}
1404 }
1405 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1406 void AddSharedMemoryTests (tcu::TestCaseGroup* group)
1407 {
1408 	tcu::TestContext&			testCtx		= group->getTestContext();
1409 	std::string					filePath	= "compute/zero_initialize_workgroup_memory";
1410 	std::vector<std::string>	requirements;
1411 
1412 	std::string					testNames[]	=
1413 	{
1414 		"workgroup_size_128",
1415 		"workgroup_size_8x8x2",
1416 		"workgroup_size_8x2x8",
1417 		"workgroup_size_2x8x8",
1418 		"workgroup_size_8x4x4",
1419 		"workgroup_size_4x8x4",
1420 		"workgroup_size_4x4x8"
1421 	};
1422 
1423 	requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1424 
1425 	for (const auto& testName : testNames)
1426 	{
1427 		group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(), testName + ".amber", requirements));
1428 	}
1429 }
1430 #endif // CTS_USES_VULKANSC
1431 
1432 } // anonymous
1433 
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx)1434 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests(tcu::TestContext& testCtx)
1435 {
1436 	de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory", "VK_KHR_zero_intialize_workgroup_memory tests"));
1437 
1438 	tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1439 		new tcu::TestCaseGroup(testCtx, "max_workgroup_memory", "Read initialization of max workgroup memory");
1440 	AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup);
1441 	tests->addChild(maxWorkgroupMemoryGroup);
1442 
1443 	tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types", "basic type tests");
1444 	AddTypeTests(typeGroup);
1445 	tests->addChild(typeGroup);
1446 
1447 	tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites", "composite type tests");
1448 	AddCompositeTests(compositeGroup);
1449 	tests->addChild(compositeGroup);
1450 
1451 	tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups", "max workgroups");
1452 	AddMaxWorkgroupsTests(maxWorkgroupsGroup);
1453 	tests->addChild(maxWorkgroupsGroup);
1454 
1455 	tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup", "specialize workgroup size");
1456 	AddSpecializeWorkgroupTests(specializeWorkgroupGroup);
1457 	tests->addChild(specializeWorkgroupGroup);
1458 
1459 	tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline", "repeated pipeline run");
1460 	AddRepeatedPipelineTests(repeatPipelineGroup);
1461 	tests->addChild(repeatPipelineGroup);
1462 
1463 #ifndef CTS_USES_VULKANSC
1464 	tcu::TestCaseGroup* subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks", "shared memory tests");
1465 	AddSharedMemoryTests(subgroupInvocationGroup);
1466 	tests->addChild(subgroupInvocationGroup);
1467 #endif // CTS_USES_VULKANSC
1468 
1469 	return tests.release();
1470 }
1471 
1472 } // compute
1473 } // vkt
1474