• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Google LLC.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief VK_KHR_zero_initialize_workgroup_memory tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkDefs.hpp"
38 #include "vkRef.hpp"
39 
40 #include "tcuCommandLine.hpp"
41 #include "tcuTestLog.hpp"
42 
43 #include "deRandom.hpp"
44 #include "deStringUtil.hpp"
45 #include "deUniquePtr.hpp"
46 
47 #include <algorithm>
48 #include <vector>
49 
50 using namespace vk;
51 
52 namespace vkt
53 {
54 namespace compute
55 {
56 namespace
57 {
58 
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,const std::vector<deUint32> specValues={},deUint32 increment=0)59 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
60 							deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
61 							const std::vector<deUint32> specValues = {},
62 							deUint32 increment = 0)
63 {
64 	const DeviceInterface&	vk			= context.getDeviceInterface();
65 	const VkDevice			device		= context.getDevice();
66 	Allocator&				allocator	= context.getDefaultAllocator();
67 	tcu::TestLog&			log			= context.getTestContext().getLog();
68 
69 	de::MovePtr<BufferWithMemory> buffer;
70 	VkDescriptorBufferInfo bufferDescriptor;
71 
72 	VkDeviceSize size = bufferSize;
73 	buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
74 		vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
75 		MemoryRequirement::HostVisible | MemoryRequirement::Cached));
76 	bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
77 
78 	deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
79 	deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
80 
81 	DescriptorSetLayoutBuilder layoutBuilder;
82 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
83 
84 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
85 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
86 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
87 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
88 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
89 
90 	std::vector<VkSpecializationMapEntry> entries(specValues.size());
91 	if (!specValues.empty())
92 	{
93 		for (deUint32 i = 0; i < specValues.size(); ++i)
94 		{
95 			entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
96 		}
97 	}
98 	const VkSpecializationInfo specInfo =
99 	{
100 		(deUint32)specValues.size(),
101 		entries.data(),
102 		specValues.size() * sizeof(deUint32),
103 		specValues.data(),
104 	};
105 
106 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
107 	{
108 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
109 		DE_NULL,
110 		(VkPipelineLayoutCreateFlags)0,
111 		1,
112 		&descriptorSetLayout.get(),
113 		0u,
114 		DE_NULL,
115 	};
116 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
117 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
118 	flushAlloc(vk, device, buffer->getAllocation());
119 
120 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
121 	const VkPipelineShaderStageCreateInfo shaderInfo =
122 	{
123 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
124 		DE_NULL,
125 		0,
126 		VK_SHADER_STAGE_COMPUTE_BIT,
127 		*shader,
128 		"main",
129 		specValues.empty() ? DE_NULL : &specInfo,
130 	};
131 
132 	const VkComputePipelineCreateInfo pipelineInfo =
133 	{
134 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
135 		DE_NULL,
136 		0u,
137 		shaderInfo,
138 		*pipelineLayout,
139 		(VkPipeline)0,
140 		0u,
141 	};
142 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
143 
144 	const VkQueue queue = context.getUniversalQueue();
145 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
146 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
147 		context.getUniversalQueueFamilyIndex());
148 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
149 
150 	DescriptorSetUpdateBuilder setUpdateBuilder;
151 	setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
152 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
153 	setUpdateBuilder.update(vk, device);
154 
155 	beginCommandBuffer(vk, *cmdBuffer, 0);
156 
157 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
158 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
159 
160 	vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
161 
162 	endCommandBuffer(vk, *cmdBuffer);
163 
164 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
165 
166 	invalidateAlloc(vk, device, buffer->getAllocation());
167 
168 	for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
169 	{
170 		deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
171 		if (ptr[i] != expected)
172 		{
173 			log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
174 			return tcu::TestStatus::fail("compute failed");
175 		}
176 	}
177 
178 	return tcu::TestStatus::pass("compute succeeded");
179 }
180 
181 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
182 {
183 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups)184 	MaxWorkgroupMemoryInstance(Context& context, deUint32 numWorkgroups)
185 		: TestInstance(context),
186 		m_numWorkgroups(numWorkgroups)
187 	{
188 	}
189 	tcu::TestStatus iterate(void);
190 
191 private:
192 	deUint32 m_numWorkgroups;
193 };
194 
195 class MaxWorkgroupMemoryTest : public vkt::TestCase
196 {
197 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 numWorkgroups)198 	MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
199 							const std::string& name,
200 							const std::string& description,
201 							deUint32 numWorkgroups)
202 		: TestCase(testCtx, name, description),
203 		m_numWorkgroups(numWorkgroups)
204 	{
205 	}
206 
207 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const208 	TestInstance* createInstance(Context& context) const
209 	{
210 		return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups);
211 	}
212 	virtual void checkSupport(Context& context) const;
213 
214 private:
215 	deUint32 m_numWorkgroups;
216 };
217 
checkSupport(Context & context) const218 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
219 {
220 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
221 }
222 
initPrograms(SourceCollections & sourceCollections) const223 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
224 {
225 	std::ostringstream src;
226 	src << "#version 450\n";
227 	src << "#extension GL_EXT_null_initializer : enable\n";
228 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
229 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
230 	src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
231 	src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
232 	src << "shared uvec4 wg_mem[num_elems] = {};\n";
233 	src << "void main() {\n";
234 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
235 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
236 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
237 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
238 	src << "  uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
239 	src << "  for (uint i = 0; i < num_elems; ++i) {\n";
240 	src << "    for (uint j = 0; j < 4; ++j) {\n";
241 	src << "      uint shared_idx = 4*i + j;\n";
242 	src << "      uint wg_val = wg_mem[i][j];\n";
243 	src << "      if (idx == shared_idx) {\n";
244 	src << "        atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
245 	src << "      } else if (idx == 0 && shared_idx >= wg_size) {\n";
246 	src << "        atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
247 	src << "      }\n";
248 	src << "    }\n";
249 	src << "  }\n";
250 	src << "}\n";
251 
252 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
253 }
254 
iterate(void)255 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
256 {
257 	VkPhysicalDeviceProperties properties;
258 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
259 	const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
260 
261 	const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
262 	deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
263 	deUint32 wgy = 1;
264 	deUint32 wgz = 1;
265 	if (wgx < maxWG)
266 	{
267 		wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
268 	}
269 	if ((wgx * wgy) < maxWG)
270 	{
271 		wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
272 	}
273 	const deUint32 size = maxMemSize;
274 	const deUint32 numElems = maxMemSize / 16;
275 
276 	return runCompute(m_context, size, m_numWorkgroups, 1, 1, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
277 }
278 
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group)279 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group)
280 {
281 	std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
282 	for (deUint32 i = 0; i < workgroups.size(); ++i) {
283 		deUint32 numWG = workgroups[i];
284 		group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
285 			de::toString(numWG), de::toString(numWG) + " workgroups", numWG));
286 	}
287 }
288 
289 struct TypeCaseDef
290 {
291 	std::string	typeName;
292 	deUint32	typeSize;
293 	deUint32	numElements;
294 	deUint32	numRows;
295 	deUint32	numVariables;
296 };
297 
298 class TypeTestInstance : public vkt::TestInstance
299 {
300 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef)301 	TypeTestInstance(Context& context, const TypeCaseDef& caseDef)
302 		: TestInstance(context),
303 		m_caseDef(caseDef)
304 	{
305 	}
306 	tcu::TestStatus iterate(void);
307 
308 private:
309 	TypeCaseDef m_caseDef;
310 };
311 
312 class TypeTest : public vkt::TestCase
313 {
314 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TypeCaseDef & caseDef)315 	TypeTest(tcu::TestContext& testCtx,
316 			const std::string& name,
317 			const std::string& description,
318 			const TypeCaseDef& caseDef)
319 		: TestCase(testCtx, name, description),
320 		m_caseDef(caseDef)
321 	{
322 	}
323 
324 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const325 	TestInstance* createInstance(Context& context) const
326 	{
327 		return new TypeTestInstance(context, m_caseDef);
328 	}
329 	virtual void checkSupport(Context& context) const;
330 
331 private:
332 	TypeCaseDef m_caseDef;
333 };
334 
checkSupport(Context & context) const335 void TypeTest::checkSupport(Context& context) const
336 {
337 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
338 
339 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
340 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
341 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
342 	f16_i8_features.pNext = DE_NULL;
343 
344 	VkPhysicalDeviceFeatures2 features2;
345 	deMemset(&features2, 0, sizeof(features2));
346 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
347 	features2.pNext = &f16_i8_features;
348 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
349 
350 	if (m_caseDef.typeName == "float16_t" ||
351 		m_caseDef.typeName == "f16vec2" ||
352 		m_caseDef.typeName == "f16vec3" ||
353 		m_caseDef.typeName == "f16vec4" ||
354 		m_caseDef.typeName == "f16mat2x2" ||
355 		m_caseDef.typeName == "f16mat2x3" ||
356 		m_caseDef.typeName == "f16mat2x4" ||
357 		m_caseDef.typeName == "f16mat3x2" ||
358 		m_caseDef.typeName == "f16mat3x3" ||
359 		m_caseDef.typeName == "f16mat3x4" ||
360 		m_caseDef.typeName == "f16mat4x2" ||
361 		m_caseDef.typeName == "f16mat4x3" ||
362 		m_caseDef.typeName == "f16mat4x4")
363 	{
364 		if (f16_i8_features.shaderFloat16 != VK_TRUE)
365 			TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
366 	}
367 
368 	if (m_caseDef.typeName == "float64_t" ||
369 		m_caseDef.typeName == "f64vec2" ||
370 		m_caseDef.typeName == "f64vec3" ||
371 		m_caseDef.typeName == "f64vec4"||
372 		m_caseDef.typeName == "f64mat2x2" ||
373 		m_caseDef.typeName == "f64mat2x3" ||
374 		m_caseDef.typeName == "f64mat2x4" ||
375 		m_caseDef.typeName == "f64mat3x2" ||
376 		m_caseDef.typeName == "f64mat3x3" ||
377 		m_caseDef.typeName == "f64mat3x4" ||
378 		m_caseDef.typeName == "f64mat4x2" ||
379 		m_caseDef.typeName == "f64mat4x3" ||
380 		m_caseDef.typeName == "f64mat4x4")
381 	{
382 		if (features2.features.shaderFloat64 != VK_TRUE)
383 			TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
384 	}
385 
386 	if (m_caseDef.typeName == "int8_t" ||
387 		m_caseDef.typeName == "i8vec2" ||
388 		m_caseDef.typeName == "i8vec3" ||
389 		m_caseDef.typeName == "i8vec4" ||
390 		m_caseDef.typeName == "uint8_t" ||
391 		m_caseDef.typeName == "u8vec2" ||
392 		m_caseDef.typeName == "u8vec3" ||
393 		m_caseDef.typeName == "u8vec4")
394 	{
395 		if (f16_i8_features.shaderInt8 != VK_TRUE)
396 			TCU_THROW(NotSupportedError, "shaderInt8 not supported");
397 	}
398 
399 	if (m_caseDef.typeName == "int16_t" ||
400 		m_caseDef.typeName == "i16vec2" ||
401 		m_caseDef.typeName == "i16vec3" ||
402 		m_caseDef.typeName == "i16vec4" ||
403 		m_caseDef.typeName == "uint16_t" ||
404 		m_caseDef.typeName == "u16vec2" ||
405 		m_caseDef.typeName == "u16vec3" ||
406 		m_caseDef.typeName == "u16vec4")
407 	{
408 		if (features2.features.shaderInt16 != VK_TRUE)
409 			TCU_THROW(NotSupportedError, "shaderInt16 not supported");
410 	}
411 
412 	if (m_caseDef.typeName == "int64_t" ||
413 		m_caseDef.typeName == "i64vec2" ||
414 		m_caseDef.typeName == "i64vec3" ||
415 		m_caseDef.typeName == "i64vec4" ||
416 		m_caseDef.typeName == "uint64_t" ||
417 		m_caseDef.typeName == "u64vec2" ||
418 		m_caseDef.typeName == "u64vec3" ||
419 		m_caseDef.typeName == "u64vec4")
420 	{
421 		if (features2.features.shaderInt64 != VK_TRUE)
422 			TCU_THROW(NotSupportedError, "shaderInt64 not supported");
423 	}
424 }
425 
initPrograms(SourceCollections & sourceCollections) const426 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
427 {
428 	std::ostringstream src;
429 	src << "#version 450\n";
430 	src << "#extension GL_EXT_null_initializer : enable\n";
431 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
432 	src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
433 	src << "layout(set = 0, binding = 0) buffer A  { uint a[]; } a;\n";
434 	for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
435 		src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
436 	}
437 	src << "void main() {\n";
438 	if (m_caseDef.numRows > 1)
439 	{
440 		src << "  uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
441 		src << "  uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
442 	}
443 	std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
444 	for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
445 	{
446 		if (m_caseDef.numElements == 1)
447 		{
448 			// Scalars.
449 			src << "  a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") ==  0) ? 0 : 1;\n";
450 		}
451 		else if (m_caseDef.numRows == 1)
452 		{
453 			// Vectors.
454 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) ==  0) ? 0 : 1;\n";
455 		}
456 		else
457 		{
458 			// Matrices.
459 			src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) ==  0) ? 0 : 1;\n";
460 		}
461 	}
462 	src << "}\n";
463 
464 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
465 }
466 
iterate(void)467 tcu::TestStatus TypeTestInstance::iterate(void)
468 {
469 	const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
470 	return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1);
471 }
472 
AddTypeTests(tcu::TestCaseGroup * group)473 void AddTypeTests(tcu::TestCaseGroup* group)
474 {
475 	deRandom rnd;
476 	deRandom_init(&rnd, 0);
477 	std::vector<TypeCaseDef> cases =
478 	{
479 		{"bool",		1,	1,	1,	0},
480 		{"bvec2",		1,	2,	1,	0},
481 		{"bvec3",		1,	3,	1,	0},
482 		{"bvec4",		1,	4,	1,	0},
483 		{"uint32_t",	4,	1,	1,	0},
484 		{"uvec2",		4,	2,	1,	0},
485 		{"uvec3",		4,	3,	1,	0},
486 		{"uvec4",		4,	4,	1,	0},
487 		{"int32_t",		4,	1,	1,	0},
488 		{"ivec2",		4,	2,	1,	0},
489 		{"ivec3",		4,	3,	1,	0},
490 		{"ivec4",		4,	4,	1,	0},
491 		{"uint8_t",		1,	1,	1,	0},
492 		{"u8vec2",		1,	2,	1,	0},
493 		{"u8vec3",		1,	3,	1,	0},
494 		{"u8vec4",		1,	4,	1,	0},
495 		{"int8_t",		1,	1,	1,	0},
496 		{"i8vec2",		1,	2,	1,	0},
497 		{"i8vec3",		1,	3,	1,	0},
498 		{"i8vec4",		1,	4,	1,	0},
499 		{"uint16_t",	2,	1,	1,	0},
500 		{"u16vec2",		2,	2,	1,	0},
501 		{"u16vec3",		2,	3,	1,	0},
502 		{"u16vec4",		2,	4,	1,	0},
503 		{"int16_t",		2,	1,	1,	0},
504 		{"i16vec2",		2,	2,	1,	0},
505 		{"i16vec3",		2,	3,	1,	0},
506 		{"i16vec4",		2,	4,	1,	0},
507 		{"uint64_t",	8,	1,	1,	0},
508 		{"u64vec2",		8,	2,	1,	0},
509 		{"u64vec3",		8,	3,	1,	0},
510 		{"u64vec4",		8,	4,	1,	0},
511 		{"int64_t",		8,	1,	1,	0},
512 		{"i64vec2",		8,	2,	1,	0},
513 		{"i64vec3",		8,	3,	1,	0},
514 		{"i64vec4",		8,	4,	1,	0},
515 		{"float32_t",	4,	1,	1,	0},
516 		{"f32vec2",		4,	2,	1,	0},
517 		{"f32vec3",		4,	3,	1,	0},
518 		{"f32vec4",		4,	4,	1,	0},
519 		{"f32mat2x2",	4,	2,	2,	0},
520 		{"f32mat2x3",	4,	3,	2,	0},
521 		{"f32mat2x4",	4,	4,	2,	0},
522 		{"f32mat3x2",	4,	2,	3,	0},
523 		{"f32mat3x3",	4,	3,	3,	0},
524 		{"f32mat3x4",	4,	4,	3,	0},
525 		{"f32mat4x2",	4,	2,	4,	0},
526 		{"f32mat4x3",	4,	3,	4,	0},
527 		{"f32mat4x4",	4,	4,	4,	0},
528 		{"float16_t",	2,	1,	1,	0},
529 		{"f16vec2",		2,	2,	1,	0},
530 		{"f16vec3",		2,	3,	1,	0},
531 		{"f16vec4",		2,	4,	1,	0},
532 		{"f16mat2x2",	2,	2,	2,	0},
533 		{"f16mat2x3",	2,	3,	2,	0},
534 		{"f16mat2x4",	2,	4,	2,	0},
535 		{"f16mat3x2",	2,	2,	3,	0},
536 		{"f16mat3x3",	2,	3,	3,	0},
537 		{"f16mat3x4",	2,	4,	3,	0},
538 		{"f16mat4x2",	2,	2,	4,	0},
539 		{"f16mat4x3",	2,	3,	4,	0},
540 		{"f16mat4x4",	2,	4,	4,	0},
541 		{"float64_t",	8,	1,	1,	0},
542 		{"f64vec2",		8,	2,	1,	0},
543 		{"f64vec3",		8,	3,	1,	0},
544 		{"f64vec4",		8,	4,	1,	0},
545 		{"f64mat2x2",	8,	2,	2,	0},
546 		{"f64mat2x3",	8,	3,	2,	0},
547 		{"f64mat2x4",	8,	4,	2,	0},
548 		{"f64mat3x2",	8,	2,	3,	0},
549 		{"f64mat3x3",	8,	3,	3,	0},
550 		{"f64mat3x4",	8,	4,	3,	0},
551 		{"f64mat4x2",	8,	2,	4,	0},
552 		{"f64mat4x3",	8,	3,	4,	0},
553 		{"f64mat4x4",	8,	4,	4,	0},
554 	};
555 
556 	for (deUint32 i = 0; i < cases.size(); ++i)
557 	{
558 		cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
559 		group->addChild(
560 			new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i].typeName.c_str(), cases[i]));
561 	}
562 }
563 
564 struct CompositeCaseDef
565 {
566 	deUint32				index;
567 	std::string				typeDefinition;
568 	std::string				assignment;
569 	deUint32				elements;
570 	std::vector<deUint32>	specValues;
571 };
572 
573 class CompositeTestInstance : public vkt::TestInstance
574 {
575 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef)576 	CompositeTestInstance(Context& context, const CompositeCaseDef& caseDef)
577 		: TestInstance(context),
578 		m_caseDef(caseDef)
579 	{
580 	}
581 	tcu::TestStatus iterate(void);
582 private:
583 	CompositeCaseDef m_caseDef;
584 };
585 
586 class CompositeTest : public vkt::TestCase
587 {
588 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const CompositeCaseDef & caseDef)589 	CompositeTest(tcu::TestContext& testCtx,
590 				  const std::string& name,
591 				  const std::string& description,
592 				  const CompositeCaseDef& caseDef)
593 		: TestCase(testCtx, name, description),
594 		m_caseDef(caseDef)
595 	{
596 	}
597 
598 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const599 	TestInstance* createInstance(Context& context) const
600 	{
601 		return new CompositeTestInstance(context, m_caseDef);
602 	}
603 	virtual void checkSupport(Context& context) const;
604 private:
605 	CompositeCaseDef m_caseDef;
606 };
607 
checkSupport(Context & context) const608 void CompositeTest::checkSupport(Context& context) const
609 {
610 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
611 
612 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
613 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
614 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
615 	f16_i8_features.pNext = DE_NULL;
616 
617 	VkPhysicalDeviceFeatures2 features2;
618 	deMemset(&features2, 0, sizeof(features2));
619 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
620 	features2.pNext = &f16_i8_features;
621 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
622 
623 	bool needsFloat16	= (m_caseDef.index & 0x1) != 0;
624 	bool needsFloat64	= (m_caseDef.index & 0x2) != 0;
625 	bool needsInt8		= (m_caseDef.index & 0x4) != 0;
626 	bool needsInt16		= (m_caseDef.index & 0x8) != 0;
627 	bool needsInt64		= (m_caseDef.index & 0x10) != 0;
628 
629 	if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
630 		TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
631 	if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
632 		TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
633 	if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
634 		TCU_THROW(NotSupportedError, "shaderInt8 not supported");
635 	if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
636 		TCU_THROW(NotSupportedError, "shaderInt16 not supported");
637 	if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
638 		TCU_THROW(NotSupportedError, "shaderInt64 not supported");
639 }
640 
initPrograms(SourceCollections & sourceCollections) const641 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
642 {
643 	std::ostringstream src;
644 	src << "#version 450\n";
645 	src << "#extension GL_EXT_null_initializer : enable\n";
646 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
647 	src << "\n";
648 	for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
649 		src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
650 	}
651 	src << "\n";
652 	src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
653 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
654 	src << "\n";
655 	src << m_caseDef.typeDefinition;
656 	src << "\n";
657 	src << "void main() {\n";
658 	src << m_caseDef.assignment;
659 	src << "}\n";
660 
661 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
662 }
663 
iterate(void)664 tcu::TestStatus CompositeTestInstance::iterate(void)
665 {
666 	const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
667 	return runCompute(m_context, bufferSize, 1, 1, 1, m_caseDef.specValues);
668 }
669 
AddCompositeTests(tcu::TestCaseGroup * group)670 void AddCompositeTests(tcu::TestCaseGroup* group)
671 {
672 	std::vector<CompositeCaseDef> cases =
673 	{
674 		{0,
675 		"shared uint wg_mem[specId0] = {};\n",
676 
677 		"for (uint i = 0; i < specId0; ++i) {\n"
678 		"  a.a[i] = wg_mem[i];\n"
679 		"}\n",
680 		16,
681 		{16},
682 		},
683 
684 		{0,
685 		"shared float wg_mem[specId0][specId1] = {};\n",
686 
687 		"for (uint i = 0; i < specId0; ++i) {\n"
688 		"  for (uint j = 0; j < specId1; ++j) {\n"
689 		"    uint idx = i * specId1 + j;\n"
690 		"    a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
691 		"  }\n"
692 		"}\n",
693 		32,
694 		{4, 8},
695 		},
696 
697 		{0,
698 		"struct Sa {\n"
699 		"  uint a;\n"
700 		"  uvec2 b;\n"
701 		"  uvec3 c;\n"
702 		"  uvec4 d;\n"
703 		"  float e;\n"
704 		"  vec2 f;\n"
705 		"  vec3 g;\n"
706 		"  vec4 h;\n"
707 		"  bool i;\n"
708 		"  bvec2 j;\n"
709 		"  bvec3 k;\n"
710 		"  bvec4 l;\n"
711 		"};\n"
712 		"shared Sa wg_mem = {};\n",
713 
714 		"uint i = 0;\n"
715 		"a.a[i++] = wg_mem.a;\n"
716 		"a.a[i++] = wg_mem.b.x;\n"
717 		"a.a[i++] = wg_mem.b.y;\n"
718 		"a.a[i++] = wg_mem.c.x;\n"
719 		"a.a[i++] = wg_mem.c.y;\n"
720 		"a.a[i++] = wg_mem.c.z;\n"
721 		"a.a[i++] = wg_mem.d.x;\n"
722 		"a.a[i++] = wg_mem.d.y;\n"
723 		"a.a[i++] = wg_mem.d.z;\n"
724 		"a.a[i++] = wg_mem.d.w;\n"
725 		"a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
726 		"a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
727 		"a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
728 		"a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
729 		"a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
730 		"a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
731 		"a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
732 		"a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
733 		"a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
734 		"a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
735 		"a.a[i++] = wg_mem.i ? 1 : 0;\n"
736 		"a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
737 		"a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
738 		"a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
739 		"a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
740 		"a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
741 		"a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
742 		"a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
743 		"a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
744 		"a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
745 		30,
746 		{},
747 		},
748 
749 		{0,
750 		"struct Sa {\n"
751 		"  uint a;\n"
752 		"};\n"
753 		"struct Sb {\n"
754 		"  uvec2 a;\n"
755 		"};\n"
756 		"struct Sc {\n"
757 		"  Sa a[specId0];\n"
758 		"  Sb b[specId1];\n"
759 		"};\n"
760 		"shared Sc wg_mem[specId2] = {};\n",
761 
762 		"uint idx = 0;\n"
763 		"for (uint i = 0; i < specId2; ++i) {\n"
764 		"  for (uint j = 0; j < specId0; ++j) {\n"
765 		"    a.a[idx++] = wg_mem[i].a[j].a;\n"
766 		"  }\n"
767 		"  for (uint j = 0; j < specId1; ++j) {\n"
768 		"    a.a[idx++] = wg_mem[i].b[j].a.x;\n"
769 		"    a.a[idx++] = wg_mem[i].b[j].a.y;\n"
770 		"  }\n"
771 		"}\n",
772 		32,
773 		{2,3,4},
774 		},
775 
776 		{1,
777 		"struct Sa {\n"
778 		"  f16vec2 a;\n"
779 		"  float16_t b[specId0];\n"
780 		"};\n"
781 		"shared Sa wg_mem = {};\n",
782 
783 		"uint idx = 0;\n"
784 		"a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
785 		"a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
786 		"for (uint i = 0; i < specId0; ++i) {\n"
787 		"  a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
788 		"}\n",
789 		18,
790 		{16},
791 		},
792 
793 		{2,
794 		"struct Sa {\n"
795 		"  f64vec2 a;\n"
796 		"  float64_t b[specId0];\n"
797 		"};\n"
798 		"shared Sa wg_mem = {};\n",
799 
800 		"uint idx = 0;\n"
801 		"a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
802 		"a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
803 		"for (uint i = 0; i < specId0; ++i) {\n"
804 		"  a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
805 		"}\n",
806 		7,
807 		{5},
808 		},
809 
810 		{4,
811 		"struct Sa {\n"
812 		"  i8vec2 a;\n"
813 		"  int8_t b[specId0];\n"
814 		"};\n"
815 		"shared Sa wg_mem = {};\n",
816 
817 		"uint idx = 0;\n"
818 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
819 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
820 		"for (uint i = 0; i < specId0; ++i) {\n"
821 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
822 		"}\n",
823 		34,
824 		{32},
825 		},
826 
827 		{8,
828 		"struct Sa {\n"
829 		"  i16vec2 a;\n"
830 		"  int16_t b[specId0];\n"
831 		"};\n"
832 		"shared Sa wg_mem = {};\n",
833 
834 		"uint idx = 0;\n"
835 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
836 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
837 		"for (uint i = 0; i < specId0; ++i) {\n"
838 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
839 		"}\n",
840 		122,
841 		{120},
842 		},
843 
844 		{16,
845 		"struct Sa {\n"
846 		"  i64vec2 a;\n"
847 		"  int64_t b[specId0];\n"
848 		"};\n"
849 		"shared Sa wg_mem = {};\n",
850 
851 		"uint idx = 0;\n"
852 		"a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
853 		"a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
854 		"for (uint i = 0; i < specId0; ++i) {\n"
855 		"  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
856 		"}\n",
857 		63,
858 		{61},
859 		},
860 
861 		{0x1f,
862 		"struct Sa {\n"
863 		"  float16_t a;\n"
864 		"  float b;\n"
865 		"  int8_t c;\n"
866 		"  int16_t d;\n"
867 		"  int e;\n"
868 		"  int64_t f;\n"
869 		"  float64_t g;\n"
870 		"};\n"
871 		"shared Sa wg_mem = {};\n",
872 
873 		"uint idx = 0;\n"
874 		"a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
875 		"a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
876 		"a.a[idx++] = uint(wg_mem.c);\n"
877 		"a.a[idx++] = uint(wg_mem.d);\n"
878 		"a.a[idx++] = uint(wg_mem.e);\n"
879 		"a.a[idx++] = uint(wg_mem.f);\n"
880 		"a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
881 		7,
882 		{},
883 		},
884 
885 		{0,
886 		"struct Sa {\n"
887 		"  uint a;\n"
888 		"};\n"
889 		"struct Sb {\n"
890 		"  Sa a[specId0];\n"
891 		"  uint b;\n"
892 		"};\n"
893 		"struct Sc {\n"
894 		"  Sb b[specId1];\n"
895 		"  uint c;\n"
896 		"};\n"
897 		"struct Sd {\n"
898 		"  Sc c[specId2];\n"
899 		"  uint d;\n"
900 		"};\n"
901 		"struct Se {\n"
902 		"  Sd d[specId3];\n"
903 		"  uint e;\n"
904 		"};\n"
905 		"shared Se wg_mem[specId4] = {};\n",
906 
907 		"uint idx = 0;\n"
908 		"for (uint i1 = 0; i1 < specId4; ++i1) {\n"
909 		"  a.a[idx++] = wg_mem[i1].e;\n"
910 		"  for (uint i2 = 0; i2 < specId3; ++i2) {\n"
911 		"    a.a[idx++] = wg_mem[i1].d[i2].d;\n"
912 		"    for (uint i3 = 0; i3 < specId2; ++i3) {\n"
913 		"      a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
914 		"      for (uint i4 = 0; i4 < specId1; ++i4) {\n"
915 		"        a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
916 		"        for (uint i5 = 0; i5 < specId0; ++i5) {\n"
917 		"          a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
918 		"        }\n"
919 		"      }\n"
920 		"    }\n"
921 		"  }\n"
922 		"}\n",
923 		872,
924 		{6,5,4,3,2},
925 		},
926 	};
927 
928 	for (deUint32 i = 0; i < cases.size(); ++i)
929 	{
930 		group->addChild(
931 			new CompositeTest(group->getTestContext(), de::toString(i), de::toString(i), cases[i]));
932 	}
933 }
934 
935 enum Dim {
936 	DimX,
937 	DimY,
938 	DimZ,
939 };
940 
941 class MaxWorkgroupsInstance : public vkt::TestInstance
942 {
943 public:
MaxWorkgroupsInstance(Context & context,Dim dim)944 	MaxWorkgroupsInstance(Context &context, Dim dim)
945 		: TestInstance(context),
946 		m_dim(dim)
947 	{
948 	}
949 	tcu::TestStatus iterate(void);
950 private:
951 	Dim m_dim;
952 };
953 
954 class MaxWorkgroupsTest : public vkt::TestCase
955 {
956 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,Dim dim)957 	MaxWorkgroupsTest(tcu::TestContext& testCtx,
958 					  const std::string& name,
959 					  const std::string& description,
960 					  Dim dim)
961 		: TestCase(testCtx, name, description),
962 		m_dim(dim)
963 	{
964 	}
965 
966 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const967 	TestInstance* createInstance(Context& context) const
968 	{
969 		return new MaxWorkgroupsInstance(context, m_dim);
970 	}
971 	virtual void checkSupport(Context& context) const;
972 private:
973 	Dim m_dim;
974 };
975 
checkSupport(Context & context) const976 void MaxWorkgroupsTest::checkSupport(Context& context) const
977 {
978 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
979 }
980 
initPrograms(SourceCollections & sourceCollections) const981 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
982 {
983 	std::ostringstream src;
984 	src << "#version 450\n";
985 	src << "#extension GL_EXT_null_initializer : enable\n";
986 	src << "\n";
987 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
988 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
989 	src << "shared uint wg_mem[2] = {};\n";
990 	std::string dim;
991 	switch (m_dim) {
992 		case DimX:
993 			dim = "x";
994 			break;
995 		case DimY:
996 			dim = "y";
997 			break;
998 		case DimZ:
999 			dim = "z";
1000 			break;
1001 	}
1002 	src << "\n";
1003 	src << "void main() {\n";
1004 	src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1005 	src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1006 	src << "  uint idx_x = gl_LocalInvocationID.x;\n";
1007 	src << "  uint idx = idx_x + idx_y + idx_z;\n";
1008 	src << "  if (gl_LocalInvocationID.x == 0) {\n";
1009 	src << "    wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1010 	src << "  }\n";
1011 	src << "  barrier();\n";
1012 	src << "  atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1013 	src << "}\n";
1014 
1015 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1016 }
1017 
iterate(void)1018 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
1019 {
1020 	VkPhysicalDeviceProperties properties;
1021 	deMemset(&properties, 0, sizeof(properties));
1022 	m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1023 
1024 	const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1025 	deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1026 	deUint32 wgy = 1;
1027 	deUint32 wgz = 1;
1028 	if (wgx < maxWG)
1029 	{
1030 		wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1031 	}
1032 	if ((wgx * wgy) < maxWG)
1033 	{
1034 		wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1035 	}
1036 	deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1037 
1038 	deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1039 	deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1040 	deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1041 
1042 	return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, {wgx, wgy, wgz}, /*increment*/ 1);
1043 }
1044 
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group)1045 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group)
1046 {
1047 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", "max x dim workgroups", DimX));
1048 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", "max y dim workgroups", DimY));
1049 	group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", "max z dim workgroups", DimZ));
1050 }
1051 
1052 class SpecializeWorkgroupInstance : public vkt::TestInstance
1053 {
1054 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z)1055 	SpecializeWorkgroupInstance(Context &context, deUint32 x, deUint32 y, deUint32 z)
1056 		: TestInstance(context),
1057 		m_x(x),
1058 		m_y(y),
1059 		m_z(z)
1060 	{
1061 	}
1062 	tcu::TestStatus iterate(void);
1063 private:
1064 	deUint32 m_x;
1065 	deUint32 m_y;
1066 	deUint32 m_z;
1067 };
1068 
1069 class SpecializeWorkgroupTest : public vkt::TestCase
1070 {
1071 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 x,deUint32 y,deUint32 z)1072 	SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1073 					  const std::string& name,
1074 					  const std::string& description,
1075 					  deUint32 x, deUint32 y, deUint32 z)
1076 		: TestCase(testCtx, name, description),
1077 		m_x(x),
1078 		m_y(y),
1079 		m_z(z)
1080 	{
1081 	}
1082 
1083 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1084 	TestInstance* createInstance(Context& context) const
1085 	{
1086 		return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z);
1087 	}
1088 	virtual void checkSupport(Context& context) const;
1089 private:
1090 	deUint32 m_x;
1091 	deUint32 m_y;
1092 	deUint32 m_z;
1093 };
1094 
checkSupport(Context & context) const1095 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1096 {
1097 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1098 
1099 	VkPhysicalDeviceProperties properties;
1100 	deMemset(&properties, 0, sizeof(properties));
1101 	context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1102 	if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1103 		TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1104 }
1105 
initPrograms(SourceCollections & sourceCollections) const1106 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1107 {
1108 	std::ostringstream src;
1109 	src << "#version 450\n";
1110 	src << "#extension GL_EXT_null_initializer : enable\n";
1111 	src << "\n";
1112 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1113 	src << "layout(constant_id = 1) const uint WGY = 1;\n";
1114 	src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1115 	src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1116 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1117 	src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1118 	src << "\n";
1119 	src << "void main() {\n";
1120 	src << "  a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1121 	src << "}\n";
1122 
1123 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1124 }
1125 
iterate(void)1126 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1127 {
1128 	const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1129 	return runCompute(m_context, size, 1, 1, 1, {m_x, m_y, m_z});
1130 }
1131 
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group)1132 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group)
1133 {
1134 	for (deUint32 z = 1; z <= 8; ++z)
1135 	{
1136 		for (deUint32 y = 1; y <= 8; ++y)
1137 		{
1138 			for (deUint32 x = 1; x <= 8; ++x)
1139 			{
1140 				group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1141 					de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1142 					de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1143 					x, y, z));
1144 			}
1145 		}
1146 	}
1147 }
1148 
1149 class RepeatedPipelineInstance : public vkt::TestInstance
1150 {
1151 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1152 	RepeatedPipelineInstance(Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1153 		: TestInstance(context),
1154 		m_xSize(xSize),
1155 		m_repeat(repeat),
1156 		m_odd(odd)
1157 	{
1158 	}
1159 	tcu::TestStatus iterate(void);
1160 private:
1161 	deUint32 m_xSize;
1162 	deUint32 m_repeat;
1163 	deUint32 m_odd;
1164 };
1165 
1166 class RepeatedPipelineTest : public vkt::TestCase
1167 {
1168 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 xSize,deUint32 repeat,deUint32 odd)1169 	RepeatedPipelineTest(tcu::TestContext& testCtx,
1170 						const std::string& name,
1171 						const std::string& description,
1172 						deUint32 xSize, deUint32 repeat, deUint32 odd)
1173 		: TestCase(testCtx, name, description),
1174 		m_xSize(xSize),
1175 		m_repeat(repeat),
1176 		m_odd(odd)
1177 	{
1178 	}
1179 
1180 	void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1181 	TestInstance* createInstance(Context& context) const
1182 	{
1183 		return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1184 	}
1185 	virtual void checkSupport(Context& context) const;
1186 private:
1187 	deUint32 m_xSize;
1188 	deUint32 m_repeat;
1189 	deUint32 m_odd;
1190 };
1191 
checkSupport(Context & context) const1192 void RepeatedPipelineTest::checkSupport(Context& context) const
1193 {
1194 	context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1195 }
1196 
initPrograms(SourceCollections & sourceCollections) const1197 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1198 {
1199 	std::ostringstream src;
1200 	src << "#version 450\n";
1201 	src << "#extension GL_EXT_null_initializer : enable\n";
1202 	src << "\n";
1203 	src << "layout(constant_id = 0) const uint WGX = 1;\n";
1204 	src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1205 	src << "\n";
1206 	src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1207 	src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1208 	src << "\n";
1209 	src << "shared uint wg_mem[WGX][2] = {};\n";
1210 	src << "void main() {\n";
1211 	src << "  if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1212 	src << "    wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1213 	src << "  }\n";
1214 	src << "  barrier();\n";
1215 	src << "  a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1216 	src << "}\n";
1217 
1218 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1219 }
1220 
iterate(void)1221 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1222 {
1223 	Context& context					= m_context;
1224 	const deUint32 bufferSize			= m_xSize * 2 * (deUint32)sizeof(deUint32);
1225 	const deUint32 numBuffers			= 2;
1226 
1227 	const DeviceInterface&	vk			= context.getDeviceInterface();
1228 	const VkDevice			device		= context.getDevice();
1229 	Allocator&				allocator	= context.getDefaultAllocator();
1230 	tcu::TestLog&			log			= context.getTestContext().getLog();
1231 
1232 	de::MovePtr<BufferWithMemory> buffers[numBuffers];
1233 	VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1234 
1235 	VkDeviceSize size = bufferSize;
1236 	for (deUint32 i = 0; i < numBuffers; ++i)
1237 	{
1238 		buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1239 			vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1240 			MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1241 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1242 	}
1243 
1244 	deUint32* ptrs[numBuffers];
1245 	for (deUint32 i = 0; i < numBuffers; ++i)
1246 	{
1247 		ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1248 	}
1249 	for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1250 	{
1251 		ptrs[1][i] = i;
1252 	}
1253 	deMemset(ptrs[0], 0xff, (size_t)size);
1254 
1255 	DescriptorSetLayoutBuilder layoutBuilder;
1256 	for (deUint32 i = 0; i < numBuffers; ++i)
1257 	{
1258 		layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1259 	}
1260 
1261 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1262 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1263 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1264 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1265 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1266 
1267 	const deUint32 specData[1] =
1268 	{
1269 		m_xSize,
1270 	};
1271 	const vk::VkSpecializationMapEntry entries[1] =
1272 	{
1273 		{0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1274 	};
1275 	const vk::VkSpecializationInfo specInfo =
1276 	{
1277 		1,
1278 		entries,
1279 		sizeof(specData),
1280 		specData
1281 	};
1282 
1283 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1284 	{
1285 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1286 		DE_NULL,
1287 		(VkPipelineLayoutCreateFlags)0,
1288 		1,
1289 		&descriptorSetLayout.get(),
1290 		0u,
1291 		DE_NULL,
1292 	};
1293 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1294 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1295 
1296 	for (deUint32 i = 0; i < numBuffers; ++i)
1297 	{
1298 		flushAlloc(vk, device, buffers[i]->getAllocation());
1299 	}
1300 
1301 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1302 	const VkPipelineShaderStageCreateInfo shaderInfo =
1303 	{
1304 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1305 		DE_NULL,
1306 		0,
1307 		VK_SHADER_STAGE_COMPUTE_BIT,
1308 		*shader,
1309 		"main",
1310 		&specInfo,
1311 	};
1312 
1313 	const VkComputePipelineCreateInfo pipelineInfo =
1314 	{
1315 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1316 		DE_NULL,
1317 		0u,
1318 		shaderInfo,
1319 		*pipelineLayout,
1320 		(VkPipeline)0,
1321 		0u,
1322 	};
1323 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1324 
1325 	const VkQueue queue = context.getUniversalQueue();
1326 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1327 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1328 		context.getUniversalQueueFamilyIndex());
1329 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1330 
1331 	DescriptorSetUpdateBuilder setUpdateBuilder;
1332 	for (deUint32 i = 0; i < numBuffers; ++i)
1333 	{
1334 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1335 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1336 	}
1337 	setUpdateBuilder.update(vk, device);
1338 
1339 	beginCommandBuffer(vk, *cmdBuffer, 0);
1340 
1341 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1342 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1343 
1344 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1345 
1346 	endCommandBuffer(vk, *cmdBuffer);
1347 
1348 	for (deUint32 r = 0; r < m_repeat; ++r)
1349 	{
1350 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1351 
1352 		invalidateAlloc(vk, device, buffers[0]->getAllocation());
1353 
1354 		for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1355 		{
1356 			deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1357 			if (ptrs[0][i] != expected)
1358 			{
1359 				log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1360 				return tcu::TestStatus::fail("compute failed");
1361 			}
1362 		}
1363 
1364 		deMemset(ptrs[0], 0xff, (size_t)size);
1365 		flushAlloc(vk, device, buffers[0]->getAllocation());
1366 		setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1367 									 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1368 		setUpdateBuilder.update(vk, device);
1369 	}
1370 
1371 	return tcu::TestStatus::pass("compute succeeded");
1372 }
1373 
AddRepeatedPipelineTests(tcu::TestCaseGroup * group)1374 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group)
1375 {
1376 	std::vector<deUint32> xSizes = {4, 16, 32, 64};
1377 	std::vector<deUint32> odds = {0, 1};
1378 	std::vector<deUint32> repeats = {2, 4, 8, 16};
1379 	for (deUint32 i = 0; i < xSizes.size(); ++i)
1380 	{
1381 		deUint32 x = xSizes[i];
1382 		for (deUint32 j = 0; j < odds.size(); ++j)
1383 		{
1384 			deUint32 odd = odds[j];
1385 			for (deUint32 k = 0; k < repeats.size(); ++k)
1386 			{
1387 				deUint32 repeat = repeats[k];
1388 				group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1389 					std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1390 					std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1391 					x, odd, repeat));
1392 			}
1393 		}
1394 	}
1395 }
1396 
1397 } // anonymous
1398 
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx)1399 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests(tcu::TestContext& testCtx)
1400 {
1401 	de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory", "VK_KHR_zero_intialize_workgroup_memory tests"));
1402 
1403 	tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1404 		new tcu::TestCaseGroup(testCtx, "max_workgroup_memory", "Read initialization of max workgroup memory");
1405 	AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup);
1406 	tests->addChild(maxWorkgroupMemoryGroup);
1407 
1408 	tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types", "basic type tests");
1409 	AddTypeTests(typeGroup);
1410 	tests->addChild(typeGroup);
1411 
1412 	tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites", "composite type tests");
1413 	AddCompositeTests(compositeGroup);
1414 	tests->addChild(compositeGroup);
1415 
1416 	tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups", "max workgroups");
1417 	AddMaxWorkgroupsTests(maxWorkgroupsGroup);
1418 	tests->addChild(maxWorkgroupsGroup);
1419 
1420 	tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup", "specialize workgroup size");
1421 	AddSpecializeWorkgroupTests(specializeWorkgroupGroup);
1422 	tests->addChild(specializeWorkgroupGroup);
1423 
1424 	tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline", "repeated pipeline run");
1425 	AddRepeatedPipelineTests(repeatPipelineGroup);
1426 	tests->addChild(repeatPipelineGroup);
1427 
1428 	return tests.release();
1429 }
1430 
1431 } // compute
1432 } // vkt
1433