• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Intel Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief VK_KHR_workgroup_memory_explicit_layout tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktComputeWorkgroupMemoryExplicitLayoutTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
29 #include "vktTestGroupUtil.hpp"
30 
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageWithMemory.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkObjUtil.hpp"
38 #include "vkDefs.hpp"
39 #include "vkRef.hpp"
40 
41 #include "tcuCommandLine.hpp"
42 #include "tcuTestLog.hpp"
43 
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47 
48 #include <algorithm>
49 #include <vector>
50 
51 using namespace vk;
52 
53 namespace vkt
54 {
55 namespace compute
56 {
57 namespace
58 {
59 
60 struct CheckSupportParams
61 {
62 	bool needsScalar;
63 	bool needsInt8;
64 	bool needsInt16;
65 	bool needsInt64;
66 	bool needsFloat16;
67 	bool needsFloat64;
68 
useTypevkt::compute::__anon5d3a54a00111::CheckSupportParams69 	void useType(glu::DataType dt)
70 	{
71 		using namespace glu;
72 
73 		needsInt8		|= isDataTypeIntOrIVec8Bit(dt) || isDataTypeUintOrUVec8Bit(dt);
74 		needsInt16		|= isDataTypeIntOrIVec16Bit(dt) || isDataTypeUintOrUVec16Bit(dt);
75 		needsFloat16	|= isDataTypeFloat16OrVec(dt);
76 		needsFloat64	|= isDataTypeDoubleOrDVec(dt);
77 	}
78 };
79 
checkSupportWithParams(Context & context,const CheckSupportParams & params)80 void checkSupportWithParams(Context& context, const CheckSupportParams& params)
81 {
82 	context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
83 	context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
84 
85 	VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout_features;
86 	deMemset(&layout_features, 0, sizeof(layout_features));
87 	layout_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
88 	layout_features.pNext = DE_NULL;
89 
90 	VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
91 	deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
92 	f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
93 	f16_i8_features.pNext = &layout_features;
94 
95 	VkPhysicalDeviceFeatures2 features2;
96 	deMemset(&features2, 0, sizeof(features2));
97 	features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
98 	features2.pNext = &f16_i8_features;
99 	context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
100 
101 	if (params.needsScalar)
102 	{
103 		if (layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout != VK_TRUE)
104 			TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayoutScalarBlockLayout not supported");
105 	}
106 
107 	if (params.needsInt8)
108 	{
109 		if (f16_i8_features.shaderInt8 != VK_TRUE)
110 			TCU_THROW(NotSupportedError, "shaderInt8 not supported");
111 		if (layout_features.workgroupMemoryExplicitLayout8BitAccess != VK_TRUE)
112 			TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout8BitAccess not supported");
113 	}
114 
115 	if (params.needsInt16)
116 	{
117 		if (features2.features.shaderInt16 != VK_TRUE)
118 			TCU_THROW(NotSupportedError, "shaderInt16 not supported");
119 		if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
120 			TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
121 	}
122 
123 	if (params.needsInt64)
124 	{
125 		if (features2.features.shaderInt64 != VK_TRUE)
126 			TCU_THROW(NotSupportedError, "shaderInt64 not supported");
127 	}
128 
129 	if (params.needsFloat16)
130 	{
131 		if (f16_i8_features.shaderFloat16 != VK_TRUE)
132 			TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
133 		if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
134 			TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
135 	}
136 
137 	if (params.needsFloat64)
138 	{
139 		if (features2.features.shaderFloat64 != VK_TRUE)
140 			TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
141 	}
142 }
143 
runCompute(Context & context,deUint32 workgroupSize)144 tcu::TestStatus runCompute(Context& context, deUint32 workgroupSize)
145 {
146 	const DeviceInterface&	vk			= context.getDeviceInterface();
147 	const VkDevice			device		= context.getDevice();
148 	Allocator&				allocator	= context.getDefaultAllocator();
149 	tcu::TestLog&			log			= context.getTestContext().getLog();
150 
151 	de::MovePtr<BufferWithMemory> buffer;
152 	VkDescriptorBufferInfo bufferDescriptor;
153 
154 	VkDeviceSize size = sizeof(deUint32) * workgroupSize;
155 
156 	buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
157 		vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
158 		MemoryRequirement::HostVisible | MemoryRequirement::Cached));
159 	bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
160 
161 	deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
162 
163 	deMemset(ptr, 0xFF, static_cast<std::size_t>(size));
164 
165 	DescriptorSetLayoutBuilder layoutBuilder;
166 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
167 
168 	Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
169 	Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
170 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
171 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
172 	Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
173 
174 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
175 	{
176 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 		DE_NULL,
178 		(VkPipelineLayoutCreateFlags)0,
179 		1,
180 		&descriptorSetLayout.get(),
181 		0u,
182 		DE_NULL,
183 	};
184 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
185 	VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
186 	flushAlloc(vk, device, buffer->getAllocation());
187 
188 	const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
189 	const VkPipelineShaderStageCreateInfo shaderInfo =
190 	{
191 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
192 		DE_NULL,
193 		0,
194 		VK_SHADER_STAGE_COMPUTE_BIT,
195 		*shader,
196 		"main",
197 		DE_NULL,
198 	};
199 
200 	const VkComputePipelineCreateInfo pipelineInfo =
201 	{
202 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
203 		DE_NULL,
204 		0u,
205 		shaderInfo,
206 		*pipelineLayout,
207 		(VkPipeline)0,
208 		0u,
209 	};
210 	Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
211 
212 	const VkQueue queue = context.getUniversalQueue();
213 	Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
214 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
215 		context.getUniversalQueueFamilyIndex());
216 	Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
217 
218 	DescriptorSetUpdateBuilder setUpdateBuilder;
219 	setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
220 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
221 	setUpdateBuilder.update(vk, device);
222 
223 	beginCommandBuffer(vk, *cmdBuffer, 0);
224 
225 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
226 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
227 
228 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
229 
230 	endCommandBuffer(vk, *cmdBuffer);
231 
232 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
233 
234 	invalidateAlloc(vk, device, buffer->getAllocation());
235 	for (deUint32 i = 0; i < workgroupSize; ++i)
236 	{
237 		deUint32 expected = i;
238 		if (ptr[i] != expected)
239 		{
240 			log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
241 			return tcu::TestStatus::fail("compute failed");
242 		}
243 	}
244 
245 	return tcu::TestStatus::pass("compute succeeded");
246 }
247 
248 class AliasTest : public vkt::TestCase
249 {
250 public:
251 	enum Requirements
252 	{
253 		RequirementNone    = 0,
254 		RequirementFloat16 = 1 << 0,
255 		RequirementFloat64 = 1 << 1,
256 		RequirementInt8    = 1 << 2,
257 		RequirementInt16   = 1 << 3,
258 		RequirementInt64   = 1 << 4,
259 	};
260 
261 	enum Flags
262 	{
263 		FlagNone         = 0,
264 		FlagLayoutStd430 = 1 << 0,
265 		FlagLayoutStd140 = 1 << 1,
266 		FlagLayoutScalar = 1 << 2,
267 		FlagFunction     = 1 << 3,
268 		FlagBarrier      = 1 << 4,
269 	};
270 
271 	enum LayoutFlags
272 	{
273 		LayoutNone    = 0,
274 
275 		LayoutDefault = 1 << 0,
276 		LayoutStd140  = 1 << 1,
277 		LayoutStd430  = 1 << 2,
278 		LayoutScalar  = 1 << 3,
279 		LayoutAll     = LayoutDefault | LayoutStd140 | LayoutStd430 | LayoutScalar,
280 
281 		LayoutCount   = 4,
282 	};
283 
284 	enum Function
285 	{
286 		FunctionNone = 0,
287 		FunctionRead,
288 		FunctionWrite,
289 		FunctionReadWrite,
290 		FunctionCount,
291 	};
292 
293 	enum Synchronization
294 	{
295 		SynchronizationNone = 0,
296 		SynchronizationBarrier,
297 		SynchronizationCount,
298 	};
299 
300 	struct CaseDef
301 	{
302 		std::string extraTypes;
303 
304 		std::string writeDesc;
305 		std::string	writeType;
306 		std::string writeValue;
307 
308 		std::string readDesc;
309 		std::string readType;
310 		std::string readValue;
311 
312 		LayoutFlags layout;
313 		Function func;
314 		Synchronization sync;
315 		Requirements requirements;
316 
testNamevkt::compute::__anon5d3a54a00111::AliasTest::CaseDef317 		std::string testName() const
318 		{
319 			std::string name = writeDesc + "_to_" + readDesc;
320 
321 			// In a valid test case, only one flag will be set.
322 			switch (layout)
323 			{
324 			case LayoutDefault:
325 				name += "_default";
326 				break;
327 			case LayoutStd140:
328 				name += "_std140";
329 				break;
330 			case LayoutStd430:
331 				name += "_std430";
332 				break;
333 			case LayoutScalar:
334 				name += "_scalar";
335 				break;
336 			default:
337 				DE_ASSERT(0);
338 				break;
339 			}
340 
341 			switch (func)
342 			{
343 			case FunctionNone:
344 				break;
345 			case FunctionRead:
346 				name += "_func_read";
347 				break;
348 			case FunctionWrite:
349 				name += "_func_write";
350 				break;
351 			case FunctionReadWrite:
352 				name += "_func_read_write";
353 				break;
354 			default:
355 				DE_ASSERT(0);
356 				break;
357 			}
358 
359 			switch (sync)
360 			{
361 			case SynchronizationNone:
362 				break;
363 			case SynchronizationBarrier:
364 				name += "_barrier";
365 				break;
366 			default:
367 				DE_ASSERT(0);
368 				break;
369 			}
370 
371 			return name;
372 		}
373 	};
374 
AliasTest(tcu::TestContext & testCtx,const CaseDef & caseDef)375 	AliasTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
376 		: TestCase(testCtx, caseDef.testName(), caseDef.testName()),
377 		m_caseDef(caseDef)
378 	{
379 	}
380 
381 	virtual void checkSupport(Context& context) const;
382 	void initPrograms(SourceCollections& sourceCollections) const;
383 
384 	class Instance : public vkt::TestInstance
385 	{
386 	public:
Instance(Context & context,const CaseDef & caseDef)387 		Instance(Context& context, const CaseDef& caseDef)
388 			: TestInstance(context),
389 			  m_caseDef(caseDef)
390 		{
391 		}
392 
iterate(void)393 		tcu::TestStatus iterate(void)
394 		{
395 			return runCompute(m_context, 1u);
396 		}
397 
398 	private:
399 		CaseDef m_caseDef;
400 	};
401 
createInstance(Context & context) const402 	TestInstance* createInstance(Context& context) const
403 	{
404 		return new Instance(context, m_caseDef);
405 	}
406 
407 private:
408 	CaseDef m_caseDef;
409 };
410 
checkSupport(Context & context) const411 void AliasTest::checkSupport(Context& context) const
412 {
413 	CheckSupportParams p;
414 	deMemset(&p, 0, sizeof(p));
415 
416 	p.needsScalar	= m_caseDef.layout == LayoutScalar;
417 	p.needsInt8		= m_caseDef.requirements & RequirementInt8;
418 	p.needsInt16	= m_caseDef.requirements & RequirementInt16;
419 	p.needsInt64	= m_caseDef.requirements & RequirementInt64;
420 	p.needsFloat16	= m_caseDef.requirements & RequirementFloat16;
421 	p.needsFloat64	= m_caseDef.requirements & RequirementFloat64;
422 
423 	checkSupportWithParams(context, p);
424 }
425 
initPrograms(SourceCollections & sourceCollections) const426 void AliasTest::initPrograms(SourceCollections& sourceCollections) const
427 {
428 	std::string layout;
429 	switch (m_caseDef.layout)
430 	{
431 	case LayoutStd140:
432 		layout = "layout(std140)";
433 		break;
434 	case LayoutStd430:
435 		layout = "layout(std430)";
436 		break;
437 	case LayoutScalar:
438 		layout = "layout(scalar)";
439 		break;
440 	default:
441 		// No layout specified.
442 		break;
443 	}
444 
445 	std::ostringstream src;
446 
447 	src << "#version 450\n";
448 	src << "#extension GL_EXT_shared_memory_block : enable\n";
449 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
450 
451 	if (m_caseDef.layout == LayoutScalar)
452 		src << "#extension GL_EXT_scalar_block_layout : enable\n";
453 
454 	src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
455 
456 	if (!m_caseDef.extraTypes.empty())
457 		src << m_caseDef.extraTypes << ";\n";
458 
459 	src << layout << "shared A { " << m_caseDef.writeType << "; } a;\n";
460 	src << layout << "shared B { " << m_caseDef.readType << "; } b;\n";
461 	src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
462 
463 	if (m_caseDef.func == FunctionRead ||
464 		m_caseDef.func == FunctionReadWrite)
465 	{
466 		src << "void read(int index) {\n";
467 		src << "  if (b.v == " << m_caseDef.readValue << ")\n";
468 		src << "    result = index;\n";
469 		src << "}\n";
470 	}
471 
472 	if (m_caseDef.func == FunctionWrite ||
473 		m_caseDef.func == FunctionReadWrite)
474 	{
475 		src << "void write(int index) {\n";
476 		src << "  if (index == 0)\n";
477 		src << "    a.v = " << m_caseDef.writeValue << ";\n";
478 		src << "}\n";
479 	}
480 
481 	src << "void main() {\n";
482 	src << "  int index = int(gl_LocalInvocationIndex);\n";
483 
484 	if (m_caseDef.func == FunctionWrite)
485 		src << "  write(index);\n";
486 	else
487 		src << "  a.v = " << m_caseDef.writeValue << ";\n";
488 
489 	if (m_caseDef.sync == SynchronizationBarrier)
490 		src << "  barrier();\n";
491 
492 	if (m_caseDef.func == FunctionRead ||
493 		m_caseDef.func == FunctionReadWrite)
494 	{
495 		src << "  read(index);\n";
496 	}
497 	else
498 	{
499 		src << "  if (b.v == " << m_caseDef.readValue << ")\n";
500 		src << "    result = index;\n";
501 	}
502 	src << "}\n";
503 
504 	deUint32 buildFlags =
505 		m_caseDef.layout == LayoutScalar ? ShaderBuildOptions::FLAG_ALLOW_WORKGROUP_SCALAR_OFFSETS :
506 		                                   ShaderBuildOptions::Flags(0u);
507 
508 	sourceCollections.glslSources.add("comp")
509 		<< glu::ComputeSource(src.str())
510 		<< vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4, buildFlags);
511 }
512 
makeArray(const std::string & type,const std::vector<deUint64> & values)513 std::string makeArray(const std::string& type, const std::vector<deUint64>& values)
514 {
515 	std::ostringstream s;
516 	s << type << "[](";
517 	for (std::size_t i = 0; i < values.size(); i++)
518 	{
519 		s << type << "(" << std::to_string(values[i]) << ")";
520 		if (i != values.size() - 1)
521 			s << ", ";
522 	};
523 	s << ")";
524 	return s.str();
525 }
526 
makeU8Array(const std::vector<deUint64> & values)527 std::string makeU8Array(const std::vector<deUint64>& values)
528 {
529 	return makeArray("uint8_t", values);
530 }
531 
makeU16Array(const std::vector<deUint64> & values)532 std::string makeU16Array(const std::vector<deUint64>& values)
533 {
534 	return makeArray("uint16_t", values);
535 }
536 
makeU32Array(const std::vector<deUint64> & values)537 std::string makeU32Array(const std::vector<deUint64>& values)
538 {
539 	return makeArray("uint32_t", values);
540 }
541 
AddAliasTests(tcu::TestCaseGroup * group)542 void AddAliasTests(tcu::TestCaseGroup* group)
543 {
544 	const int DEFAULT = AliasTest::LayoutDefault;
545 	const int STD140 = AliasTest::LayoutStd140;
546 	const int STD430 = AliasTest::LayoutStd430;
547 	const int SCALAR = AliasTest::LayoutScalar;
548 	const int ALL = DEFAULT | STD140 | STD430 | SCALAR;
549 
550 	const int FLOAT16 = AliasTest::RequirementFloat16;
551 	const int FLOAT64 = AliasTest::RequirementFloat64;
552 	const int INT8 = AliasTest::RequirementInt8;
553 	const int INT16 = AliasTest::RequirementInt16;
554 	const int INT64 = AliasTest::RequirementInt64;
555 
556 #define CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2)						\
557 	{ E, D1, T1, V1, D2, T2, V2, AliasTest::LayoutFlags(L), AliasTest::FunctionNone, AliasTest::SynchronizationNone, AliasTest::Requirements(R) }
558 
559 #define CASE_EXTRA_WITH_REVERSE(L, R, E, D1, T1, V1, D2, T2, V2)	\
560 	CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2),					\
561 	CASE_EXTRA(L, R, E, D2, T2, V2, D1, T1, V1)
562 
563 #define CASE_WITH_REVERSE(L, R, D1, T1, V1, D2, T2, V2)	CASE_EXTRA_WITH_REVERSE(L, R, "", D1, T1, V1, D2, T2, V2)
564 #define CASE_SAME_TYPE(R, D, T, V)						CASE_EXTRA(ALL, R, "", D, T, V, D, T, V)
565 #define CASE(L, R, D1, T1, V1, D2, T2, V2)				CASE_EXTRA(L, R, "", D1, T1, V1, D2, T2, V2)
566 
567 
568 	std::vector<AliasTest::CaseDef> cases =
569 	{
570 		CASE_SAME_TYPE(0,		"bool_true",	"bool v",		"true"),
571 		CASE_SAME_TYPE(0,		"bool_false",	"bool v",		"false"),
572 		CASE_SAME_TYPE(0,		"bvec2",		"bvec2 v",		"bvec2(false, true)"),
573 		CASE_SAME_TYPE(0,		"bvec3",		"bvec3 v",		"bvec3(false, true, true)"),
574 		CASE_SAME_TYPE(0,		"bvec4",		"bvec4 v",		"bvec4(false, true, true, false)"),
575 		CASE_SAME_TYPE(INT8,	"u8",			"uint8_t v",	"uint8_t(10)"),
576 		CASE_SAME_TYPE(INT8,	"u8vec2",		"u8vec2 v",		"u8vec2(10, 20)"),
577 		CASE_SAME_TYPE(INT8,	"u8vec3",		"u8vec3 v",		"u8vec3(10, 20, 30)"),
578 		CASE_SAME_TYPE(INT8,	"u8vec4",		"u8vec4 v",		"u8vec4(10, 20, 30, 40)"),
579 		CASE_SAME_TYPE(INT8,	"i8",			"int8_t v",		"int8_t(-10)"),
580 		CASE_SAME_TYPE(INT8,	"i8vec2",		"i8vec2 v",		"i8vec2(-10, 20)"),
581 		CASE_SAME_TYPE(INT8,	"i8vec3",		"i8vec3 v",		"i8vec3(-10, 20, -30)"),
582 		CASE_SAME_TYPE(INT8,	"i8vec4",		"i8vec4 v",		"i8vec4(-10, 20, -30, 40)"),
583 		CASE_SAME_TYPE(INT16,	"u16",			"uint16_t v",	"uint16_t(1000)"),
584 		CASE_SAME_TYPE(INT16,	"u16vec2",		"u16vec2 v",	"u16vec2(1000, 2000)"),
585 		CASE_SAME_TYPE(INT16,	"u16vec3",		"u16vec3 v",	"u16vec3(1000, 2000, 3000)"),
586 		CASE_SAME_TYPE(INT16,	"u16vec4",		"u16vec4 v",	"u16vec4(1000, 2000, 3000, 4000)"),
587 		CASE_SAME_TYPE(INT16,	"i16",			"int16_t v",	"int16_t(-1000)"),
588 		CASE_SAME_TYPE(INT16,	"i16vec2",		"i16vec2 v",	"i16vec2(-1000, 2000)"),
589 		CASE_SAME_TYPE(INT16,	"i16vec3",		"i16vec3 v",	"i16vec3(-1000, 2000, -3000)"),
590 		CASE_SAME_TYPE(INT16,	"i16vec4",		"i16vec4 v",	"i16vec4(-1000, 2000, -3000, 4000)"),
591 		CASE_SAME_TYPE(0,		"u32",			"uint32_t v",	"uint32_t(100)"),
592 		CASE_SAME_TYPE(0,		"uvec2",		"uvec2 v",		"uvec2(100, 200)"),
593 		CASE_SAME_TYPE(0,		"uvec3",		"uvec3 v",		"uvec3(100, 200, 300)"),
594 		CASE_SAME_TYPE(0,		"uvec4",		"uvec4 v",		"uvec4(100, 200, 300, 400)"),
595 		CASE_SAME_TYPE(0,		"i32",			"int32_t v",	"int32_t(-100)"),
596 		CASE_SAME_TYPE(0,		"ivec2",		"ivec2 v",		"ivec2(-100, 200)"),
597 		CASE_SAME_TYPE(0,		"ivec3",		"ivec3 v",		"ivec3(-100, 200, -300)"),
598 		CASE_SAME_TYPE(0,		"ivec4",		"ivec4 v",		"ivec4(-100, 200, -300, 400)"),
599 		CASE_SAME_TYPE(INT64,	"u64",			"uint64_t v",	"uint64_t(1000)"),
600 		CASE_SAME_TYPE(INT64,	"u64vec2",		"u64vec2 v",	"u64vec2(1000, 2000)"),
601 		CASE_SAME_TYPE(INT64,	"u64vec3",		"u64vec3 v",	"u64vec3(1000, 2000, 3000)"),
602 		CASE_SAME_TYPE(INT64,	"u64vec4",		"u64vec4 v",	"u64vec4(1000, 2000, 3000, 4000)"),
603 		CASE_SAME_TYPE(INT64,	"i64",			"int64_t v",	"int64_t(-1000)"),
604 		CASE_SAME_TYPE(INT64,	"i64vec2",		"i64vec2 v",	"i64vec2(-1000, 2000)"),
605 		CASE_SAME_TYPE(INT64,	"i64vec3",		"i64vec3 v",	"i64vec3(-1000, 2000, -3000)"),
606 		CASE_SAME_TYPE(INT64,	"i64vec4",		"i64vec4 v",	"i64vec4(-1000, 2000, -3000, 4000)"),
607 		CASE_SAME_TYPE(FLOAT16,	"f16",			"float16_t v",	"float16_t(-100.0)"),
608 		CASE_SAME_TYPE(FLOAT16,	"f16vec2",		"f16vec2 v",	"f16vec2(100.0, -200.0)"),
609 		CASE_SAME_TYPE(FLOAT16,	"f16vec3",		"f16vec3 v",	"f16vec3(100.0, -200.0, 300.0)"),
610 		CASE_SAME_TYPE(FLOAT16,	"f16vec4",		"f16vec4 v",	"f16vec4(100.0, -200.0, 300.0, -400.0)"),
611 		CASE_SAME_TYPE(0,		"f32",			"float32_t v",	"float32_t(-100.0)"),
612 		CASE_SAME_TYPE(0,		"f32vec2",		"f32vec2 v",	"f32vec2(100.0, -200.0)"),
613 		CASE_SAME_TYPE(0,		"f32vec3",		"f32vec3 v",	"f32vec3(100.0, -200.0, 300.0)"),
614 		CASE_SAME_TYPE(0,		"f32vec4",		"f32vec4 v",	"f32vec4(100.0, -200.0, 300.0, -400.0)"),
615 		CASE_SAME_TYPE(FLOAT64,	"f64",			"float64_t v",	"float32_t(-100.0)"),
616 		CASE_SAME_TYPE(FLOAT64,	"f64vec2",		"f64vec2 v",	"f64vec2(100.0, -200.0)"),
617 		CASE_SAME_TYPE(FLOAT64,	"f64vec3",		"f64vec3 v",	"f64vec3(100.0, -200.0, 300.0)"),
618 		CASE_SAME_TYPE(FLOAT64,	"f64vec4",		"f64vec4 v",	"f64vec4(100.0, -200.0, 300.0, -400.0)"),
619 		CASE_SAME_TYPE(FLOAT16,	"f16mat2x2",	"f16mat2x2 v",	"f16mat2x2(1, 2, 3, 4)"),
620 		CASE_SAME_TYPE(FLOAT16,	"f16mat2x3",	"f16mat2x3 v",	"f16mat2x3(1, 2, 3, 4, 5, 6)"),
621 		CASE_SAME_TYPE(FLOAT16,	"f16mat2x4",	"f16mat2x4 v",	"f16mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
622 		CASE_SAME_TYPE(FLOAT16,	"f16mat3x2",	"f16mat3x2 v",	"f16mat3x2(1, 2, 3, 4, 5, 6)"),
623 		CASE_SAME_TYPE(FLOAT16,	"f16mat3x3",	"f16mat3x3 v",	"f16mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
624 		CASE_SAME_TYPE(FLOAT16,	"f16mat3x4",	"f16mat3x4 v",	"f16mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
625 		CASE_SAME_TYPE(FLOAT16,	"f16mat4x2",	"f16mat4x2 v",	"f16mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
626 		CASE_SAME_TYPE(FLOAT16,	"f16mat4x3",	"f16mat4x3 v",	"f16mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
627 		CASE_SAME_TYPE(FLOAT16,	"f16mat4x4",	"f16mat4x4 v",	"f16mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
628 		CASE_SAME_TYPE(0,		"f32mat2x2",	"f32mat2x2 v",	"f32mat2x2(1, 2, 3, 4)"),
629 		CASE_SAME_TYPE(0,		"f32mat2x3",	"f32mat2x3 v",	"f32mat2x3(1, 2, 3, 4, 5, 6)"),
630 		CASE_SAME_TYPE(0,		"f32mat2x4",	"f32mat2x4 v",	"f32mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
631 		CASE_SAME_TYPE(0,		"f32mat3x2",	"f32mat3x2 v",	"f32mat3x2(1, 2, 3, 4, 5, 6)"),
632 		CASE_SAME_TYPE(0,		"f32mat3x3",	"f32mat3x3 v",	"f32mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
633 		CASE_SAME_TYPE(0,		"f32mat3x4",	"f32mat3x4 v",	"f32mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
634 		CASE_SAME_TYPE(0,		"f32mat4x2",	"f32mat4x2 v",	"f32mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
635 		CASE_SAME_TYPE(0,		"f32mat4x3",	"f32mat4x3 v",	"f32mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
636 		CASE_SAME_TYPE(0,		"f32mat4x4",	"f32mat4x4 v",	"f32mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
637 		CASE_SAME_TYPE(FLOAT64,	"f64mat2x2",	"f64mat2x2 v",	"f64mat2x2(1, 2, 3, 4)"),
638 		CASE_SAME_TYPE(FLOAT64,	"f64mat2x3",	"f64mat2x3 v",	"f64mat2x3(1, 2, 3, 4, 5, 6)"),
639 		CASE_SAME_TYPE(FLOAT64,	"f64mat2x4",	"f64mat2x4 v",	"f64mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
640 		CASE_SAME_TYPE(FLOAT64,	"f64mat3x2",	"f64mat3x2 v",	"f64mat3x2(1, 2, 3, 4, 5, 6)"),
641 		CASE_SAME_TYPE(FLOAT64,	"f64mat3x3",	"f64mat3x3 v",	"f64mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
642 		CASE_SAME_TYPE(FLOAT64,	"f64mat3x4",	"f64mat3x4 v",	"f64mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
643 		CASE_SAME_TYPE(FLOAT64,	"f64mat4x2",	"f64mat4x2 v",	"f64mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
644 		CASE_SAME_TYPE(FLOAT64,	"f64mat4x3",	"f64mat4x3 v",	"f64mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
645 		CASE_SAME_TYPE(FLOAT64,	"f64mat4x4",	"f64mat4x4 v",	"f64mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
646 
647 		CASE_WITH_REVERSE(ALL, INT8,
648 			"i8",			"int8_t v",			"int8_t(-2)",
649 			"u8",			"uint8_t v",		"uint8_t(0xFE)"),
650 		CASE_WITH_REVERSE(ALL, INT16,
651 			"i16",			"int16_t v",		"int16_t(-2)",
652 			"u16",			"uint16_t v",		"uint16_t(0xFFFE)"),
653 		CASE_WITH_REVERSE(ALL, 0,
654 			"i32",			"int32_t v",		"int32_t(-2)",
655 			"u32",			"uint32_t v",		"uint32_t(0xFFFFFFFE)"),
656 		CASE_WITH_REVERSE(ALL, INT64,
657 			"i64",			"int64_t v",		"int64_t(-2UL)",
658 			"u64",			"uint64_t v",		"uint64_t(0xFFFFFFFFFFFFFFFEUL)"),
659 		CASE_WITH_REVERSE(ALL, FLOAT16 | INT16,
660 			"f16",			"float16_t v",		"float16_t(1.0)",
661 			"u16",			"uint16_t v",		"uint16_t(0x3C00)"),
662 		CASE_WITH_REVERSE(ALL, 0,
663 			"f32",			"float32_t v",		"float32_t(1.0)",
664 			"u32",			"uint32_t v",		"uint32_t(0x3F800000)"),
665 		CASE_WITH_REVERSE(ALL, FLOAT64 | INT64,
666 			"f64",			"float64_t v",		"float64_t(1.0)",
667 			"u64",			"uint64_t v",		"uint64_t(0x3FF0000000000000UL)"),
668 
669 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
670 			"u16",			"uint16_t v",		"uint16_t(0x1234)",
671 			"u8_array",		"uint8_t v[2]",		makeU8Array({0x34, 0x12})),
672 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
673 			"u32",			"uint32_t v",		"uint32_t(0x12345678)",
674 			"u8_array",		"uint8_t v[4]",		makeU8Array({0x78, 0x56, 0x34, 0x12})),
675 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
676 			"u32",			"uint32_t v",		"uint32_t(0x12345678)",
677 			"u16_array",	"uint16_t v[2]",	makeU16Array({0x5678, 0x1234})),
678 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
679 			"u64",			"uint64_t v",		"uint64_t(0x1234567890ABCDEFUL)",
680 			"u8_array",		"uint8_t v[8]",		makeU8Array({0xEF, 0xCD, 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12})),
681 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
682 			"u64",			"uint64_t v",		"uint64_t(0x1234567890ABCDEFUL)",
683 			"u16_array",	"uint16_t v[4]",	makeU16Array({0xCDEF, 0x90AB, 0x5678, 0x1234})),
684 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
685 			"u64",			"uint64_t v",		"uint64_t(0x1234567890ABCDEFUL)",
686 			"u32_array",	"uint32_t v[2]",	makeU32Array({0x90ABCDEF, 0x12345678})),
687 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
688 			"i16",			"int16_t v",		"int16_t(-2)",
689 			"u8_array",		"uint8_t v[2]",		makeU8Array({0xFE, 0xFF})),
690 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
691 			 "i32",			"int32_t v",		"int32_t(-2)",
692 			 "u8_array",	"uint8_t v[4]",		makeU8Array({0xFE, 0xFF, 0xFF, 0xFF})),
693 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
694 			 "i32",			"int32_t v",		"int32_t(-2)",
695 			 "u16_array",	"uint16_t v[2]",	makeU16Array({0xFFFE, 0xFFFF})),
696 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
697 			 "i64",			"int64_t v",		"int64_t(-2UL)",
698 			 "u8_array",	"uint8_t v[8]",		makeU8Array({0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF})),
699 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
700 			 "i64",			"int64_t v",		"int64_t(-2UL)",
701 			 "u16_array",	"uint16_t v[4]",	makeU16Array({0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF})),
702 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
703 			 "i64",			"int64_t v",		"int64_t(-2UL)",
704 			 "u32_array",	"uint32_t v[2]",	makeU32Array({0xFFFFFFFE, 0xFFFFFFFF})),
705 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT16 | INT8,
706 			 "f16",			"float16_t v",		"float16_t(1.0)",
707 			 "u8_array",	"uint8_t v[2]",		makeU8Array({0x00, 0x3C})),
708 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
709 			 "f32",			"float32_t v",		"float32_t(1.0)",
710 			 "u8_array",	"uint8_t v[4]",		makeU8Array({0x00, 0x00, 0x80, 0x3F})),
711 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
712 			 "f32",			"float32_t v",		"float32_t(1.0)",
713 			 "u16_array",	"uint16_t v[2]",	makeU16Array({0x0000, 0x3F80})),
714 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT8,
715 			 "f64",			"float64_t v",		"float64_t(1.0)",
716 			 "u8_array",	"uint8_t v[8]",		makeU8Array({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F})),
717 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT16,
718 			"f64",			"float64_t v",		"float64_t(1.0)",
719 			"u16_array",	"uint16_t v[4]",	makeU16Array({0x0000, 0x0000, 0x0000, 0x3FF0})),
720 		CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64,
721 			 "f64",			"float64_t v",		"float64_t(1.0)",
722 			 "u32_array",	"uint32_t v[2]",	makeU32Array({0x00000000, 0x3FF00000})),
723 
724 		CASE(DEFAULT | STD430, 0,
725 			 "vec4_array",	"vec4 v[3]",		"vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
726 			 "vec2_array",	"vec2 v[6]",		"vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
727 		CASE(STD140, 0,
728 			 "vec4_array",   "vec4 v[3]",		"vec4[](vec4(1, 1, 999, 999), vec4(2, 2, 999, 999), vec4(3, 3, 999, 999))",
729 			 "vec2_array",	"vec2 v[3]",		"vec2[](vec2(1), vec2(2), vec2(3))"),
730 		CASE(SCALAR, 0,
731 			 "vec4_array",	"vec4 v[3]",		"vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
732 			 "vec2_array",	"vec2 v[6]",		"vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
733 
734 		CASE(DEFAULT | STD430, 0,
735 			 "vec4_array",	"vec4 v[3]",		"vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
736 			 "vec3_array",	"vec3 v[3]",		"vec3[](vec3(1), vec3(2), vec3(3))"),
737 		CASE(STD140, 0,
738 			 "vec4_array",	"vec4 v[3]",		"vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
739 			 "vec3_array",	"vec3 v[3]",		"vec3[](vec3(1), vec3(2), vec3(3))"),
740 		CASE(SCALAR, 0,
741 			 "vec4_array",	"vec4 v[3]",		"vec4[](vec4(1, 1, 1, 2), vec4(2, 2, 3, 3), vec4(3, 4, 4, 4))",
742 			 "vec3_array",	"vec3 v[4]",		"vec3[](vec3(1), vec3(2), vec3(3), vec3(4))"),
743 
744 		CASE_EXTRA(DEFAULT | STD430 | SCALAR, INT8,
745 			"struct s { int a; int b; }",
746 			"u8_array",			"uint8_t v[8]",	makeU8Array({2, 0, 0, 0, 0xFE, 0xFF, 0xFF, 0xFF}),
747 			"struct_int_int",	"s v",			"s(2, -2)"),
748 		CASE_EXTRA(ALL, 0,
749 			"struct s { int a; int b; }",
750 			"uvec2",				"uvec2 v",		"uvec2(2, 0xFFFFFFFE)",
751 			"struct_int_int",	"s v",			"s(2, -2)"),
752 	};
753 
754 #undef CASE_EXTRA
755 #undef CASE_EXTRA_WITH_REVERSE
756 #undef CASE_WITH_REVERSE
757 #undef CASE_SAME_TYPE
758 #undef CASE
759 
760 	for (deUint32 i = 0; i < cases.size(); i++)
761 	{
762 		for (int syncIndex = 0; syncIndex < AliasTest::SynchronizationCount; syncIndex++)
763 		{
764 			const AliasTest::Synchronization sync = AliasTest::Synchronization(syncIndex);
765 
766 			for (int funcIndex = 0; funcIndex < AliasTest::FunctionCount; funcIndex++)
767 			{
768 				const AliasTest::Function func = AliasTest::Function(funcIndex);
769 
770 				for (int layoutIndex = 0; layoutIndex < AliasTest::LayoutCount; layoutIndex++)
771 				{
772 					const AliasTest::LayoutFlags layout = AliasTest::LayoutFlags(1 << layoutIndex);
773 
774 					AliasTest::CaseDef c = cases[i];
775 
776 					if (c.writeDesc == c.readDesc)
777 						continue;
778 
779 					if ((c.layout & layout) == 0)
780 						continue;
781 
782 					c.layout = layout;
783 					c.func = func;
784 					c.sync = sync;
785 
786 					group->addChild(new AliasTest(group->getTestContext(), c));
787 				}
788 			}
789 		}
790 	}
791 }
792 
793 class ZeroTest : public vkt::TestCase
794 {
795 public:
796 	struct CaseDef
797 	{
798 		glu::DataType zeroElementType;
799 		glu::DataType fieldType[2];
800 		deUint32 elements;
801 
testNamevkt::compute::__anon5d3a54a00111::ZeroTest::CaseDef802 		std::string testName() const
803 		{
804 			std::string name = glu::getDataTypeName(zeroElementType);
805 			name += "_array_to";
806 
807 			for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(fieldType); ++i)
808 			{
809 				if (fieldType[i] == glu::TYPE_INVALID)
810 					break;
811 				name += "_";
812 				name += glu::getDataTypeName(fieldType[i]);
813 			}
814 			name += "_array_" + de::toString(elements);
815 			return name;
816 		}
817 	};
818 
ZeroTest(tcu::TestContext & testCtx,const CaseDef & caseDef)819 	ZeroTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
820 		: TestCase(testCtx, caseDef.testName(), caseDef.testName()),
821 		m_caseDef(caseDef)
822 	{
823 	}
824 
825 	virtual void checkSupport(Context& context) const;
826 	void initPrograms(SourceCollections& sourceCollections) const;
827 
828 	class Instance : public vkt::TestInstance
829 	{
830 	public:
Instance(Context & context)831 		Instance(Context& context)
832 			: TestInstance(context)
833 		{
834 		}
835 
iterate(void)836 		tcu::TestStatus iterate(void)
837 		{
838 			return runCompute(m_context, 1u);
839 		}
840 	};
841 
createInstance(Context & context) const842 	TestInstance* createInstance(Context& context) const
843 	{
844 		return new Instance(context);
845 	}
846 
847 private:
848 	CaseDef m_caseDef;
849 };
850 
checkSupport(Context & context) const851 void ZeroTest::checkSupport(Context& context) const
852 {
853 	CheckSupportParams p;
854 	deMemset(&p, 0, sizeof(p));
855 
856 	DE_ASSERT(!glu::isDataTypeFloat16OrVec(m_caseDef.zeroElementType));
857 
858 	p.useType(m_caseDef.zeroElementType);
859 	p.useType(m_caseDef.fieldType[0]);
860 	p.useType(m_caseDef.fieldType[1]);
861 
862 	checkSupportWithParams(context, p);
863 }
864 
getDataTypeLiteral(glu::DataType dt,std::string baseValue)865 std::string getDataTypeLiteral(glu::DataType dt, std::string baseValue)
866 {
867 	using namespace glu;
868 
869 	if (isDataTypeVector(dt))
870 	{
871 		std::string elemValue = getDataTypeLiteral(getDataTypeScalarType(dt), baseValue);
872 
873 		std::ostringstream result;
874 		result << getDataTypeName(dt) << "(";
875 		for (int i = 0; i < getDataTypeScalarSize(dt); ++i)
876 		{
877 			if (i > 0)
878 				result << ", ";
879 			result << elemValue;
880 		}
881 		result << ")";
882 		return result.str();
883 	}
884 	else if (isDataTypeScalar(dt))
885 	{
886 		return getDataTypeName(dt) + std::string("(") + baseValue + std::string(")");
887 	}
888 	else
889 	{
890 		DE_ASSERT(0);
891 		return std::string();
892 	}
893 }
894 
initPrograms(SourceCollections & sourceCollections) const895 void ZeroTest::initPrograms(SourceCollections& sourceCollections) const
896 {
897 	using namespace glu;
898 
899 	std::ostringstream src;
900 
901 	src << "#version 450\n"
902 		<< "#extension GL_EXT_shared_memory_block : enable\n"
903 		<< "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
904 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
905 
906 	// Large enough to cover the largest B block even if just 8-bit elements.
907 	// Small enough to fit in the minimum shared memory size limit even if with uvec4.
908 	src << "shared A { " << getDataTypeName(m_caseDef.zeroElementType) << " arr[256]; } zero;\n";
909 
910 	src << "struct st {\n"
911 		<< "    " << getDataTypeName(m_caseDef.fieldType[0]) << " x;\n";
912 	if (m_caseDef.fieldType[1])
913 		src << "    " << getDataTypeName(m_caseDef.fieldType[1]) << " y;\n";
914 	src << "};\n";
915 
916 
917 	src << "shared B { st arr[4]; };\n"
918 		<< "layout(set = 0, binding = 0) buffer Result { uint result; };\n"
919 		<< "void main() {\n"
920 		<< "for (int i = 0; i < zero.arr.length(); i++) {\n"
921 		<< "    zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "1") << ";\n"
922 		<< "  }\n"
923 		<< "  for (int i = 0; i < zero.arr.length(); i++) {\n"
924 		<< "    zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "0") << ";\n"
925 		<< "  }\n"
926 		<< "  result = (\n";
927 
928 	for (deUint32 i = 0; i < 4; i++)
929 	{
930 		src << "    ";
931 		if (i > 0)
932 			src << "&& ";
933 		src << "(arr[" << de::toString(i) << "].x == " << getDataTypeLiteral(m_caseDef.fieldType[0], "0") << ")\n";
934 		if (m_caseDef.fieldType[1])
935 			src << "    && (arr[" << de::toString(i) << "].y == " << getDataTypeLiteral(m_caseDef.fieldType[1], "0") << ")\n";
936 	}
937 
938 	src << "  ) ? 0 : 0xFF;\n"
939 		<< "}\n";
940 
941 	sourceCollections.glslSources.add("comp")
942 		<< ComputeSource(src.str())
943 		<< vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
944 								  vk::ShaderBuildOptions::Flags(0u));
945 }
946 
isTestedZeroElementType(glu::DataType dt)947 bool isTestedZeroElementType(glu::DataType dt)
948 {
949 	using namespace glu;
950 
951 	// Select only a few interesting types.
952 	switch (dt)
953 	{
954 	case TYPE_UINT:
955 	case TYPE_UINT_VEC4:
956 	case TYPE_UINT8:
957 	case TYPE_UINT8_VEC4:
958 	case TYPE_UINT16:
959 		return true;
960 	default:
961 		return false;
962 	}
963 }
964 
isTestedFieldType(glu::DataType dt)965 bool isTestedFieldType(glu::DataType dt)
966 {
967 	using namespace glu;
968 
969 	// Select only a few interesting types.
970 	switch (dt)
971 	{
972 	case TYPE_UINT:
973 	case TYPE_UINT_VEC3:
974 	case TYPE_UINT8:
975 	case TYPE_UINT16:
976 	case TYPE_FLOAT:
977 	case TYPE_FLOAT_VEC4:
978 	case TYPE_FLOAT16:
979 	case TYPE_DOUBLE:
980 	case TYPE_DOUBLE_VEC4:
981 	case TYPE_BOOL:
982 		return true;
983 
984 	default:
985 		return false;
986 	}
987 }
988 
AddZeroTests(tcu::TestCaseGroup * group)989 void AddZeroTests(tcu::TestCaseGroup* group)
990 {
991 	using namespace glu;
992 
993 	ZeroTest::CaseDef c;
994 
995 	for (deUint32 i = 0; i < TYPE_LAST; ++i)
996 	{
997 		c.zeroElementType = DataType(i);
998 
999 		if (isTestedZeroElementType(c.zeroElementType))
1000 		{
1001 			deUint32 idx[2] = { 0, 0 };
1002 
1003 			while (idx[1] < TYPE_LAST && idx[0] < TYPE_LAST)
1004 			{
1005 				c.fieldType[0] = DataType(idx[0]);
1006 				c.fieldType[1] = DataType(idx[1]);
1007 
1008 				if (isTestedFieldType(c.fieldType[0]) &&
1009 					(c.fieldType[1] == TYPE_INVALID || isTestedFieldType(c.fieldType[1])))
1010 				{
1011 					for (deUint32 elements = 1; elements <= 4; ++elements)
1012 					{
1013 						c.elements = elements;
1014 						group->addChild(new ZeroTest(group->getTestContext(), c));
1015 					}
1016 				}
1017 
1018 				idx[0]++;
1019 				if (idx[0] >= TYPE_LAST)
1020 				{
1021 					idx[1]++;
1022 					idx[0] = 0;
1023 				}
1024 			}
1025 		}
1026 	}
1027 }
1028 
1029 class PaddingTest : public vkt::TestCase
1030 {
1031 public:
1032 	struct CaseDef
1033 	{
1034 		std::vector<glu::DataType> types;
1035 		std::vector<deUint32> offsets;
1036 		std::vector<std::string> values;
1037 		deUint32 expected[32];
1038 
testNamevkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1039 		std::string testName() const
1040 		{
1041 			DE_ASSERT(types.size() > 0);
1042 			DE_ASSERT(types.size() == offsets.size());
1043 			DE_ASSERT(types.size() == values.size());
1044 
1045 			std::string name;
1046 			for (deUint32 i = 0; i < types.size(); ++i)
1047 			{
1048 				if (i > 0)
1049 					name += "_";
1050 				name += glu::getDataTypeName(types[i]);
1051 				name += "_" + de::toString(offsets[i]);
1052 			}
1053 			return name;
1054 		}
1055 
addvkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1056 		void add(glu::DataType dt, deUint32 offset, const std::string& v)
1057 		{
1058 			types.push_back(dt);
1059 			offsets.push_back(offset);
1060 			values.push_back(v);
1061 		}
1062 
needsScalarvkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1063 		bool needsScalar() const
1064 		{
1065 			for (deUint32 i = 0; i < offsets.size(); ++i)
1066 			{
1067 				if (offsets[i] % 4 != 0)
1068 					return true;
1069 			}
1070 			return false;
1071 		}
1072 	};
1073 
PaddingTest(tcu::TestContext & testCtx,const CaseDef & caseDef)1074 	PaddingTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
1075 		: TestCase(testCtx, caseDef.testName(), caseDef.testName()),
1076 		m_caseDef(caseDef)
1077 	{
1078 	}
1079 
1080 	virtual void checkSupport(Context& context) const;
1081 	void initPrograms(SourceCollections& sourceCollections) const;
1082 
1083 	class Instance : public vkt::TestInstance
1084 	{
1085 	public:
Instance(Context & context,const CaseDef & caseDef)1086 		Instance(Context& context, const CaseDef& caseDef)
1087 			: TestInstance(context),
1088 			  m_caseDef(caseDef)
1089 		{
1090 		}
1091 
iterate(void)1092 		tcu::TestStatus iterate(void)
1093 		{
1094 			return runCompute(m_context, 1u);
1095 		}
1096 
1097 	private:
1098 		CaseDef m_caseDef;
1099 	};
1100 
createInstance(Context & context) const1101 	TestInstance* createInstance(Context& context) const
1102 	{
1103 		return new Instance(context, m_caseDef);
1104 	}
1105 
1106 private:
1107 	CaseDef m_caseDef;
1108 };
1109 
checkSupport(Context & context) const1110 void PaddingTest::checkSupport(Context& context) const
1111 {
1112 	CheckSupportParams p;
1113 	deMemset(&p, 0, sizeof(p));
1114 
1115 	for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1116 		p.useType(m_caseDef.types[i]);
1117 
1118 	p.needsScalar = m_caseDef.needsScalar();
1119 
1120 	checkSupportWithParams(context, p);
1121 }
1122 
initPrograms(SourceCollections & sourceCollections) const1123 void PaddingTest::initPrograms(SourceCollections& sourceCollections) const
1124 {
1125 	using namespace glu;
1126 
1127 	std::ostringstream src;
1128 
1129 	src << "#version 450\n"
1130 		<< "#extension GL_EXT_shared_memory_block : enable\n"
1131 		<< "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
1132 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
1133 
1134 	src	<< "shared A { uint32_t words[32]; };\n";
1135 
1136 	if (m_caseDef.needsScalar())
1137 	{
1138 		src << "#extension GL_EXT_scalar_block_layout : enable\n"
1139 			<< "layout (scalar) ";
1140 	}
1141 
1142 	src << "shared B {\n";
1143 
1144 	for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1145 	{
1146 		src << "  layout(offset = " << m_caseDef.offsets[i] << ") "
1147 			<< glu::getDataTypeName(m_caseDef.types[i]) << " x" << i << ";\n";
1148 	}
1149 
1150 	src	<< "};\n"
1151 		<< "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1152 
1153 	src	<< "void main() {\n"
1154 		<< "for (int i = 0; i < 32; i++) words[i] = 0;\n";
1155 
1156 	for (deUint32 i = 0; i < m_caseDef.values.size(); ++i)
1157 		src << "x" << i << " = " << m_caseDef.values[i] << ";\n";
1158 
1159 	src << "result = 32;\n";
1160 	for (deUint32 i = 0; i < 32; ++i)
1161 	{
1162 		src	<< "if (words[" << std::dec << i << "] == 0x"
1163 			<< std::uppercase << std::hex << m_caseDef.expected[i]
1164 			<< ") result--;\n";
1165 	}
1166 
1167 	src << "}\n";
1168 
1169 	sourceCollections.glslSources.add("comp")
1170 		<< ComputeSource(src.str())
1171 		<< vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1172 								  vk::ShaderBuildOptions::Flags(0u));
1173 }
1174 
AddPaddingTests(tcu::TestCaseGroup * group)1175 void AddPaddingTests(tcu::TestCaseGroup* group)
1176 {
1177 	using namespace glu;
1178 
1179 	for (deUint32 i = 0; i < 31; ++i)
1180 	{
1181 		for (deUint32 j = i + 1; j < 32; j += 4)
1182 		{
1183 			PaddingTest::CaseDef c;
1184 			deMemset(&c, 0, sizeof(c));
1185 
1186 			c.add(TYPE_UINT, 4 * i, "0x1234");
1187 			c.expected[i] = 0x1234;
1188 
1189 			c.add(TYPE_UINT, 4 * j, "0x5678");
1190 			c.expected[j] = 0x5678;
1191 
1192 			group->addChild(new PaddingTest(group->getTestContext(), c));
1193 		}
1194 	}
1195 
1196 	for (deUint32 i = 0; i < 127; ++i)
1197 	{
1198 		for (deUint32 j = i + 1; j < 32; j += 16)
1199 		{
1200 			PaddingTest::CaseDef c;
1201 			deMemset(&c, 0, sizeof(c));
1202 
1203 			deUint8* expected = reinterpret_cast<deUint8*>(c.expected);
1204 
1205 			c.add(TYPE_UINT8, i, "uint8_t(0xAA)");
1206 			expected[i] = 0xAA;
1207 
1208 			c.add(TYPE_UINT8, j, "uint8_t(0xBB)");
1209 			expected[j] = 0xBB;
1210 
1211 			group->addChild(new PaddingTest(group->getTestContext(), c));
1212 		}
1213 	}
1214 }
1215 
1216 class SizeTest : public vkt::TestCase
1217 {
1218 public:
SizeTest(tcu::TestContext & testCtx,deUint32 size)1219 	SizeTest(tcu::TestContext& testCtx, deUint32 size)
1220 		: TestCase(testCtx, de::toString(size), de::toString(size))
1221 		, m_size(size)
1222 	{
1223 		DE_ASSERT(size % 8 == 0);
1224 	}
1225 
1226 	virtual void checkSupport(Context& context) const;
1227 	void initPrograms(SourceCollections& sourceCollections) const;
1228 
1229 	class Instance : public vkt::TestInstance
1230 	{
1231 	public:
Instance(Context & context)1232 		Instance(Context& context)
1233 			: TestInstance(context)
1234 		{
1235 		}
1236 
iterate(void)1237 		tcu::TestStatus iterate(void)
1238 		{
1239 			return runCompute(m_context, 1u);
1240 		}
1241 	};
1242 
createInstance(Context & context) const1243 	TestInstance* createInstance(Context& context) const
1244 	{
1245 		return new Instance(context);
1246 	}
1247 
1248 private:
1249 	deUint32 m_size;
1250 };
1251 
checkSupport(Context & context) const1252 void SizeTest::checkSupport(Context& context) const
1253 {
1254 	context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
1255 	context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
1256 
1257 	if (context.getDeviceProperties().limits.maxComputeSharedMemorySize < m_size)
1258 		TCU_THROW(NotSupportedError, "Not enough shared memory supported.");
1259 }
1260 
initPrograms(SourceCollections & sourceCollections) const1261 void SizeTest::initPrograms(SourceCollections& sourceCollections) const
1262 {
1263 	using namespace glu;
1264 
1265 	std::ostringstream src;
1266 
1267 	src << "#version 450\n";
1268 	src << "#extension GL_EXT_shared_memory_block : enable\n";
1269 	src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
1270 	src << "layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;\n";
1271 
1272 	for (deUint32 i = 0; i < 8; ++i)
1273 		src << "shared B" << i << " { uint32_t words[" << (m_size / 4) << "]; } b" << i << ";\n";
1274 
1275 	src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1276 
1277 	src	<< "void main() {\n";
1278 	src << "  int index = int(gl_LocalInvocationIndex);\n";
1279 	src << "  int size = " << (m_size / 4) << ";\n";
1280 
1281 	src << "  if (index == 0) for (int x = 0; x < size; x++) b0.words[x] = 0xFFFF;\n";
1282 	src << "  barrier();\n";
1283 
1284 	src << "  for (int x = 0; x < size; x++) {\n";
1285 	src << "    if (x % 8 != index) continue;\n";
1286 	for (deUint32 i = 0; i < 8; ++i)
1287 		src << "    if (index == " << i << ") b" << i << ".words[x] = (x << 3) | " << i << ";\n";
1288 	src << "  }\n";
1289 
1290 	src << "  barrier();\n";
1291 	src << "  if (index != 0) return;\n";
1292 
1293 	src << "  int r = size;\n";
1294 	src << "  for (int x = 0; x < size; x++) {\n";
1295 	src << "    int expected = (x << 3) | (x % 8);\n";
1296 	src << "    if (b0.words[x] == expected) r--;\n";
1297 	src << "  }\n";
1298 	src << "  result = r;\n";
1299 	src << "}\n";
1300 
1301 	sourceCollections.glslSources.add("comp")
1302 		<< ComputeSource(src.str())
1303 		<< vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1304 								  vk::ShaderBuildOptions::Flags(0u));
1305 }
1306 
AddSizeTests(tcu::TestCaseGroup * group)1307 void AddSizeTests(tcu::TestCaseGroup* group)
1308 {
1309 	deUint32 sizes[] =
1310 	{
1311 		8u,
1312 		64u,
1313 		4096u,
1314 
1315 		// Dynamic generation of shaders based on properties reported
1316 		// by devices is not allowed in the CTS, so let's create a few
1317 		// variants based on common known maximum sizes.
1318 		16384u,
1319 		32768u,
1320 		49152u,
1321 		65536u,
1322 	};
1323 
1324 	for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(sizes); ++i)
1325 		group->addChild(new SizeTest(group->getTestContext(), sizes[i]));
1326 }
1327 
CreateAmberTestCase(tcu::TestContext & testCtx,const char * name,const char * description,const std::string & filename,const std::vector<std::string> & requirements=std::vector<std::string> ())1328 cts_amber::AmberTestCase* CreateAmberTestCase(tcu::TestContext& testCtx,
1329 											  const char* name,
1330 											  const char* description,
1331 											  const std::string& filename,
1332 											  const std::vector<std::string>& requirements = std::vector<std::string>())
1333 {
1334 	vk::SpirVAsmBuildOptions asm_options(VK_MAKE_VERSION(1, 1, 0), vk::SPIRV_VERSION_1_4);
1335 	asm_options.supports_VK_KHR_spirv_1_4 = true;
1336 
1337 	cts_amber::AmberTestCase *t = cts_amber::createAmberTestCase(testCtx, name, description, "compute/workgroup_memory_explicit_layout", filename, requirements);
1338 	t->setSpirVAsmBuildOptions(asm_options);
1339 	t->addRequirement("VK_KHR_workgroup_memory_explicit_layout");
1340 	return t;
1341 }
1342 
AddCopyMemoryTests(tcu::TestCaseGroup * group)1343 void AddCopyMemoryTests(tcu::TestCaseGroup* group)
1344 {
1345 	tcu::TestContext& testCtx = group->getTestContext();
1346 
1347 	group->addChild(CreateAmberTestCase(testCtx, "basic", "", "copy_memory_basic.amber"));
1348 	group->addChild(CreateAmberTestCase(testCtx, "two_invocations", "", "copy_memory_two_invocations.amber"));
1349 	group->addChild(CreateAmberTestCase(testCtx, "variable_pointers", "", "copy_memory_variable_pointers.amber",
1350 										{ "VariablePointerFeatures.variablePointers" }));
1351 }
1352 
1353 } // anonymous
1354 
createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext & testCtx)1355 tcu::TestCaseGroup* createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext& testCtx)
1356 {
1357 	de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "workgroup_memory_explicit_layout", "VK_KHR_workgroup_memory_explicit_layout tests"));
1358 
1359 	tcu::TestCaseGroup* alias = new tcu::TestCaseGroup(testCtx, "alias", "Aliasing between different blocks and types");
1360 	AddAliasTests(alias);
1361 	tests->addChild(alias);
1362 
1363 	tcu::TestCaseGroup* zero = new tcu::TestCaseGroup(testCtx, "zero", "Manually zero initialize a block and read from another");
1364 	AddZeroTests(zero);
1365 	tests->addChild(zero);
1366 
1367 	tcu::TestCaseGroup* padding = new tcu::TestCaseGroup(testCtx, "padding", "Padding as part of the explicit layout");
1368 	AddPaddingTests(padding);
1369 	tests->addChild(padding);
1370 
1371 	tcu::TestCaseGroup* size = new tcu::TestCaseGroup(testCtx, "size", "Test blocks of various sizes");
1372 	AddSizeTests(size);
1373 	tests->addChild(size);
1374 
1375 	tcu::TestCaseGroup* copy_memory = new tcu::TestCaseGroup(testCtx, "copy_memory", "Test OpCopyMemory with Workgroup memory");
1376 	AddCopyMemoryTests(copy_memory);
1377 	tests->addChild(copy_memory);
1378 
1379 	return tests.release();
1380 }
1381 
1382 } // compute
1383 } // vkt
1384