1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Intel Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief VK_KHR_workgroup_memory_explicit_layout tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeWorkgroupMemoryExplicitLayoutTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
29 #include "vktTestGroupUtil.hpp"
30
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageWithMemory.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkObjUtil.hpp"
38 #include "vkDefs.hpp"
39 #include "vkRef.hpp"
40
41 #include "tcuCommandLine.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47
48 #include <algorithm>
49 #include <vector>
50
51 using namespace vk;
52
53 namespace vkt
54 {
55 namespace compute
56 {
57 namespace
58 {
59
60 struct CheckSupportParams
61 {
62 bool needsScalar;
63 bool needsInt8;
64 bool needsInt16;
65 bool needsInt64;
66 bool needsFloat16;
67 bool needsFloat64;
68
useTypevkt::compute::__anon6620ebe50111::CheckSupportParams69 void useType(glu::DataType dt)
70 {
71 using namespace glu;
72
73 needsInt8 |= isDataTypeIntOrIVec8Bit(dt) || isDataTypeUintOrUVec8Bit(dt);
74 needsInt16 |= isDataTypeIntOrIVec16Bit(dt) || isDataTypeUintOrUVec16Bit(dt);
75 needsFloat16 |= isDataTypeFloat16OrVec(dt);
76 needsFloat64 |= isDataTypeDoubleOrDVec(dt);
77 }
78 };
79
checkSupportWithParams(Context & context,const CheckSupportParams & params)80 void checkSupportWithParams(Context& context, const CheckSupportParams& params)
81 {
82 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
83 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
84
85 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout_features;
86 deMemset(&layout_features, 0, sizeof(layout_features));
87 layout_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
88 layout_features.pNext = DE_NULL;
89
90 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
91 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
92 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
93 f16_i8_features.pNext = &layout_features;
94
95 VkPhysicalDeviceFeatures2 features2;
96 deMemset(&features2, 0, sizeof(features2));
97 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
98 features2.pNext = &f16_i8_features;
99 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
100
101 if (params.needsScalar)
102 {
103 if (layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout != VK_TRUE)
104 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayoutScalarBlockLayout not supported");
105 }
106
107 if (params.needsInt8)
108 {
109 if (f16_i8_features.shaderInt8 != VK_TRUE)
110 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
111 if (layout_features.workgroupMemoryExplicitLayout8BitAccess != VK_TRUE)
112 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout8BitAccess not supported");
113 }
114
115 if (params.needsInt16)
116 {
117 if (features2.features.shaderInt16 != VK_TRUE)
118 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
119 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
120 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
121 }
122
123 if (params.needsInt64)
124 {
125 if (features2.features.shaderInt64 != VK_TRUE)
126 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
127 }
128
129 if (params.needsFloat16)
130 {
131 if (f16_i8_features.shaderFloat16 != VK_TRUE)
132 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
133 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
134 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
135 }
136
137 if (params.needsFloat64)
138 {
139 if (features2.features.shaderFloat64 != VK_TRUE)
140 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
141 }
142 }
143
runCompute(Context & context,deUint32 workgroupSize)144 tcu::TestStatus runCompute(Context& context, deUint32 workgroupSize)
145 {
146 const DeviceInterface& vk = context.getDeviceInterface();
147 const VkDevice device = context.getDevice();
148 Allocator& allocator = context.getDefaultAllocator();
149 tcu::TestLog& log = context.getTestContext().getLog();
150
151 de::MovePtr<BufferWithMemory> buffer;
152 VkDescriptorBufferInfo bufferDescriptor;
153
154 VkDeviceSize size = sizeof(deUint32) * workgroupSize;
155
156 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
157 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
158 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
159 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
160
161 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
162
163 deMemset(ptr, 0xFF, static_cast<std::size_t>(size));
164
165 DescriptorSetLayoutBuilder layoutBuilder;
166 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
167
168 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
169 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
170 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
171 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
172 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
173
174 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
175 {
176 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 DE_NULL,
178 (VkPipelineLayoutCreateFlags)0,
179 1,
180 &descriptorSetLayout.get(),
181 0u,
182 DE_NULL,
183 };
184 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
185 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
186 flushAlloc(vk, device, buffer->getAllocation());
187
188 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
189 const VkPipelineShaderStageCreateInfo shaderInfo =
190 {
191 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
192 DE_NULL,
193 0,
194 VK_SHADER_STAGE_COMPUTE_BIT,
195 *shader,
196 "main",
197 DE_NULL,
198 };
199
200 const VkComputePipelineCreateInfo pipelineInfo =
201 {
202 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
203 DE_NULL,
204 0u,
205 shaderInfo,
206 *pipelineLayout,
207 (VkPipeline)0,
208 0u,
209 };
210 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
211
212 const VkQueue queue = context.getUniversalQueue();
213 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
214 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
215 context.getUniversalQueueFamilyIndex());
216 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
217
218 DescriptorSetUpdateBuilder setUpdateBuilder;
219 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
220 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
221 setUpdateBuilder.update(vk, device);
222
223 beginCommandBuffer(vk, *cmdBuffer, 0);
224
225 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
226 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
227
228 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
229
230 endCommandBuffer(vk, *cmdBuffer);
231
232 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
233
234 invalidateAlloc(vk, device, buffer->getAllocation());
235 for (deUint32 i = 0; i < workgroupSize; ++i)
236 {
237 deUint32 expected = i;
238 if (ptr[i] != expected)
239 {
240 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
241 return tcu::TestStatus::fail("compute failed");
242 }
243 }
244
245 return tcu::TestStatus::pass("compute succeeded");
246 }
247
248 class AliasTest : public vkt::TestCase
249 {
250 public:
251 enum Requirements
252 {
253 RequirementNone = 0,
254 RequirementFloat16 = 1 << 0,
255 RequirementFloat64 = 1 << 1,
256 RequirementInt8 = 1 << 2,
257 RequirementInt16 = 1 << 3,
258 RequirementInt64 = 1 << 4,
259 };
260
261 enum Flags
262 {
263 FlagNone = 0,
264 FlagLayoutStd430 = 1 << 0,
265 FlagLayoutStd140 = 1 << 1,
266 FlagLayoutScalar = 1 << 2,
267 FlagFunction = 1 << 3,
268 FlagBarrier = 1 << 4,
269 };
270
271 enum LayoutFlags
272 {
273 LayoutNone = 0,
274
275 LayoutDefault = 1 << 0,
276 LayoutStd140 = 1 << 1,
277 LayoutStd430 = 1 << 2,
278 LayoutScalar = 1 << 3,
279 LayoutAll = LayoutDefault | LayoutStd140 | LayoutStd430 | LayoutScalar,
280
281 LayoutCount = 4,
282 };
283
284 enum Function
285 {
286 FunctionNone = 0,
287 FunctionRead,
288 FunctionWrite,
289 FunctionReadWrite,
290 FunctionCount,
291 };
292
293 enum Synchronization
294 {
295 SynchronizationNone = 0,
296 SynchronizationBarrier,
297 SynchronizationCount,
298 };
299
300 struct CaseDef
301 {
302 std::string extraTypes;
303
304 std::string writeDesc;
305 std::string writeType;
306 std::string writeValue;
307
308 std::string readDesc;
309 std::string readType;
310 std::string readValue;
311
312 LayoutFlags layout;
313 Function func;
314 Synchronization sync;
315 Requirements requirements;
316
CaseDefvkt::compute::__anon6620ebe50111::AliasTest::CaseDef317 CaseDef (const std::string& extraTypes_,
318 const std::string& writeDesc_,
319 const std::string& writeType_,
320 const std::string& writeValue_,
321 const std::string& readDesc_,
322 const std::string& readType_,
323 const std::string& readValue_,
324 LayoutFlags layout_,
325 Function func_,
326 Synchronization sync_,
327 Requirements requirements_)
328 : extraTypes (extraTypes_)
329 , writeDesc (writeDesc_)
330 , writeType (writeType_)
331 , writeValue (writeValue_)
332 , readDesc (readDesc_)
333 , readType (readType_)
334 , readValue (readValue_)
335 , layout (layout_)
336 , func (func_)
337 , sync (sync_)
338 , requirements (requirements_)
339 {}
340
testNamevkt::compute::__anon6620ebe50111::AliasTest::CaseDef341 std::string testName() const
342 {
343 std::string name = writeDesc + "_to_" + readDesc;
344
345 // In a valid test case, only one flag will be set.
346 switch (layout)
347 {
348 case LayoutDefault:
349 name += "_default";
350 break;
351 case LayoutStd140:
352 name += "_std140";
353 break;
354 case LayoutStd430:
355 name += "_std430";
356 break;
357 case LayoutScalar:
358 name += "_scalar";
359 break;
360 default:
361 DE_ASSERT(0);
362 break;
363 }
364
365 switch (func)
366 {
367 case FunctionNone:
368 break;
369 case FunctionRead:
370 name += "_func_read";
371 break;
372 case FunctionWrite:
373 name += "_func_write";
374 break;
375 case FunctionReadWrite:
376 name += "_func_read_write";
377 break;
378 default:
379 DE_ASSERT(0);
380 break;
381 }
382
383 switch (sync)
384 {
385 case SynchronizationNone:
386 break;
387 case SynchronizationBarrier:
388 name += "_barrier";
389 break;
390 default:
391 DE_ASSERT(0);
392 break;
393 }
394
395 return name;
396 }
397 };
398
AliasTest(tcu::TestContext & testCtx,const CaseDef & caseDef)399 AliasTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
400 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
401 m_caseDef(caseDef)
402 {
403 }
404
405 virtual void checkSupport(Context& context) const;
406 void initPrograms(SourceCollections& sourceCollections) const;
407
408 class Instance : public vkt::TestInstance
409 {
410 public:
Instance(Context & context,const CaseDef & caseDef)411 Instance(Context& context, const CaseDef& caseDef)
412 : TestInstance(context),
413 m_caseDef(caseDef)
414 {
415 }
416
iterate(void)417 tcu::TestStatus iterate(void)
418 {
419 return runCompute(m_context, 1u);
420 }
421
422 private:
423 CaseDef m_caseDef;
424 };
425
createInstance(Context & context) const426 TestInstance* createInstance(Context& context) const
427 {
428 return new Instance(context, m_caseDef);
429 }
430
431 private:
432 CaseDef m_caseDef;
433 };
434
checkSupport(Context & context) const435 void AliasTest::checkSupport(Context& context) const
436 {
437 CheckSupportParams p;
438 deMemset(&p, 0, sizeof(p));
439
440 p.needsScalar = m_caseDef.layout == LayoutScalar;
441 p.needsInt8 = m_caseDef.requirements & RequirementInt8;
442 p.needsInt16 = m_caseDef.requirements & RequirementInt16;
443 p.needsInt64 = m_caseDef.requirements & RequirementInt64;
444 p.needsFloat16 = m_caseDef.requirements & RequirementFloat16;
445 p.needsFloat64 = m_caseDef.requirements & RequirementFloat64;
446
447 checkSupportWithParams(context, p);
448 }
449
initPrograms(SourceCollections & sourceCollections) const450 void AliasTest::initPrograms(SourceCollections& sourceCollections) const
451 {
452 std::string layout;
453 switch (m_caseDef.layout)
454 {
455 case LayoutStd140:
456 layout = "layout(std140)";
457 break;
458 case LayoutStd430:
459 layout = "layout(std430)";
460 break;
461 case LayoutScalar:
462 layout = "layout(scalar)";
463 break;
464 default:
465 // No layout specified.
466 break;
467 }
468
469 std::ostringstream src;
470
471 src << "#version 450\n";
472 src << "#extension GL_EXT_shared_memory_block : enable\n";
473 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
474
475 if (m_caseDef.layout == LayoutScalar)
476 src << "#extension GL_EXT_scalar_block_layout : enable\n";
477
478 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
479
480 if (!m_caseDef.extraTypes.empty())
481 src << m_caseDef.extraTypes << ";\n";
482
483 src << layout << "shared A { " << m_caseDef.writeType << "; } a;\n";
484 src << layout << "shared B { " << m_caseDef.readType << "; } b;\n";
485 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
486
487 if (m_caseDef.func == FunctionRead ||
488 m_caseDef.func == FunctionReadWrite)
489 {
490 src << "void read(int index) {\n";
491 src << " if (b.v == " << m_caseDef.readValue << ")\n";
492 src << " result = index;\n";
493 src << "}\n";
494 }
495
496 if (m_caseDef.func == FunctionWrite ||
497 m_caseDef.func == FunctionReadWrite)
498 {
499 src << "void write(int index) {\n";
500 src << " if (index == 0)\n";
501 src << " a.v = " << m_caseDef.writeValue << ";\n";
502 src << "}\n";
503 }
504
505 src << "void main() {\n";
506 src << " int index = int(gl_LocalInvocationIndex);\n";
507
508 if (m_caseDef.func == FunctionWrite)
509 src << " write(index);\n";
510 else
511 src << " a.v = " << m_caseDef.writeValue << ";\n";
512
513 if (m_caseDef.sync == SynchronizationBarrier)
514 src << " barrier();\n";
515
516 if (m_caseDef.func == FunctionRead ||
517 m_caseDef.func == FunctionReadWrite)
518 {
519 src << " read(index);\n";
520 }
521 else
522 {
523 src << " if (b.v == " << m_caseDef.readValue << ")\n";
524 src << " result = index;\n";
525 }
526 src << "}\n";
527
528 deUint32 buildFlags =
529 m_caseDef.layout == LayoutScalar ? ShaderBuildOptions::FLAG_ALLOW_WORKGROUP_SCALAR_OFFSETS :
530 ShaderBuildOptions::Flags(0u);
531
532 sourceCollections.glslSources.add("comp")
533 << glu::ComputeSource(src.str())
534 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4, buildFlags, true);
535 }
536
makeArray(const std::string & type,const std::vector<deUint64> & values)537 std::string makeArray(const std::string& type, const std::vector<deUint64>& values)
538 {
539 std::ostringstream s;
540 s << type << "[](";
541 for (std::size_t i = 0; i < values.size(); i++)
542 {
543 s << type << "(" << std::to_string(values[i]) << ")";
544 if (i != values.size() - 1)
545 s << ", ";
546 }
547 s << ")";
548 return s.str();
549 }
550
makeU8Array(const std::vector<deUint64> & values)551 std::string makeU8Array(const std::vector<deUint64>& values)
552 {
553 return makeArray("uint8_t", values);
554 }
555
makeU16Array(const std::vector<deUint64> & values)556 std::string makeU16Array(const std::vector<deUint64>& values)
557 {
558 return makeArray("uint16_t", values);
559 }
560
makeU32Array(const std::vector<deUint64> & values)561 std::string makeU32Array(const std::vector<deUint64>& values)
562 {
563 return makeArray("uint32_t", values);
564 }
565
AddAliasTests(tcu::TestCaseGroup * group)566 void AddAliasTests(tcu::TestCaseGroup* group)
567 {
568 const int DEFAULT = AliasTest::LayoutDefault;
569 const int STD140 = AliasTest::LayoutStd140;
570 const int STD430 = AliasTest::LayoutStd430;
571 const int SCALAR = AliasTest::LayoutScalar;
572 const int ALL = DEFAULT | STD140 | STD430 | SCALAR;
573
574 const int FLOAT16 = AliasTest::RequirementFloat16;
575 const int FLOAT64 = AliasTest::RequirementFloat64;
576 const int INT8 = AliasTest::RequirementInt8;
577 const int INT16 = AliasTest::RequirementInt16;
578 const int INT64 = AliasTest::RequirementInt64;
579
580 #define CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2) \
581 { E, D1, T1, V1, D2, T2, V2, AliasTest::LayoutFlags(L), AliasTest::FunctionNone, AliasTest::SynchronizationNone, AliasTest::Requirements(R) }
582
583 #define CASE_EXTRA_WITH_REVERSE(L, R, E, D1, T1, V1, D2, T2, V2) \
584 CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2), \
585 CASE_EXTRA(L, R, E, D2, T2, V2, D1, T1, V1)
586
587 #define CASE_WITH_REVERSE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA_WITH_REVERSE(L, R, "", D1, T1, V1, D2, T2, V2)
588 #define CASE_SAME_TYPE(R, D, T, V) CASE_EXTRA(ALL, R, "", D, T, V, D, T, V)
589 #define CASE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA(L, R, "", D1, T1, V1, D2, T2, V2)
590
591
592 const std::vector<AliasTest::CaseDef> cases
593 {
594 CASE_SAME_TYPE(0, "bool_true", "bool v", "true"),
595 CASE_SAME_TYPE(0, "bool_false", "bool v", "false"),
596 CASE_SAME_TYPE(0, "bvec2", "bvec2 v", "bvec2(false, true)"),
597 CASE_SAME_TYPE(0, "bvec3", "bvec3 v", "bvec3(false, true, true)"),
598 CASE_SAME_TYPE(0, "bvec4", "bvec4 v", "bvec4(false, true, true, false)"),
599 CASE_SAME_TYPE(INT8, "u8", "uint8_t v", "uint8_t(10)"),
600 CASE_SAME_TYPE(INT8, "u8vec2", "u8vec2 v", "u8vec2(10, 20)"),
601 CASE_SAME_TYPE(INT8, "u8vec3", "u8vec3 v", "u8vec3(10, 20, 30)"),
602 CASE_SAME_TYPE(INT8, "u8vec4", "u8vec4 v", "u8vec4(10, 20, 30, 40)"),
603 CASE_SAME_TYPE(INT8, "i8", "int8_t v", "int8_t(-10)"),
604 CASE_SAME_TYPE(INT8, "i8vec2", "i8vec2 v", "i8vec2(-10, 20)"),
605 CASE_SAME_TYPE(INT8, "i8vec3", "i8vec3 v", "i8vec3(-10, 20, -30)"),
606 CASE_SAME_TYPE(INT8, "i8vec4", "i8vec4 v", "i8vec4(-10, 20, -30, 40)"),
607 CASE_SAME_TYPE(INT16, "u16", "uint16_t v", "uint16_t(1000)"),
608 CASE_SAME_TYPE(INT16, "u16vec2", "u16vec2 v", "u16vec2(1000, 2000)"),
609 CASE_SAME_TYPE(INT16, "u16vec3", "u16vec3 v", "u16vec3(1000, 2000, 3000)"),
610 CASE_SAME_TYPE(INT16, "u16vec4", "u16vec4 v", "u16vec4(1000, 2000, 3000, 4000)"),
611 CASE_SAME_TYPE(INT16, "i16", "int16_t v", "int16_t(-1000)"),
612 CASE_SAME_TYPE(INT16, "i16vec2", "i16vec2 v", "i16vec2(-1000, 2000)"),
613 CASE_SAME_TYPE(INT16, "i16vec3", "i16vec3 v", "i16vec3(-1000, 2000, -3000)"),
614 CASE_SAME_TYPE(INT16, "i16vec4", "i16vec4 v", "i16vec4(-1000, 2000, -3000, 4000)"),
615 CASE_SAME_TYPE(0, "u32", "uint32_t v", "uint32_t(100)"),
616 CASE_SAME_TYPE(0, "uvec2", "uvec2 v", "uvec2(100, 200)"),
617 CASE_SAME_TYPE(0, "uvec3", "uvec3 v", "uvec3(100, 200, 300)"),
618 CASE_SAME_TYPE(0, "uvec4", "uvec4 v", "uvec4(100, 200, 300, 400)"),
619 CASE_SAME_TYPE(0, "i32", "int32_t v", "int32_t(-100)"),
620 CASE_SAME_TYPE(0, "ivec2", "ivec2 v", "ivec2(-100, 200)"),
621 CASE_SAME_TYPE(0, "ivec3", "ivec3 v", "ivec3(-100, 200, -300)"),
622 CASE_SAME_TYPE(0, "ivec4", "ivec4 v", "ivec4(-100, 200, -300, 400)"),
623 CASE_SAME_TYPE(INT64, "u64", "uint64_t v", "uint64_t(1000)"),
624 CASE_SAME_TYPE(INT64, "u64vec2", "u64vec2 v", "u64vec2(1000, 2000)"),
625 CASE_SAME_TYPE(INT64, "u64vec3", "u64vec3 v", "u64vec3(1000, 2000, 3000)"),
626 CASE_SAME_TYPE(INT64, "u64vec4", "u64vec4 v", "u64vec4(1000, 2000, 3000, 4000)"),
627 CASE_SAME_TYPE(INT64, "i64", "int64_t v", "int64_t(-1000)"),
628 CASE_SAME_TYPE(INT64, "i64vec2", "i64vec2 v", "i64vec2(-1000, 2000)"),
629 CASE_SAME_TYPE(INT64, "i64vec3", "i64vec3 v", "i64vec3(-1000, 2000, -3000)"),
630 CASE_SAME_TYPE(INT64, "i64vec4", "i64vec4 v", "i64vec4(-1000, 2000, -3000, 4000)"),
631 CASE_SAME_TYPE(FLOAT16, "f16", "float16_t v", "float16_t(-100.0)"),
632 CASE_SAME_TYPE(FLOAT16, "f16vec2", "f16vec2 v", "f16vec2(100.0, -200.0)"),
633 CASE_SAME_TYPE(FLOAT16, "f16vec3", "f16vec3 v", "f16vec3(100.0, -200.0, 300.0)"),
634 CASE_SAME_TYPE(FLOAT16, "f16vec4", "f16vec4 v", "f16vec4(100.0, -200.0, 300.0, -400.0)"),
635 CASE_SAME_TYPE(0, "f32", "float32_t v", "float32_t(-100.0)"),
636 CASE_SAME_TYPE(0, "f32vec2", "f32vec2 v", "f32vec2(100.0, -200.0)"),
637 CASE_SAME_TYPE(0, "f32vec3", "f32vec3 v", "f32vec3(100.0, -200.0, 300.0)"),
638 CASE_SAME_TYPE(0, "f32vec4", "f32vec4 v", "f32vec4(100.0, -200.0, 300.0, -400.0)"),
639 CASE_SAME_TYPE(FLOAT64, "f64", "float64_t v", "float32_t(-100.0)"),
640 CASE_SAME_TYPE(FLOAT64, "f64vec2", "f64vec2 v", "f64vec2(100.0, -200.0)"),
641 CASE_SAME_TYPE(FLOAT64, "f64vec3", "f64vec3 v", "f64vec3(100.0, -200.0, 300.0)"),
642 CASE_SAME_TYPE(FLOAT64, "f64vec4", "f64vec4 v", "f64vec4(100.0, -200.0, 300.0, -400.0)"),
643 CASE_SAME_TYPE(FLOAT16, "f16mat2x2", "f16mat2x2 v", "f16mat2x2(1, 2, 3, 4)"),
644 CASE_SAME_TYPE(FLOAT16, "f16mat2x3", "f16mat2x3 v", "f16mat2x3(1, 2, 3, 4, 5, 6)"),
645 CASE_SAME_TYPE(FLOAT16, "f16mat2x4", "f16mat2x4 v", "f16mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
646 CASE_SAME_TYPE(FLOAT16, "f16mat3x2", "f16mat3x2 v", "f16mat3x2(1, 2, 3, 4, 5, 6)"),
647 CASE_SAME_TYPE(FLOAT16, "f16mat3x3", "f16mat3x3 v", "f16mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
648 CASE_SAME_TYPE(FLOAT16, "f16mat3x4", "f16mat3x4 v", "f16mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
649 CASE_SAME_TYPE(FLOAT16, "f16mat4x2", "f16mat4x2 v", "f16mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
650 CASE_SAME_TYPE(FLOAT16, "f16mat4x3", "f16mat4x3 v", "f16mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
651 CASE_SAME_TYPE(FLOAT16, "f16mat4x4", "f16mat4x4 v", "f16mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
652 CASE_SAME_TYPE(0, "f32mat2x2", "f32mat2x2 v", "f32mat2x2(1, 2, 3, 4)"),
653 CASE_SAME_TYPE(0, "f32mat2x3", "f32mat2x3 v", "f32mat2x3(1, 2, 3, 4, 5, 6)"),
654 CASE_SAME_TYPE(0, "f32mat2x4", "f32mat2x4 v", "f32mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
655 CASE_SAME_TYPE(0, "f32mat3x2", "f32mat3x2 v", "f32mat3x2(1, 2, 3, 4, 5, 6)"),
656 CASE_SAME_TYPE(0, "f32mat3x3", "f32mat3x3 v", "f32mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
657 CASE_SAME_TYPE(0, "f32mat3x4", "f32mat3x4 v", "f32mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
658 CASE_SAME_TYPE(0, "f32mat4x2", "f32mat4x2 v", "f32mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
659 CASE_SAME_TYPE(0, "f32mat4x3", "f32mat4x3 v", "f32mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
660 CASE_SAME_TYPE(0, "f32mat4x4", "f32mat4x4 v", "f32mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
661 CASE_SAME_TYPE(FLOAT64, "f64mat2x2", "f64mat2x2 v", "f64mat2x2(1, 2, 3, 4)"),
662 CASE_SAME_TYPE(FLOAT64, "f64mat2x3", "f64mat2x3 v", "f64mat2x3(1, 2, 3, 4, 5, 6)"),
663 CASE_SAME_TYPE(FLOAT64, "f64mat2x4", "f64mat2x4 v", "f64mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
664 CASE_SAME_TYPE(FLOAT64, "f64mat3x2", "f64mat3x2 v", "f64mat3x2(1, 2, 3, 4, 5, 6)"),
665 CASE_SAME_TYPE(FLOAT64, "f64mat3x3", "f64mat3x3 v", "f64mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
666 CASE_SAME_TYPE(FLOAT64, "f64mat3x4", "f64mat3x4 v", "f64mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
667 CASE_SAME_TYPE(FLOAT64, "f64mat4x2", "f64mat4x2 v", "f64mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
668 CASE_SAME_TYPE(FLOAT64, "f64mat4x3", "f64mat4x3 v", "f64mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
669 CASE_SAME_TYPE(FLOAT64, "f64mat4x4", "f64mat4x4 v", "f64mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
670
671 CASE_WITH_REVERSE(ALL, INT8,
672 "i8", "int8_t v", "int8_t(-2)",
673 "u8", "uint8_t v", "uint8_t(0xFE)"),
674 CASE_WITH_REVERSE(ALL, INT16,
675 "i16", "int16_t v", "int16_t(-2)",
676 "u16", "uint16_t v", "uint16_t(0xFFFE)"),
677 CASE_WITH_REVERSE(ALL, 0,
678 "i32", "int32_t v", "int32_t(-2)",
679 "u32", "uint32_t v", "uint32_t(0xFFFFFFFE)"),
680 CASE_WITH_REVERSE(ALL, INT64,
681 "i64", "int64_t v", "int64_t(-2UL)",
682 "u64", "uint64_t v", "uint64_t(0xFFFFFFFFFFFFFFFEUL)"),
683 CASE_WITH_REVERSE(ALL, FLOAT16 | INT16,
684 "f16", "float16_t v", "float16_t(1.0)",
685 "u16", "uint16_t v", "uint16_t(0x3C00)"),
686 CASE_WITH_REVERSE(ALL, 0,
687 "f32", "float32_t v", "float32_t(1.0)",
688 "u32", "uint32_t v", "uint32_t(0x3F800000)"),
689 CASE_WITH_REVERSE(ALL, FLOAT64 | INT64,
690 "f64", "float64_t v", "float64_t(1.0)",
691 "u64", "uint64_t v", "uint64_t(0x3FF0000000000000UL)"),
692
693 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
694 "u16", "uint16_t v", "uint16_t(0x1234)",
695 "u8_array", "uint8_t v[2]", makeU8Array({0x34, 0x12})),
696 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
697 "u32", "uint32_t v", "uint32_t(0x12345678)",
698 "u8_array", "uint8_t v[4]", makeU8Array({0x78, 0x56, 0x34, 0x12})),
699 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
700 "u32", "uint32_t v", "uint32_t(0x12345678)",
701 "u16_array", "uint16_t v[2]", makeU16Array({0x5678, 0x1234})),
702 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
703 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
704 "u8_array", "uint8_t v[8]", makeU8Array({0xEF, 0xCD, 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12})),
705 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
706 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
707 "u16_array", "uint16_t v[4]", makeU16Array({0xCDEF, 0x90AB, 0x5678, 0x1234})),
708 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
709 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
710 "u32_array", "uint32_t v[2]", makeU32Array({0x90ABCDEF, 0x12345678})),
711 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
712 "i16", "int16_t v", "int16_t(-2)",
713 "u8_array", "uint8_t v[2]", makeU8Array({0xFE, 0xFF})),
714 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
715 "i32", "int32_t v", "int32_t(-2)",
716 "u8_array", "uint8_t v[4]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF})),
717 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
718 "i32", "int32_t v", "int32_t(-2)",
719 "u16_array", "uint16_t v[2]", makeU16Array({0xFFFE, 0xFFFF})),
720 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
721 "i64", "int64_t v", "int64_t(-2UL)",
722 "u8_array", "uint8_t v[8]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF})),
723 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
724 "i64", "int64_t v", "int64_t(-2UL)",
725 "u16_array", "uint16_t v[4]", makeU16Array({0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF})),
726 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
727 "i64", "int64_t v", "int64_t(-2UL)",
728 "u32_array", "uint32_t v[2]", makeU32Array({0xFFFFFFFE, 0xFFFFFFFF})),
729 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT16 | INT8,
730 "f16", "float16_t v", "float16_t(1.0)",
731 "u8_array", "uint8_t v[2]", makeU8Array({0x00, 0x3C})),
732 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
733 "f32", "float32_t v", "float32_t(1.0)",
734 "u8_array", "uint8_t v[4]", makeU8Array({0x00, 0x00, 0x80, 0x3F})),
735 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
736 "f32", "float32_t v", "float32_t(1.0)",
737 "u16_array", "uint16_t v[2]", makeU16Array({0x0000, 0x3F80})),
738 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT8,
739 "f64", "float64_t v", "float64_t(1.0)",
740 "u8_array", "uint8_t v[8]", makeU8Array({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F})),
741 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT16,
742 "f64", "float64_t v", "float64_t(1.0)",
743 "u16_array", "uint16_t v[4]", makeU16Array({0x0000, 0x0000, 0x0000, 0x3FF0})),
744 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64,
745 "f64", "float64_t v", "float64_t(1.0)",
746 "u32_array", "uint32_t v[2]", makeU32Array({0x00000000, 0x3FF00000})),
747
748 CASE(DEFAULT | STD430, 0,
749 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
750 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
751 CASE(STD140, 0,
752 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 999, 999), vec4(2, 2, 999, 999), vec4(3, 3, 999, 999))",
753 "vec2_array", "vec2 v[3]", "vec2[](vec2(1), vec2(2), vec2(3))"),
754 CASE(SCALAR, 0,
755 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
756 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
757
758 CASE(DEFAULT | STD430, 0,
759 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
760 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
761 CASE(STD140, 0,
762 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
763 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
764 CASE(SCALAR, 0,
765 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 2), vec4(2, 2, 3, 3), vec4(3, 4, 4, 4))",
766 "vec3_array", "vec3 v[4]", "vec3[](vec3(1), vec3(2), vec3(3), vec3(4))"),
767
768 CASE_EXTRA(DEFAULT | STD430 | SCALAR, INT8,
769 "struct s { int a; int b; }",
770 "u8_array", "uint8_t v[8]", makeU8Array({2, 0, 0, 0, 0xFE, 0xFF, 0xFF, 0xFF}),
771 "struct_int_int", "s v", "s(2, -2)"),
772 CASE_EXTRA(ALL, 0,
773 "struct s { int a; int b; }",
774 "uvec2", "uvec2 v", "uvec2(2, 0xFFFFFFFE)",
775 "struct_int_int", "s v", "s(2, -2)"),
776 };
777
778 #undef CASE_EXTRA
779 #undef CASE_EXTRA_WITH_REVERSE
780 #undef CASE_WITH_REVERSE
781 #undef CASE_SAME_TYPE
782 #undef CASE
783
784 for (deUint32 i = 0; i < cases.size(); i++)
785 {
786 for (int syncIndex = 0; syncIndex < AliasTest::SynchronizationCount; syncIndex++)
787 {
788 const AliasTest::Synchronization sync = AliasTest::Synchronization(syncIndex);
789
790 for (int funcIndex = 0; funcIndex < AliasTest::FunctionCount; funcIndex++)
791 {
792 const AliasTest::Function func = AliasTest::Function(funcIndex);
793
794 for (int layoutIndex = 0; layoutIndex < AliasTest::LayoutCount; layoutIndex++)
795 {
796 const AliasTest::LayoutFlags layout = AliasTest::LayoutFlags(1 << layoutIndex);
797
798 AliasTest::CaseDef c = cases[i];
799
800 if (c.writeDesc == c.readDesc)
801 continue;
802
803 if ((c.layout & layout) == 0)
804 continue;
805
806 c.layout = layout;
807 c.func = func;
808 c.sync = sync;
809
810 group->addChild(new AliasTest(group->getTestContext(), c));
811 }
812 }
813 }
814 }
815 }
816
817 class ZeroTest : public vkt::TestCase
818 {
819 public:
820 struct CaseDef
821 {
822 glu::DataType zeroElementType;
823 glu::DataType fieldType[2];
824 deUint32 elements;
825
testNamevkt::compute::__anon6620ebe50111::ZeroTest::CaseDef826 std::string testName() const
827 {
828 std::string name = glu::getDataTypeName(zeroElementType);
829 name += "_array_to";
830
831 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(fieldType); ++i)
832 {
833 if (fieldType[i] == glu::TYPE_INVALID)
834 break;
835 name += "_";
836 name += glu::getDataTypeName(fieldType[i]);
837 }
838 name += "_array_" + de::toString(elements);
839 return name;
840 }
841 };
842
ZeroTest(tcu::TestContext & testCtx,const CaseDef & caseDef)843 ZeroTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
844 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
845 m_caseDef(caseDef)
846 {
847 }
848
849 virtual void checkSupport(Context& context) const;
850 void initPrograms(SourceCollections& sourceCollections) const;
851
852 class Instance : public vkt::TestInstance
853 {
854 public:
Instance(Context & context)855 Instance(Context& context)
856 : TestInstance(context)
857 {
858 }
859
iterate(void)860 tcu::TestStatus iterate(void)
861 {
862 return runCompute(m_context, 1u);
863 }
864 };
865
createInstance(Context & context) const866 TestInstance* createInstance(Context& context) const
867 {
868 return new Instance(context);
869 }
870
871 private:
872 CaseDef m_caseDef;
873 };
874
checkSupport(Context & context) const875 void ZeroTest::checkSupport(Context& context) const
876 {
877 CheckSupportParams p;
878 deMemset(&p, 0, sizeof(p));
879
880 DE_ASSERT(!glu::isDataTypeFloat16OrVec(m_caseDef.zeroElementType));
881
882 p.useType(m_caseDef.zeroElementType);
883 p.useType(m_caseDef.fieldType[0]);
884 p.useType(m_caseDef.fieldType[1]);
885
886 checkSupportWithParams(context, p);
887 }
888
getDataTypeLiteral(glu::DataType dt,std::string baseValue)889 std::string getDataTypeLiteral(glu::DataType dt, std::string baseValue)
890 {
891 using namespace glu;
892
893 if (isDataTypeVector(dt))
894 {
895 std::string elemValue = getDataTypeLiteral(getDataTypeScalarType(dt), baseValue);
896
897 std::ostringstream result;
898 result << getDataTypeName(dt) << "(";
899 for (int i = 0; i < getDataTypeScalarSize(dt); ++i)
900 {
901 if (i > 0)
902 result << ", ";
903 result << elemValue;
904 }
905 result << ")";
906 return result.str();
907 }
908 else if (isDataTypeScalar(dt))
909 {
910 return getDataTypeName(dt) + std::string("(") + baseValue + std::string(")");
911 }
912 else
913 {
914 DE_ASSERT(0);
915 return std::string();
916 }
917 }
918
initPrograms(SourceCollections & sourceCollections) const919 void ZeroTest::initPrograms(SourceCollections& sourceCollections) const
920 {
921 using namespace glu;
922
923 std::ostringstream src;
924
925 src << "#version 450\n"
926 << "#extension GL_EXT_shared_memory_block : enable\n"
927 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
928 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
929
930 // Large enough to cover the largest B block even if just 8-bit elements.
931 // Small enough to fit in the minimum shared memory size limit even if with uvec4.
932 src << "shared A { " << getDataTypeName(m_caseDef.zeroElementType) << " arr[256]; } zero;\n";
933
934 src << "struct st {\n"
935 << " " << getDataTypeName(m_caseDef.fieldType[0]) << " x;\n";
936 if (m_caseDef.fieldType[1])
937 src << " " << getDataTypeName(m_caseDef.fieldType[1]) << " y;\n";
938 src << "};\n";
939
940
941 src << "shared B { st arr[4]; };\n"
942 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n"
943 << "void main() {\n"
944 << "for (int i = 0; i < zero.arr.length(); i++) {\n"
945 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "1") << ";\n"
946 << " }\n"
947 << " for (int i = 0; i < zero.arr.length(); i++) {\n"
948 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "0") << ";\n"
949 << " }\n"
950 << " result = (\n";
951
952 for (deUint32 i = 0; i < 4; i++)
953 {
954 src << " ";
955 if (i > 0)
956 src << "&& ";
957 src << "(arr[" << de::toString(i) << "].x == " << getDataTypeLiteral(m_caseDef.fieldType[0], "0") << ")\n";
958 if (m_caseDef.fieldType[1])
959 src << " && (arr[" << de::toString(i) << "].y == " << getDataTypeLiteral(m_caseDef.fieldType[1], "0") << ")\n";
960 }
961
962 src << " ) ? 0 : 0xFF;\n"
963 << "}\n";
964
965 sourceCollections.glslSources.add("comp")
966 << ComputeSource(src.str())
967 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
968 vk::ShaderBuildOptions::Flags(0u), true);
969 }
970
isTestedZeroElementType(glu::DataType dt)971 bool isTestedZeroElementType(glu::DataType dt)
972 {
973 using namespace glu;
974
975 // Select only a few interesting types.
976 switch (dt)
977 {
978 case TYPE_UINT:
979 case TYPE_UINT_VEC4:
980 case TYPE_UINT8:
981 case TYPE_UINT8_VEC4:
982 case TYPE_UINT16:
983 return true;
984 default:
985 return false;
986 }
987 }
988
isTestedFieldType(glu::DataType dt)989 bool isTestedFieldType(glu::DataType dt)
990 {
991 using namespace glu;
992
993 // Select only a few interesting types.
994 switch (dt)
995 {
996 case TYPE_UINT:
997 case TYPE_UINT_VEC3:
998 case TYPE_UINT8:
999 case TYPE_UINT16:
1000 case TYPE_FLOAT:
1001 case TYPE_FLOAT_VEC4:
1002 case TYPE_FLOAT16:
1003 case TYPE_DOUBLE:
1004 case TYPE_DOUBLE_VEC4:
1005 case TYPE_BOOL:
1006 return true;
1007
1008 default:
1009 return false;
1010 }
1011 }
1012
AddZeroTests(tcu::TestCaseGroup * group)1013 void AddZeroTests(tcu::TestCaseGroup* group)
1014 {
1015 using namespace glu;
1016
1017 ZeroTest::CaseDef c;
1018
1019 for (deUint32 i = 0; i < TYPE_LAST; ++i)
1020 {
1021 c.zeroElementType = DataType(i);
1022
1023 if (isTestedZeroElementType(c.zeroElementType))
1024 {
1025 deUint32 idx[2] = { 0, 0 };
1026
1027 while (idx[1] < TYPE_LAST && idx[0] < TYPE_LAST)
1028 {
1029 c.fieldType[0] = DataType(idx[0]);
1030 c.fieldType[1] = DataType(idx[1]);
1031
1032 if (isTestedFieldType(c.fieldType[0]) &&
1033 (c.fieldType[1] == TYPE_INVALID || isTestedFieldType(c.fieldType[1])))
1034 {
1035 for (deUint32 elements = 1; elements <= 4; ++elements)
1036 {
1037 c.elements = elements;
1038 group->addChild(new ZeroTest(group->getTestContext(), c));
1039 }
1040 }
1041
1042 idx[0]++;
1043 if (idx[0] >= TYPE_LAST)
1044 {
1045 idx[1]++;
1046 idx[0] = 0;
1047 }
1048 }
1049 }
1050 }
1051 }
1052
1053 class PaddingTest : public vkt::TestCase
1054 {
1055 public:
1056 struct CaseDef
1057 {
1058 std::vector<glu::DataType> types;
1059 std::vector<deUint32> offsets;
1060 std::vector<std::string> values;
1061 deUint32 expected[32];
1062
testNamevkt::compute::__anon6620ebe50111::PaddingTest::CaseDef1063 std::string testName() const
1064 {
1065 DE_ASSERT(types.size() > 0);
1066 DE_ASSERT(types.size() == offsets.size());
1067 DE_ASSERT(types.size() == values.size());
1068
1069 std::string name;
1070 for (deUint32 i = 0; i < types.size(); ++i)
1071 {
1072 if (i > 0)
1073 name += "_";
1074 name += glu::getDataTypeName(types[i]);
1075 name += "_" + de::toString(offsets[i]);
1076 }
1077 return name;
1078 }
1079
addvkt::compute::__anon6620ebe50111::PaddingTest::CaseDef1080 void add(glu::DataType dt, deUint32 offset, const std::string& v)
1081 {
1082 types.push_back(dt);
1083 offsets.push_back(offset);
1084 values.push_back(v);
1085 }
1086
needsScalarvkt::compute::__anon6620ebe50111::PaddingTest::CaseDef1087 bool needsScalar() const
1088 {
1089 for (deUint32 i = 0; i < offsets.size(); ++i)
1090 {
1091 if (offsets[i] % 4 != 0)
1092 return true;
1093 }
1094 return false;
1095 }
1096 };
1097
PaddingTest(tcu::TestContext & testCtx,const CaseDef & caseDef)1098 PaddingTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
1099 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
1100 m_caseDef(caseDef)
1101 {
1102 }
1103
1104 virtual void checkSupport(Context& context) const;
1105 void initPrograms(SourceCollections& sourceCollections) const;
1106
1107 class Instance : public vkt::TestInstance
1108 {
1109 public:
Instance(Context & context,const CaseDef & caseDef)1110 Instance(Context& context, const CaseDef& caseDef)
1111 : TestInstance(context),
1112 m_caseDef(caseDef)
1113 {
1114 }
1115
iterate(void)1116 tcu::TestStatus iterate(void)
1117 {
1118 return runCompute(m_context, 1u);
1119 }
1120
1121 private:
1122 CaseDef m_caseDef;
1123 };
1124
createInstance(Context & context) const1125 TestInstance* createInstance(Context& context) const
1126 {
1127 return new Instance(context, m_caseDef);
1128 }
1129
1130 private:
1131 CaseDef m_caseDef;
1132 };
1133
checkSupport(Context & context) const1134 void PaddingTest::checkSupport(Context& context) const
1135 {
1136 CheckSupportParams p;
1137 deMemset(&p, 0, sizeof(p));
1138
1139 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1140 p.useType(m_caseDef.types[i]);
1141
1142 p.needsScalar = m_caseDef.needsScalar();
1143
1144 checkSupportWithParams(context, p);
1145 }
1146
initPrograms(SourceCollections & sourceCollections) const1147 void PaddingTest::initPrograms(SourceCollections& sourceCollections) const
1148 {
1149 using namespace glu;
1150
1151 std::ostringstream src;
1152
1153 src << "#version 450\n"
1154 << "#extension GL_EXT_shared_memory_block : enable\n"
1155 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
1156 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
1157
1158 src << "shared A { uint32_t words[32]; };\n";
1159
1160 if (m_caseDef.needsScalar())
1161 {
1162 src << "#extension GL_EXT_scalar_block_layout : enable\n"
1163 << "layout (scalar) ";
1164 }
1165
1166 src << "shared B {\n";
1167
1168 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1169 {
1170 src << " layout(offset = " << m_caseDef.offsets[i] << ") "
1171 << glu::getDataTypeName(m_caseDef.types[i]) << " x" << i << ";\n";
1172 }
1173
1174 src << "};\n"
1175 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1176
1177 src << "void main() {\n"
1178 << "for (int i = 0; i < 32; i++) words[i] = 0;\n";
1179
1180 for (deUint32 i = 0; i < m_caseDef.values.size(); ++i)
1181 src << "x" << i << " = " << m_caseDef.values[i] << ";\n";
1182
1183 src << "result = 32;\n";
1184 for (deUint32 i = 0; i < 32; ++i)
1185 {
1186 src << "if (words[" << std::dec << i << "] == 0x"
1187 << std::uppercase << std::hex << m_caseDef.expected[i]
1188 << ") result--;\n";
1189 }
1190
1191 src << "}\n";
1192
1193 sourceCollections.glslSources.add("comp")
1194 << ComputeSource(src.str())
1195 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1196 vk::ShaderBuildOptions::Flags(0u), true);
1197 }
1198
AddPaddingTests(tcu::TestCaseGroup * group)1199 void AddPaddingTests(tcu::TestCaseGroup* group)
1200 {
1201 using namespace glu;
1202
1203 for (deUint32 i = 0; i < 31; ++i)
1204 {
1205 for (deUint32 j = i + 1; j < 32; j += 4)
1206 {
1207 PaddingTest::CaseDef c;
1208 deMemset(&c, 0, sizeof(c));
1209
1210 c.add(TYPE_UINT, 4 * i, "0x1234");
1211 c.expected[i] = 0x1234;
1212
1213 c.add(TYPE_UINT, 4 * j, "0x5678");
1214 c.expected[j] = 0x5678;
1215
1216 group->addChild(new PaddingTest(group->getTestContext(), c));
1217 }
1218 }
1219
1220 for (deUint32 i = 0; i < 127; ++i)
1221 {
1222 for (deUint32 j = i + 1; j < 32; j += 16)
1223 {
1224 PaddingTest::CaseDef c;
1225 deMemset(&c, 0, sizeof(c));
1226
1227 deUint8* expected = reinterpret_cast<deUint8*>(c.expected);
1228
1229 c.add(TYPE_UINT8, i, "uint8_t(0xAA)");
1230 expected[i] = 0xAA;
1231
1232 c.add(TYPE_UINT8, j, "uint8_t(0xBB)");
1233 expected[j] = 0xBB;
1234
1235 group->addChild(new PaddingTest(group->getTestContext(), c));
1236 }
1237 }
1238 }
1239
1240 class SizeTest : public vkt::TestCase
1241 {
1242 public:
SizeTest(tcu::TestContext & testCtx,deUint32 size)1243 SizeTest(tcu::TestContext& testCtx, deUint32 size)
1244 : TestCase(testCtx, de::toString(size), de::toString(size))
1245 , m_size(size)
1246 {
1247 DE_ASSERT(size % 8 == 0);
1248 }
1249
1250 virtual void checkSupport(Context& context) const;
1251 void initPrograms(SourceCollections& sourceCollections) const;
1252
1253 class Instance : public vkt::TestInstance
1254 {
1255 public:
Instance(Context & context)1256 Instance(Context& context)
1257 : TestInstance(context)
1258 {
1259 }
1260
iterate(void)1261 tcu::TestStatus iterate(void)
1262 {
1263 return runCompute(m_context, 1u);
1264 }
1265 };
1266
createInstance(Context & context) const1267 TestInstance* createInstance(Context& context) const
1268 {
1269 return new Instance(context);
1270 }
1271
1272 private:
1273 deUint32 m_size;
1274 };
1275
checkSupport(Context & context) const1276 void SizeTest::checkSupport(Context& context) const
1277 {
1278 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
1279 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
1280
1281 if (context.getDeviceProperties().limits.maxComputeSharedMemorySize < m_size)
1282 TCU_THROW(NotSupportedError, "Not enough shared memory supported.");
1283 }
1284
initPrograms(SourceCollections & sourceCollections) const1285 void SizeTest::initPrograms(SourceCollections& sourceCollections) const
1286 {
1287 using namespace glu;
1288
1289 std::ostringstream src;
1290
1291 src << "#version 450\n";
1292 src << "#extension GL_EXT_shared_memory_block : enable\n";
1293 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
1294 src << "layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;\n";
1295
1296 for (deUint32 i = 0; i < 8; ++i)
1297 src << "shared B" << i << " { uint32_t words[" << (m_size / 4) << "]; } b" << i << ";\n";
1298
1299 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1300
1301 src << "void main() {\n";
1302 src << " int index = int(gl_LocalInvocationIndex);\n";
1303 src << " int size = " << (m_size / 4) << ";\n";
1304
1305 src << " if (index == 0) for (int x = 0; x < size; x++) b0.words[x] = 0xFFFF;\n";
1306 src << " barrier();\n";
1307
1308 src << " for (int x = 0; x < size; x++) {\n";
1309 src << " if (x % 8 != index) continue;\n";
1310 for (deUint32 i = 0; i < 8; ++i)
1311 src << " if (index == " << i << ") b" << i << ".words[x] = (x << 3) | " << i << ";\n";
1312 src << " }\n";
1313
1314 src << " barrier();\n";
1315 src << " if (index != 0) return;\n";
1316
1317 src << " int r = size;\n";
1318 src << " for (int x = 0; x < size; x++) {\n";
1319 src << " int expected = (x << 3) | (x % 8);\n";
1320 src << " if (b0.words[x] == expected) r--;\n";
1321 src << " }\n";
1322 src << " result = r;\n";
1323 src << "}\n";
1324
1325 sourceCollections.glslSources.add("comp")
1326 << ComputeSource(src.str())
1327 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1328 vk::ShaderBuildOptions::Flags(0u), true);
1329 }
1330
AddSizeTests(tcu::TestCaseGroup * group)1331 void AddSizeTests(tcu::TestCaseGroup* group)
1332 {
1333 deUint32 sizes[] =
1334 {
1335 8u,
1336 64u,
1337 4096u,
1338
1339 // Dynamic generation of shaders based on properties reported
1340 // by devices is not allowed in the CTS, so let's create a few
1341 // variants based on common known maximum sizes.
1342 16384u,
1343 32768u,
1344 49152u,
1345 65536u,
1346 };
1347
1348 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(sizes); ++i)
1349 group->addChild(new SizeTest(group->getTestContext(), sizes[i]));
1350 }
1351
CreateAmberTestCase(tcu::TestContext & testCtx,const char * name,const char * description,const std::string & filename,const std::vector<std::string> & requirements=std::vector<std::string> (),bool zeroinit=false)1352 cts_amber::AmberTestCase* CreateAmberTestCase(tcu::TestContext& testCtx,
1353 const char* name,
1354 const char* description,
1355 const std::string& filename,
1356 const std::vector<std::string>& requirements = std::vector<std::string>(),
1357 bool zeroinit = false)
1358 {
1359 vk::SpirVAsmBuildOptions asm_options(VK_MAKE_VERSION(1, 1, 0), vk::SPIRV_VERSION_1_4);
1360 asm_options.supports_VK_KHR_spirv_1_4 = true;
1361
1362 cts_amber::AmberTestCase *t = cts_amber::createAmberTestCase(testCtx, name, description, "compute/workgroup_memory_explicit_layout", filename, requirements);
1363 t->setSpirVAsmBuildOptions(asm_options);
1364 t->addRequirement("VK_KHR_workgroup_memory_explicit_layout");
1365 t->addRequirement("VK_KHR_spirv_1_4");
1366 if (zeroinit)
1367 {
1368 t->addRequirement("VK_KHR_zero_initialize_workgroup_memory");
1369 }
1370 return t;
1371 }
1372
AddCopyMemoryTests(tcu::TestCaseGroup * group)1373 void AddCopyMemoryTests(tcu::TestCaseGroup* group)
1374 {
1375 tcu::TestContext& testCtx = group->getTestContext();
1376
1377 group->addChild(CreateAmberTestCase(testCtx, "basic", "", "copy_memory_basic.amber"));
1378 group->addChild(CreateAmberTestCase(testCtx, "two_invocations", "", "copy_memory_two_invocations.amber"));
1379 group->addChild(CreateAmberTestCase(testCtx, "variable_pointers", "", "copy_memory_variable_pointers.amber",
1380 { "VariablePointerFeatures.variablePointers" }));
1381 }
1382
AddZeroInitializeExtensionTests(tcu::TestCaseGroup * group)1383 void AddZeroInitializeExtensionTests(tcu::TestCaseGroup* group)
1384 {
1385 tcu::TestContext& testCtx = group->getTestContext();
1386
1387 group->addChild(CreateAmberTestCase(testCtx, "block", "", "zero_ext_block.amber", std::vector<std::string>(), true));
1388 group->addChild(CreateAmberTestCase(testCtx, "other_block", "", "zero_ext_other_block.amber", std::vector<std::string>(), true));
1389 group->addChild(CreateAmberTestCase(testCtx, "block_with_offset", "", "zero_ext_block_with_offset.amber", std::vector<std::string>(), true));
1390 }
1391
1392 } // anonymous
1393
createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext & testCtx)1394 tcu::TestCaseGroup* createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext& testCtx)
1395 {
1396 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "workgroup_memory_explicit_layout", "VK_KHR_workgroup_memory_explicit_layout tests"));
1397
1398 tcu::TestCaseGroup* alias = new tcu::TestCaseGroup(testCtx, "alias", "Aliasing between different blocks and types");
1399 AddAliasTests(alias);
1400 tests->addChild(alias);
1401
1402 tcu::TestCaseGroup* zero = new tcu::TestCaseGroup(testCtx, "zero", "Manually zero initialize a block and read from another");
1403 AddZeroTests(zero);
1404 tests->addChild(zero);
1405
1406 tcu::TestCaseGroup* padding = new tcu::TestCaseGroup(testCtx, "padding", "Padding as part of the explicit layout");
1407 AddPaddingTests(padding);
1408 tests->addChild(padding);
1409
1410 tcu::TestCaseGroup* size = new tcu::TestCaseGroup(testCtx, "size", "Test blocks of various sizes");
1411 AddSizeTests(size);
1412 tests->addChild(size);
1413
1414 tcu::TestCaseGroup* copy_memory = new tcu::TestCaseGroup(testCtx, "copy_memory", "Test OpCopyMemory with Workgroup memory");
1415 AddCopyMemoryTests(copy_memory);
1416 tests->addChild(copy_memory);
1417
1418 tcu::TestCaseGroup* zero_ext = new tcu::TestCaseGroup(testCtx, "zero_ext", "Test interaction with VK_KHR_zero_initialize_workgroup_memory");
1419 AddZeroInitializeExtensionTests(zero_ext);
1420 tests->addChild(zero_ext);
1421
1422 return tests.release();
1423 }
1424
1425 } // compute
1426 } // vkt
1427