1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Intel Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief VK_KHR_workgroup_memory_explicit_layout tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeWorkgroupMemoryExplicitLayoutTests.hpp"
26 #include "vktAmberTestCase.hpp"
27 #include "vktTestCase.hpp"
28 #include "vktTestCaseUtil.hpp"
29 #include "vktTestGroupUtil.hpp"
30
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageWithMemory.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkObjUtil.hpp"
38 #include "vkDefs.hpp"
39 #include "vkRef.hpp"
40
41 #include "tcuCommandLine.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47
48 #include <algorithm>
49 #include <vector>
50
51 using namespace vk;
52
53 namespace vkt
54 {
55 namespace compute
56 {
57 namespace
58 {
59
60 struct CheckSupportParams
61 {
62 bool needsScalar;
63 bool needsInt8;
64 bool needsInt16;
65 bool needsInt64;
66 bool needsFloat16;
67 bool needsFloat64;
68
useTypevkt::compute::__anon5d3a54a00111::CheckSupportParams69 void useType(glu::DataType dt)
70 {
71 using namespace glu;
72
73 needsInt8 |= isDataTypeIntOrIVec8Bit(dt) || isDataTypeUintOrUVec8Bit(dt);
74 needsInt16 |= isDataTypeIntOrIVec16Bit(dt) || isDataTypeUintOrUVec16Bit(dt);
75 needsFloat16 |= isDataTypeFloat16OrVec(dt);
76 needsFloat64 |= isDataTypeDoubleOrDVec(dt);
77 }
78 };
79
checkSupportWithParams(Context & context,const CheckSupportParams & params)80 void checkSupportWithParams(Context& context, const CheckSupportParams& params)
81 {
82 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
83 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
84
85 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout_features;
86 deMemset(&layout_features, 0, sizeof(layout_features));
87 layout_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
88 layout_features.pNext = DE_NULL;
89
90 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
91 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
92 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
93 f16_i8_features.pNext = &layout_features;
94
95 VkPhysicalDeviceFeatures2 features2;
96 deMemset(&features2, 0, sizeof(features2));
97 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
98 features2.pNext = &f16_i8_features;
99 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
100
101 if (params.needsScalar)
102 {
103 if (layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout != VK_TRUE)
104 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayoutScalarBlockLayout not supported");
105 }
106
107 if (params.needsInt8)
108 {
109 if (f16_i8_features.shaderInt8 != VK_TRUE)
110 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
111 if (layout_features.workgroupMemoryExplicitLayout8BitAccess != VK_TRUE)
112 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout8BitAccess not supported");
113 }
114
115 if (params.needsInt16)
116 {
117 if (features2.features.shaderInt16 != VK_TRUE)
118 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
119 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
120 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
121 }
122
123 if (params.needsInt64)
124 {
125 if (features2.features.shaderInt64 != VK_TRUE)
126 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
127 }
128
129 if (params.needsFloat16)
130 {
131 if (f16_i8_features.shaderFloat16 != VK_TRUE)
132 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
133 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
134 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
135 }
136
137 if (params.needsFloat64)
138 {
139 if (features2.features.shaderFloat64 != VK_TRUE)
140 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
141 }
142 }
143
runCompute(Context & context,deUint32 workgroupSize)144 tcu::TestStatus runCompute(Context& context, deUint32 workgroupSize)
145 {
146 const DeviceInterface& vk = context.getDeviceInterface();
147 const VkDevice device = context.getDevice();
148 Allocator& allocator = context.getDefaultAllocator();
149 tcu::TestLog& log = context.getTestContext().getLog();
150
151 de::MovePtr<BufferWithMemory> buffer;
152 VkDescriptorBufferInfo bufferDescriptor;
153
154 VkDeviceSize size = sizeof(deUint32) * workgroupSize;
155
156 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
157 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
158 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
159 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
160
161 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
162
163 deMemset(ptr, 0xFF, static_cast<std::size_t>(size));
164
165 DescriptorSetLayoutBuilder layoutBuilder;
166 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
167
168 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
169 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
170 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
171 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
172 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
173
174 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
175 {
176 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 DE_NULL,
178 (VkPipelineLayoutCreateFlags)0,
179 1,
180 &descriptorSetLayout.get(),
181 0u,
182 DE_NULL,
183 };
184 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
185 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
186 flushAlloc(vk, device, buffer->getAllocation());
187
188 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
189 const VkPipelineShaderStageCreateInfo shaderInfo =
190 {
191 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
192 DE_NULL,
193 0,
194 VK_SHADER_STAGE_COMPUTE_BIT,
195 *shader,
196 "main",
197 DE_NULL,
198 };
199
200 const VkComputePipelineCreateInfo pipelineInfo =
201 {
202 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
203 DE_NULL,
204 0u,
205 shaderInfo,
206 *pipelineLayout,
207 (VkPipeline)0,
208 0u,
209 };
210 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
211
212 const VkQueue queue = context.getUniversalQueue();
213 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
214 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
215 context.getUniversalQueueFamilyIndex());
216 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
217
218 DescriptorSetUpdateBuilder setUpdateBuilder;
219 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
220 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
221 setUpdateBuilder.update(vk, device);
222
223 beginCommandBuffer(vk, *cmdBuffer, 0);
224
225 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
226 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
227
228 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
229
230 endCommandBuffer(vk, *cmdBuffer);
231
232 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
233
234 invalidateAlloc(vk, device, buffer->getAllocation());
235 for (deUint32 i = 0; i < workgroupSize; ++i)
236 {
237 deUint32 expected = i;
238 if (ptr[i] != expected)
239 {
240 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
241 return tcu::TestStatus::fail("compute failed");
242 }
243 }
244
245 return tcu::TestStatus::pass("compute succeeded");
246 }
247
248 class AliasTest : public vkt::TestCase
249 {
250 public:
251 enum Requirements
252 {
253 RequirementNone = 0,
254 RequirementFloat16 = 1 << 0,
255 RequirementFloat64 = 1 << 1,
256 RequirementInt8 = 1 << 2,
257 RequirementInt16 = 1 << 3,
258 RequirementInt64 = 1 << 4,
259 };
260
261 enum Flags
262 {
263 FlagNone = 0,
264 FlagLayoutStd430 = 1 << 0,
265 FlagLayoutStd140 = 1 << 1,
266 FlagLayoutScalar = 1 << 2,
267 FlagFunction = 1 << 3,
268 FlagBarrier = 1 << 4,
269 };
270
271 enum LayoutFlags
272 {
273 LayoutNone = 0,
274
275 LayoutDefault = 1 << 0,
276 LayoutStd140 = 1 << 1,
277 LayoutStd430 = 1 << 2,
278 LayoutScalar = 1 << 3,
279 LayoutAll = LayoutDefault | LayoutStd140 | LayoutStd430 | LayoutScalar,
280
281 LayoutCount = 4,
282 };
283
284 enum Function
285 {
286 FunctionNone = 0,
287 FunctionRead,
288 FunctionWrite,
289 FunctionReadWrite,
290 FunctionCount,
291 };
292
293 enum Synchronization
294 {
295 SynchronizationNone = 0,
296 SynchronizationBarrier,
297 SynchronizationCount,
298 };
299
300 struct CaseDef
301 {
302 std::string extraTypes;
303
304 std::string writeDesc;
305 std::string writeType;
306 std::string writeValue;
307
308 std::string readDesc;
309 std::string readType;
310 std::string readValue;
311
312 LayoutFlags layout;
313 Function func;
314 Synchronization sync;
315 Requirements requirements;
316
testNamevkt::compute::__anon5d3a54a00111::AliasTest::CaseDef317 std::string testName() const
318 {
319 std::string name = writeDesc + "_to_" + readDesc;
320
321 // In a valid test case, only one flag will be set.
322 switch (layout)
323 {
324 case LayoutDefault:
325 name += "_default";
326 break;
327 case LayoutStd140:
328 name += "_std140";
329 break;
330 case LayoutStd430:
331 name += "_std430";
332 break;
333 case LayoutScalar:
334 name += "_scalar";
335 break;
336 default:
337 DE_ASSERT(0);
338 break;
339 }
340
341 switch (func)
342 {
343 case FunctionNone:
344 break;
345 case FunctionRead:
346 name += "_func_read";
347 break;
348 case FunctionWrite:
349 name += "_func_write";
350 break;
351 case FunctionReadWrite:
352 name += "_func_read_write";
353 break;
354 default:
355 DE_ASSERT(0);
356 break;
357 }
358
359 switch (sync)
360 {
361 case SynchronizationNone:
362 break;
363 case SynchronizationBarrier:
364 name += "_barrier";
365 break;
366 default:
367 DE_ASSERT(0);
368 break;
369 }
370
371 return name;
372 }
373 };
374
AliasTest(tcu::TestContext & testCtx,const CaseDef & caseDef)375 AliasTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
376 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
377 m_caseDef(caseDef)
378 {
379 }
380
381 virtual void checkSupport(Context& context) const;
382 void initPrograms(SourceCollections& sourceCollections) const;
383
384 class Instance : public vkt::TestInstance
385 {
386 public:
Instance(Context & context,const CaseDef & caseDef)387 Instance(Context& context, const CaseDef& caseDef)
388 : TestInstance(context),
389 m_caseDef(caseDef)
390 {
391 }
392
iterate(void)393 tcu::TestStatus iterate(void)
394 {
395 return runCompute(m_context, 1u);
396 }
397
398 private:
399 CaseDef m_caseDef;
400 };
401
createInstance(Context & context) const402 TestInstance* createInstance(Context& context) const
403 {
404 return new Instance(context, m_caseDef);
405 }
406
407 private:
408 CaseDef m_caseDef;
409 };
410
checkSupport(Context & context) const411 void AliasTest::checkSupport(Context& context) const
412 {
413 CheckSupportParams p;
414 deMemset(&p, 0, sizeof(p));
415
416 p.needsScalar = m_caseDef.layout == LayoutScalar;
417 p.needsInt8 = m_caseDef.requirements & RequirementInt8;
418 p.needsInt16 = m_caseDef.requirements & RequirementInt16;
419 p.needsInt64 = m_caseDef.requirements & RequirementInt64;
420 p.needsFloat16 = m_caseDef.requirements & RequirementFloat16;
421 p.needsFloat64 = m_caseDef.requirements & RequirementFloat64;
422
423 checkSupportWithParams(context, p);
424 }
425
initPrograms(SourceCollections & sourceCollections) const426 void AliasTest::initPrograms(SourceCollections& sourceCollections) const
427 {
428 std::string layout;
429 switch (m_caseDef.layout)
430 {
431 case LayoutStd140:
432 layout = "layout(std140)";
433 break;
434 case LayoutStd430:
435 layout = "layout(std430)";
436 break;
437 case LayoutScalar:
438 layout = "layout(scalar)";
439 break;
440 default:
441 // No layout specified.
442 break;
443 }
444
445 std::ostringstream src;
446
447 src << "#version 450\n";
448 src << "#extension GL_EXT_shared_memory_block : enable\n";
449 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
450
451 if (m_caseDef.layout == LayoutScalar)
452 src << "#extension GL_EXT_scalar_block_layout : enable\n";
453
454 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
455
456 if (!m_caseDef.extraTypes.empty())
457 src << m_caseDef.extraTypes << ";\n";
458
459 src << layout << "shared A { " << m_caseDef.writeType << "; } a;\n";
460 src << layout << "shared B { " << m_caseDef.readType << "; } b;\n";
461 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
462
463 if (m_caseDef.func == FunctionRead ||
464 m_caseDef.func == FunctionReadWrite)
465 {
466 src << "void read(int index) {\n";
467 src << " if (b.v == " << m_caseDef.readValue << ")\n";
468 src << " result = index;\n";
469 src << "}\n";
470 }
471
472 if (m_caseDef.func == FunctionWrite ||
473 m_caseDef.func == FunctionReadWrite)
474 {
475 src << "void write(int index) {\n";
476 src << " if (index == 0)\n";
477 src << " a.v = " << m_caseDef.writeValue << ";\n";
478 src << "}\n";
479 }
480
481 src << "void main() {\n";
482 src << " int index = int(gl_LocalInvocationIndex);\n";
483
484 if (m_caseDef.func == FunctionWrite)
485 src << " write(index);\n";
486 else
487 src << " a.v = " << m_caseDef.writeValue << ";\n";
488
489 if (m_caseDef.sync == SynchronizationBarrier)
490 src << " barrier();\n";
491
492 if (m_caseDef.func == FunctionRead ||
493 m_caseDef.func == FunctionReadWrite)
494 {
495 src << " read(index);\n";
496 }
497 else
498 {
499 src << " if (b.v == " << m_caseDef.readValue << ")\n";
500 src << " result = index;\n";
501 }
502 src << "}\n";
503
504 deUint32 buildFlags =
505 m_caseDef.layout == LayoutScalar ? ShaderBuildOptions::FLAG_ALLOW_WORKGROUP_SCALAR_OFFSETS :
506 ShaderBuildOptions::Flags(0u);
507
508 sourceCollections.glslSources.add("comp")
509 << glu::ComputeSource(src.str())
510 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4, buildFlags);
511 }
512
makeArray(const std::string & type,const std::vector<deUint64> & values)513 std::string makeArray(const std::string& type, const std::vector<deUint64>& values)
514 {
515 std::ostringstream s;
516 s << type << "[](";
517 for (std::size_t i = 0; i < values.size(); i++)
518 {
519 s << type << "(" << std::to_string(values[i]) << ")";
520 if (i != values.size() - 1)
521 s << ", ";
522 };
523 s << ")";
524 return s.str();
525 }
526
makeU8Array(const std::vector<deUint64> & values)527 std::string makeU8Array(const std::vector<deUint64>& values)
528 {
529 return makeArray("uint8_t", values);
530 }
531
makeU16Array(const std::vector<deUint64> & values)532 std::string makeU16Array(const std::vector<deUint64>& values)
533 {
534 return makeArray("uint16_t", values);
535 }
536
makeU32Array(const std::vector<deUint64> & values)537 std::string makeU32Array(const std::vector<deUint64>& values)
538 {
539 return makeArray("uint32_t", values);
540 }
541
AddAliasTests(tcu::TestCaseGroup * group)542 void AddAliasTests(tcu::TestCaseGroup* group)
543 {
544 const int DEFAULT = AliasTest::LayoutDefault;
545 const int STD140 = AliasTest::LayoutStd140;
546 const int STD430 = AliasTest::LayoutStd430;
547 const int SCALAR = AliasTest::LayoutScalar;
548 const int ALL = DEFAULT | STD140 | STD430 | SCALAR;
549
550 const int FLOAT16 = AliasTest::RequirementFloat16;
551 const int FLOAT64 = AliasTest::RequirementFloat64;
552 const int INT8 = AliasTest::RequirementInt8;
553 const int INT16 = AliasTest::RequirementInt16;
554 const int INT64 = AliasTest::RequirementInt64;
555
556 #define CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2) \
557 { E, D1, T1, V1, D2, T2, V2, AliasTest::LayoutFlags(L), AliasTest::FunctionNone, AliasTest::SynchronizationNone, AliasTest::Requirements(R) }
558
559 #define CASE_EXTRA_WITH_REVERSE(L, R, E, D1, T1, V1, D2, T2, V2) \
560 CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2), \
561 CASE_EXTRA(L, R, E, D2, T2, V2, D1, T1, V1)
562
563 #define CASE_WITH_REVERSE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA_WITH_REVERSE(L, R, "", D1, T1, V1, D2, T2, V2)
564 #define CASE_SAME_TYPE(R, D, T, V) CASE_EXTRA(ALL, R, "", D, T, V, D, T, V)
565 #define CASE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA(L, R, "", D1, T1, V1, D2, T2, V2)
566
567
568 std::vector<AliasTest::CaseDef> cases =
569 {
570 CASE_SAME_TYPE(0, "bool_true", "bool v", "true"),
571 CASE_SAME_TYPE(0, "bool_false", "bool v", "false"),
572 CASE_SAME_TYPE(0, "bvec2", "bvec2 v", "bvec2(false, true)"),
573 CASE_SAME_TYPE(0, "bvec3", "bvec3 v", "bvec3(false, true, true)"),
574 CASE_SAME_TYPE(0, "bvec4", "bvec4 v", "bvec4(false, true, true, false)"),
575 CASE_SAME_TYPE(INT8, "u8", "uint8_t v", "uint8_t(10)"),
576 CASE_SAME_TYPE(INT8, "u8vec2", "u8vec2 v", "u8vec2(10, 20)"),
577 CASE_SAME_TYPE(INT8, "u8vec3", "u8vec3 v", "u8vec3(10, 20, 30)"),
578 CASE_SAME_TYPE(INT8, "u8vec4", "u8vec4 v", "u8vec4(10, 20, 30, 40)"),
579 CASE_SAME_TYPE(INT8, "i8", "int8_t v", "int8_t(-10)"),
580 CASE_SAME_TYPE(INT8, "i8vec2", "i8vec2 v", "i8vec2(-10, 20)"),
581 CASE_SAME_TYPE(INT8, "i8vec3", "i8vec3 v", "i8vec3(-10, 20, -30)"),
582 CASE_SAME_TYPE(INT8, "i8vec4", "i8vec4 v", "i8vec4(-10, 20, -30, 40)"),
583 CASE_SAME_TYPE(INT16, "u16", "uint16_t v", "uint16_t(1000)"),
584 CASE_SAME_TYPE(INT16, "u16vec2", "u16vec2 v", "u16vec2(1000, 2000)"),
585 CASE_SAME_TYPE(INT16, "u16vec3", "u16vec3 v", "u16vec3(1000, 2000, 3000)"),
586 CASE_SAME_TYPE(INT16, "u16vec4", "u16vec4 v", "u16vec4(1000, 2000, 3000, 4000)"),
587 CASE_SAME_TYPE(INT16, "i16", "int16_t v", "int16_t(-1000)"),
588 CASE_SAME_TYPE(INT16, "i16vec2", "i16vec2 v", "i16vec2(-1000, 2000)"),
589 CASE_SAME_TYPE(INT16, "i16vec3", "i16vec3 v", "i16vec3(-1000, 2000, -3000)"),
590 CASE_SAME_TYPE(INT16, "i16vec4", "i16vec4 v", "i16vec4(-1000, 2000, -3000, 4000)"),
591 CASE_SAME_TYPE(0, "u32", "uint32_t v", "uint32_t(100)"),
592 CASE_SAME_TYPE(0, "uvec2", "uvec2 v", "uvec2(100, 200)"),
593 CASE_SAME_TYPE(0, "uvec3", "uvec3 v", "uvec3(100, 200, 300)"),
594 CASE_SAME_TYPE(0, "uvec4", "uvec4 v", "uvec4(100, 200, 300, 400)"),
595 CASE_SAME_TYPE(0, "i32", "int32_t v", "int32_t(-100)"),
596 CASE_SAME_TYPE(0, "ivec2", "ivec2 v", "ivec2(-100, 200)"),
597 CASE_SAME_TYPE(0, "ivec3", "ivec3 v", "ivec3(-100, 200, -300)"),
598 CASE_SAME_TYPE(0, "ivec4", "ivec4 v", "ivec4(-100, 200, -300, 400)"),
599 CASE_SAME_TYPE(INT64, "u64", "uint64_t v", "uint64_t(1000)"),
600 CASE_SAME_TYPE(INT64, "u64vec2", "u64vec2 v", "u64vec2(1000, 2000)"),
601 CASE_SAME_TYPE(INT64, "u64vec3", "u64vec3 v", "u64vec3(1000, 2000, 3000)"),
602 CASE_SAME_TYPE(INT64, "u64vec4", "u64vec4 v", "u64vec4(1000, 2000, 3000, 4000)"),
603 CASE_SAME_TYPE(INT64, "i64", "int64_t v", "int64_t(-1000)"),
604 CASE_SAME_TYPE(INT64, "i64vec2", "i64vec2 v", "i64vec2(-1000, 2000)"),
605 CASE_SAME_TYPE(INT64, "i64vec3", "i64vec3 v", "i64vec3(-1000, 2000, -3000)"),
606 CASE_SAME_TYPE(INT64, "i64vec4", "i64vec4 v", "i64vec4(-1000, 2000, -3000, 4000)"),
607 CASE_SAME_TYPE(FLOAT16, "f16", "float16_t v", "float16_t(-100.0)"),
608 CASE_SAME_TYPE(FLOAT16, "f16vec2", "f16vec2 v", "f16vec2(100.0, -200.0)"),
609 CASE_SAME_TYPE(FLOAT16, "f16vec3", "f16vec3 v", "f16vec3(100.0, -200.0, 300.0)"),
610 CASE_SAME_TYPE(FLOAT16, "f16vec4", "f16vec4 v", "f16vec4(100.0, -200.0, 300.0, -400.0)"),
611 CASE_SAME_TYPE(0, "f32", "float32_t v", "float32_t(-100.0)"),
612 CASE_SAME_TYPE(0, "f32vec2", "f32vec2 v", "f32vec2(100.0, -200.0)"),
613 CASE_SAME_TYPE(0, "f32vec3", "f32vec3 v", "f32vec3(100.0, -200.0, 300.0)"),
614 CASE_SAME_TYPE(0, "f32vec4", "f32vec4 v", "f32vec4(100.0, -200.0, 300.0, -400.0)"),
615 CASE_SAME_TYPE(FLOAT64, "f64", "float64_t v", "float32_t(-100.0)"),
616 CASE_SAME_TYPE(FLOAT64, "f64vec2", "f64vec2 v", "f64vec2(100.0, -200.0)"),
617 CASE_SAME_TYPE(FLOAT64, "f64vec3", "f64vec3 v", "f64vec3(100.0, -200.0, 300.0)"),
618 CASE_SAME_TYPE(FLOAT64, "f64vec4", "f64vec4 v", "f64vec4(100.0, -200.0, 300.0, -400.0)"),
619 CASE_SAME_TYPE(FLOAT16, "f16mat2x2", "f16mat2x2 v", "f16mat2x2(1, 2, 3, 4)"),
620 CASE_SAME_TYPE(FLOAT16, "f16mat2x3", "f16mat2x3 v", "f16mat2x3(1, 2, 3, 4, 5, 6)"),
621 CASE_SAME_TYPE(FLOAT16, "f16mat2x4", "f16mat2x4 v", "f16mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
622 CASE_SAME_TYPE(FLOAT16, "f16mat3x2", "f16mat3x2 v", "f16mat3x2(1, 2, 3, 4, 5, 6)"),
623 CASE_SAME_TYPE(FLOAT16, "f16mat3x3", "f16mat3x3 v", "f16mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
624 CASE_SAME_TYPE(FLOAT16, "f16mat3x4", "f16mat3x4 v", "f16mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
625 CASE_SAME_TYPE(FLOAT16, "f16mat4x2", "f16mat4x2 v", "f16mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
626 CASE_SAME_TYPE(FLOAT16, "f16mat4x3", "f16mat4x3 v", "f16mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
627 CASE_SAME_TYPE(FLOAT16, "f16mat4x4", "f16mat4x4 v", "f16mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
628 CASE_SAME_TYPE(0, "f32mat2x2", "f32mat2x2 v", "f32mat2x2(1, 2, 3, 4)"),
629 CASE_SAME_TYPE(0, "f32mat2x3", "f32mat2x3 v", "f32mat2x3(1, 2, 3, 4, 5, 6)"),
630 CASE_SAME_TYPE(0, "f32mat2x4", "f32mat2x4 v", "f32mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
631 CASE_SAME_TYPE(0, "f32mat3x2", "f32mat3x2 v", "f32mat3x2(1, 2, 3, 4, 5, 6)"),
632 CASE_SAME_TYPE(0, "f32mat3x3", "f32mat3x3 v", "f32mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
633 CASE_SAME_TYPE(0, "f32mat3x4", "f32mat3x4 v", "f32mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
634 CASE_SAME_TYPE(0, "f32mat4x2", "f32mat4x2 v", "f32mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
635 CASE_SAME_TYPE(0, "f32mat4x3", "f32mat4x3 v", "f32mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
636 CASE_SAME_TYPE(0, "f32mat4x4", "f32mat4x4 v", "f32mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
637 CASE_SAME_TYPE(FLOAT64, "f64mat2x2", "f64mat2x2 v", "f64mat2x2(1, 2, 3, 4)"),
638 CASE_SAME_TYPE(FLOAT64, "f64mat2x3", "f64mat2x3 v", "f64mat2x3(1, 2, 3, 4, 5, 6)"),
639 CASE_SAME_TYPE(FLOAT64, "f64mat2x4", "f64mat2x4 v", "f64mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
640 CASE_SAME_TYPE(FLOAT64, "f64mat3x2", "f64mat3x2 v", "f64mat3x2(1, 2, 3, 4, 5, 6)"),
641 CASE_SAME_TYPE(FLOAT64, "f64mat3x3", "f64mat3x3 v", "f64mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
642 CASE_SAME_TYPE(FLOAT64, "f64mat3x4", "f64mat3x4 v", "f64mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
643 CASE_SAME_TYPE(FLOAT64, "f64mat4x2", "f64mat4x2 v", "f64mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
644 CASE_SAME_TYPE(FLOAT64, "f64mat4x3", "f64mat4x3 v", "f64mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
645 CASE_SAME_TYPE(FLOAT64, "f64mat4x4", "f64mat4x4 v", "f64mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
646
647 CASE_WITH_REVERSE(ALL, INT8,
648 "i8", "int8_t v", "int8_t(-2)",
649 "u8", "uint8_t v", "uint8_t(0xFE)"),
650 CASE_WITH_REVERSE(ALL, INT16,
651 "i16", "int16_t v", "int16_t(-2)",
652 "u16", "uint16_t v", "uint16_t(0xFFFE)"),
653 CASE_WITH_REVERSE(ALL, 0,
654 "i32", "int32_t v", "int32_t(-2)",
655 "u32", "uint32_t v", "uint32_t(0xFFFFFFFE)"),
656 CASE_WITH_REVERSE(ALL, INT64,
657 "i64", "int64_t v", "int64_t(-2UL)",
658 "u64", "uint64_t v", "uint64_t(0xFFFFFFFFFFFFFFFEUL)"),
659 CASE_WITH_REVERSE(ALL, FLOAT16 | INT16,
660 "f16", "float16_t v", "float16_t(1.0)",
661 "u16", "uint16_t v", "uint16_t(0x3C00)"),
662 CASE_WITH_REVERSE(ALL, 0,
663 "f32", "float32_t v", "float32_t(1.0)",
664 "u32", "uint32_t v", "uint32_t(0x3F800000)"),
665 CASE_WITH_REVERSE(ALL, FLOAT64 | INT64,
666 "f64", "float64_t v", "float64_t(1.0)",
667 "u64", "uint64_t v", "uint64_t(0x3FF0000000000000UL)"),
668
669 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
670 "u16", "uint16_t v", "uint16_t(0x1234)",
671 "u8_array", "uint8_t v[2]", makeU8Array({0x34, 0x12})),
672 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
673 "u32", "uint32_t v", "uint32_t(0x12345678)",
674 "u8_array", "uint8_t v[4]", makeU8Array({0x78, 0x56, 0x34, 0x12})),
675 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
676 "u32", "uint32_t v", "uint32_t(0x12345678)",
677 "u16_array", "uint16_t v[2]", makeU16Array({0x5678, 0x1234})),
678 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
679 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
680 "u8_array", "uint8_t v[8]", makeU8Array({0xEF, 0xCD, 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12})),
681 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
682 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
683 "u16_array", "uint16_t v[4]", makeU16Array({0xCDEF, 0x90AB, 0x5678, 0x1234})),
684 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
685 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
686 "u32_array", "uint32_t v[2]", makeU32Array({0x90ABCDEF, 0x12345678})),
687 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
688 "i16", "int16_t v", "int16_t(-2)",
689 "u8_array", "uint8_t v[2]", makeU8Array({0xFE, 0xFF})),
690 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
691 "i32", "int32_t v", "int32_t(-2)",
692 "u8_array", "uint8_t v[4]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF})),
693 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
694 "i32", "int32_t v", "int32_t(-2)",
695 "u16_array", "uint16_t v[2]", makeU16Array({0xFFFE, 0xFFFF})),
696 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
697 "i64", "int64_t v", "int64_t(-2UL)",
698 "u8_array", "uint8_t v[8]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF})),
699 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
700 "i64", "int64_t v", "int64_t(-2UL)",
701 "u16_array", "uint16_t v[4]", makeU16Array({0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF})),
702 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
703 "i64", "int64_t v", "int64_t(-2UL)",
704 "u32_array", "uint32_t v[2]", makeU32Array({0xFFFFFFFE, 0xFFFFFFFF})),
705 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT16 | INT8,
706 "f16", "float16_t v", "float16_t(1.0)",
707 "u8_array", "uint8_t v[2]", makeU8Array({0x00, 0x3C})),
708 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
709 "f32", "float32_t v", "float32_t(1.0)",
710 "u8_array", "uint8_t v[4]", makeU8Array({0x00, 0x00, 0x80, 0x3F})),
711 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
712 "f32", "float32_t v", "float32_t(1.0)",
713 "u16_array", "uint16_t v[2]", makeU16Array({0x0000, 0x3F80})),
714 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT8,
715 "f64", "float64_t v", "float64_t(1.0)",
716 "u8_array", "uint8_t v[8]", makeU8Array({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F})),
717 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT16,
718 "f64", "float64_t v", "float64_t(1.0)",
719 "u16_array", "uint16_t v[4]", makeU16Array({0x0000, 0x0000, 0x0000, 0x3FF0})),
720 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64,
721 "f64", "float64_t v", "float64_t(1.0)",
722 "u32_array", "uint32_t v[2]", makeU32Array({0x00000000, 0x3FF00000})),
723
724 CASE(DEFAULT | STD430, 0,
725 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
726 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
727 CASE(STD140, 0,
728 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 999, 999), vec4(2, 2, 999, 999), vec4(3, 3, 999, 999))",
729 "vec2_array", "vec2 v[3]", "vec2[](vec2(1), vec2(2), vec2(3))"),
730 CASE(SCALAR, 0,
731 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
732 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
733
734 CASE(DEFAULT | STD430, 0,
735 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
736 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
737 CASE(STD140, 0,
738 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
739 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
740 CASE(SCALAR, 0,
741 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 2), vec4(2, 2, 3, 3), vec4(3, 4, 4, 4))",
742 "vec3_array", "vec3 v[4]", "vec3[](vec3(1), vec3(2), vec3(3), vec3(4))"),
743
744 CASE_EXTRA(DEFAULT | STD430 | SCALAR, INT8,
745 "struct s { int a; int b; }",
746 "u8_array", "uint8_t v[8]", makeU8Array({2, 0, 0, 0, 0xFE, 0xFF, 0xFF, 0xFF}),
747 "struct_int_int", "s v", "s(2, -2)"),
748 CASE_EXTRA(ALL, 0,
749 "struct s { int a; int b; }",
750 "uvec2", "uvec2 v", "uvec2(2, 0xFFFFFFFE)",
751 "struct_int_int", "s v", "s(2, -2)"),
752 };
753
754 #undef CASE_EXTRA
755 #undef CASE_EXTRA_WITH_REVERSE
756 #undef CASE_WITH_REVERSE
757 #undef CASE_SAME_TYPE
758 #undef CASE
759
760 for (deUint32 i = 0; i < cases.size(); i++)
761 {
762 for (int syncIndex = 0; syncIndex < AliasTest::SynchronizationCount; syncIndex++)
763 {
764 const AliasTest::Synchronization sync = AliasTest::Synchronization(syncIndex);
765
766 for (int funcIndex = 0; funcIndex < AliasTest::FunctionCount; funcIndex++)
767 {
768 const AliasTest::Function func = AliasTest::Function(funcIndex);
769
770 for (int layoutIndex = 0; layoutIndex < AliasTest::LayoutCount; layoutIndex++)
771 {
772 const AliasTest::LayoutFlags layout = AliasTest::LayoutFlags(1 << layoutIndex);
773
774 AliasTest::CaseDef c = cases[i];
775
776 if (c.writeDesc == c.readDesc)
777 continue;
778
779 if ((c.layout & layout) == 0)
780 continue;
781
782 c.layout = layout;
783 c.func = func;
784 c.sync = sync;
785
786 group->addChild(new AliasTest(group->getTestContext(), c));
787 }
788 }
789 }
790 }
791 }
792
793 class ZeroTest : public vkt::TestCase
794 {
795 public:
796 struct CaseDef
797 {
798 glu::DataType zeroElementType;
799 glu::DataType fieldType[2];
800 deUint32 elements;
801
testNamevkt::compute::__anon5d3a54a00111::ZeroTest::CaseDef802 std::string testName() const
803 {
804 std::string name = glu::getDataTypeName(zeroElementType);
805 name += "_array_to";
806
807 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(fieldType); ++i)
808 {
809 if (fieldType[i] == glu::TYPE_INVALID)
810 break;
811 name += "_";
812 name += glu::getDataTypeName(fieldType[i]);
813 }
814 name += "_array_" + de::toString(elements);
815 return name;
816 }
817 };
818
ZeroTest(tcu::TestContext & testCtx,const CaseDef & caseDef)819 ZeroTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
820 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
821 m_caseDef(caseDef)
822 {
823 }
824
825 virtual void checkSupport(Context& context) const;
826 void initPrograms(SourceCollections& sourceCollections) const;
827
828 class Instance : public vkt::TestInstance
829 {
830 public:
Instance(Context & context)831 Instance(Context& context)
832 : TestInstance(context)
833 {
834 }
835
iterate(void)836 tcu::TestStatus iterate(void)
837 {
838 return runCompute(m_context, 1u);
839 }
840 };
841
createInstance(Context & context) const842 TestInstance* createInstance(Context& context) const
843 {
844 return new Instance(context);
845 }
846
847 private:
848 CaseDef m_caseDef;
849 };
850
checkSupport(Context & context) const851 void ZeroTest::checkSupport(Context& context) const
852 {
853 CheckSupportParams p;
854 deMemset(&p, 0, sizeof(p));
855
856 DE_ASSERT(!glu::isDataTypeFloat16OrVec(m_caseDef.zeroElementType));
857
858 p.useType(m_caseDef.zeroElementType);
859 p.useType(m_caseDef.fieldType[0]);
860 p.useType(m_caseDef.fieldType[1]);
861
862 checkSupportWithParams(context, p);
863 }
864
getDataTypeLiteral(glu::DataType dt,std::string baseValue)865 std::string getDataTypeLiteral(glu::DataType dt, std::string baseValue)
866 {
867 using namespace glu;
868
869 if (isDataTypeVector(dt))
870 {
871 std::string elemValue = getDataTypeLiteral(getDataTypeScalarType(dt), baseValue);
872
873 std::ostringstream result;
874 result << getDataTypeName(dt) << "(";
875 for (int i = 0; i < getDataTypeScalarSize(dt); ++i)
876 {
877 if (i > 0)
878 result << ", ";
879 result << elemValue;
880 }
881 result << ")";
882 return result.str();
883 }
884 else if (isDataTypeScalar(dt))
885 {
886 return getDataTypeName(dt) + std::string("(") + baseValue + std::string(")");
887 }
888 else
889 {
890 DE_ASSERT(0);
891 return std::string();
892 }
893 }
894
initPrograms(SourceCollections & sourceCollections) const895 void ZeroTest::initPrograms(SourceCollections& sourceCollections) const
896 {
897 using namespace glu;
898
899 std::ostringstream src;
900
901 src << "#version 450\n"
902 << "#extension GL_EXT_shared_memory_block : enable\n"
903 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
904 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
905
906 // Large enough to cover the largest B block even if just 8-bit elements.
907 // Small enough to fit in the minimum shared memory size limit even if with uvec4.
908 src << "shared A { " << getDataTypeName(m_caseDef.zeroElementType) << " arr[256]; } zero;\n";
909
910 src << "struct st {\n"
911 << " " << getDataTypeName(m_caseDef.fieldType[0]) << " x;\n";
912 if (m_caseDef.fieldType[1])
913 src << " " << getDataTypeName(m_caseDef.fieldType[1]) << " y;\n";
914 src << "};\n";
915
916
917 src << "shared B { st arr[4]; };\n"
918 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n"
919 << "void main() {\n"
920 << "for (int i = 0; i < zero.arr.length(); i++) {\n"
921 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "1") << ";\n"
922 << " }\n"
923 << " for (int i = 0; i < zero.arr.length(); i++) {\n"
924 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "0") << ";\n"
925 << " }\n"
926 << " result = (\n";
927
928 for (deUint32 i = 0; i < 4; i++)
929 {
930 src << " ";
931 if (i > 0)
932 src << "&& ";
933 src << "(arr[" << de::toString(i) << "].x == " << getDataTypeLiteral(m_caseDef.fieldType[0], "0") << ")\n";
934 if (m_caseDef.fieldType[1])
935 src << " && (arr[" << de::toString(i) << "].y == " << getDataTypeLiteral(m_caseDef.fieldType[1], "0") << ")\n";
936 }
937
938 src << " ) ? 0 : 0xFF;\n"
939 << "}\n";
940
941 sourceCollections.glslSources.add("comp")
942 << ComputeSource(src.str())
943 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
944 vk::ShaderBuildOptions::Flags(0u));
945 }
946
isTestedZeroElementType(glu::DataType dt)947 bool isTestedZeroElementType(glu::DataType dt)
948 {
949 using namespace glu;
950
951 // Select only a few interesting types.
952 switch (dt)
953 {
954 case TYPE_UINT:
955 case TYPE_UINT_VEC4:
956 case TYPE_UINT8:
957 case TYPE_UINT8_VEC4:
958 case TYPE_UINT16:
959 return true;
960 default:
961 return false;
962 }
963 }
964
isTestedFieldType(glu::DataType dt)965 bool isTestedFieldType(glu::DataType dt)
966 {
967 using namespace glu;
968
969 // Select only a few interesting types.
970 switch (dt)
971 {
972 case TYPE_UINT:
973 case TYPE_UINT_VEC3:
974 case TYPE_UINT8:
975 case TYPE_UINT16:
976 case TYPE_FLOAT:
977 case TYPE_FLOAT_VEC4:
978 case TYPE_FLOAT16:
979 case TYPE_DOUBLE:
980 case TYPE_DOUBLE_VEC4:
981 case TYPE_BOOL:
982 return true;
983
984 default:
985 return false;
986 }
987 }
988
AddZeroTests(tcu::TestCaseGroup * group)989 void AddZeroTests(tcu::TestCaseGroup* group)
990 {
991 using namespace glu;
992
993 ZeroTest::CaseDef c;
994
995 for (deUint32 i = 0; i < TYPE_LAST; ++i)
996 {
997 c.zeroElementType = DataType(i);
998
999 if (isTestedZeroElementType(c.zeroElementType))
1000 {
1001 deUint32 idx[2] = { 0, 0 };
1002
1003 while (idx[1] < TYPE_LAST && idx[0] < TYPE_LAST)
1004 {
1005 c.fieldType[0] = DataType(idx[0]);
1006 c.fieldType[1] = DataType(idx[1]);
1007
1008 if (isTestedFieldType(c.fieldType[0]) &&
1009 (c.fieldType[1] == TYPE_INVALID || isTestedFieldType(c.fieldType[1])))
1010 {
1011 for (deUint32 elements = 1; elements <= 4; ++elements)
1012 {
1013 c.elements = elements;
1014 group->addChild(new ZeroTest(group->getTestContext(), c));
1015 }
1016 }
1017
1018 idx[0]++;
1019 if (idx[0] >= TYPE_LAST)
1020 {
1021 idx[1]++;
1022 idx[0] = 0;
1023 }
1024 }
1025 }
1026 }
1027 }
1028
1029 class PaddingTest : public vkt::TestCase
1030 {
1031 public:
1032 struct CaseDef
1033 {
1034 std::vector<glu::DataType> types;
1035 std::vector<deUint32> offsets;
1036 std::vector<std::string> values;
1037 deUint32 expected[32];
1038
testNamevkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1039 std::string testName() const
1040 {
1041 DE_ASSERT(types.size() > 0);
1042 DE_ASSERT(types.size() == offsets.size());
1043 DE_ASSERT(types.size() == values.size());
1044
1045 std::string name;
1046 for (deUint32 i = 0; i < types.size(); ++i)
1047 {
1048 if (i > 0)
1049 name += "_";
1050 name += glu::getDataTypeName(types[i]);
1051 name += "_" + de::toString(offsets[i]);
1052 }
1053 return name;
1054 }
1055
addvkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1056 void add(glu::DataType dt, deUint32 offset, const std::string& v)
1057 {
1058 types.push_back(dt);
1059 offsets.push_back(offset);
1060 values.push_back(v);
1061 }
1062
needsScalarvkt::compute::__anon5d3a54a00111::PaddingTest::CaseDef1063 bool needsScalar() const
1064 {
1065 for (deUint32 i = 0; i < offsets.size(); ++i)
1066 {
1067 if (offsets[i] % 4 != 0)
1068 return true;
1069 }
1070 return false;
1071 }
1072 };
1073
PaddingTest(tcu::TestContext & testCtx,const CaseDef & caseDef)1074 PaddingTest(tcu::TestContext& testCtx, const CaseDef& caseDef)
1075 : TestCase(testCtx, caseDef.testName(), caseDef.testName()),
1076 m_caseDef(caseDef)
1077 {
1078 }
1079
1080 virtual void checkSupport(Context& context) const;
1081 void initPrograms(SourceCollections& sourceCollections) const;
1082
1083 class Instance : public vkt::TestInstance
1084 {
1085 public:
Instance(Context & context,const CaseDef & caseDef)1086 Instance(Context& context, const CaseDef& caseDef)
1087 : TestInstance(context),
1088 m_caseDef(caseDef)
1089 {
1090 }
1091
iterate(void)1092 tcu::TestStatus iterate(void)
1093 {
1094 return runCompute(m_context, 1u);
1095 }
1096
1097 private:
1098 CaseDef m_caseDef;
1099 };
1100
createInstance(Context & context) const1101 TestInstance* createInstance(Context& context) const
1102 {
1103 return new Instance(context, m_caseDef);
1104 }
1105
1106 private:
1107 CaseDef m_caseDef;
1108 };
1109
checkSupport(Context & context) const1110 void PaddingTest::checkSupport(Context& context) const
1111 {
1112 CheckSupportParams p;
1113 deMemset(&p, 0, sizeof(p));
1114
1115 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1116 p.useType(m_caseDef.types[i]);
1117
1118 p.needsScalar = m_caseDef.needsScalar();
1119
1120 checkSupportWithParams(context, p);
1121 }
1122
initPrograms(SourceCollections & sourceCollections) const1123 void PaddingTest::initPrograms(SourceCollections& sourceCollections) const
1124 {
1125 using namespace glu;
1126
1127 std::ostringstream src;
1128
1129 src << "#version 450\n"
1130 << "#extension GL_EXT_shared_memory_block : enable\n"
1131 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
1132 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
1133
1134 src << "shared A { uint32_t words[32]; };\n";
1135
1136 if (m_caseDef.needsScalar())
1137 {
1138 src << "#extension GL_EXT_scalar_block_layout : enable\n"
1139 << "layout (scalar) ";
1140 }
1141
1142 src << "shared B {\n";
1143
1144 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1145 {
1146 src << " layout(offset = " << m_caseDef.offsets[i] << ") "
1147 << glu::getDataTypeName(m_caseDef.types[i]) << " x" << i << ";\n";
1148 }
1149
1150 src << "};\n"
1151 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1152
1153 src << "void main() {\n"
1154 << "for (int i = 0; i < 32; i++) words[i] = 0;\n";
1155
1156 for (deUint32 i = 0; i < m_caseDef.values.size(); ++i)
1157 src << "x" << i << " = " << m_caseDef.values[i] << ";\n";
1158
1159 src << "result = 32;\n";
1160 for (deUint32 i = 0; i < 32; ++i)
1161 {
1162 src << "if (words[" << std::dec << i << "] == 0x"
1163 << std::uppercase << std::hex << m_caseDef.expected[i]
1164 << ") result--;\n";
1165 }
1166
1167 src << "}\n";
1168
1169 sourceCollections.glslSources.add("comp")
1170 << ComputeSource(src.str())
1171 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1172 vk::ShaderBuildOptions::Flags(0u));
1173 }
1174
AddPaddingTests(tcu::TestCaseGroup * group)1175 void AddPaddingTests(tcu::TestCaseGroup* group)
1176 {
1177 using namespace glu;
1178
1179 for (deUint32 i = 0; i < 31; ++i)
1180 {
1181 for (deUint32 j = i + 1; j < 32; j += 4)
1182 {
1183 PaddingTest::CaseDef c;
1184 deMemset(&c, 0, sizeof(c));
1185
1186 c.add(TYPE_UINT, 4 * i, "0x1234");
1187 c.expected[i] = 0x1234;
1188
1189 c.add(TYPE_UINT, 4 * j, "0x5678");
1190 c.expected[j] = 0x5678;
1191
1192 group->addChild(new PaddingTest(group->getTestContext(), c));
1193 }
1194 }
1195
1196 for (deUint32 i = 0; i < 127; ++i)
1197 {
1198 for (deUint32 j = i + 1; j < 32; j += 16)
1199 {
1200 PaddingTest::CaseDef c;
1201 deMemset(&c, 0, sizeof(c));
1202
1203 deUint8* expected = reinterpret_cast<deUint8*>(c.expected);
1204
1205 c.add(TYPE_UINT8, i, "uint8_t(0xAA)");
1206 expected[i] = 0xAA;
1207
1208 c.add(TYPE_UINT8, j, "uint8_t(0xBB)");
1209 expected[j] = 0xBB;
1210
1211 group->addChild(new PaddingTest(group->getTestContext(), c));
1212 }
1213 }
1214 }
1215
1216 class SizeTest : public vkt::TestCase
1217 {
1218 public:
SizeTest(tcu::TestContext & testCtx,deUint32 size)1219 SizeTest(tcu::TestContext& testCtx, deUint32 size)
1220 : TestCase(testCtx, de::toString(size), de::toString(size))
1221 , m_size(size)
1222 {
1223 DE_ASSERT(size % 8 == 0);
1224 }
1225
1226 virtual void checkSupport(Context& context) const;
1227 void initPrograms(SourceCollections& sourceCollections) const;
1228
1229 class Instance : public vkt::TestInstance
1230 {
1231 public:
Instance(Context & context)1232 Instance(Context& context)
1233 : TestInstance(context)
1234 {
1235 }
1236
iterate(void)1237 tcu::TestStatus iterate(void)
1238 {
1239 return runCompute(m_context, 1u);
1240 }
1241 };
1242
createInstance(Context & context) const1243 TestInstance* createInstance(Context& context) const
1244 {
1245 return new Instance(context);
1246 }
1247
1248 private:
1249 deUint32 m_size;
1250 };
1251
checkSupport(Context & context) const1252 void SizeTest::checkSupport(Context& context) const
1253 {
1254 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
1255 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
1256
1257 if (context.getDeviceProperties().limits.maxComputeSharedMemorySize < m_size)
1258 TCU_THROW(NotSupportedError, "Not enough shared memory supported.");
1259 }
1260
initPrograms(SourceCollections & sourceCollections) const1261 void SizeTest::initPrograms(SourceCollections& sourceCollections) const
1262 {
1263 using namespace glu;
1264
1265 std::ostringstream src;
1266
1267 src << "#version 450\n";
1268 src << "#extension GL_EXT_shared_memory_block : enable\n";
1269 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
1270 src << "layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;\n";
1271
1272 for (deUint32 i = 0; i < 8; ++i)
1273 src << "shared B" << i << " { uint32_t words[" << (m_size / 4) << "]; } b" << i << ";\n";
1274
1275 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1276
1277 src << "void main() {\n";
1278 src << " int index = int(gl_LocalInvocationIndex);\n";
1279 src << " int size = " << (m_size / 4) << ";\n";
1280
1281 src << " if (index == 0) for (int x = 0; x < size; x++) b0.words[x] = 0xFFFF;\n";
1282 src << " barrier();\n";
1283
1284 src << " for (int x = 0; x < size; x++) {\n";
1285 src << " if (x % 8 != index) continue;\n";
1286 for (deUint32 i = 0; i < 8; ++i)
1287 src << " if (index == " << i << ") b" << i << ".words[x] = (x << 3) | " << i << ";\n";
1288 src << " }\n";
1289
1290 src << " barrier();\n";
1291 src << " if (index != 0) return;\n";
1292
1293 src << " int r = size;\n";
1294 src << " for (int x = 0; x < size; x++) {\n";
1295 src << " int expected = (x << 3) | (x % 8);\n";
1296 src << " if (b0.words[x] == expected) r--;\n";
1297 src << " }\n";
1298 src << " result = r;\n";
1299 src << "}\n";
1300
1301 sourceCollections.glslSources.add("comp")
1302 << ComputeSource(src.str())
1303 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1304 vk::ShaderBuildOptions::Flags(0u));
1305 }
1306
AddSizeTests(tcu::TestCaseGroup * group)1307 void AddSizeTests(tcu::TestCaseGroup* group)
1308 {
1309 deUint32 sizes[] =
1310 {
1311 8u,
1312 64u,
1313 4096u,
1314
1315 // Dynamic generation of shaders based on properties reported
1316 // by devices is not allowed in the CTS, so let's create a few
1317 // variants based on common known maximum sizes.
1318 16384u,
1319 32768u,
1320 49152u,
1321 65536u,
1322 };
1323
1324 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(sizes); ++i)
1325 group->addChild(new SizeTest(group->getTestContext(), sizes[i]));
1326 }
1327
CreateAmberTestCase(tcu::TestContext & testCtx,const char * name,const char * description,const std::string & filename,const std::vector<std::string> & requirements=std::vector<std::string> ())1328 cts_amber::AmberTestCase* CreateAmberTestCase(tcu::TestContext& testCtx,
1329 const char* name,
1330 const char* description,
1331 const std::string& filename,
1332 const std::vector<std::string>& requirements = std::vector<std::string>())
1333 {
1334 vk::SpirVAsmBuildOptions asm_options(VK_MAKE_VERSION(1, 1, 0), vk::SPIRV_VERSION_1_4);
1335 asm_options.supports_VK_KHR_spirv_1_4 = true;
1336
1337 cts_amber::AmberTestCase *t = cts_amber::createAmberTestCase(testCtx, name, description, "compute/workgroup_memory_explicit_layout", filename, requirements);
1338 t->setSpirVAsmBuildOptions(asm_options);
1339 t->addRequirement("VK_KHR_workgroup_memory_explicit_layout");
1340 return t;
1341 }
1342
AddCopyMemoryTests(tcu::TestCaseGroup * group)1343 void AddCopyMemoryTests(tcu::TestCaseGroup* group)
1344 {
1345 tcu::TestContext& testCtx = group->getTestContext();
1346
1347 group->addChild(CreateAmberTestCase(testCtx, "basic", "", "copy_memory_basic.amber"));
1348 group->addChild(CreateAmberTestCase(testCtx, "two_invocations", "", "copy_memory_two_invocations.amber"));
1349 group->addChild(CreateAmberTestCase(testCtx, "variable_pointers", "", "copy_memory_variable_pointers.amber",
1350 { "VariablePointerFeatures.variablePointers" }));
1351 }
1352
1353 } // anonymous
1354
createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext & testCtx)1355 tcu::TestCaseGroup* createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext& testCtx)
1356 {
1357 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "workgroup_memory_explicit_layout", "VK_KHR_workgroup_memory_explicit_layout tests"));
1358
1359 tcu::TestCaseGroup* alias = new tcu::TestCaseGroup(testCtx, "alias", "Aliasing between different blocks and types");
1360 AddAliasTests(alias);
1361 tests->addChild(alias);
1362
1363 tcu::TestCaseGroup* zero = new tcu::TestCaseGroup(testCtx, "zero", "Manually zero initialize a block and read from another");
1364 AddZeroTests(zero);
1365 tests->addChild(zero);
1366
1367 tcu::TestCaseGroup* padding = new tcu::TestCaseGroup(testCtx, "padding", "Padding as part of the explicit layout");
1368 AddPaddingTests(padding);
1369 tests->addChild(padding);
1370
1371 tcu::TestCaseGroup* size = new tcu::TestCaseGroup(testCtx, "size", "Test blocks of various sizes");
1372 AddSizeTests(size);
1373 tests->addChild(size);
1374
1375 tcu::TestCaseGroup* copy_memory = new tcu::TestCaseGroup(testCtx, "copy_memory", "Test OpCopyMemory with Workgroup memory");
1376 AddCopyMemoryTests(copy_memory);
1377 tests->addChild(copy_memory);
1378
1379 return tests.release();
1380 }
1381
1382 } // compute
1383 } // vkt
1384