1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Google LLC.
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief VK_KHR_zero_initialize_workgroup_memory tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49
50 #include <algorithm>
51 #include <vector>
52
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<deUint32> specValues={},deUint32 increment=0)62 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
63 deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
64 vk::ComputePipelineConstructionType m_computePipelineConstructionType,
65 const std::vector<deUint32> specValues = {},
66 deUint32 increment = 0)
67 {
68 const DeviceInterface& vk = context.getDeviceInterface();
69 const VkDevice device = context.getDevice();
70 Allocator& allocator = context.getDefaultAllocator();
71 tcu::TestLog& log = context.getTestContext().getLog();
72
73 de::MovePtr<BufferWithMemory> buffer;
74 VkDescriptorBufferInfo bufferDescriptor;
75
76 VkDeviceSize size = bufferSize;
77 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
78 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79 MemoryRequirement::HostVisible));
80 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
81
82 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
83 deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84
85 DescriptorSetLayoutBuilder layoutBuilder;
86 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87
88 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
90 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
91 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
92 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
93
94 std::vector<VkSpecializationMapEntry> entries(specValues.size());
95 if (!specValues.empty())
96 {
97 for (deUint32 i = 0; i < specValues.size(); ++i)
98 {
99 entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
100 }
101 }
102 const VkSpecializationInfo specInfo =
103 {
104 (deUint32)specValues.size(),
105 entries.data(),
106 specValues.size() * sizeof(deUint32),
107 specValues.data(),
108 };
109 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110 flushAlloc(vk, device, buffer->getAllocation());
111
112 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType, context.getBinaryCollection().get("comp"));
113 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
114 pipeline.setSpecializationInfo(specInfo);
115 pipeline.buildPipeline();
116
117 const VkQueue queue = context.getUniversalQueue();
118 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
119 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120 context.getUniversalQueueFamilyIndex());
121 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122
123 DescriptorSetUpdateBuilder setUpdateBuilder;
124 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126 setUpdateBuilder.update(vk, device);
127
128 beginCommandBuffer(vk, *cmdBuffer, 0);
129
130 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131 pipeline.bind(*cmdBuffer);
132
133 vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134
135 endCommandBuffer(vk, *cmdBuffer);
136
137 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
138
139 invalidateAlloc(vk, device, buffer->getAllocation());
140
141 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
142 {
143 deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
144 if (ptr[i] != expected)
145 {
146 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
147 return tcu::TestStatus::fail("compute failed");
148 }
149 }
150
151 return tcu::TestStatus::pass("compute succeeded");
152 }
153
154 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
155 {
156 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)157 MaxWorkgroupMemoryInstance (Context& context, deUint32 numWorkgroups, const vk::ComputePipelineConstructionType computePipelineConstructionType)
158 : TestInstance (context)
159 , m_numWorkgroups (numWorkgroups)
160 , m_computePipelineConstructionType (computePipelineConstructionType)
161 {
162 }
163 tcu::TestStatus iterate (void);
164
165 private:
166 deUint32 m_numWorkgroups;
167 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
168 };
169
170 class MaxWorkgroupMemoryTest : public vkt::TestCase
171 {
172 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,deUint32 numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)173 MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
174 const std::string& name,
175 deUint32 numWorkgroups,
176 const vk::ComputePipelineConstructionType computePipelineConstructionType)
177 : TestCase (testCtx, name),
178 m_numWorkgroups (numWorkgroups)
179 , m_computePipelineConstructionType (computePipelineConstructionType)
180 {
181 }
182
183 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const184 TestInstance* createInstance(Context& context) const
185 {
186 return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
187 }
188 virtual void checkSupport(Context& context) const;
189
190 private:
191 deUint32 m_numWorkgroups;
192 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
193 };
194
checkSupport(Context & context) const195 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
196 {
197 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
198 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
199 }
200
initPrograms(SourceCollections & sourceCollections) const201 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
202 {
203 std::ostringstream src;
204 src << "#version 450\n";
205 src << "#extension GL_EXT_null_initializer : enable\n";
206 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
207 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
208 src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
209 src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
210 src << "shared uvec4 wg_mem[num_elems] = {};\n";
211 src << "void main() {\n";
212 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
213 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
214 src << " uint idx_x = gl_LocalInvocationID.x;\n";
215 src << " uint idx = idx_x + idx_y + idx_z;\n";
216 src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
217 src << " for (uint i = 0; i < num_elems; ++i) {\n";
218 src << " for (uint j = 0; j < 4; ++j) {\n";
219 src << " uint shared_idx = 4*i + j;\n";
220 src << " uint wg_val = wg_mem[i][j];\n";
221 src << " if (idx == shared_idx) {\n";
222 src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
223 src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
224 src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
225 src << " }\n";
226 src << " }\n";
227 src << " }\n";
228 src << "}\n";
229
230 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
231 }
232
iterate(void)233 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate (void)
234 {
235 VkPhysicalDeviceProperties properties;
236 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
237 const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
238
239 const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
240 deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
241 deUint32 wgy = 1;
242 deUint32 wgz = 1;
243 if (wgx < maxWG)
244 {
245 wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
246 }
247 if ((wgx * wgy) < maxWG)
248 {
249 wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
250 }
251 const deUint32 size = maxMemSize;
252 const deUint32 numElems = maxMemSize / 16;
253
254 return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
255 }
256
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)257 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
258 {
259 std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
260 for (deUint32 i = 0; i < workgroups.size(); ++i) {
261 deUint32 numWG = workgroups[i];
262 group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
263 de::toString(numWG), numWG, computePipelineConstructionType));
264 }
265 }
266
267 struct TypeCaseDef
268 {
269 std::string typeName;
270 deUint32 typeSize;
271 deUint32 numElements;
272 deUint32 numRows;
273 deUint32 numVariables;
274 };
275
276 class TypeTestInstance : public vkt::TestInstance
277 {
278 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)279 TypeTestInstance(Context& context, const TypeCaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
280 : TestInstance (context)
281 , m_caseDef (caseDef)
282 , m_computePipelineConstructionType (computePipelineConstructionType)
283 {
284 }
285 tcu::TestStatus iterate (void);
286
287 private:
288 TypeCaseDef m_caseDef;
289 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
290 };
291
292 class TypeTest : public vkt::TestCase
293 {
294 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)295 TypeTest(tcu::TestContext& testCtx,
296 const std::string& name,
297 const TypeCaseDef& caseDef,
298 const vk::ComputePipelineConstructionType computePipelineConstructionType)
299 : TestCase (testCtx, name)
300 , m_caseDef (caseDef)
301 , m_computePipelineConstructionType (computePipelineConstructionType)
302 {
303 }
304
305 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const306 TestInstance* createInstance(Context& context) const
307 {
308 return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
309 }
310 virtual void checkSupport(Context& context) const;
311
312 private:
313 TypeCaseDef m_caseDef;
314 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
315 };
316
checkSupport(Context & context) const317 void TypeTest::checkSupport(Context& context) const
318 {
319 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
320 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
321
322 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
323 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
324 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
325 f16_i8_features.pNext = DE_NULL;
326
327 VkPhysicalDeviceFeatures2 features2;
328 deMemset(&features2, 0, sizeof(features2));
329 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
330 features2.pNext = &f16_i8_features;
331 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
332
333 if (m_caseDef.typeName == "float16_t" ||
334 m_caseDef.typeName == "f16vec2" ||
335 m_caseDef.typeName == "f16vec3" ||
336 m_caseDef.typeName == "f16vec4" ||
337 m_caseDef.typeName == "f16mat2x2" ||
338 m_caseDef.typeName == "f16mat2x3" ||
339 m_caseDef.typeName == "f16mat2x4" ||
340 m_caseDef.typeName == "f16mat3x2" ||
341 m_caseDef.typeName == "f16mat3x3" ||
342 m_caseDef.typeName == "f16mat3x4" ||
343 m_caseDef.typeName == "f16mat4x2" ||
344 m_caseDef.typeName == "f16mat4x3" ||
345 m_caseDef.typeName == "f16mat4x4")
346 {
347 if (f16_i8_features.shaderFloat16 != VK_TRUE)
348 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
349 }
350
351 if (m_caseDef.typeName == "float64_t" ||
352 m_caseDef.typeName == "f64vec2" ||
353 m_caseDef.typeName == "f64vec3" ||
354 m_caseDef.typeName == "f64vec4"||
355 m_caseDef.typeName == "f64mat2x2" ||
356 m_caseDef.typeName == "f64mat2x3" ||
357 m_caseDef.typeName == "f64mat2x4" ||
358 m_caseDef.typeName == "f64mat3x2" ||
359 m_caseDef.typeName == "f64mat3x3" ||
360 m_caseDef.typeName == "f64mat3x4" ||
361 m_caseDef.typeName == "f64mat4x2" ||
362 m_caseDef.typeName == "f64mat4x3" ||
363 m_caseDef.typeName == "f64mat4x4")
364 {
365 if (features2.features.shaderFloat64 != VK_TRUE)
366 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
367 }
368
369 if (m_caseDef.typeName == "int8_t" ||
370 m_caseDef.typeName == "i8vec2" ||
371 m_caseDef.typeName == "i8vec3" ||
372 m_caseDef.typeName == "i8vec4" ||
373 m_caseDef.typeName == "uint8_t" ||
374 m_caseDef.typeName == "u8vec2" ||
375 m_caseDef.typeName == "u8vec3" ||
376 m_caseDef.typeName == "u8vec4")
377 {
378 if (f16_i8_features.shaderInt8 != VK_TRUE)
379 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
380 }
381
382 if (m_caseDef.typeName == "int16_t" ||
383 m_caseDef.typeName == "i16vec2" ||
384 m_caseDef.typeName == "i16vec3" ||
385 m_caseDef.typeName == "i16vec4" ||
386 m_caseDef.typeName == "uint16_t" ||
387 m_caseDef.typeName == "u16vec2" ||
388 m_caseDef.typeName == "u16vec3" ||
389 m_caseDef.typeName == "u16vec4")
390 {
391 if (features2.features.shaderInt16 != VK_TRUE)
392 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
393 }
394
395 if (m_caseDef.typeName == "int64_t" ||
396 m_caseDef.typeName == "i64vec2" ||
397 m_caseDef.typeName == "i64vec3" ||
398 m_caseDef.typeName == "i64vec4" ||
399 m_caseDef.typeName == "uint64_t" ||
400 m_caseDef.typeName == "u64vec2" ||
401 m_caseDef.typeName == "u64vec3" ||
402 m_caseDef.typeName == "u64vec4")
403 {
404 if (features2.features.shaderInt64 != VK_TRUE)
405 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
406 }
407 }
408
initPrograms(SourceCollections & sourceCollections) const409 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
410 {
411 std::ostringstream src;
412 src << "#version 450\n";
413 src << "#extension GL_EXT_null_initializer : enable\n";
414 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
415 src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
416 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
417 for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
418 src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
419 }
420 src << "void main() {\n";
421 if (m_caseDef.numRows > 1)
422 {
423 src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
424 src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
425 }
426 std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
427 for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
428 {
429 if (m_caseDef.numElements == 1)
430 {
431 // Scalars.
432 src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
433 }
434 else if (m_caseDef.numRows == 1)
435 {
436 // Vectors.
437 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
438 }
439 else
440 {
441 // Matrices.
442 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
443 }
444 }
445 src << "}\n";
446
447 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
448 }
449
iterate(void)450 tcu::TestStatus TypeTestInstance::iterate (void)
451 {
452 const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
453 return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
454 }
455
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)456 void AddTypeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
457 {
458 deRandom rnd;
459 deRandom_init(&rnd, 0);
460 std::vector<TypeCaseDef> cases =
461 {
462 {"bool", 1, 1, 1, 0},
463 {"bvec2", 1, 2, 1, 0},
464 {"bvec3", 1, 3, 1, 0},
465 {"bvec4", 1, 4, 1, 0},
466 {"uint32_t", 4, 1, 1, 0},
467 {"uvec2", 4, 2, 1, 0},
468 {"uvec3", 4, 3, 1, 0},
469 {"uvec4", 4, 4, 1, 0},
470 {"int32_t", 4, 1, 1, 0},
471 {"ivec2", 4, 2, 1, 0},
472 {"ivec3", 4, 3, 1, 0},
473 {"ivec4", 4, 4, 1, 0},
474 {"uint8_t", 1, 1, 1, 0},
475 {"u8vec2", 1, 2, 1, 0},
476 {"u8vec3", 1, 3, 1, 0},
477 {"u8vec4", 1, 4, 1, 0},
478 {"int8_t", 1, 1, 1, 0},
479 {"i8vec2", 1, 2, 1, 0},
480 {"i8vec3", 1, 3, 1, 0},
481 {"i8vec4", 1, 4, 1, 0},
482 {"uint16_t", 2, 1, 1, 0},
483 {"u16vec2", 2, 2, 1, 0},
484 {"u16vec3", 2, 3, 1, 0},
485 {"u16vec4", 2, 4, 1, 0},
486 {"int16_t", 2, 1, 1, 0},
487 {"i16vec2", 2, 2, 1, 0},
488 {"i16vec3", 2, 3, 1, 0},
489 {"i16vec4", 2, 4, 1, 0},
490 {"uint64_t", 8, 1, 1, 0},
491 {"u64vec2", 8, 2, 1, 0},
492 {"u64vec3", 8, 3, 1, 0},
493 {"u64vec4", 8, 4, 1, 0},
494 {"int64_t", 8, 1, 1, 0},
495 {"i64vec2", 8, 2, 1, 0},
496 {"i64vec3", 8, 3, 1, 0},
497 {"i64vec4", 8, 4, 1, 0},
498 {"float32_t", 4, 1, 1, 0},
499 {"f32vec2", 4, 2, 1, 0},
500 {"f32vec3", 4, 3, 1, 0},
501 {"f32vec4", 4, 4, 1, 0},
502 {"f32mat2x2", 4, 2, 2, 0},
503 {"f32mat2x3", 4, 3, 2, 0},
504 {"f32mat2x4", 4, 4, 2, 0},
505 {"f32mat3x2", 4, 2, 3, 0},
506 {"f32mat3x3", 4, 3, 3, 0},
507 {"f32mat3x4", 4, 4, 3, 0},
508 {"f32mat4x2", 4, 2, 4, 0},
509 {"f32mat4x3", 4, 3, 4, 0},
510 {"f32mat4x4", 4, 4, 4, 0},
511 {"float16_t", 2, 1, 1, 0},
512 {"f16vec2", 2, 2, 1, 0},
513 {"f16vec3", 2, 3, 1, 0},
514 {"f16vec4", 2, 4, 1, 0},
515 {"f16mat2x2", 2, 2, 2, 0},
516 {"f16mat2x3", 2, 3, 2, 0},
517 {"f16mat2x4", 2, 4, 2, 0},
518 {"f16mat3x2", 2, 2, 3, 0},
519 {"f16mat3x3", 2, 3, 3, 0},
520 {"f16mat3x4", 2, 4, 3, 0},
521 {"f16mat4x2", 2, 2, 4, 0},
522 {"f16mat4x3", 2, 3, 4, 0},
523 {"f16mat4x4", 2, 4, 4, 0},
524 {"float64_t", 8, 1, 1, 0},
525 {"f64vec2", 8, 2, 1, 0},
526 {"f64vec3", 8, 3, 1, 0},
527 {"f64vec4", 8, 4, 1, 0},
528 {"f64mat2x2", 8, 2, 2, 0},
529 {"f64mat2x3", 8, 3, 2, 0},
530 {"f64mat2x4", 8, 4, 2, 0},
531 {"f64mat3x2", 8, 2, 3, 0},
532 {"f64mat3x3", 8, 3, 3, 0},
533 {"f64mat3x4", 8, 4, 3, 0},
534 {"f64mat4x2", 8, 2, 4, 0},
535 {"f64mat4x3", 8, 3, 4, 0},
536 {"f64mat4x4", 8, 4, 4, 0},
537 };
538
539 for (deUint32 i = 0; i < cases.size(); ++i)
540 {
541 cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
542 group->addChild(
543 new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i], computePipelineConstructionType));
544 }
545 }
546
547 struct CompositeCaseDef
548 {
549 deUint32 index;
550 std::string typeDefinition;
551 std::string assignment;
552 deUint32 elements;
553 std::vector<deUint32> specValues;
554
CompositeCaseDefvkt::compute::__anonfd4cf72c0111::CompositeCaseDef555 CompositeCaseDef (uint32_t index_, const std::string& typeDefinition_, const std::string& assignment_, uint32_t elements_, const std::vector<uint32_t>& specValues_)
556 : index (index_)
557 , typeDefinition (typeDefinition_)
558 , assignment (assignment_)
559 , elements (elements_)
560 , specValues (specValues_)
561 {}
562 };
563
564 class CompositeTestInstance : public vkt::TestInstance
565 {
566 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)567 CompositeTestInstance (Context& context, const CompositeCaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
568 : TestInstance (context)
569 , m_caseDef (caseDef)
570 , m_computePipelineConstructionType (computePipelineConstructionType)
571 {
572 }
573 tcu::TestStatus iterate (void);
574 private:
575 CompositeCaseDef m_caseDef;
576 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
577 };
578
579 class CompositeTest : public vkt::TestCase
580 {
581 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)582 CompositeTest(tcu::TestContext& testCtx,
583 const std::string& name,
584 const CompositeCaseDef& caseDef,
585 const vk::ComputePipelineConstructionType computePipelineConstructionType)
586 : TestCase (testCtx, name)
587 , m_caseDef (caseDef)
588 , m_computePipelineConstructionType (computePipelineConstructionType)
589 {
590 }
591
592 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const593 TestInstance* createInstance(Context& context) const
594 {
595 return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
596 }
597 virtual void checkSupport(Context& context) const;
598 private:
599 CompositeCaseDef m_caseDef;
600 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
601 };
602
checkSupport(Context & context) const603 void CompositeTest::checkSupport(Context& context) const
604 {
605 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
606 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
607
608 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
609 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
610 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
611 f16_i8_features.pNext = DE_NULL;
612
613 VkPhysicalDeviceFeatures2 features2;
614 deMemset(&features2, 0, sizeof(features2));
615 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
616 features2.pNext = &f16_i8_features;
617 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
618
619 bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
620 bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
621 bool needsInt8 = (m_caseDef.index & 0x4) != 0;
622 bool needsInt16 = (m_caseDef.index & 0x8) != 0;
623 bool needsInt64 = (m_caseDef.index & 0x10) != 0;
624
625 if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
626 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
627 if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
628 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
629 if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
630 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
631 if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
632 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
633 if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
634 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
635 }
636
initPrograms(SourceCollections & sourceCollections) const637 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
638 {
639 std::ostringstream src;
640 src << "#version 450\n";
641 src << "#extension GL_EXT_null_initializer : enable\n";
642 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
643 src << "\n";
644 for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
645 src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
646 }
647 src << "\n";
648 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
649 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
650 src << "\n";
651 src << m_caseDef.typeDefinition;
652 src << "\n";
653 src << "void main() {\n";
654 src << m_caseDef.assignment;
655 src << "}\n";
656
657 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
658 }
659
iterate(void)660 tcu::TestStatus CompositeTestInstance::iterate (void)
661 {
662 const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
663 return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
664 }
665
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)666 void AddCompositeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
667 {
668 const std::vector<CompositeCaseDef> cases
669 {
670 {0,
671 "shared uint wg_mem[specId0] = {};\n",
672
673 "for (uint i = 0; i < specId0; ++i) {\n"
674 " a.a[i] = wg_mem[i];\n"
675 "}\n",
676 16,
677 {16},
678 },
679
680 {0,
681 "shared float wg_mem[specId0][specId1] = {};\n",
682
683 "for (uint i = 0; i < specId0; ++i) {\n"
684 " for (uint j = 0; j < specId1; ++j) {\n"
685 " uint idx = i * specId1 + j;\n"
686 " a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
687 " }\n"
688 "}\n",
689 32,
690 {4, 8},
691 },
692
693 {0,
694 "struct Sa {\n"
695 " uint a;\n"
696 " uvec2 b;\n"
697 " uvec3 c;\n"
698 " uvec4 d;\n"
699 " float e;\n"
700 " vec2 f;\n"
701 " vec3 g;\n"
702 " vec4 h;\n"
703 " bool i;\n"
704 " bvec2 j;\n"
705 " bvec3 k;\n"
706 " bvec4 l;\n"
707 "};\n"
708 "shared Sa wg_mem = {};\n",
709
710 "uint i = 0;\n"
711 "a.a[i++] = wg_mem.a;\n"
712 "a.a[i++] = wg_mem.b.x;\n"
713 "a.a[i++] = wg_mem.b.y;\n"
714 "a.a[i++] = wg_mem.c.x;\n"
715 "a.a[i++] = wg_mem.c.y;\n"
716 "a.a[i++] = wg_mem.c.z;\n"
717 "a.a[i++] = wg_mem.d.x;\n"
718 "a.a[i++] = wg_mem.d.y;\n"
719 "a.a[i++] = wg_mem.d.z;\n"
720 "a.a[i++] = wg_mem.d.w;\n"
721 "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
722 "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
723 "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
724 "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
725 "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
726 "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
727 "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
728 "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
729 "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
730 "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
731 "a.a[i++] = wg_mem.i ? 1 : 0;\n"
732 "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
733 "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
734 "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
735 "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
736 "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
737 "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
738 "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
739 "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
740 "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
741 30,
742 {},
743 },
744
745 {0,
746 "struct Sa {\n"
747 " uint a;\n"
748 "};\n"
749 "struct Sb {\n"
750 " uvec2 a;\n"
751 "};\n"
752 "struct Sc {\n"
753 " Sa a[specId0];\n"
754 " Sb b[specId1];\n"
755 "};\n"
756 "shared Sc wg_mem[specId2] = {};\n",
757
758 "uint idx = 0;\n"
759 "for (uint i = 0; i < specId2; ++i) {\n"
760 " for (uint j = 0; j < specId0; ++j) {\n"
761 " a.a[idx++] = wg_mem[i].a[j].a;\n"
762 " }\n"
763 " for (uint j = 0; j < specId1; ++j) {\n"
764 " a.a[idx++] = wg_mem[i].b[j].a.x;\n"
765 " a.a[idx++] = wg_mem[i].b[j].a.y;\n"
766 " }\n"
767 "}\n",
768 32,
769 {2,3,4},
770 },
771
772 {1,
773 "struct Sa {\n"
774 " f16vec2 a;\n"
775 " float16_t b[specId0];\n"
776 "};\n"
777 "shared Sa wg_mem = {};\n",
778
779 "uint idx = 0;\n"
780 "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
781 "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
782 "for (uint i = 0; i < specId0; ++i) {\n"
783 " a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
784 "}\n",
785 18,
786 {16},
787 },
788
789 {2,
790 "struct Sa {\n"
791 " f64vec2 a;\n"
792 " float64_t b[specId0];\n"
793 "};\n"
794 "shared Sa wg_mem = {};\n",
795
796 "uint idx = 0;\n"
797 "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
798 "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
799 "for (uint i = 0; i < specId0; ++i) {\n"
800 " a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
801 "}\n",
802 7,
803 {5},
804 },
805
806 {4,
807 "struct Sa {\n"
808 " i8vec2 a;\n"
809 " int8_t b[specId0];\n"
810 "};\n"
811 "shared Sa wg_mem = {};\n",
812
813 "uint idx = 0;\n"
814 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
815 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
816 "for (uint i = 0; i < specId0; ++i) {\n"
817 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
818 "}\n",
819 34,
820 {32},
821 },
822
823 {8,
824 "struct Sa {\n"
825 " i16vec2 a;\n"
826 " int16_t b[specId0];\n"
827 "};\n"
828 "shared Sa wg_mem = {};\n",
829
830 "uint idx = 0;\n"
831 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
832 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
833 "for (uint i = 0; i < specId0; ++i) {\n"
834 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
835 "}\n",
836 122,
837 {120},
838 },
839
840 {16,
841 "struct Sa {\n"
842 " i64vec2 a;\n"
843 " int64_t b[specId0];\n"
844 "};\n"
845 "shared Sa wg_mem = {};\n",
846
847 "uint idx = 0;\n"
848 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
849 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
850 "for (uint i = 0; i < specId0; ++i) {\n"
851 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
852 "}\n",
853 63,
854 {61},
855 },
856
857 {0x1f,
858 "struct Sa {\n"
859 " float16_t a;\n"
860 " float b;\n"
861 " int8_t c;\n"
862 " int16_t d;\n"
863 " int e;\n"
864 " int64_t f;\n"
865 " float64_t g;\n"
866 "};\n"
867 "shared Sa wg_mem = {};\n",
868
869 "uint idx = 0;\n"
870 "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
871 "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
872 "a.a[idx++] = uint(wg_mem.c);\n"
873 "a.a[idx++] = uint(wg_mem.d);\n"
874 "a.a[idx++] = uint(wg_mem.e);\n"
875 "a.a[idx++] = uint(wg_mem.f);\n"
876 "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
877 7,
878 {},
879 },
880
881 {0,
882 "struct Sa {\n"
883 " uint a;\n"
884 "};\n"
885 "struct Sb {\n"
886 " Sa a[specId0];\n"
887 " uint b;\n"
888 "};\n"
889 "struct Sc {\n"
890 " Sb b[specId1];\n"
891 " uint c;\n"
892 "};\n"
893 "struct Sd {\n"
894 " Sc c[specId2];\n"
895 " uint d;\n"
896 "};\n"
897 "struct Se {\n"
898 " Sd d[specId3];\n"
899 " uint e;\n"
900 "};\n"
901 "shared Se wg_mem[specId4] = {};\n",
902
903 "uint idx = 0;\n"
904 "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
905 " a.a[idx++] = wg_mem[i1].e;\n"
906 " for (uint i2 = 0; i2 < specId3; ++i2) {\n"
907 " a.a[idx++] = wg_mem[i1].d[i2].d;\n"
908 " for (uint i3 = 0; i3 < specId2; ++i3) {\n"
909 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
910 " for (uint i4 = 0; i4 < specId1; ++i4) {\n"
911 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
912 " for (uint i5 = 0; i5 < specId0; ++i5) {\n"
913 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
914 " }\n"
915 " }\n"
916 " }\n"
917 " }\n"
918 "}\n",
919 872,
920 {6,5,4,3,2},
921 },
922 };
923
924 for (deUint32 i = 0; i < cases.size(); ++i)
925 {
926 group->addChild(
927 new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
928 }
929 }
930
931 enum Dim {
932 DimX,
933 DimY,
934 DimZ,
935 };
936
937 class MaxWorkgroupsInstance : public vkt::TestInstance
938 {
939 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)940 MaxWorkgroupsInstance(Context &context, Dim dim, const vk::ComputePipelineConstructionType computePipelineConstructionType)
941 : TestInstance (context)
942 , m_dim (dim)
943 , m_computePipelineConstructionType (computePipelineConstructionType)
944 {
945 }
946 tcu::TestStatus iterate (void);
947 private:
948 Dim m_dim;
949 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
950 };
951
952 class MaxWorkgroupsTest : public vkt::TestCase
953 {
954 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)955 MaxWorkgroupsTest(tcu::TestContext& testCtx,
956 const std::string& name,
957 Dim dim,
958 const vk::ComputePipelineConstructionType computePipelineConstructionType)
959 : TestCase (testCtx, name)
960 , m_dim (dim)
961 , m_computePipelineConstructionType (computePipelineConstructionType)
962 {
963 }
964
965 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const966 TestInstance* createInstance(Context& context) const
967 {
968 return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
969 }
970 virtual void checkSupport(Context& context) const;
971 private:
972 Dim m_dim;
973 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
974 };
975
checkSupport(Context & context) const976 void MaxWorkgroupsTest::checkSupport(Context& context) const
977 {
978 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
979 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
980 }
981
initPrograms(SourceCollections & sourceCollections) const982 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
983 {
984 std::ostringstream src;
985 src << "#version 450\n";
986 src << "#extension GL_EXT_null_initializer : enable\n";
987 src << "\n";
988 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
989 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
990 src << "shared uint wg_mem[2] = {};\n";
991 std::string dim;
992 switch (m_dim) {
993 case DimX:
994 dim = "x";
995 break;
996 case DimY:
997 dim = "y";
998 break;
999 case DimZ:
1000 dim = "z";
1001 break;
1002 }
1003 src << "\n";
1004 src << "void main() {\n";
1005 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1006 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1007 src << " uint idx_x = gl_LocalInvocationID.x;\n";
1008 src << " uint idx = idx_x + idx_y + idx_z;\n";
1009 src << " if (gl_LocalInvocationID.x == 0) {\n";
1010 src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1011 src << " }\n";
1012 src << " barrier();\n";
1013 src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1014 src << "}\n";
1015
1016 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1017 }
1018
iterate(void)1019 tcu::TestStatus MaxWorkgroupsInstance::iterate (void)
1020 {
1021 VkPhysicalDeviceProperties properties;
1022 deMemset(&properties, 0, sizeof(properties));
1023 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1024
1025 const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1026 deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1027 deUint32 wgy = 1;
1028 deUint32 wgz = 1;
1029 if (wgx < maxWG)
1030 {
1031 wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1032 }
1033 if ((wgx * wgy) < maxWG)
1034 {
1035 wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1036 }
1037 deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1038
1039 deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1040 deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1041 deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1042
1043 return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz}, /*increment*/ 1);
1044 }
1045
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1046 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1047 {
1048 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
1049 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
1050 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
1051 }
1052
1053 class SpecializeWorkgroupInstance : public vkt::TestInstance
1054 {
1055 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1056 SpecializeWorkgroupInstance (Context &context, deUint32 x, deUint32 y, deUint32 z, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1057 : TestInstance (context)
1058 , m_x (x)
1059 , m_y (y)
1060 , m_z (z)
1061 , m_computePipelineConstructionType (computePipelineConstructionType)
1062 {
1063 }
1064 tcu::TestStatus iterate (void);
1065 private:
1066 deUint32 m_x;
1067 deUint32 m_y;
1068 deUint32 m_z;
1069 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1070 };
1071
1072 class SpecializeWorkgroupTest : public vkt::TestCase
1073 {
1074 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,deUint32 x,deUint32 y,deUint32 z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1075 SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1076 const std::string& name,
1077 deUint32 x, deUint32 y, deUint32 z,
1078 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1079 : TestCase (testCtx, name)
1080 , m_x (x)
1081 , m_y (y)
1082 , m_z (z)
1083 , m_computePipelineConstructionType (computePipelineConstructionType)
1084 {
1085 }
1086
1087 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1088 TestInstance* createInstance(Context& context) const
1089 {
1090 return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1091 }
1092 virtual void checkSupport(Context& context) const;
1093 private:
1094 deUint32 m_x;
1095 deUint32 m_y;
1096 deUint32 m_z;
1097 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1098 };
1099
checkSupport(Context & context) const1100 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1101 {
1102 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1103 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1104
1105 VkPhysicalDeviceProperties properties;
1106 deMemset(&properties, 0, sizeof(properties));
1107 context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1108 if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1109 TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1110 }
1111
initPrograms(SourceCollections & sourceCollections) const1112 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1113 {
1114 std::ostringstream src;
1115 src << "#version 450\n";
1116 src << "#extension GL_EXT_null_initializer : enable\n";
1117 src << "\n";
1118 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1119 src << "layout(constant_id = 1) const uint WGY = 1;\n";
1120 src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1121 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1122 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1123 src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1124 src << "\n";
1125 src << "void main() {\n";
1126 src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1127 src << "}\n";
1128
1129 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1130 }
1131
iterate(void)1132 tcu::TestStatus SpecializeWorkgroupInstance::iterate (void)
1133 {
1134 const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1135 return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1136 }
1137
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1138 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1139 {
1140 for (deUint32 z = 1; z <= 8; ++z)
1141 {
1142 for (deUint32 y = 1; y <= 8; ++y)
1143 {
1144 for (deUint32 x = 1; x <= 8; ++x)
1145 {
1146 group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1147 de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1148 x, y, z,
1149 computePipelineConstructionType));
1150 }
1151 }
1152 }
1153 }
1154
1155 class RepeatedPipelineInstance : public vkt::TestInstance
1156 {
1157 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1158 RepeatedPipelineInstance (Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1159 : TestInstance (context)
1160 , m_xSize (xSize)
1161 , m_repeat (repeat)
1162 , m_odd (odd)
1163 {
1164 }
1165 tcu::TestStatus iterate (void);
1166 private:
1167 deUint32 m_xSize;
1168 deUint32 m_repeat;
1169 deUint32 m_odd;
1170 };
1171
1172 class RepeatedPipelineTest : public vkt::TestCase
1173 {
1174 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,deUint32 xSize,deUint32 repeat,deUint32 odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1175 RepeatedPipelineTest(tcu::TestContext& testCtx,
1176 const std::string& name,
1177 deUint32 xSize, deUint32 repeat, deUint32 odd,
1178 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1179 : TestCase (testCtx, name)
1180 , m_xSize (xSize)
1181 , m_repeat (repeat)
1182 , m_odd (odd)
1183 , m_computePipelineConstructionType (computePipelineConstructionType)
1184 {
1185 }
1186
1187 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1188 TestInstance* createInstance(Context& context) const
1189 {
1190 return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1191 }
1192 virtual void checkSupport(Context& context) const;
1193 private:
1194 deUint32 m_xSize;
1195 deUint32 m_repeat;
1196 deUint32 m_odd;
1197 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1198 };
1199
checkSupport(Context & context) const1200 void RepeatedPipelineTest::checkSupport(Context& context) const
1201 {
1202 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1203 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1204 }
1205
initPrograms(SourceCollections & sourceCollections) const1206 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1207 {
1208 std::ostringstream src;
1209 src << "#version 450\n";
1210 src << "#extension GL_EXT_null_initializer : enable\n";
1211 src << "\n";
1212 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1213 src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1214 src << "\n";
1215 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1216 src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1217 src << "\n";
1218 src << "shared uint wg_mem[WGX][2] = {};\n";
1219 src << "void main() {\n";
1220 src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1221 src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1222 src << " }\n";
1223 src << " barrier();\n";
1224 src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1225 src << "}\n";
1226
1227 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1228 }
1229
iterate(void)1230 tcu::TestStatus RepeatedPipelineInstance::iterate (void)
1231 {
1232 Context& context = m_context;
1233 const deUint32 bufferSize = m_xSize * 2 * (deUint32)sizeof(deUint32);
1234 const deUint32 numBuffers = 2;
1235
1236 const DeviceInterface& vk = context.getDeviceInterface();
1237 const VkDevice device = context.getDevice();
1238 Allocator& allocator = context.getDefaultAllocator();
1239 tcu::TestLog& log = context.getTestContext().getLog();
1240
1241 de::MovePtr<BufferWithMemory> buffers[numBuffers];
1242 VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1243
1244 VkDeviceSize size = bufferSize;
1245 for (deUint32 i = 0; i < numBuffers; ++i)
1246 {
1247 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1248 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1249 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1250 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1251 }
1252
1253 deUint32* ptrs[numBuffers];
1254 for (deUint32 i = 0; i < numBuffers; ++i)
1255 {
1256 ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1257 }
1258 for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1259 {
1260 ptrs[1][i] = i;
1261 }
1262 deMemset(ptrs[0], 0xff, (size_t)size);
1263
1264 DescriptorSetLayoutBuilder layoutBuilder;
1265 for (deUint32 i = 0; i < numBuffers; ++i)
1266 {
1267 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1268 }
1269
1270 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1271 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1272 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1273 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1274 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1275
1276 const deUint32 specData[1] =
1277 {
1278 m_xSize,
1279 };
1280 const vk::VkSpecializationMapEntry entries[1] =
1281 {
1282 {0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1283 };
1284 const vk::VkSpecializationInfo specInfo =
1285 {
1286 1,
1287 entries,
1288 sizeof(specData),
1289 specData
1290 };
1291
1292 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1293 {
1294 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1295 DE_NULL,
1296 (VkPipelineLayoutCreateFlags)0,
1297 1,
1298 &descriptorSetLayout.get(),
1299 0u,
1300 DE_NULL,
1301 };
1302 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1303 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1304
1305 for (deUint32 i = 0; i < numBuffers; ++i)
1306 {
1307 flushAlloc(vk, device, buffers[i]->getAllocation());
1308 }
1309
1310 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1311 const VkPipelineShaderStageCreateInfo shaderInfo =
1312 {
1313 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1314 DE_NULL,
1315 0,
1316 VK_SHADER_STAGE_COMPUTE_BIT,
1317 *shader,
1318 "main",
1319 &specInfo,
1320 };
1321
1322 const VkComputePipelineCreateInfo pipelineInfo =
1323 {
1324 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1325 DE_NULL,
1326 0u,
1327 shaderInfo,
1328 *pipelineLayout,
1329 (VkPipeline)0,
1330 0u,
1331 };
1332 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1333
1334 const VkQueue queue = context.getUniversalQueue();
1335 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1336 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1337 context.getUniversalQueueFamilyIndex());
1338 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1339
1340 DescriptorSetUpdateBuilder setUpdateBuilder;
1341 for (deUint32 i = 0; i < numBuffers; ++i)
1342 {
1343 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1344 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1345 }
1346 setUpdateBuilder.update(vk, device);
1347
1348 beginCommandBuffer(vk, *cmdBuffer, 0);
1349
1350 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1351 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1352
1353 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1354
1355 endCommandBuffer(vk, *cmdBuffer);
1356
1357 for (deUint32 r = 0; r < m_repeat; ++r)
1358 {
1359 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1360
1361 invalidateAlloc(vk, device, buffers[0]->getAllocation());
1362
1363 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1364 {
1365 deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1366 if (ptrs[0][i] != expected)
1367 {
1368 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1369 return tcu::TestStatus::fail("compute failed");
1370 }
1371 }
1372
1373 deMemset(ptrs[0], 0xff, (size_t)size);
1374 flushAlloc(vk, device, buffers[0]->getAllocation());
1375 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1376 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1377 setUpdateBuilder.update(vk, device);
1378 }
1379
1380 return tcu::TestStatus::pass("compute succeeded");
1381 }
1382
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1383 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1384 {
1385 std::vector<deUint32> xSizes = {4, 16, 32, 64};
1386 std::vector<deUint32> odds = {0, 1};
1387 std::vector<deUint32> repeats = {2, 4, 8, 16};
1388 for (deUint32 i = 0; i < xSizes.size(); ++i)
1389 {
1390 deUint32 x = xSizes[i];
1391 for (deUint32 j = 0; j < odds.size(); ++j)
1392 {
1393 deUint32 odd = odds[j];
1394 for (deUint32 k = 0; k < repeats.size(); ++k)
1395 {
1396 deUint32 repeat = repeats[k];
1397 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1398 std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1399 x, odd, repeat, computePipelineConstructionType));
1400 }
1401 }
1402 }
1403 }
1404 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType)1405 void AddSharedMemoryTests (tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType)
1406 {
1407 tcu::TestContext& testCtx = group->getTestContext();
1408 std::string filePath = "compute/zero_initialize_workgroup_memory";
1409 std::vector<std::string> requirements;
1410
1411 std::string testNames[] =
1412 {
1413 "workgroup_size_128",
1414 "workgroup_size_8x8x2",
1415 "workgroup_size_8x2x8",
1416 "workgroup_size_2x8x8",
1417 "workgroup_size_8x4x4",
1418 "workgroup_size_4x8x4",
1419 "workgroup_size_4x4x8"
1420 };
1421
1422 requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1423
1424 for (const auto& testName : testNames)
1425 {
1426 group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(), testName + ".amber", requirements));
1427 }
1428 }
1429 #endif // CTS_USES_VULKANSC
1430
1431 } // anonymous
1432
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1433 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1434 {
1435 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1436
1437 tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1438 // Read initialization of max workgroup memory
1439 new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1440 AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1441 tests->addChild(maxWorkgroupMemoryGroup);
1442
1443 tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1444 AddTypeTests(typeGroup, computePipelineConstructionType);
1445 tests->addChild(typeGroup);
1446
1447 tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1448 AddCompositeTests(compositeGroup, computePipelineConstructionType);
1449 tests->addChild(compositeGroup);
1450
1451 tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1452 AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1453 tests->addChild(maxWorkgroupsGroup);
1454
1455 tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1456 AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1457 tests->addChild(specializeWorkgroupGroup);
1458
1459 tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1460 AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1461 tests->addChild(repeatPipelineGroup);
1462
1463 #ifndef CTS_USES_VULKANSC
1464 tcu::TestCaseGroup* subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1465 AddSharedMemoryTests(subgroupInvocationGroup, computePipelineConstructionType);
1466 tests->addChild(subgroupInvocationGroup);
1467 #endif // CTS_USES_VULKANSC
1468
1469 return tests.release();
1470 }
1471
1472 } // compute
1473 } // vkt
1474