1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Google LLC.
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief VK_KHR_zero_initialize_workgroup_memory tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49
50 #include <algorithm>
51 #include <vector>
52
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61
runCompute(Context & context,uint32_t bufferSize,uint32_t numWGX,uint32_t numWGY,uint32_t numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<uint32_t> specValues={},uint32_t increment=0)62 tcu::TestStatus runCompute(Context &context, uint32_t bufferSize, uint32_t numWGX, uint32_t numWGY, uint32_t numWGZ,
63 vk::ComputePipelineConstructionType m_computePipelineConstructionType,
64 const std::vector<uint32_t> specValues = {}, uint32_t increment = 0)
65 {
66 const DeviceInterface &vk = context.getDeviceInterface();
67 const VkDevice device = context.getDevice();
68 Allocator &allocator = context.getDefaultAllocator();
69 tcu::TestLog &log = context.getTestContext().getLog();
70
71 de::MovePtr<BufferWithMemory> buffer;
72 VkDescriptorBufferInfo bufferDescriptor;
73
74 VkDeviceSize size = bufferSize;
75 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
76 vk, device, allocator,
77 makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
78 VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79 MemoryRequirement::HostVisible));
80 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
81
82 uint32_t *ptr = (uint32_t *)buffer->getAllocation().getHostPtr();
83 deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84
85 DescriptorSetLayoutBuilder layoutBuilder;
86 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87
88 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89 Unique<VkDescriptorPool> descriptorPool(
90 DescriptorPoolBuilder()
91 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
92 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
93 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
94
95 std::vector<VkSpecializationMapEntry> entries(specValues.size());
96 if (!specValues.empty())
97 {
98 for (uint32_t i = 0; i < specValues.size(); ++i)
99 {
100 entries[i] = {i, (uint32_t)(sizeof(uint32_t) * i), sizeof(uint32_t)};
101 }
102 }
103 const VkSpecializationInfo specInfo = {
104 (uint32_t)specValues.size(),
105 entries.data(),
106 specValues.size() * sizeof(uint32_t),
107 specValues.data(),
108 };
109 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110 flushAlloc(vk, device, buffer->getAllocation());
111
112 ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
113 context.getBinaryCollection().get("comp"));
114 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
115 pipeline.setSpecializationInfo(specInfo);
116 pipeline.buildPipeline();
117
118 const VkQueue queue = context.getUniversalQueue();
119 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120 context.getUniversalQueueFamilyIndex());
121 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122
123 DescriptorSetUpdateBuilder setUpdateBuilder;
124 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126 setUpdateBuilder.update(vk, device);
127
128 beginCommandBuffer(vk, *cmdBuffer, 0);
129
130 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131 pipeline.bind(*cmdBuffer);
132
133 vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134
135 endCommandBuffer(vk, *cmdBuffer);
136
137 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
138
139 invalidateAlloc(vk, device, buffer->getAllocation());
140
141 for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
142 {
143 uint32_t expected = increment ? numWGX * numWGY * numWGZ : 0u;
144 if (ptr[i] != expected)
145 {
146 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i]
147 << tcu::TestLog::EndMessage;
148 return tcu::TestStatus::fail("compute failed");
149 }
150 }
151
152 return tcu::TestStatus::pass("compute succeeded");
153 }
154
155 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
156 {
157 public:
MaxWorkgroupMemoryInstance(Context & context,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)158 MaxWorkgroupMemoryInstance(Context &context, uint32_t numWorkgroups,
159 const vk::ComputePipelineConstructionType computePipelineConstructionType)
160 : TestInstance(context)
161 , m_numWorkgroups(numWorkgroups)
162 , m_computePipelineConstructionType(computePipelineConstructionType)
163 {
164 }
165 tcu::TestStatus iterate(void);
166
167 private:
168 uint32_t m_numWorkgroups;
169 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
170 };
171
172 class MaxWorkgroupMemoryTest : public vkt::TestCase
173 {
174 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)175 MaxWorkgroupMemoryTest(tcu::TestContext &testCtx, const std::string &name, uint32_t numWorkgroups,
176 const vk::ComputePipelineConstructionType computePipelineConstructionType)
177 : TestCase(testCtx, name)
178 , m_numWorkgroups(numWorkgroups)
179 , m_computePipelineConstructionType(computePipelineConstructionType)
180 {
181 }
182
183 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const184 TestInstance *createInstance(Context &context) const
185 {
186 return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
187 }
188 virtual void checkSupport(Context &context) const;
189
190 private:
191 uint32_t m_numWorkgroups;
192 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
193 };
194
checkSupport(Context & context) const195 void MaxWorkgroupMemoryTest::checkSupport(Context &context) const
196 {
197 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
198 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
199 m_computePipelineConstructionType);
200 }
201
initPrograms(SourceCollections & sourceCollections) const202 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections &sourceCollections) const
203 {
204 std::ostringstream src;
205 src << "#version 450\n";
206 src << "#extension GL_EXT_null_initializer : enable\n";
207 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
208 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
209 src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
210 src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
211 src << "shared uvec4 wg_mem[num_elems] = {};\n";
212 src << "void main() {\n";
213 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
214 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
215 src << " uint idx_x = gl_LocalInvocationID.x;\n";
216 src << " uint idx = idx_x + idx_y + idx_z;\n";
217 src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
218 src << " for (uint i = 0; i < num_elems; ++i) {\n";
219 src << " for (uint j = 0; j < 4; ++j) {\n";
220 src << " uint shared_idx = 4*i + j;\n";
221 src << " uint wg_val = wg_mem[i][j];\n";
222 src << " if (idx == shared_idx) {\n";
223 src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
224 src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
225 src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
226 src << " }\n";
227 src << " }\n";
228 src << " }\n";
229 src << "}\n";
230
231 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
232 }
233
iterate(void)234 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
235 {
236 VkPhysicalDeviceProperties properties;
237 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
238 const uint32_t maxMemSize = properties.limits.maxComputeSharedMemorySize;
239
240 const uint32_t maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
241 uint32_t wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
242 uint32_t wgy = 1;
243 uint32_t wgz = 1;
244 if (wgx < maxWG)
245 {
246 wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
247 }
248 if ((wgx * wgy) < maxWG)
249 {
250 wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
251 }
252 const uint32_t size = maxMemSize;
253 const uint32_t numElems = maxMemSize / 16;
254
255 return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType,
256 {wgx, wgy, wgz, numElems}, /*increment*/ 1);
257 }
258
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)259 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup *group,
260 vk::ComputePipelineConstructionType computePipelineConstructionType)
261 {
262 std::vector<uint32_t> workgroups = {1, 2, 4, 16, 64, 128};
263 for (uint32_t i = 0; i < workgroups.size(); ++i)
264 {
265 uint32_t numWG = workgroups[i];
266 group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(), de::toString(numWG), numWG,
267 computePipelineConstructionType));
268 }
269 }
270
271 struct TypeCaseDef
272 {
273 std::string typeName;
274 uint32_t typeSize;
275 uint32_t numElements;
276 uint32_t numRows;
277 uint32_t numVariables;
278 };
279
280 class TypeTestInstance : public vkt::TestInstance
281 {
282 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)283 TypeTestInstance(Context &context, const TypeCaseDef &caseDef,
284 const vk::ComputePipelineConstructionType computePipelineConstructionType)
285 : TestInstance(context)
286 , m_caseDef(caseDef)
287 , m_computePipelineConstructionType(computePipelineConstructionType)
288 {
289 }
290 tcu::TestStatus iterate(void);
291
292 private:
293 TypeCaseDef m_caseDef;
294 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
295 };
296
297 class TypeTest : public vkt::TestCase
298 {
299 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)300 TypeTest(tcu::TestContext &testCtx, const std::string &name, const TypeCaseDef &caseDef,
301 const vk::ComputePipelineConstructionType computePipelineConstructionType)
302 : TestCase(testCtx, name)
303 , m_caseDef(caseDef)
304 , m_computePipelineConstructionType(computePipelineConstructionType)
305 {
306 }
307
308 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const309 TestInstance *createInstance(Context &context) const
310 {
311 return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
312 }
313 virtual void checkSupport(Context &context) const;
314
315 private:
316 TypeCaseDef m_caseDef;
317 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
318 };
319
checkSupport(Context & context) const320 void TypeTest::checkSupport(Context &context) const
321 {
322 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
323 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
324 m_computePipelineConstructionType);
325
326 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
327 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
328 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
329 f16_i8_features.pNext = DE_NULL;
330
331 VkPhysicalDeviceFeatures2 features2;
332 deMemset(&features2, 0, sizeof(features2));
333 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
334 features2.pNext = &f16_i8_features;
335 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
336
337 if (m_caseDef.typeName == "float16_t" || m_caseDef.typeName == "f16vec2" || m_caseDef.typeName == "f16vec3" ||
338 m_caseDef.typeName == "f16vec4" || m_caseDef.typeName == "f16mat2x2" || m_caseDef.typeName == "f16mat2x3" ||
339 m_caseDef.typeName == "f16mat2x4" || m_caseDef.typeName == "f16mat3x2" || m_caseDef.typeName == "f16mat3x3" ||
340 m_caseDef.typeName == "f16mat3x4" || m_caseDef.typeName == "f16mat4x2" || m_caseDef.typeName == "f16mat4x3" ||
341 m_caseDef.typeName == "f16mat4x4")
342 {
343 if (f16_i8_features.shaderFloat16 != VK_TRUE)
344 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
345 }
346
347 if (m_caseDef.typeName == "float64_t" || m_caseDef.typeName == "f64vec2" || m_caseDef.typeName == "f64vec3" ||
348 m_caseDef.typeName == "f64vec4" || m_caseDef.typeName == "f64mat2x2" || m_caseDef.typeName == "f64mat2x3" ||
349 m_caseDef.typeName == "f64mat2x4" || m_caseDef.typeName == "f64mat3x2" || m_caseDef.typeName == "f64mat3x3" ||
350 m_caseDef.typeName == "f64mat3x4" || m_caseDef.typeName == "f64mat4x2" || m_caseDef.typeName == "f64mat4x3" ||
351 m_caseDef.typeName == "f64mat4x4")
352 {
353 if (features2.features.shaderFloat64 != VK_TRUE)
354 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
355 }
356
357 if (m_caseDef.typeName == "int8_t" || m_caseDef.typeName == "i8vec2" || m_caseDef.typeName == "i8vec3" ||
358 m_caseDef.typeName == "i8vec4" || m_caseDef.typeName == "uint8_t" || m_caseDef.typeName == "u8vec2" ||
359 m_caseDef.typeName == "u8vec3" || m_caseDef.typeName == "u8vec4")
360 {
361 if (f16_i8_features.shaderInt8 != VK_TRUE)
362 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
363 }
364
365 if (m_caseDef.typeName == "int16_t" || m_caseDef.typeName == "i16vec2" || m_caseDef.typeName == "i16vec3" ||
366 m_caseDef.typeName == "i16vec4" || m_caseDef.typeName == "uint16_t" || m_caseDef.typeName == "u16vec2" ||
367 m_caseDef.typeName == "u16vec3" || m_caseDef.typeName == "u16vec4")
368 {
369 if (features2.features.shaderInt16 != VK_TRUE)
370 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
371 }
372
373 if (m_caseDef.typeName == "int64_t" || m_caseDef.typeName == "i64vec2" || m_caseDef.typeName == "i64vec3" ||
374 m_caseDef.typeName == "i64vec4" || m_caseDef.typeName == "uint64_t" || m_caseDef.typeName == "u64vec2" ||
375 m_caseDef.typeName == "u64vec3" || m_caseDef.typeName == "u64vec4")
376 {
377 if (features2.features.shaderInt64 != VK_TRUE)
378 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
379 }
380 }
381
initPrograms(SourceCollections & sourceCollections) const382 void TypeTest::initPrograms(SourceCollections &sourceCollections) const
383 {
384 std::ostringstream src;
385 src << "#version 450\n";
386 src << "#extension GL_EXT_null_initializer : enable\n";
387 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
388 src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows
389 << ", local_size_y = 1, local_size_z = 1) in;\n";
390 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
391 for (uint32_t i = 0; i < m_caseDef.numVariables; ++i)
392 {
393 src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
394 }
395 src << "void main() {\n";
396 if (m_caseDef.numRows > 1)
397 {
398 src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
399 src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
400 }
401 std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
402 for (uint32_t v = 0; v < m_caseDef.numVariables; ++v)
403 {
404 if (m_caseDef.numElements == 1)
405 {
406 // Scalars.
407 src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
408 }
409 else if (m_caseDef.numRows == 1)
410 {
411 // Vectors.
412 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
413 << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
414 }
415 else
416 {
417 // Matrices.
418 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
419 << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
420 }
421 }
422 src << "}\n";
423
424 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
425 }
426
iterate(void)427 tcu::TestStatus TypeTestInstance::iterate(void)
428 {
429 const uint32_t varBytes = m_caseDef.numElements * m_caseDef.numRows * (uint32_t)sizeof(uint32_t);
430 return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
431 }
432
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)433 void AddTypeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
434 {
435 deRandom rnd;
436 deRandom_init(&rnd, 0);
437 std::vector<TypeCaseDef> cases = {
438 {"bool", 1, 1, 1, 0}, {"bvec2", 1, 2, 1, 0}, {"bvec3", 1, 3, 1, 0}, {"bvec4", 1, 4, 1, 0},
439 {"uint32_t", 4, 1, 1, 0}, {"uvec2", 4, 2, 1, 0}, {"uvec3", 4, 3, 1, 0}, {"uvec4", 4, 4, 1, 0},
440 {"int32_t", 4, 1, 1, 0}, {"ivec2", 4, 2, 1, 0}, {"ivec3", 4, 3, 1, 0}, {"ivec4", 4, 4, 1, 0},
441 {"uint8_t", 1, 1, 1, 0}, {"u8vec2", 1, 2, 1, 0}, {"u8vec3", 1, 3, 1, 0}, {"u8vec4", 1, 4, 1, 0},
442 {"int8_t", 1, 1, 1, 0}, {"i8vec2", 1, 2, 1, 0}, {"i8vec3", 1, 3, 1, 0}, {"i8vec4", 1, 4, 1, 0},
443 {"uint16_t", 2, 1, 1, 0}, {"u16vec2", 2, 2, 1, 0}, {"u16vec3", 2, 3, 1, 0}, {"u16vec4", 2, 4, 1, 0},
444 {"int16_t", 2, 1, 1, 0}, {"i16vec2", 2, 2, 1, 0}, {"i16vec3", 2, 3, 1, 0}, {"i16vec4", 2, 4, 1, 0},
445 {"uint64_t", 8, 1, 1, 0}, {"u64vec2", 8, 2, 1, 0}, {"u64vec3", 8, 3, 1, 0}, {"u64vec4", 8, 4, 1, 0},
446 {"int64_t", 8, 1, 1, 0}, {"i64vec2", 8, 2, 1, 0}, {"i64vec3", 8, 3, 1, 0}, {"i64vec4", 8, 4, 1, 0},
447 {"float32_t", 4, 1, 1, 0}, {"f32vec2", 4, 2, 1, 0}, {"f32vec3", 4, 3, 1, 0}, {"f32vec4", 4, 4, 1, 0},
448 {"f32mat2x2", 4, 2, 2, 0}, {"f32mat2x3", 4, 3, 2, 0}, {"f32mat2x4", 4, 4, 2, 0}, {"f32mat3x2", 4, 2, 3, 0},
449 {"f32mat3x3", 4, 3, 3, 0}, {"f32mat3x4", 4, 4, 3, 0}, {"f32mat4x2", 4, 2, 4, 0}, {"f32mat4x3", 4, 3, 4, 0},
450 {"f32mat4x4", 4, 4, 4, 0}, {"float16_t", 2, 1, 1, 0}, {"f16vec2", 2, 2, 1, 0}, {"f16vec3", 2, 3, 1, 0},
451 {"f16vec4", 2, 4, 1, 0}, {"f16mat2x2", 2, 2, 2, 0}, {"f16mat2x3", 2, 3, 2, 0}, {"f16mat2x4", 2, 4, 2, 0},
452 {"f16mat3x2", 2, 2, 3, 0}, {"f16mat3x3", 2, 3, 3, 0}, {"f16mat3x4", 2, 4, 3, 0}, {"f16mat4x2", 2, 2, 4, 0},
453 {"f16mat4x3", 2, 3, 4, 0}, {"f16mat4x4", 2, 4, 4, 0}, {"float64_t", 8, 1, 1, 0}, {"f64vec2", 8, 2, 1, 0},
454 {"f64vec3", 8, 3, 1, 0}, {"f64vec4", 8, 4, 1, 0}, {"f64mat2x2", 8, 2, 2, 0}, {"f64mat2x3", 8, 3, 2, 0},
455 {"f64mat2x4", 8, 4, 2, 0}, {"f64mat3x2", 8, 2, 3, 0}, {"f64mat3x3", 8, 3, 3, 0}, {"f64mat3x4", 8, 4, 3, 0},
456 {"f64mat4x2", 8, 2, 4, 0}, {"f64mat4x3", 8, 3, 4, 0}, {"f64mat4x4", 8, 4, 4, 0},
457 };
458
459 for (uint32_t i = 0; i < cases.size(); ++i)
460 {
461 cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
462 group->addChild(new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i],
463 computePipelineConstructionType));
464 }
465 }
466
467 struct CompositeCaseDef
468 {
469 uint32_t index;
470 std::string typeDefinition;
471 std::string assignment;
472 uint32_t elements;
473 std::vector<uint32_t> specValues;
474
CompositeCaseDefvkt::compute::__anona2831aff0111::CompositeCaseDef475 CompositeCaseDef(uint32_t index_, const std::string &typeDefinition_, const std::string &assignment_,
476 uint32_t elements_, const std::vector<uint32_t> &specValues_)
477 : index(index_)
478 , typeDefinition(typeDefinition_)
479 , assignment(assignment_)
480 , elements(elements_)
481 , specValues(specValues_)
482 {
483 }
484 };
485
486 class CompositeTestInstance : public vkt::TestInstance
487 {
488 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)489 CompositeTestInstance(Context &context, const CompositeCaseDef &caseDef,
490 const vk::ComputePipelineConstructionType computePipelineConstructionType)
491 : TestInstance(context)
492 , m_caseDef(caseDef)
493 , m_computePipelineConstructionType(computePipelineConstructionType)
494 {
495 }
496 tcu::TestStatus iterate(void);
497
498 private:
499 CompositeCaseDef m_caseDef;
500 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
501 };
502
503 class CompositeTest : public vkt::TestCase
504 {
505 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)506 CompositeTest(tcu::TestContext &testCtx, const std::string &name, const CompositeCaseDef &caseDef,
507 const vk::ComputePipelineConstructionType computePipelineConstructionType)
508 : TestCase(testCtx, name)
509 , m_caseDef(caseDef)
510 , m_computePipelineConstructionType(computePipelineConstructionType)
511 {
512 }
513
514 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const515 TestInstance *createInstance(Context &context) const
516 {
517 return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
518 }
519 virtual void checkSupport(Context &context) const;
520
521 private:
522 CompositeCaseDef m_caseDef;
523 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
524 };
525
checkSupport(Context & context) const526 void CompositeTest::checkSupport(Context &context) const
527 {
528 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
529 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
530 m_computePipelineConstructionType);
531
532 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
533 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
534 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
535 f16_i8_features.pNext = DE_NULL;
536
537 VkPhysicalDeviceFeatures2 features2;
538 deMemset(&features2, 0, sizeof(features2));
539 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
540 features2.pNext = &f16_i8_features;
541 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
542
543 bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
544 bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
545 bool needsInt8 = (m_caseDef.index & 0x4) != 0;
546 bool needsInt16 = (m_caseDef.index & 0x8) != 0;
547 bool needsInt64 = (m_caseDef.index & 0x10) != 0;
548
549 if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
550 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
551 if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
552 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
553 if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
554 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
555 if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
556 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
557 if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
558 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
559 }
560
initPrograms(SourceCollections & sourceCollections) const561 void CompositeTest::initPrograms(SourceCollections &sourceCollections) const
562 {
563 std::ostringstream src;
564 src << "#version 450\n";
565 src << "#extension GL_EXT_null_initializer : enable\n";
566 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
567 src << "\n";
568 for (uint32_t i = 0; i < m_caseDef.specValues.size(); ++i)
569 {
570 src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
571 }
572 src << "\n";
573 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
574 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
575 src << "\n";
576 src << m_caseDef.typeDefinition;
577 src << "\n";
578 src << "void main() {\n";
579 src << m_caseDef.assignment;
580 src << "}\n";
581
582 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
583 }
584
iterate(void)585 tcu::TestStatus CompositeTestInstance::iterate(void)
586 {
587 const uint32_t bufferSize = (uint32_t)sizeof(uint32_t) * m_caseDef.elements;
588 return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
589 }
590
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)591 void AddCompositeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
592 {
593 const std::vector<CompositeCaseDef> cases{
594 {
595 0,
596 "shared uint wg_mem[specId0] = {};\n",
597
598 "for (uint i = 0; i < specId0; ++i) {\n"
599 " a.a[i] = wg_mem[i];\n"
600 "}\n",
601 16,
602 {16},
603 },
604
605 {
606 0,
607 "shared float wg_mem[specId0][specId1] = {};\n",
608
609 "for (uint i = 0; i < specId0; ++i) {\n"
610 " for (uint j = 0; j < specId1; ++j) {\n"
611 " uint idx = i * specId1 + j;\n"
612 " a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
613 " }\n"
614 "}\n",
615 32,
616 {4, 8},
617 },
618
619 {
620 0,
621 "struct Sa {\n"
622 " uint a;\n"
623 " uvec2 b;\n"
624 " uvec3 c;\n"
625 " uvec4 d;\n"
626 " float e;\n"
627 " vec2 f;\n"
628 " vec3 g;\n"
629 " vec4 h;\n"
630 " bool i;\n"
631 " bvec2 j;\n"
632 " bvec3 k;\n"
633 " bvec4 l;\n"
634 "};\n"
635 "shared Sa wg_mem = {};\n",
636
637 "uint i = 0;\n"
638 "a.a[i++] = wg_mem.a;\n"
639 "a.a[i++] = wg_mem.b.x;\n"
640 "a.a[i++] = wg_mem.b.y;\n"
641 "a.a[i++] = wg_mem.c.x;\n"
642 "a.a[i++] = wg_mem.c.y;\n"
643 "a.a[i++] = wg_mem.c.z;\n"
644 "a.a[i++] = wg_mem.d.x;\n"
645 "a.a[i++] = wg_mem.d.y;\n"
646 "a.a[i++] = wg_mem.d.z;\n"
647 "a.a[i++] = wg_mem.d.w;\n"
648 "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
649 "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
650 "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
651 "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
652 "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
653 "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
654 "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
655 "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
656 "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
657 "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
658 "a.a[i++] = wg_mem.i ? 1 : 0;\n"
659 "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
660 "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
661 "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
662 "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
663 "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
664 "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
665 "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
666 "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
667 "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
668 30,
669 {},
670 },
671
672 {
673 0,
674 "struct Sa {\n"
675 " uint a;\n"
676 "};\n"
677 "struct Sb {\n"
678 " uvec2 a;\n"
679 "};\n"
680 "struct Sc {\n"
681 " Sa a[specId0];\n"
682 " Sb b[specId1];\n"
683 "};\n"
684 "shared Sc wg_mem[specId2] = {};\n",
685
686 "uint idx = 0;\n"
687 "for (uint i = 0; i < specId2; ++i) {\n"
688 " for (uint j = 0; j < specId0; ++j) {\n"
689 " a.a[idx++] = wg_mem[i].a[j].a;\n"
690 " }\n"
691 " for (uint j = 0; j < specId1; ++j) {\n"
692 " a.a[idx++] = wg_mem[i].b[j].a.x;\n"
693 " a.a[idx++] = wg_mem[i].b[j].a.y;\n"
694 " }\n"
695 "}\n",
696 32,
697 {2, 3, 4},
698 },
699
700 {
701 1,
702 "struct Sa {\n"
703 " f16vec2 a;\n"
704 " float16_t b[specId0];\n"
705 "};\n"
706 "shared Sa wg_mem = {};\n",
707
708 "uint idx = 0;\n"
709 "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
710 "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
711 "for (uint i = 0; i < specId0; ++i) {\n"
712 " a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
713 "}\n",
714 18,
715 {16},
716 },
717
718 {
719 2,
720 "struct Sa {\n"
721 " f64vec2 a;\n"
722 " float64_t b[specId0];\n"
723 "};\n"
724 "shared Sa wg_mem = {};\n",
725
726 "uint idx = 0;\n"
727 "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
728 "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
729 "for (uint i = 0; i < specId0; ++i) {\n"
730 " a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
731 "}\n",
732 7,
733 {5},
734 },
735
736 {
737 4,
738 "struct Sa {\n"
739 " i8vec2 a;\n"
740 " int8_t b[specId0];\n"
741 "};\n"
742 "shared Sa wg_mem = {};\n",
743
744 "uint idx = 0;\n"
745 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
746 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
747 "for (uint i = 0; i < specId0; ++i) {\n"
748 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
749 "}\n",
750 34,
751 {32},
752 },
753
754 {
755 8,
756 "struct Sa {\n"
757 " i16vec2 a;\n"
758 " int16_t b[specId0];\n"
759 "};\n"
760 "shared Sa wg_mem = {};\n",
761
762 "uint idx = 0;\n"
763 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
764 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
765 "for (uint i = 0; i < specId0; ++i) {\n"
766 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
767 "}\n",
768 122,
769 {120},
770 },
771
772 {
773 16,
774 "struct Sa {\n"
775 " i64vec2 a;\n"
776 " int64_t b[specId0];\n"
777 "};\n"
778 "shared Sa wg_mem = {};\n",
779
780 "uint idx = 0;\n"
781 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
782 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
783 "for (uint i = 0; i < specId0; ++i) {\n"
784 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
785 "}\n",
786 63,
787 {61},
788 },
789
790 {
791 0x1f,
792 "struct Sa {\n"
793 " float16_t a;\n"
794 " float b;\n"
795 " int8_t c;\n"
796 " int16_t d;\n"
797 " int e;\n"
798 " int64_t f;\n"
799 " float64_t g;\n"
800 "};\n"
801 "shared Sa wg_mem = {};\n",
802
803 "uint idx = 0;\n"
804 "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
805 "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
806 "a.a[idx++] = uint(wg_mem.c);\n"
807 "a.a[idx++] = uint(wg_mem.d);\n"
808 "a.a[idx++] = uint(wg_mem.e);\n"
809 "a.a[idx++] = uint(wg_mem.f);\n"
810 "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
811 7,
812 {},
813 },
814
815 {
816 0,
817 "struct Sa {\n"
818 " uint a;\n"
819 "};\n"
820 "struct Sb {\n"
821 " Sa a[specId0];\n"
822 " uint b;\n"
823 "};\n"
824 "struct Sc {\n"
825 " Sb b[specId1];\n"
826 " uint c;\n"
827 "};\n"
828 "struct Sd {\n"
829 " Sc c[specId2];\n"
830 " uint d;\n"
831 "};\n"
832 "struct Se {\n"
833 " Sd d[specId3];\n"
834 " uint e;\n"
835 "};\n"
836 "shared Se wg_mem[specId4] = {};\n",
837
838 "uint idx = 0;\n"
839 "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
840 " a.a[idx++] = wg_mem[i1].e;\n"
841 " for (uint i2 = 0; i2 < specId3; ++i2) {\n"
842 " a.a[idx++] = wg_mem[i1].d[i2].d;\n"
843 " for (uint i3 = 0; i3 < specId2; ++i3) {\n"
844 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
845 " for (uint i4 = 0; i4 < specId1; ++i4) {\n"
846 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
847 " for (uint i5 = 0; i5 < specId0; ++i5) {\n"
848 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
849 " }\n"
850 " }\n"
851 " }\n"
852 " }\n"
853 "}\n",
854 872,
855 {6, 5, 4, 3, 2},
856 },
857 };
858
859 for (uint32_t i = 0; i < cases.size(); ++i)
860 {
861 group->addChild(
862 new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
863 }
864 }
865
866 enum Dim
867 {
868 DimX,
869 DimY,
870 DimZ,
871 };
872
873 class MaxWorkgroupsInstance : public vkt::TestInstance
874 {
875 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)876 MaxWorkgroupsInstance(Context &context, Dim dim,
877 const vk::ComputePipelineConstructionType computePipelineConstructionType)
878 : TestInstance(context)
879 , m_dim(dim)
880 , m_computePipelineConstructionType(computePipelineConstructionType)
881 {
882 }
883 tcu::TestStatus iterate(void);
884
885 private:
886 Dim m_dim;
887 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
888 };
889
890 class MaxWorkgroupsTest : public vkt::TestCase
891 {
892 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)893 MaxWorkgroupsTest(tcu::TestContext &testCtx, const std::string &name, Dim dim,
894 const vk::ComputePipelineConstructionType computePipelineConstructionType)
895 : TestCase(testCtx, name)
896 , m_dim(dim)
897 , m_computePipelineConstructionType(computePipelineConstructionType)
898 {
899 }
900
901 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const902 TestInstance *createInstance(Context &context) const
903 {
904 return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
905 }
906 virtual void checkSupport(Context &context) const;
907
908 private:
909 Dim m_dim;
910 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
911 };
912
checkSupport(Context & context) const913 void MaxWorkgroupsTest::checkSupport(Context &context) const
914 {
915 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
916 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
917 m_computePipelineConstructionType);
918 }
919
initPrograms(SourceCollections & sourceCollections) const920 void MaxWorkgroupsTest::initPrograms(SourceCollections &sourceCollections) const
921 {
922 std::ostringstream src;
923 src << "#version 450\n";
924 src << "#extension GL_EXT_null_initializer : enable\n";
925 src << "\n";
926 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
927 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
928 src << "shared uint wg_mem[2] = {};\n";
929 std::string dim;
930 switch (m_dim)
931 {
932 case DimX:
933 dim = "x";
934 break;
935 case DimY:
936 dim = "y";
937 break;
938 case DimZ:
939 dim = "z";
940 break;
941 }
942 src << "\n";
943 src << "void main() {\n";
944 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
945 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
946 src << " uint idx_x = gl_LocalInvocationID.x;\n";
947 src << " uint idx = idx_x + idx_y + idx_z;\n";
948 src << " if (gl_LocalInvocationID.x == 0) {\n";
949 src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
950 src << " }\n";
951 src << " barrier();\n";
952 src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
953 src << "}\n";
954
955 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
956 }
957
iterate(void)958 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
959 {
960 VkPhysicalDeviceProperties properties;
961 deMemset(&properties, 0, sizeof(properties));
962 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
963
964 const uint32_t maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
965 uint32_t wgx = properties.limits.maxComputeWorkGroupSize[0];
966 uint32_t wgy = 1;
967 uint32_t wgz = 1;
968 if (wgx < maxWG)
969 {
970 wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
971 }
972 if ((wgx * wgy) < maxWG)
973 {
974 wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
975 }
976 uint32_t size = (uint32_t)sizeof(uint32_t) * wgx * wgy * wgz;
977
978 uint32_t num_wgx = m_dim == DimX ? 65535 : 1;
979 uint32_t num_wgy = m_dim == DimY ? 65535 : 1;
980 uint32_t num_wgz = m_dim == DimZ ? 65535 : 1;
981
982 return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz},
983 /*increment*/ 1);
984 }
985
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)986 void AddMaxWorkgroupsTests(tcu::TestCaseGroup *group,
987 vk::ComputePipelineConstructionType computePipelineConstructionType)
988 {
989 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
990 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
991 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
992 }
993
994 class SpecializeWorkgroupInstance : public vkt::TestInstance
995 {
996 public:
SpecializeWorkgroupInstance(Context & context,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)997 SpecializeWorkgroupInstance(Context &context, uint32_t x, uint32_t y, uint32_t z,
998 const vk::ComputePipelineConstructionType computePipelineConstructionType)
999 : TestInstance(context)
1000 , m_x(x)
1001 , m_y(y)
1002 , m_z(z)
1003 , m_computePipelineConstructionType(computePipelineConstructionType)
1004 {
1005 }
1006 tcu::TestStatus iterate(void);
1007
1008 private:
1009 uint32_t m_x;
1010 uint32_t m_y;
1011 uint32_t m_z;
1012 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1013 };
1014
1015 class SpecializeWorkgroupTest : public vkt::TestCase
1016 {
1017 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1018 SpecializeWorkgroupTest(tcu::TestContext &testCtx, const std::string &name, uint32_t x, uint32_t y, uint32_t z,
1019 const vk::ComputePipelineConstructionType computePipelineConstructionType)
1020 : TestCase(testCtx, name)
1021 , m_x(x)
1022 , m_y(y)
1023 , m_z(z)
1024 , m_computePipelineConstructionType(computePipelineConstructionType)
1025 {
1026 }
1027
1028 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1029 TestInstance *createInstance(Context &context) const
1030 {
1031 return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1032 }
1033 virtual void checkSupport(Context &context) const;
1034
1035 private:
1036 uint32_t m_x;
1037 uint32_t m_y;
1038 uint32_t m_z;
1039 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1040 };
1041
checkSupport(Context & context) const1042 void SpecializeWorkgroupTest::checkSupport(Context &context) const
1043 {
1044 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1045 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1046 m_computePipelineConstructionType);
1047
1048 VkPhysicalDeviceProperties properties;
1049 deMemset(&properties, 0, sizeof(properties));
1050 context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1051 if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1052 TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1053 }
1054
initPrograms(SourceCollections & sourceCollections) const1055 void SpecializeWorkgroupTest::initPrograms(SourceCollections &sourceCollections) const
1056 {
1057 std::ostringstream src;
1058 src << "#version 450\n";
1059 src << "#extension GL_EXT_null_initializer : enable\n";
1060 src << "\n";
1061 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1062 src << "layout(constant_id = 1) const uint WGY = 1;\n";
1063 src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1064 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1065 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1066 src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1067 src << "\n";
1068 src << "void main() {\n";
1069 src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * "
1070 "gl_WorkGroupSize.x + gl_LocalInvocationID.x] = "
1071 "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1072 src << "}\n";
1073
1074 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1075 }
1076
iterate(void)1077 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1078 {
1079 const uint32_t size = (uint32_t)sizeof(uint32_t) * m_x * m_y * m_z;
1080 return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1081 }
1082
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1083 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup *group,
1084 vk::ComputePipelineConstructionType computePipelineConstructionType)
1085 {
1086 for (uint32_t z = 1; z <= 8; ++z)
1087 {
1088 for (uint32_t y = 1; y <= 8; ++y)
1089 {
1090 for (uint32_t x = 1; x <= 8; ++x)
1091 {
1092 group->addChild(new SpecializeWorkgroupTest(
1093 group->getTestContext(), de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z), x, y, z,
1094 computePipelineConstructionType));
1095 }
1096 }
1097 }
1098 }
1099
1100 class RepeatedPipelineInstance : public vkt::TestInstance
1101 {
1102 public:
RepeatedPipelineInstance(Context & context,uint32_t xSize,uint32_t repeat,uint32_t odd)1103 RepeatedPipelineInstance(Context &context, uint32_t xSize, uint32_t repeat, uint32_t odd)
1104 : TestInstance(context)
1105 , m_xSize(xSize)
1106 , m_repeat(repeat)
1107 , m_odd(odd)
1108 {
1109 }
1110 tcu::TestStatus iterate(void);
1111
1112 private:
1113 uint32_t m_xSize;
1114 uint32_t m_repeat;
1115 uint32_t m_odd;
1116 };
1117
1118 class RepeatedPipelineTest : public vkt::TestCase
1119 {
1120 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,uint32_t xSize,uint32_t repeat,uint32_t odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1121 RepeatedPipelineTest(tcu::TestContext &testCtx, const std::string &name, uint32_t xSize, uint32_t repeat,
1122 uint32_t odd, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1123 : TestCase(testCtx, name)
1124 , m_xSize(xSize)
1125 , m_repeat(repeat)
1126 , m_odd(odd)
1127 , m_computePipelineConstructionType(computePipelineConstructionType)
1128 {
1129 }
1130
1131 void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1132 TestInstance *createInstance(Context &context) const
1133 {
1134 return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1135 }
1136 virtual void checkSupport(Context &context) const;
1137
1138 private:
1139 uint32_t m_xSize;
1140 uint32_t m_repeat;
1141 uint32_t m_odd;
1142 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1143 };
1144
checkSupport(Context & context) const1145 void RepeatedPipelineTest::checkSupport(Context &context) const
1146 {
1147 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1148 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1149 m_computePipelineConstructionType);
1150 }
1151
initPrograms(SourceCollections & sourceCollections) const1152 void RepeatedPipelineTest::initPrograms(SourceCollections &sourceCollections) const
1153 {
1154 std::ostringstream src;
1155 src << "#version 450\n";
1156 src << "#extension GL_EXT_null_initializer : enable\n";
1157 src << "\n";
1158 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1159 src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1160 src << "\n";
1161 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1162 src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1163 src << "\n";
1164 src << "shared uint wg_mem[WGX][2] = {};\n";
1165 src << "void main() {\n";
1166 src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1167 src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + "
1168 "gl_LocalInvocationID.x];\n";
1169 src << " }\n";
1170 src << " barrier();\n";
1171 src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = "
1172 "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1173 src << "}\n";
1174
1175 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1176 }
1177
iterate(void)1178 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1179 {
1180 Context &context = m_context;
1181 const uint32_t bufferSize = m_xSize * 2 * (uint32_t)sizeof(uint32_t);
1182 const uint32_t numBuffers = 2;
1183
1184 const DeviceInterface &vk = context.getDeviceInterface();
1185 const VkDevice device = context.getDevice();
1186 Allocator &allocator = context.getDefaultAllocator();
1187 tcu::TestLog &log = context.getTestContext().getLog();
1188
1189 de::MovePtr<BufferWithMemory> buffers[numBuffers];
1190 VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1191
1192 VkDeviceSize size = bufferSize;
1193 for (uint32_t i = 0; i < numBuffers; ++i)
1194 {
1195 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1196 vk, device, allocator,
1197 makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1198 VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1199 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1200 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1201 }
1202
1203 uint32_t *ptrs[numBuffers];
1204 for (uint32_t i = 0; i < numBuffers; ++i)
1205 {
1206 ptrs[i] = (uint32_t *)buffers[i]->getAllocation().getHostPtr();
1207 }
1208 for (uint32_t i = 0; i < bufferSize / sizeof(uint32_t); ++i)
1209 {
1210 ptrs[1][i] = i;
1211 }
1212 deMemset(ptrs[0], 0xff, (size_t)size);
1213
1214 DescriptorSetLayoutBuilder layoutBuilder;
1215 for (uint32_t i = 0; i < numBuffers; ++i)
1216 {
1217 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1218 }
1219
1220 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1221 Unique<VkDescriptorPool> descriptorPool(
1222 DescriptorPoolBuilder()
1223 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1224 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1225 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1226
1227 const uint32_t specData[1] = {
1228 m_xSize,
1229 };
1230 const vk::VkSpecializationMapEntry entries[1] = {
1231 {0, (uint32_t)(sizeof(uint32_t) * 0), sizeof(uint32_t)},
1232 };
1233 const vk::VkSpecializationInfo specInfo = {1, entries, sizeof(specData), specData};
1234
1235 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1236 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1237 DE_NULL,
1238 (VkPipelineLayoutCreateFlags)0,
1239 1,
1240 &descriptorSetLayout.get(),
1241 0u,
1242 DE_NULL,
1243 };
1244 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1245 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1246
1247 for (uint32_t i = 0; i < numBuffers; ++i)
1248 {
1249 flushAlloc(vk, device, buffers[i]->getAllocation());
1250 }
1251
1252 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1253 const VkPipelineShaderStageCreateInfo shaderInfo = {
1254 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1255 DE_NULL,
1256 0,
1257 VK_SHADER_STAGE_COMPUTE_BIT,
1258 *shader,
1259 "main",
1260 &specInfo,
1261 };
1262
1263 const VkComputePipelineCreateInfo pipelineInfo = {
1264 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, DE_NULL, 0u, shaderInfo, *pipelineLayout, (VkPipeline)0, 0u,
1265 };
1266 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1267
1268 const VkQueue queue = context.getUniversalQueue();
1269 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1270 context.getUniversalQueueFamilyIndex());
1271 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1272
1273 DescriptorSetUpdateBuilder setUpdateBuilder;
1274 for (uint32_t i = 0; i < numBuffers; ++i)
1275 {
1276 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1277 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1278 }
1279 setUpdateBuilder.update(vk, device);
1280
1281 beginCommandBuffer(vk, *cmdBuffer, 0);
1282
1283 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1284 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1285
1286 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1287
1288 endCommandBuffer(vk, *cmdBuffer);
1289
1290 for (uint32_t r = 0; r < m_repeat; ++r)
1291 {
1292 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1293
1294 invalidateAlloc(vk, device, buffers[0]->getAllocation());
1295
1296 for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
1297 {
1298 uint32_t expected = (m_odd == (i / m_xSize)) ? i : 0u;
1299 if (ptrs[0][i] != expected)
1300 {
1301 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected
1302 << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1303 return tcu::TestStatus::fail("compute failed");
1304 }
1305 }
1306
1307 deMemset(ptrs[0], 0xff, (size_t)size);
1308 flushAlloc(vk, device, buffers[0]->getAllocation());
1309 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1310 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1311 setUpdateBuilder.update(vk, device);
1312 }
1313
1314 return tcu::TestStatus::pass("compute succeeded");
1315 }
1316
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1317 void AddRepeatedPipelineTests(tcu::TestCaseGroup *group,
1318 vk::ComputePipelineConstructionType computePipelineConstructionType)
1319 {
1320 std::vector<uint32_t> xSizes = {4, 16, 32, 64};
1321 std::vector<uint32_t> odds = {0, 1};
1322 std::vector<uint32_t> repeats = {2, 4, 8, 16};
1323 for (uint32_t i = 0; i < xSizes.size(); ++i)
1324 {
1325 uint32_t x = xSizes[i];
1326 for (uint32_t j = 0; j < odds.size(); ++j)
1327 {
1328 uint32_t odd = odds[j];
1329 for (uint32_t k = 0; k < repeats.size(); ++k)
1330 {
1331 uint32_t repeat = repeats[k];
1332 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1333 std::string("x_") + de::toString(x) +
1334 (odd == 1 ? "_odd" : "_even") + "_repeat_" +
1335 de::toString(repeat),
1336 x, odd, repeat, computePipelineConstructionType));
1337 }
1338 }
1339 }
1340 }
1341 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1342 void AddSharedMemoryTests(tcu::TestCaseGroup *group)
1343 {
1344 tcu::TestContext &testCtx = group->getTestContext();
1345 std::string filePath = "compute/zero_initialize_workgroup_memory";
1346 std::vector<std::string> requirements;
1347
1348 std::string testNames[] = {"workgroup_size_128", "workgroup_size_8x8x2", "workgroup_size_8x2x8",
1349 "workgroup_size_2x8x8", "workgroup_size_8x4x4", "workgroup_size_4x8x4",
1350 "workgroup_size_4x4x8"};
1351
1352 requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1353
1354 for (const auto &testName : testNames)
1355 {
1356 group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(),
1357 testName + ".amber", requirements));
1358 }
1359 }
1360 #endif // CTS_USES_VULKANSC
1361
1362 } // namespace
1363
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1364 tcu::TestCaseGroup *createZeroInitializeWorkgroupMemoryTests(
1365 tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1366 {
1367 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1368
1369 tcu::TestCaseGroup *maxWorkgroupMemoryGroup =
1370 // Read initialization of max workgroup memory
1371 new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1372 AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1373 tests->addChild(maxWorkgroupMemoryGroup);
1374
1375 tcu::TestCaseGroup *typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1376 AddTypeTests(typeGroup, computePipelineConstructionType);
1377 tests->addChild(typeGroup);
1378
1379 tcu::TestCaseGroup *compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1380 AddCompositeTests(compositeGroup, computePipelineConstructionType);
1381 tests->addChild(compositeGroup);
1382
1383 tcu::TestCaseGroup *maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1384 AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1385 tests->addChild(maxWorkgroupsGroup);
1386
1387 tcu::TestCaseGroup *specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1388 AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1389 tests->addChild(specializeWorkgroupGroup);
1390
1391 tcu::TestCaseGroup *repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1392 AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1393 tests->addChild(repeatPipelineGroup);
1394
1395 #ifndef CTS_USES_VULKANSC
1396 // These are Amber tests and Amber cannot use shader objects.
1397 if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
1398 {
1399 tcu::TestCaseGroup *subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1400 AddSharedMemoryTests(subgroupInvocationGroup);
1401 tests->addChild(subgroupInvocationGroup);
1402 }
1403 #endif // CTS_USES_VULKANSC
1404
1405 return tests.release();
1406 }
1407
1408 } // namespace compute
1409 } // namespace vkt
1410