1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Google LLC.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief VK_KHR_zero_initialize_workgroup_memory tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkDefs.hpp"
38 #include "vkRef.hpp"
39
40 #include "tcuCommandLine.hpp"
41 #include "tcuTestLog.hpp"
42
43 #include "deRandom.hpp"
44 #include "deStringUtil.hpp"
45 #include "deUniquePtr.hpp"
46
47 #include <algorithm>
48 #include <vector>
49
50 using namespace vk;
51
52 namespace vkt
53 {
54 namespace compute
55 {
56 namespace
57 {
58
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,const std::vector<deUint32> specValues={},deUint32 increment=0)59 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
60 deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
61 const std::vector<deUint32> specValues = {},
62 deUint32 increment = 0)
63 {
64 const DeviceInterface& vk = context.getDeviceInterface();
65 const VkDevice device = context.getDevice();
66 Allocator& allocator = context.getDefaultAllocator();
67 tcu::TestLog& log = context.getTestContext().getLog();
68
69 de::MovePtr<BufferWithMemory> buffer;
70 VkDescriptorBufferInfo bufferDescriptor;
71
72 VkDeviceSize size = bufferSize;
73 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
74 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
75 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
76 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
77
78 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
79 deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
80
81 DescriptorSetLayoutBuilder layoutBuilder;
82 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
83
84 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
85 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
86 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
87 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
88 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
89
90 std::vector<VkSpecializationMapEntry> entries(specValues.size());
91 if (!specValues.empty())
92 {
93 for (deUint32 i = 0; i < specValues.size(); ++i)
94 {
95 entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
96 }
97 }
98 const VkSpecializationInfo specInfo =
99 {
100 (deUint32)specValues.size(),
101 entries.data(),
102 specValues.size() * sizeof(deUint32),
103 specValues.data(),
104 };
105
106 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
107 {
108 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
109 DE_NULL,
110 (VkPipelineLayoutCreateFlags)0,
111 1,
112 &descriptorSetLayout.get(),
113 0u,
114 DE_NULL,
115 };
116 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
117 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
118 flushAlloc(vk, device, buffer->getAllocation());
119
120 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
121 const VkPipelineShaderStageCreateInfo shaderInfo =
122 {
123 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
124 DE_NULL,
125 0,
126 VK_SHADER_STAGE_COMPUTE_BIT,
127 *shader,
128 "main",
129 specValues.empty() ? DE_NULL : &specInfo,
130 };
131
132 const VkComputePipelineCreateInfo pipelineInfo =
133 {
134 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
135 DE_NULL,
136 0u,
137 shaderInfo,
138 *pipelineLayout,
139 (VkPipeline)0,
140 0u,
141 };
142 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
143
144 const VkQueue queue = context.getUniversalQueue();
145 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
146 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
147 context.getUniversalQueueFamilyIndex());
148 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
149
150 DescriptorSetUpdateBuilder setUpdateBuilder;
151 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
152 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
153 setUpdateBuilder.update(vk, device);
154
155 beginCommandBuffer(vk, *cmdBuffer, 0);
156
157 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
158 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
159
160 vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
161
162 endCommandBuffer(vk, *cmdBuffer);
163
164 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
165
166 invalidateAlloc(vk, device, buffer->getAllocation());
167
168 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
169 {
170 deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
171 if (ptr[i] != expected)
172 {
173 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
174 return tcu::TestStatus::fail("compute failed");
175 }
176 }
177
178 return tcu::TestStatus::pass("compute succeeded");
179 }
180
181 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
182 {
183 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups)184 MaxWorkgroupMemoryInstance(Context& context, deUint32 numWorkgroups)
185 : TestInstance(context),
186 m_numWorkgroups(numWorkgroups)
187 {
188 }
189 tcu::TestStatus iterate(void);
190
191 private:
192 deUint32 m_numWorkgroups;
193 };
194
195 class MaxWorkgroupMemoryTest : public vkt::TestCase
196 {
197 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 numWorkgroups)198 MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
199 const std::string& name,
200 const std::string& description,
201 deUint32 numWorkgroups)
202 : TestCase(testCtx, name, description),
203 m_numWorkgroups(numWorkgroups)
204 {
205 }
206
207 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const208 TestInstance* createInstance(Context& context) const
209 {
210 return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups);
211 }
212 virtual void checkSupport(Context& context) const;
213
214 private:
215 deUint32 m_numWorkgroups;
216 };
217
checkSupport(Context & context) const218 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
219 {
220 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
221 }
222
initPrograms(SourceCollections & sourceCollections) const223 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
224 {
225 std::ostringstream src;
226 src << "#version 450\n";
227 src << "#extension GL_EXT_null_initializer : enable\n";
228 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
229 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
230 src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
231 src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
232 src << "shared uvec4 wg_mem[num_elems] = {};\n";
233 src << "void main() {\n";
234 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
235 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
236 src << " uint idx_x = gl_LocalInvocationID.x;\n";
237 src << " uint idx = idx_x + idx_y + idx_z;\n";
238 src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
239 src << " for (uint i = 0; i < num_elems; ++i) {\n";
240 src << " for (uint j = 0; j < 4; ++j) {\n";
241 src << " uint shared_idx = 4*i + j;\n";
242 src << " uint wg_val = wg_mem[i][j];\n";
243 src << " if (idx == shared_idx) {\n";
244 src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
245 src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
246 src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
247 src << " }\n";
248 src << " }\n";
249 src << " }\n";
250 src << "}\n";
251
252 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
253 }
254
iterate(void)255 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
256 {
257 VkPhysicalDeviceProperties properties;
258 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
259 const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
260
261 const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
262 deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
263 deUint32 wgy = 1;
264 deUint32 wgz = 1;
265 if (wgx < maxWG)
266 {
267 wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
268 }
269 if ((wgx * wgy) < maxWG)
270 {
271 wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
272 }
273 const deUint32 size = maxMemSize;
274 const deUint32 numElems = maxMemSize / 16;
275
276 return runCompute(m_context, size, m_numWorkgroups, 1, 1, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
277 }
278
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group)279 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group)
280 {
281 std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
282 for (deUint32 i = 0; i < workgroups.size(); ++i) {
283 deUint32 numWG = workgroups[i];
284 group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
285 de::toString(numWG), de::toString(numWG) + " workgroups", numWG));
286 }
287 }
288
289 struct TypeCaseDef
290 {
291 std::string typeName;
292 deUint32 typeSize;
293 deUint32 numElements;
294 deUint32 numRows;
295 deUint32 numVariables;
296 };
297
298 class TypeTestInstance : public vkt::TestInstance
299 {
300 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef)301 TypeTestInstance(Context& context, const TypeCaseDef& caseDef)
302 : TestInstance(context),
303 m_caseDef(caseDef)
304 {
305 }
306 tcu::TestStatus iterate(void);
307
308 private:
309 TypeCaseDef m_caseDef;
310 };
311
312 class TypeTest : public vkt::TestCase
313 {
314 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TypeCaseDef & caseDef)315 TypeTest(tcu::TestContext& testCtx,
316 const std::string& name,
317 const std::string& description,
318 const TypeCaseDef& caseDef)
319 : TestCase(testCtx, name, description),
320 m_caseDef(caseDef)
321 {
322 }
323
324 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const325 TestInstance* createInstance(Context& context) const
326 {
327 return new TypeTestInstance(context, m_caseDef);
328 }
329 virtual void checkSupport(Context& context) const;
330
331 private:
332 TypeCaseDef m_caseDef;
333 };
334
checkSupport(Context & context) const335 void TypeTest::checkSupport(Context& context) const
336 {
337 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
338
339 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
340 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
341 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
342 f16_i8_features.pNext = DE_NULL;
343
344 VkPhysicalDeviceFeatures2 features2;
345 deMemset(&features2, 0, sizeof(features2));
346 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
347 features2.pNext = &f16_i8_features;
348 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
349
350 if (m_caseDef.typeName == "float16_t" ||
351 m_caseDef.typeName == "f16vec2" ||
352 m_caseDef.typeName == "f16vec3" ||
353 m_caseDef.typeName == "f16vec4" ||
354 m_caseDef.typeName == "f16mat2x2" ||
355 m_caseDef.typeName == "f16mat2x3" ||
356 m_caseDef.typeName == "f16mat2x4" ||
357 m_caseDef.typeName == "f16mat3x2" ||
358 m_caseDef.typeName == "f16mat3x3" ||
359 m_caseDef.typeName == "f16mat3x4" ||
360 m_caseDef.typeName == "f16mat4x2" ||
361 m_caseDef.typeName == "f16mat4x3" ||
362 m_caseDef.typeName == "f16mat4x4")
363 {
364 if (f16_i8_features.shaderFloat16 != VK_TRUE)
365 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
366 }
367
368 if (m_caseDef.typeName == "float64_t" ||
369 m_caseDef.typeName == "f64vec2" ||
370 m_caseDef.typeName == "f64vec3" ||
371 m_caseDef.typeName == "f64vec4"||
372 m_caseDef.typeName == "f64mat2x2" ||
373 m_caseDef.typeName == "f64mat2x3" ||
374 m_caseDef.typeName == "f64mat2x4" ||
375 m_caseDef.typeName == "f64mat3x2" ||
376 m_caseDef.typeName == "f64mat3x3" ||
377 m_caseDef.typeName == "f64mat3x4" ||
378 m_caseDef.typeName == "f64mat4x2" ||
379 m_caseDef.typeName == "f64mat4x3" ||
380 m_caseDef.typeName == "f64mat4x4")
381 {
382 if (features2.features.shaderFloat64 != VK_TRUE)
383 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
384 }
385
386 if (m_caseDef.typeName == "int8_t" ||
387 m_caseDef.typeName == "i8vec2" ||
388 m_caseDef.typeName == "i8vec3" ||
389 m_caseDef.typeName == "i8vec4" ||
390 m_caseDef.typeName == "uint8_t" ||
391 m_caseDef.typeName == "u8vec2" ||
392 m_caseDef.typeName == "u8vec3" ||
393 m_caseDef.typeName == "u8vec4")
394 {
395 if (f16_i8_features.shaderInt8 != VK_TRUE)
396 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
397 }
398
399 if (m_caseDef.typeName == "int16_t" ||
400 m_caseDef.typeName == "i16vec2" ||
401 m_caseDef.typeName == "i16vec3" ||
402 m_caseDef.typeName == "i16vec4" ||
403 m_caseDef.typeName == "uint16_t" ||
404 m_caseDef.typeName == "u16vec2" ||
405 m_caseDef.typeName == "u16vec3" ||
406 m_caseDef.typeName == "u16vec4")
407 {
408 if (features2.features.shaderInt16 != VK_TRUE)
409 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
410 }
411
412 if (m_caseDef.typeName == "int64_t" ||
413 m_caseDef.typeName == "i64vec2" ||
414 m_caseDef.typeName == "i64vec3" ||
415 m_caseDef.typeName == "i64vec4" ||
416 m_caseDef.typeName == "uint64_t" ||
417 m_caseDef.typeName == "u64vec2" ||
418 m_caseDef.typeName == "u64vec3" ||
419 m_caseDef.typeName == "u64vec4")
420 {
421 if (features2.features.shaderInt64 != VK_TRUE)
422 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
423 }
424 }
425
initPrograms(SourceCollections & sourceCollections) const426 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
427 {
428 std::ostringstream src;
429 src << "#version 450\n";
430 src << "#extension GL_EXT_null_initializer : enable\n";
431 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
432 src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
433 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
434 for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
435 src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
436 }
437 src << "void main() {\n";
438 if (m_caseDef.numRows > 1)
439 {
440 src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
441 src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
442 }
443 std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
444 for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
445 {
446 if (m_caseDef.numElements == 1)
447 {
448 // Scalars.
449 src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
450 }
451 else if (m_caseDef.numRows == 1)
452 {
453 // Vectors.
454 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
455 }
456 else
457 {
458 // Matrices.
459 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
460 }
461 }
462 src << "}\n";
463
464 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
465 }
466
iterate(void)467 tcu::TestStatus TypeTestInstance::iterate(void)
468 {
469 const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
470 return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1);
471 }
472
AddTypeTests(tcu::TestCaseGroup * group)473 void AddTypeTests(tcu::TestCaseGroup* group)
474 {
475 deRandom rnd;
476 deRandom_init(&rnd, 0);
477 std::vector<TypeCaseDef> cases =
478 {
479 {"bool", 1, 1, 1, 0},
480 {"bvec2", 1, 2, 1, 0},
481 {"bvec3", 1, 3, 1, 0},
482 {"bvec4", 1, 4, 1, 0},
483 {"uint32_t", 4, 1, 1, 0},
484 {"uvec2", 4, 2, 1, 0},
485 {"uvec3", 4, 3, 1, 0},
486 {"uvec4", 4, 4, 1, 0},
487 {"int32_t", 4, 1, 1, 0},
488 {"ivec2", 4, 2, 1, 0},
489 {"ivec3", 4, 3, 1, 0},
490 {"ivec4", 4, 4, 1, 0},
491 {"uint8_t", 1, 1, 1, 0},
492 {"u8vec2", 1, 2, 1, 0},
493 {"u8vec3", 1, 3, 1, 0},
494 {"u8vec4", 1, 4, 1, 0},
495 {"int8_t", 1, 1, 1, 0},
496 {"i8vec2", 1, 2, 1, 0},
497 {"i8vec3", 1, 3, 1, 0},
498 {"i8vec4", 1, 4, 1, 0},
499 {"uint16_t", 2, 1, 1, 0},
500 {"u16vec2", 2, 2, 1, 0},
501 {"u16vec3", 2, 3, 1, 0},
502 {"u16vec4", 2, 4, 1, 0},
503 {"int16_t", 2, 1, 1, 0},
504 {"i16vec2", 2, 2, 1, 0},
505 {"i16vec3", 2, 3, 1, 0},
506 {"i16vec4", 2, 4, 1, 0},
507 {"uint64_t", 8, 1, 1, 0},
508 {"u64vec2", 8, 2, 1, 0},
509 {"u64vec3", 8, 3, 1, 0},
510 {"u64vec4", 8, 4, 1, 0},
511 {"int64_t", 8, 1, 1, 0},
512 {"i64vec2", 8, 2, 1, 0},
513 {"i64vec3", 8, 3, 1, 0},
514 {"i64vec4", 8, 4, 1, 0},
515 {"float32_t", 4, 1, 1, 0},
516 {"f32vec2", 4, 2, 1, 0},
517 {"f32vec3", 4, 3, 1, 0},
518 {"f32vec4", 4, 4, 1, 0},
519 {"f32mat2x2", 4, 2, 2, 0},
520 {"f32mat2x3", 4, 3, 2, 0},
521 {"f32mat2x4", 4, 4, 2, 0},
522 {"f32mat3x2", 4, 2, 3, 0},
523 {"f32mat3x3", 4, 3, 3, 0},
524 {"f32mat3x4", 4, 4, 3, 0},
525 {"f32mat4x2", 4, 2, 4, 0},
526 {"f32mat4x3", 4, 3, 4, 0},
527 {"f32mat4x4", 4, 4, 4, 0},
528 {"float16_t", 2, 1, 1, 0},
529 {"f16vec2", 2, 2, 1, 0},
530 {"f16vec3", 2, 3, 1, 0},
531 {"f16vec4", 2, 4, 1, 0},
532 {"f16mat2x2", 2, 2, 2, 0},
533 {"f16mat2x3", 2, 3, 2, 0},
534 {"f16mat2x4", 2, 4, 2, 0},
535 {"f16mat3x2", 2, 2, 3, 0},
536 {"f16mat3x3", 2, 3, 3, 0},
537 {"f16mat3x4", 2, 4, 3, 0},
538 {"f16mat4x2", 2, 2, 4, 0},
539 {"f16mat4x3", 2, 3, 4, 0},
540 {"f16mat4x4", 2, 4, 4, 0},
541 {"float64_t", 8, 1, 1, 0},
542 {"f64vec2", 8, 2, 1, 0},
543 {"f64vec3", 8, 3, 1, 0},
544 {"f64vec4", 8, 4, 1, 0},
545 {"f64mat2x2", 8, 2, 2, 0},
546 {"f64mat2x3", 8, 3, 2, 0},
547 {"f64mat2x4", 8, 4, 2, 0},
548 {"f64mat3x2", 8, 2, 3, 0},
549 {"f64mat3x3", 8, 3, 3, 0},
550 {"f64mat3x4", 8, 4, 3, 0},
551 {"f64mat4x2", 8, 2, 4, 0},
552 {"f64mat4x3", 8, 3, 4, 0},
553 {"f64mat4x4", 8, 4, 4, 0},
554 };
555
556 for (deUint32 i = 0; i < cases.size(); ++i)
557 {
558 cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
559 group->addChild(
560 new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i].typeName.c_str(), cases[i]));
561 }
562 }
563
564 struct CompositeCaseDef
565 {
566 deUint32 index;
567 std::string typeDefinition;
568 std::string assignment;
569 deUint32 elements;
570 std::vector<deUint32> specValues;
571 };
572
573 class CompositeTestInstance : public vkt::TestInstance
574 {
575 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef)576 CompositeTestInstance(Context& context, const CompositeCaseDef& caseDef)
577 : TestInstance(context),
578 m_caseDef(caseDef)
579 {
580 }
581 tcu::TestStatus iterate(void);
582 private:
583 CompositeCaseDef m_caseDef;
584 };
585
586 class CompositeTest : public vkt::TestCase
587 {
588 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const CompositeCaseDef & caseDef)589 CompositeTest(tcu::TestContext& testCtx,
590 const std::string& name,
591 const std::string& description,
592 const CompositeCaseDef& caseDef)
593 : TestCase(testCtx, name, description),
594 m_caseDef(caseDef)
595 {
596 }
597
598 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const599 TestInstance* createInstance(Context& context) const
600 {
601 return new CompositeTestInstance(context, m_caseDef);
602 }
603 virtual void checkSupport(Context& context) const;
604 private:
605 CompositeCaseDef m_caseDef;
606 };
607
checkSupport(Context & context) const608 void CompositeTest::checkSupport(Context& context) const
609 {
610 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
611
612 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
613 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
614 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
615 f16_i8_features.pNext = DE_NULL;
616
617 VkPhysicalDeviceFeatures2 features2;
618 deMemset(&features2, 0, sizeof(features2));
619 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
620 features2.pNext = &f16_i8_features;
621 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
622
623 bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
624 bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
625 bool needsInt8 = (m_caseDef.index & 0x4) != 0;
626 bool needsInt16 = (m_caseDef.index & 0x8) != 0;
627 bool needsInt64 = (m_caseDef.index & 0x10) != 0;
628
629 if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
630 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
631 if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
632 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
633 if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
634 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
635 if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
636 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
637 if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
638 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
639 }
640
initPrograms(SourceCollections & sourceCollections) const641 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
642 {
643 std::ostringstream src;
644 src << "#version 450\n";
645 src << "#extension GL_EXT_null_initializer : enable\n";
646 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
647 src << "\n";
648 for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
649 src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
650 }
651 src << "\n";
652 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
653 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
654 src << "\n";
655 src << m_caseDef.typeDefinition;
656 src << "\n";
657 src << "void main() {\n";
658 src << m_caseDef.assignment;
659 src << "}\n";
660
661 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
662 }
663
iterate(void)664 tcu::TestStatus CompositeTestInstance::iterate(void)
665 {
666 const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
667 return runCompute(m_context, bufferSize, 1, 1, 1, m_caseDef.specValues);
668 }
669
AddCompositeTests(tcu::TestCaseGroup * group)670 void AddCompositeTests(tcu::TestCaseGroup* group)
671 {
672 std::vector<CompositeCaseDef> cases =
673 {
674 {0,
675 "shared uint wg_mem[specId0] = {};\n",
676
677 "for (uint i = 0; i < specId0; ++i) {\n"
678 " a.a[i] = wg_mem[i];\n"
679 "}\n",
680 16,
681 {16},
682 },
683
684 {0,
685 "shared float wg_mem[specId0][specId1] = {};\n",
686
687 "for (uint i = 0; i < specId0; ++i) {\n"
688 " for (uint j = 0; j < specId1; ++j) {\n"
689 " uint idx = i * specId1 + j;\n"
690 " a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
691 " }\n"
692 "}\n",
693 32,
694 {4, 8},
695 },
696
697 {0,
698 "struct Sa {\n"
699 " uint a;\n"
700 " uvec2 b;\n"
701 " uvec3 c;\n"
702 " uvec4 d;\n"
703 " float e;\n"
704 " vec2 f;\n"
705 " vec3 g;\n"
706 " vec4 h;\n"
707 " bool i;\n"
708 " bvec2 j;\n"
709 " bvec3 k;\n"
710 " bvec4 l;\n"
711 "};\n"
712 "shared Sa wg_mem = {};\n",
713
714 "uint i = 0;\n"
715 "a.a[i++] = wg_mem.a;\n"
716 "a.a[i++] = wg_mem.b.x;\n"
717 "a.a[i++] = wg_mem.b.y;\n"
718 "a.a[i++] = wg_mem.c.x;\n"
719 "a.a[i++] = wg_mem.c.y;\n"
720 "a.a[i++] = wg_mem.c.z;\n"
721 "a.a[i++] = wg_mem.d.x;\n"
722 "a.a[i++] = wg_mem.d.y;\n"
723 "a.a[i++] = wg_mem.d.z;\n"
724 "a.a[i++] = wg_mem.d.w;\n"
725 "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
726 "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
727 "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
728 "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
729 "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
730 "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
731 "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
732 "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
733 "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
734 "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
735 "a.a[i++] = wg_mem.i ? 1 : 0;\n"
736 "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
737 "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
738 "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
739 "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
740 "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
741 "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
742 "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
743 "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
744 "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
745 30,
746 {},
747 },
748
749 {0,
750 "struct Sa {\n"
751 " uint a;\n"
752 "};\n"
753 "struct Sb {\n"
754 " uvec2 a;\n"
755 "};\n"
756 "struct Sc {\n"
757 " Sa a[specId0];\n"
758 " Sb b[specId1];\n"
759 "};\n"
760 "shared Sc wg_mem[specId2] = {};\n",
761
762 "uint idx = 0;\n"
763 "for (uint i = 0; i < specId2; ++i) {\n"
764 " for (uint j = 0; j < specId0; ++j) {\n"
765 " a.a[idx++] = wg_mem[i].a[j].a;\n"
766 " }\n"
767 " for (uint j = 0; j < specId1; ++j) {\n"
768 " a.a[idx++] = wg_mem[i].b[j].a.x;\n"
769 " a.a[idx++] = wg_mem[i].b[j].a.y;\n"
770 " }\n"
771 "}\n",
772 32,
773 {2,3,4},
774 },
775
776 {1,
777 "struct Sa {\n"
778 " f16vec2 a;\n"
779 " float16_t b[specId0];\n"
780 "};\n"
781 "shared Sa wg_mem = {};\n",
782
783 "uint idx = 0;\n"
784 "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
785 "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
786 "for (uint i = 0; i < specId0; ++i) {\n"
787 " a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
788 "}\n",
789 18,
790 {16},
791 },
792
793 {2,
794 "struct Sa {\n"
795 " f64vec2 a;\n"
796 " float64_t b[specId0];\n"
797 "};\n"
798 "shared Sa wg_mem = {};\n",
799
800 "uint idx = 0;\n"
801 "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
802 "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
803 "for (uint i = 0; i < specId0; ++i) {\n"
804 " a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
805 "}\n",
806 7,
807 {5},
808 },
809
810 {4,
811 "struct Sa {\n"
812 " i8vec2 a;\n"
813 " int8_t b[specId0];\n"
814 "};\n"
815 "shared Sa wg_mem = {};\n",
816
817 "uint idx = 0;\n"
818 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
819 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
820 "for (uint i = 0; i < specId0; ++i) {\n"
821 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
822 "}\n",
823 34,
824 {32},
825 },
826
827 {8,
828 "struct Sa {\n"
829 " i16vec2 a;\n"
830 " int16_t b[specId0];\n"
831 "};\n"
832 "shared Sa wg_mem = {};\n",
833
834 "uint idx = 0;\n"
835 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
836 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
837 "for (uint i = 0; i < specId0; ++i) {\n"
838 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
839 "}\n",
840 122,
841 {120},
842 },
843
844 {16,
845 "struct Sa {\n"
846 " i64vec2 a;\n"
847 " int64_t b[specId0];\n"
848 "};\n"
849 "shared Sa wg_mem = {};\n",
850
851 "uint idx = 0;\n"
852 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
853 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
854 "for (uint i = 0; i < specId0; ++i) {\n"
855 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
856 "}\n",
857 63,
858 {61},
859 },
860
861 {0x1f,
862 "struct Sa {\n"
863 " float16_t a;\n"
864 " float b;\n"
865 " int8_t c;\n"
866 " int16_t d;\n"
867 " int e;\n"
868 " int64_t f;\n"
869 " float64_t g;\n"
870 "};\n"
871 "shared Sa wg_mem = {};\n",
872
873 "uint idx = 0;\n"
874 "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
875 "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
876 "a.a[idx++] = uint(wg_mem.c);\n"
877 "a.a[idx++] = uint(wg_mem.d);\n"
878 "a.a[idx++] = uint(wg_mem.e);\n"
879 "a.a[idx++] = uint(wg_mem.f);\n"
880 "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
881 7,
882 {},
883 },
884
885 {0,
886 "struct Sa {\n"
887 " uint a;\n"
888 "};\n"
889 "struct Sb {\n"
890 " Sa a[specId0];\n"
891 " uint b;\n"
892 "};\n"
893 "struct Sc {\n"
894 " Sb b[specId1];\n"
895 " uint c;\n"
896 "};\n"
897 "struct Sd {\n"
898 " Sc c[specId2];\n"
899 " uint d;\n"
900 "};\n"
901 "struct Se {\n"
902 " Sd d[specId3];\n"
903 " uint e;\n"
904 "};\n"
905 "shared Se wg_mem[specId4] = {};\n",
906
907 "uint idx = 0;\n"
908 "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
909 " a.a[idx++] = wg_mem[i1].e;\n"
910 " for (uint i2 = 0; i2 < specId3; ++i2) {\n"
911 " a.a[idx++] = wg_mem[i1].d[i2].d;\n"
912 " for (uint i3 = 0; i3 < specId2; ++i3) {\n"
913 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
914 " for (uint i4 = 0; i4 < specId1; ++i4) {\n"
915 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
916 " for (uint i5 = 0; i5 < specId0; ++i5) {\n"
917 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
918 " }\n"
919 " }\n"
920 " }\n"
921 " }\n"
922 "}\n",
923 872,
924 {6,5,4,3,2},
925 },
926 };
927
928 for (deUint32 i = 0; i < cases.size(); ++i)
929 {
930 group->addChild(
931 new CompositeTest(group->getTestContext(), de::toString(i), de::toString(i), cases[i]));
932 }
933 }
934
935 enum Dim {
936 DimX,
937 DimY,
938 DimZ,
939 };
940
941 class MaxWorkgroupsInstance : public vkt::TestInstance
942 {
943 public:
MaxWorkgroupsInstance(Context & context,Dim dim)944 MaxWorkgroupsInstance(Context &context, Dim dim)
945 : TestInstance(context),
946 m_dim(dim)
947 {
948 }
949 tcu::TestStatus iterate(void);
950 private:
951 Dim m_dim;
952 };
953
954 class MaxWorkgroupsTest : public vkt::TestCase
955 {
956 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,Dim dim)957 MaxWorkgroupsTest(tcu::TestContext& testCtx,
958 const std::string& name,
959 const std::string& description,
960 Dim dim)
961 : TestCase(testCtx, name, description),
962 m_dim(dim)
963 {
964 }
965
966 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const967 TestInstance* createInstance(Context& context) const
968 {
969 return new MaxWorkgroupsInstance(context, m_dim);
970 }
971 virtual void checkSupport(Context& context) const;
972 private:
973 Dim m_dim;
974 };
975
checkSupport(Context & context) const976 void MaxWorkgroupsTest::checkSupport(Context& context) const
977 {
978 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
979 }
980
initPrograms(SourceCollections & sourceCollections) const981 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
982 {
983 std::ostringstream src;
984 src << "#version 450\n";
985 src << "#extension GL_EXT_null_initializer : enable\n";
986 src << "\n";
987 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
988 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
989 src << "shared uint wg_mem[2] = {};\n";
990 std::string dim;
991 switch (m_dim) {
992 case DimX:
993 dim = "x";
994 break;
995 case DimY:
996 dim = "y";
997 break;
998 case DimZ:
999 dim = "z";
1000 break;
1001 }
1002 src << "\n";
1003 src << "void main() {\n";
1004 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1005 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1006 src << " uint idx_x = gl_LocalInvocationID.x;\n";
1007 src << " uint idx = idx_x + idx_y + idx_z;\n";
1008 src << " if (gl_LocalInvocationID.x == 0) {\n";
1009 src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1010 src << " }\n";
1011 src << " barrier();\n";
1012 src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1013 src << "}\n";
1014
1015 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1016 }
1017
iterate(void)1018 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
1019 {
1020 VkPhysicalDeviceProperties properties;
1021 deMemset(&properties, 0, sizeof(properties));
1022 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1023
1024 const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1025 deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1026 deUint32 wgy = 1;
1027 deUint32 wgz = 1;
1028 if (wgx < maxWG)
1029 {
1030 wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1031 }
1032 if ((wgx * wgy) < maxWG)
1033 {
1034 wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1035 }
1036 deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1037
1038 deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1039 deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1040 deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1041
1042 return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, {wgx, wgy, wgz}, /*increment*/ 1);
1043 }
1044
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group)1045 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group)
1046 {
1047 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", "max x dim workgroups", DimX));
1048 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", "max y dim workgroups", DimY));
1049 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", "max z dim workgroups", DimZ));
1050 }
1051
1052 class SpecializeWorkgroupInstance : public vkt::TestInstance
1053 {
1054 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z)1055 SpecializeWorkgroupInstance(Context &context, deUint32 x, deUint32 y, deUint32 z)
1056 : TestInstance(context),
1057 m_x(x),
1058 m_y(y),
1059 m_z(z)
1060 {
1061 }
1062 tcu::TestStatus iterate(void);
1063 private:
1064 deUint32 m_x;
1065 deUint32 m_y;
1066 deUint32 m_z;
1067 };
1068
1069 class SpecializeWorkgroupTest : public vkt::TestCase
1070 {
1071 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 x,deUint32 y,deUint32 z)1072 SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1073 const std::string& name,
1074 const std::string& description,
1075 deUint32 x, deUint32 y, deUint32 z)
1076 : TestCase(testCtx, name, description),
1077 m_x(x),
1078 m_y(y),
1079 m_z(z)
1080 {
1081 }
1082
1083 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1084 TestInstance* createInstance(Context& context) const
1085 {
1086 return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z);
1087 }
1088 virtual void checkSupport(Context& context) const;
1089 private:
1090 deUint32 m_x;
1091 deUint32 m_y;
1092 deUint32 m_z;
1093 };
1094
checkSupport(Context & context) const1095 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1096 {
1097 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1098
1099 VkPhysicalDeviceProperties properties;
1100 deMemset(&properties, 0, sizeof(properties));
1101 context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1102 if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1103 TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1104 }
1105
initPrograms(SourceCollections & sourceCollections) const1106 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1107 {
1108 std::ostringstream src;
1109 src << "#version 450\n";
1110 src << "#extension GL_EXT_null_initializer : enable\n";
1111 src << "\n";
1112 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1113 src << "layout(constant_id = 1) const uint WGY = 1;\n";
1114 src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1115 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1116 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1117 src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1118 src << "\n";
1119 src << "void main() {\n";
1120 src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1121 src << "}\n";
1122
1123 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1124 }
1125
iterate(void)1126 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1127 {
1128 const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1129 return runCompute(m_context, size, 1, 1, 1, {m_x, m_y, m_z});
1130 }
1131
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group)1132 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group)
1133 {
1134 for (deUint32 z = 1; z <= 8; ++z)
1135 {
1136 for (deUint32 y = 1; y <= 8; ++y)
1137 {
1138 for (deUint32 x = 1; x <= 8; ++x)
1139 {
1140 group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1141 de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1142 de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1143 x, y, z));
1144 }
1145 }
1146 }
1147 }
1148
1149 class RepeatedPipelineInstance : public vkt::TestInstance
1150 {
1151 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1152 RepeatedPipelineInstance(Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1153 : TestInstance(context),
1154 m_xSize(xSize),
1155 m_repeat(repeat),
1156 m_odd(odd)
1157 {
1158 }
1159 tcu::TestStatus iterate(void);
1160 private:
1161 deUint32 m_xSize;
1162 deUint32 m_repeat;
1163 deUint32 m_odd;
1164 };
1165
1166 class RepeatedPipelineTest : public vkt::TestCase
1167 {
1168 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 xSize,deUint32 repeat,deUint32 odd)1169 RepeatedPipelineTest(tcu::TestContext& testCtx,
1170 const std::string& name,
1171 const std::string& description,
1172 deUint32 xSize, deUint32 repeat, deUint32 odd)
1173 : TestCase(testCtx, name, description),
1174 m_xSize(xSize),
1175 m_repeat(repeat),
1176 m_odd(odd)
1177 {
1178 }
1179
1180 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1181 TestInstance* createInstance(Context& context) const
1182 {
1183 return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1184 }
1185 virtual void checkSupport(Context& context) const;
1186 private:
1187 deUint32 m_xSize;
1188 deUint32 m_repeat;
1189 deUint32 m_odd;
1190 };
1191
checkSupport(Context & context) const1192 void RepeatedPipelineTest::checkSupport(Context& context) const
1193 {
1194 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1195 }
1196
initPrograms(SourceCollections & sourceCollections) const1197 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1198 {
1199 std::ostringstream src;
1200 src << "#version 450\n";
1201 src << "#extension GL_EXT_null_initializer : enable\n";
1202 src << "\n";
1203 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1204 src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1205 src << "\n";
1206 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1207 src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1208 src << "\n";
1209 src << "shared uint wg_mem[WGX][2] = {};\n";
1210 src << "void main() {\n";
1211 src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1212 src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1213 src << " }\n";
1214 src << " barrier();\n";
1215 src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1216 src << "}\n";
1217
1218 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1219 }
1220
iterate(void)1221 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1222 {
1223 Context& context = m_context;
1224 const deUint32 bufferSize = m_xSize * 2 * (deUint32)sizeof(deUint32);
1225 const deUint32 numBuffers = 2;
1226
1227 const DeviceInterface& vk = context.getDeviceInterface();
1228 const VkDevice device = context.getDevice();
1229 Allocator& allocator = context.getDefaultAllocator();
1230 tcu::TestLog& log = context.getTestContext().getLog();
1231
1232 de::MovePtr<BufferWithMemory> buffers[numBuffers];
1233 VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1234
1235 VkDeviceSize size = bufferSize;
1236 for (deUint32 i = 0; i < numBuffers; ++i)
1237 {
1238 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1239 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1240 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1241 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1242 }
1243
1244 deUint32* ptrs[numBuffers];
1245 for (deUint32 i = 0; i < numBuffers; ++i)
1246 {
1247 ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1248 }
1249 for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1250 {
1251 ptrs[1][i] = i;
1252 }
1253 deMemset(ptrs[0], 0xff, (size_t)size);
1254
1255 DescriptorSetLayoutBuilder layoutBuilder;
1256 for (deUint32 i = 0; i < numBuffers; ++i)
1257 {
1258 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1259 }
1260
1261 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1262 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1263 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1264 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1265 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1266
1267 const deUint32 specData[1] =
1268 {
1269 m_xSize,
1270 };
1271 const vk::VkSpecializationMapEntry entries[1] =
1272 {
1273 {0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1274 };
1275 const vk::VkSpecializationInfo specInfo =
1276 {
1277 1,
1278 entries,
1279 sizeof(specData),
1280 specData
1281 };
1282
1283 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1284 {
1285 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1286 DE_NULL,
1287 (VkPipelineLayoutCreateFlags)0,
1288 1,
1289 &descriptorSetLayout.get(),
1290 0u,
1291 DE_NULL,
1292 };
1293 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1294 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1295
1296 for (deUint32 i = 0; i < numBuffers; ++i)
1297 {
1298 flushAlloc(vk, device, buffers[i]->getAllocation());
1299 }
1300
1301 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1302 const VkPipelineShaderStageCreateInfo shaderInfo =
1303 {
1304 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1305 DE_NULL,
1306 0,
1307 VK_SHADER_STAGE_COMPUTE_BIT,
1308 *shader,
1309 "main",
1310 &specInfo,
1311 };
1312
1313 const VkComputePipelineCreateInfo pipelineInfo =
1314 {
1315 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1316 DE_NULL,
1317 0u,
1318 shaderInfo,
1319 *pipelineLayout,
1320 (VkPipeline)0,
1321 0u,
1322 };
1323 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1324
1325 const VkQueue queue = context.getUniversalQueue();
1326 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1327 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1328 context.getUniversalQueueFamilyIndex());
1329 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1330
1331 DescriptorSetUpdateBuilder setUpdateBuilder;
1332 for (deUint32 i = 0; i < numBuffers; ++i)
1333 {
1334 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1335 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1336 }
1337 setUpdateBuilder.update(vk, device);
1338
1339 beginCommandBuffer(vk, *cmdBuffer, 0);
1340
1341 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1342 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1343
1344 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1345
1346 endCommandBuffer(vk, *cmdBuffer);
1347
1348 for (deUint32 r = 0; r < m_repeat; ++r)
1349 {
1350 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1351
1352 invalidateAlloc(vk, device, buffers[0]->getAllocation());
1353
1354 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1355 {
1356 deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1357 if (ptrs[0][i] != expected)
1358 {
1359 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1360 return tcu::TestStatus::fail("compute failed");
1361 }
1362 }
1363
1364 deMemset(ptrs[0], 0xff, (size_t)size);
1365 flushAlloc(vk, device, buffers[0]->getAllocation());
1366 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1367 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1368 setUpdateBuilder.update(vk, device);
1369 }
1370
1371 return tcu::TestStatus::pass("compute succeeded");
1372 }
1373
AddRepeatedPipelineTests(tcu::TestCaseGroup * group)1374 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group)
1375 {
1376 std::vector<deUint32> xSizes = {4, 16, 32, 64};
1377 std::vector<deUint32> odds = {0, 1};
1378 std::vector<deUint32> repeats = {2, 4, 8, 16};
1379 for (deUint32 i = 0; i < xSizes.size(); ++i)
1380 {
1381 deUint32 x = xSizes[i];
1382 for (deUint32 j = 0; j < odds.size(); ++j)
1383 {
1384 deUint32 odd = odds[j];
1385 for (deUint32 k = 0; k < repeats.size(); ++k)
1386 {
1387 deUint32 repeat = repeats[k];
1388 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1389 std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1390 std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1391 x, odd, repeat));
1392 }
1393 }
1394 }
1395 }
1396
1397 } // anonymous
1398
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx)1399 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests(tcu::TestContext& testCtx)
1400 {
1401 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory", "VK_KHR_zero_intialize_workgroup_memory tests"));
1402
1403 tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1404 new tcu::TestCaseGroup(testCtx, "max_workgroup_memory", "Read initialization of max workgroup memory");
1405 AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup);
1406 tests->addChild(maxWorkgroupMemoryGroup);
1407
1408 tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types", "basic type tests");
1409 AddTypeTests(typeGroup);
1410 tests->addChild(typeGroup);
1411
1412 tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites", "composite type tests");
1413 AddCompositeTests(compositeGroup);
1414 tests->addChild(compositeGroup);
1415
1416 tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups", "max workgroups");
1417 AddMaxWorkgroupsTests(maxWorkgroupsGroup);
1418 tests->addChild(maxWorkgroupsGroup);
1419
1420 tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup", "specialize workgroup size");
1421 AddSpecializeWorkgroupTests(specializeWorkgroupGroup);
1422 tests->addChild(specializeWorkgroupGroup);
1423
1424 tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline", "repeated pipeline run");
1425 AddRepeatedPipelineTests(repeatPipelineGroup);
1426 tests->addChild(repeatPipelineGroup);
1427
1428 return tests.release();
1429 }
1430
1431 } // compute
1432 } // vkt
1433