1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Google LLC.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief VK_KHR_zero_initialize_workgroup_memory tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 #include "vktTestGroupUtil.hpp"
29 #include "vktAmberTestCase.hpp"
30
31 #include "vkBufferWithMemory.hpp"
32 #include "vkImageWithMemory.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 #include "vkObjUtil.hpp"
38 #include "vkDefs.hpp"
39 #include "vkRef.hpp"
40
41 #include "tcuCommandLine.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47
48 #include <algorithm>
49 #include <vector>
50
51 using namespace vk;
52
53 namespace vkt
54 {
55 namespace compute
56 {
57 namespace
58 {
59
runCompute(Context & context,deUint32 bufferSize,deUint32 numWGX,deUint32 numWGY,deUint32 numWGZ,const std::vector<deUint32> specValues={},deUint32 increment=0)60 tcu::TestStatus runCompute(Context& context, deUint32 bufferSize,
61 deUint32 numWGX, deUint32 numWGY, deUint32 numWGZ,
62 const std::vector<deUint32> specValues = {},
63 deUint32 increment = 0)
64 {
65 const DeviceInterface& vk = context.getDeviceInterface();
66 const VkDevice device = context.getDevice();
67 Allocator& allocator = context.getDefaultAllocator();
68 tcu::TestLog& log = context.getTestContext().getLog();
69
70 de::MovePtr<BufferWithMemory> buffer;
71 VkDescriptorBufferInfo bufferDescriptor;
72
73 VkDeviceSize size = bufferSize;
74 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
75 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
76 MemoryRequirement::HostVisible));
77 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
78
79 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
80 deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
81
82 DescriptorSetLayoutBuilder layoutBuilder;
83 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
84
85 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
86 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
87 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
88 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
89 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
90
91 std::vector<VkSpecializationMapEntry> entries(specValues.size());
92 if (!specValues.empty())
93 {
94 for (deUint32 i = 0; i < specValues.size(); ++i)
95 {
96 entries[i] = {i, (deUint32)(sizeof(deUint32) * i), sizeof(deUint32)};
97 }
98 }
99 const VkSpecializationInfo specInfo =
100 {
101 (deUint32)specValues.size(),
102 entries.data(),
103 specValues.size() * sizeof(deUint32),
104 specValues.data(),
105 };
106
107 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
108 {
109 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
110 DE_NULL,
111 (VkPipelineLayoutCreateFlags)0,
112 1,
113 &descriptorSetLayout.get(),
114 0u,
115 DE_NULL,
116 };
117 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
118 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
119 flushAlloc(vk, device, buffer->getAllocation());
120
121 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
122 const VkPipelineShaderStageCreateInfo shaderInfo =
123 {
124 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
125 DE_NULL,
126 0,
127 VK_SHADER_STAGE_COMPUTE_BIT,
128 *shader,
129 "main",
130 specValues.empty() ? DE_NULL : &specInfo,
131 };
132
133 const VkComputePipelineCreateInfo pipelineInfo =
134 {
135 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
136 DE_NULL,
137 0u,
138 shaderInfo,
139 *pipelineLayout,
140 (VkPipeline)0,
141 0u,
142 };
143 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
144
145 const VkQueue queue = context.getUniversalQueue();
146 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
147 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
148 context.getUniversalQueueFamilyIndex());
149 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
150
151 DescriptorSetUpdateBuilder setUpdateBuilder;
152 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
153 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
154 setUpdateBuilder.update(vk, device);
155
156 beginCommandBuffer(vk, *cmdBuffer, 0);
157
158 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
159 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
160
161 vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
162
163 endCommandBuffer(vk, *cmdBuffer);
164
165 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
166
167 invalidateAlloc(vk, device, buffer->getAllocation());
168
169 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
170 {
171 deUint32 expected = increment ? numWGX * numWGY * numWGZ : 0u;
172 if (ptr[i] != expected)
173 {
174 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
175 return tcu::TestStatus::fail("compute failed");
176 }
177 }
178
179 return tcu::TestStatus::pass("compute succeeded");
180 }
181
182 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
183 {
184 public:
MaxWorkgroupMemoryInstance(Context & context,deUint32 numWorkgroups)185 MaxWorkgroupMemoryInstance(Context& context, deUint32 numWorkgroups)
186 : TestInstance(context),
187 m_numWorkgroups(numWorkgroups)
188 {
189 }
190 tcu::TestStatus iterate(void);
191
192 private:
193 deUint32 m_numWorkgroups;
194 };
195
196 class MaxWorkgroupMemoryTest : public vkt::TestCase
197 {
198 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 numWorkgroups)199 MaxWorkgroupMemoryTest(tcu::TestContext& testCtx,
200 const std::string& name,
201 const std::string& description,
202 deUint32 numWorkgroups)
203 : TestCase(testCtx, name, description),
204 m_numWorkgroups(numWorkgroups)
205 {
206 }
207
208 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const209 TestInstance* createInstance(Context& context) const
210 {
211 return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups);
212 }
213 virtual void checkSupport(Context& context) const;
214
215 private:
216 deUint32 m_numWorkgroups;
217 };
218
checkSupport(Context & context) const219 void MaxWorkgroupMemoryTest::checkSupport(Context& context) const
220 {
221 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
222 }
223
initPrograms(SourceCollections & sourceCollections) const224 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections& sourceCollections) const
225 {
226 std::ostringstream src;
227 src << "#version 450\n";
228 src << "#extension GL_EXT_null_initializer : enable\n";
229 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
230 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
231 src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
232 src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
233 src << "shared uvec4 wg_mem[num_elems] = {};\n";
234 src << "void main() {\n";
235 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
236 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
237 src << " uint idx_x = gl_LocalInvocationID.x;\n";
238 src << " uint idx = idx_x + idx_y + idx_z;\n";
239 src << " uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
240 src << " for (uint i = 0; i < num_elems; ++i) {\n";
241 src << " for (uint j = 0; j < 4; ++j) {\n";
242 src << " uint shared_idx = 4*i + j;\n";
243 src << " uint wg_val = wg_mem[i][j];\n";
244 src << " if (idx == shared_idx) {\n";
245 src << " atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
246 src << " } else if (idx == 0 && shared_idx >= wg_size) {\n";
247 src << " atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
248 src << " }\n";
249 src << " }\n";
250 src << " }\n";
251 src << "}\n";
252
253 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
254 }
255
iterate(void)256 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
257 {
258 VkPhysicalDeviceProperties properties;
259 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
260 const deUint32 maxMemSize = properties.limits.maxComputeSharedMemorySize;
261
262 const deUint32 maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
263 deUint32 wgx = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
264 deUint32 wgy = 1;
265 deUint32 wgz = 1;
266 if (wgx < maxWG)
267 {
268 wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
269 }
270 if ((wgx * wgy) < maxWG)
271 {
272 wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
273 }
274 const deUint32 size = maxMemSize;
275 const deUint32 numElems = maxMemSize / 16;
276
277 return runCompute(m_context, size, m_numWorkgroups, 1, 1, {wgx, wgy, wgz, numElems}, /*increment*/ 1);
278 }
279
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group)280 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup* group)
281 {
282 std::vector<deUint32> workgroups = {1, 2, 4, 16, 64, 128};
283 for (deUint32 i = 0; i < workgroups.size(); ++i) {
284 deUint32 numWG = workgroups[i];
285 group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(),
286 de::toString(numWG), de::toString(numWG) + " workgroups", numWG));
287 }
288 }
289
290 struct TypeCaseDef
291 {
292 std::string typeName;
293 deUint32 typeSize;
294 deUint32 numElements;
295 deUint32 numRows;
296 deUint32 numVariables;
297 };
298
299 class TypeTestInstance : public vkt::TestInstance
300 {
301 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef)302 TypeTestInstance(Context& context, const TypeCaseDef& caseDef)
303 : TestInstance(context),
304 m_caseDef(caseDef)
305 {
306 }
307 tcu::TestStatus iterate(void);
308
309 private:
310 TypeCaseDef m_caseDef;
311 };
312
313 class TypeTest : public vkt::TestCase
314 {
315 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const TypeCaseDef & caseDef)316 TypeTest(tcu::TestContext& testCtx,
317 const std::string& name,
318 const std::string& description,
319 const TypeCaseDef& caseDef)
320 : TestCase(testCtx, name, description),
321 m_caseDef(caseDef)
322 {
323 }
324
325 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const326 TestInstance* createInstance(Context& context) const
327 {
328 return new TypeTestInstance(context, m_caseDef);
329 }
330 virtual void checkSupport(Context& context) const;
331
332 private:
333 TypeCaseDef m_caseDef;
334 };
335
checkSupport(Context & context) const336 void TypeTest::checkSupport(Context& context) const
337 {
338 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
339
340 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
341 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
342 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
343 f16_i8_features.pNext = DE_NULL;
344
345 VkPhysicalDeviceFeatures2 features2;
346 deMemset(&features2, 0, sizeof(features2));
347 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
348 features2.pNext = &f16_i8_features;
349 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
350
351 if (m_caseDef.typeName == "float16_t" ||
352 m_caseDef.typeName == "f16vec2" ||
353 m_caseDef.typeName == "f16vec3" ||
354 m_caseDef.typeName == "f16vec4" ||
355 m_caseDef.typeName == "f16mat2x2" ||
356 m_caseDef.typeName == "f16mat2x3" ||
357 m_caseDef.typeName == "f16mat2x4" ||
358 m_caseDef.typeName == "f16mat3x2" ||
359 m_caseDef.typeName == "f16mat3x3" ||
360 m_caseDef.typeName == "f16mat3x4" ||
361 m_caseDef.typeName == "f16mat4x2" ||
362 m_caseDef.typeName == "f16mat4x3" ||
363 m_caseDef.typeName == "f16mat4x4")
364 {
365 if (f16_i8_features.shaderFloat16 != VK_TRUE)
366 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
367 }
368
369 if (m_caseDef.typeName == "float64_t" ||
370 m_caseDef.typeName == "f64vec2" ||
371 m_caseDef.typeName == "f64vec3" ||
372 m_caseDef.typeName == "f64vec4"||
373 m_caseDef.typeName == "f64mat2x2" ||
374 m_caseDef.typeName == "f64mat2x3" ||
375 m_caseDef.typeName == "f64mat2x4" ||
376 m_caseDef.typeName == "f64mat3x2" ||
377 m_caseDef.typeName == "f64mat3x3" ||
378 m_caseDef.typeName == "f64mat3x4" ||
379 m_caseDef.typeName == "f64mat4x2" ||
380 m_caseDef.typeName == "f64mat4x3" ||
381 m_caseDef.typeName == "f64mat4x4")
382 {
383 if (features2.features.shaderFloat64 != VK_TRUE)
384 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
385 }
386
387 if (m_caseDef.typeName == "int8_t" ||
388 m_caseDef.typeName == "i8vec2" ||
389 m_caseDef.typeName == "i8vec3" ||
390 m_caseDef.typeName == "i8vec4" ||
391 m_caseDef.typeName == "uint8_t" ||
392 m_caseDef.typeName == "u8vec2" ||
393 m_caseDef.typeName == "u8vec3" ||
394 m_caseDef.typeName == "u8vec4")
395 {
396 if (f16_i8_features.shaderInt8 != VK_TRUE)
397 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
398 }
399
400 if (m_caseDef.typeName == "int16_t" ||
401 m_caseDef.typeName == "i16vec2" ||
402 m_caseDef.typeName == "i16vec3" ||
403 m_caseDef.typeName == "i16vec4" ||
404 m_caseDef.typeName == "uint16_t" ||
405 m_caseDef.typeName == "u16vec2" ||
406 m_caseDef.typeName == "u16vec3" ||
407 m_caseDef.typeName == "u16vec4")
408 {
409 if (features2.features.shaderInt16 != VK_TRUE)
410 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
411 }
412
413 if (m_caseDef.typeName == "int64_t" ||
414 m_caseDef.typeName == "i64vec2" ||
415 m_caseDef.typeName == "i64vec3" ||
416 m_caseDef.typeName == "i64vec4" ||
417 m_caseDef.typeName == "uint64_t" ||
418 m_caseDef.typeName == "u64vec2" ||
419 m_caseDef.typeName == "u64vec3" ||
420 m_caseDef.typeName == "u64vec4")
421 {
422 if (features2.features.shaderInt64 != VK_TRUE)
423 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
424 }
425 }
426
initPrograms(SourceCollections & sourceCollections) const427 void TypeTest::initPrograms(SourceCollections& sourceCollections) const
428 {
429 std::ostringstream src;
430 src << "#version 450\n";
431 src << "#extension GL_EXT_null_initializer : enable\n";
432 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
433 src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows << ", local_size_y = 1, local_size_z = 1) in;\n";
434 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
435 for (deUint32 i = 0; i < m_caseDef.numVariables; ++i) {
436 src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
437 }
438 src << "void main() {\n";
439 if (m_caseDef.numRows > 1)
440 {
441 src << " uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
442 src << " uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
443 }
444 std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
445 for (deUint32 v = 0; v < m_caseDef.numVariables; ++v)
446 {
447 if (m_caseDef.numElements == 1)
448 {
449 // Scalars.
450 src << " a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") == 0) ? 0 : 1;\n";
451 }
452 else if (m_caseDef.numRows == 1)
453 {
454 // Vectors.
455 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[gl_LocalInvocationID.x]) == 0) ? 0 : 1;\n";
456 }
457 else
458 {
459 // Matrices.
460 src << " a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv << "(wg_mem" << v << "[row][col]) == 0) ? 0 : 1;\n";
461 }
462 }
463 src << "}\n";
464
465 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
466 }
467
iterate(void)468 tcu::TestStatus TypeTestInstance::iterate(void)
469 {
470 const deUint32 varBytes = m_caseDef.numElements * m_caseDef.numRows * (deUint32)sizeof(deUint32);
471 return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1);
472 }
473
AddTypeTests(tcu::TestCaseGroup * group)474 void AddTypeTests(tcu::TestCaseGroup* group)
475 {
476 deRandom rnd;
477 deRandom_init(&rnd, 0);
478 std::vector<TypeCaseDef> cases =
479 {
480 {"bool", 1, 1, 1, 0},
481 {"bvec2", 1, 2, 1, 0},
482 {"bvec3", 1, 3, 1, 0},
483 {"bvec4", 1, 4, 1, 0},
484 {"uint32_t", 4, 1, 1, 0},
485 {"uvec2", 4, 2, 1, 0},
486 {"uvec3", 4, 3, 1, 0},
487 {"uvec4", 4, 4, 1, 0},
488 {"int32_t", 4, 1, 1, 0},
489 {"ivec2", 4, 2, 1, 0},
490 {"ivec3", 4, 3, 1, 0},
491 {"ivec4", 4, 4, 1, 0},
492 {"uint8_t", 1, 1, 1, 0},
493 {"u8vec2", 1, 2, 1, 0},
494 {"u8vec3", 1, 3, 1, 0},
495 {"u8vec4", 1, 4, 1, 0},
496 {"int8_t", 1, 1, 1, 0},
497 {"i8vec2", 1, 2, 1, 0},
498 {"i8vec3", 1, 3, 1, 0},
499 {"i8vec4", 1, 4, 1, 0},
500 {"uint16_t", 2, 1, 1, 0},
501 {"u16vec2", 2, 2, 1, 0},
502 {"u16vec3", 2, 3, 1, 0},
503 {"u16vec4", 2, 4, 1, 0},
504 {"int16_t", 2, 1, 1, 0},
505 {"i16vec2", 2, 2, 1, 0},
506 {"i16vec3", 2, 3, 1, 0},
507 {"i16vec4", 2, 4, 1, 0},
508 {"uint64_t", 8, 1, 1, 0},
509 {"u64vec2", 8, 2, 1, 0},
510 {"u64vec3", 8, 3, 1, 0},
511 {"u64vec4", 8, 4, 1, 0},
512 {"int64_t", 8, 1, 1, 0},
513 {"i64vec2", 8, 2, 1, 0},
514 {"i64vec3", 8, 3, 1, 0},
515 {"i64vec4", 8, 4, 1, 0},
516 {"float32_t", 4, 1, 1, 0},
517 {"f32vec2", 4, 2, 1, 0},
518 {"f32vec3", 4, 3, 1, 0},
519 {"f32vec4", 4, 4, 1, 0},
520 {"f32mat2x2", 4, 2, 2, 0},
521 {"f32mat2x3", 4, 3, 2, 0},
522 {"f32mat2x4", 4, 4, 2, 0},
523 {"f32mat3x2", 4, 2, 3, 0},
524 {"f32mat3x3", 4, 3, 3, 0},
525 {"f32mat3x4", 4, 4, 3, 0},
526 {"f32mat4x2", 4, 2, 4, 0},
527 {"f32mat4x3", 4, 3, 4, 0},
528 {"f32mat4x4", 4, 4, 4, 0},
529 {"float16_t", 2, 1, 1, 0},
530 {"f16vec2", 2, 2, 1, 0},
531 {"f16vec3", 2, 3, 1, 0},
532 {"f16vec4", 2, 4, 1, 0},
533 {"f16mat2x2", 2, 2, 2, 0},
534 {"f16mat2x3", 2, 3, 2, 0},
535 {"f16mat2x4", 2, 4, 2, 0},
536 {"f16mat3x2", 2, 2, 3, 0},
537 {"f16mat3x3", 2, 3, 3, 0},
538 {"f16mat3x4", 2, 4, 3, 0},
539 {"f16mat4x2", 2, 2, 4, 0},
540 {"f16mat4x3", 2, 3, 4, 0},
541 {"f16mat4x4", 2, 4, 4, 0},
542 {"float64_t", 8, 1, 1, 0},
543 {"f64vec2", 8, 2, 1, 0},
544 {"f64vec3", 8, 3, 1, 0},
545 {"f64vec4", 8, 4, 1, 0},
546 {"f64mat2x2", 8, 2, 2, 0},
547 {"f64mat2x3", 8, 3, 2, 0},
548 {"f64mat2x4", 8, 4, 2, 0},
549 {"f64mat3x2", 8, 2, 3, 0},
550 {"f64mat3x3", 8, 3, 3, 0},
551 {"f64mat3x4", 8, 4, 3, 0},
552 {"f64mat4x2", 8, 2, 4, 0},
553 {"f64mat4x3", 8, 3, 4, 0},
554 {"f64mat4x4", 8, 4, 4, 0},
555 };
556
557 for (deUint32 i = 0; i < cases.size(); ++i)
558 {
559 cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
560 group->addChild(
561 new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i].typeName.c_str(), cases[i]));
562 }
563 }
564
565 struct CompositeCaseDef
566 {
567 deUint32 index;
568 std::string typeDefinition;
569 std::string assignment;
570 deUint32 elements;
571 std::vector<deUint32> specValues;
572
CompositeCaseDefvkt::compute::__anon7a1c2d770111::CompositeCaseDef573 CompositeCaseDef (uint32_t index_, const std::string& typeDefinition_, const std::string& assignment_, uint32_t elements_, const std::vector<uint32_t>& specValues_)
574 : index (index_)
575 , typeDefinition (typeDefinition_)
576 , assignment (assignment_)
577 , elements (elements_)
578 , specValues (specValues_)
579 {}
580 };
581
582 class CompositeTestInstance : public vkt::TestInstance
583 {
584 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef)585 CompositeTestInstance(Context& context, const CompositeCaseDef& caseDef)
586 : TestInstance(context),
587 m_caseDef(caseDef)
588 {
589 }
590 tcu::TestStatus iterate(void);
591 private:
592 CompositeCaseDef m_caseDef;
593 };
594
595 class CompositeTest : public vkt::TestCase
596 {
597 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const CompositeCaseDef & caseDef)598 CompositeTest(tcu::TestContext& testCtx,
599 const std::string& name,
600 const std::string& description,
601 const CompositeCaseDef& caseDef)
602 : TestCase(testCtx, name, description),
603 m_caseDef(caseDef)
604 {
605 }
606
607 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const608 TestInstance* createInstance(Context& context) const
609 {
610 return new CompositeTestInstance(context, m_caseDef);
611 }
612 virtual void checkSupport(Context& context) const;
613 private:
614 CompositeCaseDef m_caseDef;
615 };
616
checkSupport(Context & context) const617 void CompositeTest::checkSupport(Context& context) const
618 {
619 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
620
621 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
622 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
623 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
624 f16_i8_features.pNext = DE_NULL;
625
626 VkPhysicalDeviceFeatures2 features2;
627 deMemset(&features2, 0, sizeof(features2));
628 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
629 features2.pNext = &f16_i8_features;
630 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
631
632 bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
633 bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
634 bool needsInt8 = (m_caseDef.index & 0x4) != 0;
635 bool needsInt16 = (m_caseDef.index & 0x8) != 0;
636 bool needsInt64 = (m_caseDef.index & 0x10) != 0;
637
638 if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
639 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
640 if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
641 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
642 if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
643 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
644 if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
645 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
646 if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
647 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
648 }
649
initPrograms(SourceCollections & sourceCollections) const650 void CompositeTest::initPrograms(SourceCollections& sourceCollections) const
651 {
652 std::ostringstream src;
653 src << "#version 450\n";
654 src << "#extension GL_EXT_null_initializer : enable\n";
655 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
656 src << "\n";
657 for (deUint32 i = 0; i < m_caseDef.specValues.size(); ++i) {
658 src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
659 }
660 src << "\n";
661 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
662 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
663 src << "\n";
664 src << m_caseDef.typeDefinition;
665 src << "\n";
666 src << "void main() {\n";
667 src << m_caseDef.assignment;
668 src << "}\n";
669
670 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
671 }
672
iterate(void)673 tcu::TestStatus CompositeTestInstance::iterate(void)
674 {
675 const deUint32 bufferSize = (deUint32)sizeof(deUint32) * m_caseDef.elements;
676 return runCompute(m_context, bufferSize, 1, 1, 1, m_caseDef.specValues);
677 }
678
AddCompositeTests(tcu::TestCaseGroup * group)679 void AddCompositeTests(tcu::TestCaseGroup* group)
680 {
681 const std::vector<CompositeCaseDef> cases
682 {
683 {0,
684 "shared uint wg_mem[specId0] = {};\n",
685
686 "for (uint i = 0; i < specId0; ++i) {\n"
687 " a.a[i] = wg_mem[i];\n"
688 "}\n",
689 16,
690 {16},
691 },
692
693 {0,
694 "shared float wg_mem[specId0][specId1] = {};\n",
695
696 "for (uint i = 0; i < specId0; ++i) {\n"
697 " for (uint j = 0; j < specId1; ++j) {\n"
698 " uint idx = i * specId1 + j;\n"
699 " a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
700 " }\n"
701 "}\n",
702 32,
703 {4, 8},
704 },
705
706 {0,
707 "struct Sa {\n"
708 " uint a;\n"
709 " uvec2 b;\n"
710 " uvec3 c;\n"
711 " uvec4 d;\n"
712 " float e;\n"
713 " vec2 f;\n"
714 " vec3 g;\n"
715 " vec4 h;\n"
716 " bool i;\n"
717 " bvec2 j;\n"
718 " bvec3 k;\n"
719 " bvec4 l;\n"
720 "};\n"
721 "shared Sa wg_mem = {};\n",
722
723 "uint i = 0;\n"
724 "a.a[i++] = wg_mem.a;\n"
725 "a.a[i++] = wg_mem.b.x;\n"
726 "a.a[i++] = wg_mem.b.y;\n"
727 "a.a[i++] = wg_mem.c.x;\n"
728 "a.a[i++] = wg_mem.c.y;\n"
729 "a.a[i++] = wg_mem.c.z;\n"
730 "a.a[i++] = wg_mem.d.x;\n"
731 "a.a[i++] = wg_mem.d.y;\n"
732 "a.a[i++] = wg_mem.d.z;\n"
733 "a.a[i++] = wg_mem.d.w;\n"
734 "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
735 "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
736 "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
737 "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
738 "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
739 "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
740 "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
741 "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
742 "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
743 "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
744 "a.a[i++] = wg_mem.i ? 1 : 0;\n"
745 "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
746 "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
747 "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
748 "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
749 "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
750 "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
751 "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
752 "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
753 "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
754 30,
755 {},
756 },
757
758 {0,
759 "struct Sa {\n"
760 " uint a;\n"
761 "};\n"
762 "struct Sb {\n"
763 " uvec2 a;\n"
764 "};\n"
765 "struct Sc {\n"
766 " Sa a[specId0];\n"
767 " Sb b[specId1];\n"
768 "};\n"
769 "shared Sc wg_mem[specId2] = {};\n",
770
771 "uint idx = 0;\n"
772 "for (uint i = 0; i < specId2; ++i) {\n"
773 " for (uint j = 0; j < specId0; ++j) {\n"
774 " a.a[idx++] = wg_mem[i].a[j].a;\n"
775 " }\n"
776 " for (uint j = 0; j < specId1; ++j) {\n"
777 " a.a[idx++] = wg_mem[i].b[j].a.x;\n"
778 " a.a[idx++] = wg_mem[i].b[j].a.y;\n"
779 " }\n"
780 "}\n",
781 32,
782 {2,3,4},
783 },
784
785 {1,
786 "struct Sa {\n"
787 " f16vec2 a;\n"
788 " float16_t b[specId0];\n"
789 "};\n"
790 "shared Sa wg_mem = {};\n",
791
792 "uint idx = 0;\n"
793 "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
794 "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
795 "for (uint i = 0; i < specId0; ++i) {\n"
796 " a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
797 "}\n",
798 18,
799 {16},
800 },
801
802 {2,
803 "struct Sa {\n"
804 " f64vec2 a;\n"
805 " float64_t b[specId0];\n"
806 "};\n"
807 "shared Sa wg_mem = {};\n",
808
809 "uint idx = 0;\n"
810 "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
811 "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
812 "for (uint i = 0; i < specId0; ++i) {\n"
813 " a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
814 "}\n",
815 7,
816 {5},
817 },
818
819 {4,
820 "struct Sa {\n"
821 " i8vec2 a;\n"
822 " int8_t b[specId0];\n"
823 "};\n"
824 "shared Sa wg_mem = {};\n",
825
826 "uint idx = 0;\n"
827 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
828 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
829 "for (uint i = 0; i < specId0; ++i) {\n"
830 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
831 "}\n",
832 34,
833 {32},
834 },
835
836 {8,
837 "struct Sa {\n"
838 " i16vec2 a;\n"
839 " int16_t b[specId0];\n"
840 "};\n"
841 "shared Sa wg_mem = {};\n",
842
843 "uint idx = 0;\n"
844 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
845 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
846 "for (uint i = 0; i < specId0; ++i) {\n"
847 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
848 "}\n",
849 122,
850 {120},
851 },
852
853 {16,
854 "struct Sa {\n"
855 " i64vec2 a;\n"
856 " int64_t b[specId0];\n"
857 "};\n"
858 "shared Sa wg_mem = {};\n",
859
860 "uint idx = 0;\n"
861 "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
862 "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
863 "for (uint i = 0; i < specId0; ++i) {\n"
864 " a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
865 "}\n",
866 63,
867 {61},
868 },
869
870 {0x1f,
871 "struct Sa {\n"
872 " float16_t a;\n"
873 " float b;\n"
874 " int8_t c;\n"
875 " int16_t d;\n"
876 " int e;\n"
877 " int64_t f;\n"
878 " float64_t g;\n"
879 "};\n"
880 "shared Sa wg_mem = {};\n",
881
882 "uint idx = 0;\n"
883 "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
884 "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
885 "a.a[idx++] = uint(wg_mem.c);\n"
886 "a.a[idx++] = uint(wg_mem.d);\n"
887 "a.a[idx++] = uint(wg_mem.e);\n"
888 "a.a[idx++] = uint(wg_mem.f);\n"
889 "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
890 7,
891 {},
892 },
893
894 {0,
895 "struct Sa {\n"
896 " uint a;\n"
897 "};\n"
898 "struct Sb {\n"
899 " Sa a[specId0];\n"
900 " uint b;\n"
901 "};\n"
902 "struct Sc {\n"
903 " Sb b[specId1];\n"
904 " uint c;\n"
905 "};\n"
906 "struct Sd {\n"
907 " Sc c[specId2];\n"
908 " uint d;\n"
909 "};\n"
910 "struct Se {\n"
911 " Sd d[specId3];\n"
912 " uint e;\n"
913 "};\n"
914 "shared Se wg_mem[specId4] = {};\n",
915
916 "uint idx = 0;\n"
917 "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
918 " a.a[idx++] = wg_mem[i1].e;\n"
919 " for (uint i2 = 0; i2 < specId3; ++i2) {\n"
920 " a.a[idx++] = wg_mem[i1].d[i2].d;\n"
921 " for (uint i3 = 0; i3 < specId2; ++i3) {\n"
922 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
923 " for (uint i4 = 0; i4 < specId1; ++i4) {\n"
924 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
925 " for (uint i5 = 0; i5 < specId0; ++i5) {\n"
926 " a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
927 " }\n"
928 " }\n"
929 " }\n"
930 " }\n"
931 "}\n",
932 872,
933 {6,5,4,3,2},
934 },
935 };
936
937 for (deUint32 i = 0; i < cases.size(); ++i)
938 {
939 group->addChild(
940 new CompositeTest(group->getTestContext(), de::toString(i), de::toString(i), cases[i]));
941 }
942 }
943
944 enum Dim {
945 DimX,
946 DimY,
947 DimZ,
948 };
949
950 class MaxWorkgroupsInstance : public vkt::TestInstance
951 {
952 public:
MaxWorkgroupsInstance(Context & context,Dim dim)953 MaxWorkgroupsInstance(Context &context, Dim dim)
954 : TestInstance(context),
955 m_dim(dim)
956 {
957 }
958 tcu::TestStatus iterate(void);
959 private:
960 Dim m_dim;
961 };
962
963 class MaxWorkgroupsTest : public vkt::TestCase
964 {
965 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,Dim dim)966 MaxWorkgroupsTest(tcu::TestContext& testCtx,
967 const std::string& name,
968 const std::string& description,
969 Dim dim)
970 : TestCase(testCtx, name, description),
971 m_dim(dim)
972 {
973 }
974
975 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const976 TestInstance* createInstance(Context& context) const
977 {
978 return new MaxWorkgroupsInstance(context, m_dim);
979 }
980 virtual void checkSupport(Context& context) const;
981 private:
982 Dim m_dim;
983 };
984
checkSupport(Context & context) const985 void MaxWorkgroupsTest::checkSupport(Context& context) const
986 {
987 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
988 }
989
initPrograms(SourceCollections & sourceCollections) const990 void MaxWorkgroupsTest::initPrograms(SourceCollections& sourceCollections) const
991 {
992 std::ostringstream src;
993 src << "#version 450\n";
994 src << "#extension GL_EXT_null_initializer : enable\n";
995 src << "\n";
996 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
997 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
998 src << "shared uint wg_mem[2] = {};\n";
999 std::string dim;
1000 switch (m_dim) {
1001 case DimX:
1002 dim = "x";
1003 break;
1004 case DimY:
1005 dim = "y";
1006 break;
1007 case DimZ:
1008 dim = "z";
1009 break;
1010 }
1011 src << "\n";
1012 src << "void main() {\n";
1013 src << " uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
1014 src << " uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
1015 src << " uint idx_x = gl_LocalInvocationID.x;\n";
1016 src << " uint idx = idx_x + idx_y + idx_z;\n";
1017 src << " if (gl_LocalInvocationID.x == 0) {\n";
1018 src << " wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
1019 src << " }\n";
1020 src << " barrier();\n";
1021 src << " atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
1022 src << "}\n";
1023
1024 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1025 }
1026
iterate(void)1027 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
1028 {
1029 VkPhysicalDeviceProperties properties;
1030 deMemset(&properties, 0, sizeof(properties));
1031 m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
1032
1033 const deUint32 maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
1034 deUint32 wgx = properties.limits.maxComputeWorkGroupSize[0];
1035 deUint32 wgy = 1;
1036 deUint32 wgz = 1;
1037 if (wgx < maxWG)
1038 {
1039 wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
1040 }
1041 if ((wgx * wgy) < maxWG)
1042 {
1043 wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
1044 }
1045 deUint32 size = (deUint32)sizeof(deUint32) * wgx * wgy * wgz;
1046
1047 deUint32 num_wgx = m_dim == DimX ? 65535 : 1;
1048 deUint32 num_wgy = m_dim == DimY ? 65535 : 1;
1049 deUint32 num_wgz = m_dim == DimZ ? 65535 : 1;
1050
1051 return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, {wgx, wgy, wgz}, /*increment*/ 1);
1052 }
1053
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group)1054 void AddMaxWorkgroupsTests(tcu::TestCaseGroup* group)
1055 {
1056 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", "max x dim workgroups", DimX));
1057 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", "max y dim workgroups", DimY));
1058 group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", "max z dim workgroups", DimZ));
1059 }
1060
1061 class SpecializeWorkgroupInstance : public vkt::TestInstance
1062 {
1063 public:
SpecializeWorkgroupInstance(Context & context,deUint32 x,deUint32 y,deUint32 z)1064 SpecializeWorkgroupInstance(Context &context, deUint32 x, deUint32 y, deUint32 z)
1065 : TestInstance(context),
1066 m_x(x),
1067 m_y(y),
1068 m_z(z)
1069 {
1070 }
1071 tcu::TestStatus iterate(void);
1072 private:
1073 deUint32 m_x;
1074 deUint32 m_y;
1075 deUint32 m_z;
1076 };
1077
1078 class SpecializeWorkgroupTest : public vkt::TestCase
1079 {
1080 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 x,deUint32 y,deUint32 z)1081 SpecializeWorkgroupTest(tcu::TestContext& testCtx,
1082 const std::string& name,
1083 const std::string& description,
1084 deUint32 x, deUint32 y, deUint32 z)
1085 : TestCase(testCtx, name, description),
1086 m_x(x),
1087 m_y(y),
1088 m_z(z)
1089 {
1090 }
1091
1092 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1093 TestInstance* createInstance(Context& context) const
1094 {
1095 return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z);
1096 }
1097 virtual void checkSupport(Context& context) const;
1098 private:
1099 deUint32 m_x;
1100 deUint32 m_y;
1101 deUint32 m_z;
1102 };
1103
checkSupport(Context & context) const1104 void SpecializeWorkgroupTest::checkSupport(Context& context) const
1105 {
1106 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1107
1108 VkPhysicalDeviceProperties properties;
1109 deMemset(&properties, 0, sizeof(properties));
1110 context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1111 if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1112 TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1113 }
1114
initPrograms(SourceCollections & sourceCollections) const1115 void SpecializeWorkgroupTest::initPrograms(SourceCollections& sourceCollections) const
1116 {
1117 std::ostringstream src;
1118 src << "#version 450\n";
1119 src << "#extension GL_EXT_null_initializer : enable\n";
1120 src << "\n";
1121 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1122 src << "layout(constant_id = 1) const uint WGY = 1;\n";
1123 src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1124 src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1125 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1126 src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1127 src << "\n";
1128 src << "void main() {\n";
1129 src << " a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * gl_WorkGroupSize.x + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1130 src << "}\n";
1131
1132 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1133 }
1134
iterate(void)1135 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1136 {
1137 const deUint32 size = (deUint32)sizeof(deUint32) * m_x * m_y * m_z;
1138 return runCompute(m_context, size, 1, 1, 1, {m_x, m_y, m_z});
1139 }
1140
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group)1141 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup* group)
1142 {
1143 for (deUint32 z = 1; z <= 8; ++z)
1144 {
1145 for (deUint32 y = 1; y <= 8; ++y)
1146 {
1147 for (deUint32 x = 1; x <= 8; ++x)
1148 {
1149 group->addChild(new SpecializeWorkgroupTest(group->getTestContext(),
1150 de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1151 de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z),
1152 x, y, z));
1153 }
1154 }
1155 }
1156 }
1157
1158 class RepeatedPipelineInstance : public vkt::TestInstance
1159 {
1160 public:
RepeatedPipelineInstance(Context & context,deUint32 xSize,deUint32 repeat,deUint32 odd)1161 RepeatedPipelineInstance(Context& context, deUint32 xSize, deUint32 repeat, deUint32 odd)
1162 : TestInstance(context),
1163 m_xSize(xSize),
1164 m_repeat(repeat),
1165 m_odd(odd)
1166 {
1167 }
1168 tcu::TestStatus iterate(void);
1169 private:
1170 deUint32 m_xSize;
1171 deUint32 m_repeat;
1172 deUint32 m_odd;
1173 };
1174
1175 class RepeatedPipelineTest : public vkt::TestCase
1176 {
1177 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,const std::string & description,deUint32 xSize,deUint32 repeat,deUint32 odd)1178 RepeatedPipelineTest(tcu::TestContext& testCtx,
1179 const std::string& name,
1180 const std::string& description,
1181 deUint32 xSize, deUint32 repeat, deUint32 odd)
1182 : TestCase(testCtx, name, description),
1183 m_xSize(xSize),
1184 m_repeat(repeat),
1185 m_odd(odd)
1186 {
1187 }
1188
1189 void initPrograms(SourceCollections& sourceCollections) const;
createInstance(Context & context) const1190 TestInstance* createInstance(Context& context) const
1191 {
1192 return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1193 }
1194 virtual void checkSupport(Context& context) const;
1195 private:
1196 deUint32 m_xSize;
1197 deUint32 m_repeat;
1198 deUint32 m_odd;
1199 };
1200
checkSupport(Context & context) const1201 void RepeatedPipelineTest::checkSupport(Context& context) const
1202 {
1203 context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1204 }
1205
initPrograms(SourceCollections & sourceCollections) const1206 void RepeatedPipelineTest::initPrograms(SourceCollections& sourceCollections) const
1207 {
1208 std::ostringstream src;
1209 src << "#version 450\n";
1210 src << "#extension GL_EXT_null_initializer : enable\n";
1211 src << "\n";
1212 src << "layout(constant_id = 0) const uint WGX = 1;\n";
1213 src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1214 src << "\n";
1215 src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1216 src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1217 src << "\n";
1218 src << "shared uint wg_mem[WGX][2] = {};\n";
1219 src << "void main() {\n";
1220 src << " if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1221 src << " wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x];\n";
1222 src << " }\n";
1223 src << " barrier();\n";
1224 src << " a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1225 src << "}\n";
1226
1227 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1228 }
1229
iterate(void)1230 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1231 {
1232 Context& context = m_context;
1233 const deUint32 bufferSize = m_xSize * 2 * (deUint32)sizeof(deUint32);
1234 const deUint32 numBuffers = 2;
1235
1236 const DeviceInterface& vk = context.getDeviceInterface();
1237 const VkDevice device = context.getDevice();
1238 Allocator& allocator = context.getDefaultAllocator();
1239 tcu::TestLog& log = context.getTestContext().getLog();
1240
1241 de::MovePtr<BufferWithMemory> buffers[numBuffers];
1242 VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1243
1244 VkDeviceSize size = bufferSize;
1245 for (deUint32 i = 0; i < numBuffers; ++i)
1246 {
1247 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1248 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1249 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1250 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1251 }
1252
1253 deUint32* ptrs[numBuffers];
1254 for (deUint32 i = 0; i < numBuffers; ++i)
1255 {
1256 ptrs[i] = (deUint32*)buffers[i]->getAllocation().getHostPtr();
1257 }
1258 for (deUint32 i = 0; i < bufferSize / sizeof(deUint32); ++i)
1259 {
1260 ptrs[1][i] = i;
1261 }
1262 deMemset(ptrs[0], 0xff, (size_t)size);
1263
1264 DescriptorSetLayoutBuilder layoutBuilder;
1265 for (deUint32 i = 0; i < numBuffers; ++i)
1266 {
1267 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1268 }
1269
1270 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1271 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
1272 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1273 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1274 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1275
1276 const deUint32 specData[1] =
1277 {
1278 m_xSize,
1279 };
1280 const vk::VkSpecializationMapEntry entries[1] =
1281 {
1282 {0, (deUint32)(sizeof(deUint32) * 0), sizeof(deUint32)},
1283 };
1284 const vk::VkSpecializationInfo specInfo =
1285 {
1286 1,
1287 entries,
1288 sizeof(specData),
1289 specData
1290 };
1291
1292 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1293 {
1294 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1295 DE_NULL,
1296 (VkPipelineLayoutCreateFlags)0,
1297 1,
1298 &descriptorSetLayout.get(),
1299 0u,
1300 DE_NULL,
1301 };
1302 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1303 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
1304
1305 for (deUint32 i = 0; i < numBuffers; ++i)
1306 {
1307 flushAlloc(vk, device, buffers[i]->getAllocation());
1308 }
1309
1310 const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1311 const VkPipelineShaderStageCreateInfo shaderInfo =
1312 {
1313 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1314 DE_NULL,
1315 0,
1316 VK_SHADER_STAGE_COMPUTE_BIT,
1317 *shader,
1318 "main",
1319 &specInfo,
1320 };
1321
1322 const VkComputePipelineCreateInfo pipelineInfo =
1323 {
1324 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1325 DE_NULL,
1326 0u,
1327 shaderInfo,
1328 *pipelineLayout,
1329 (VkPipeline)0,
1330 0u,
1331 };
1332 Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1333
1334 const VkQueue queue = context.getUniversalQueue();
1335 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
1336 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1337 context.getUniversalQueueFamilyIndex());
1338 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1339
1340 DescriptorSetUpdateBuilder setUpdateBuilder;
1341 for (deUint32 i = 0; i < numBuffers; ++i)
1342 {
1343 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1344 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1345 }
1346 setUpdateBuilder.update(vk, device);
1347
1348 beginCommandBuffer(vk, *cmdBuffer, 0);
1349
1350 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1351 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1352
1353 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1354
1355 endCommandBuffer(vk, *cmdBuffer);
1356
1357 for (deUint32 r = 0; r < m_repeat; ++r)
1358 {
1359 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1360
1361 invalidateAlloc(vk, device, buffers[0]->getAllocation());
1362
1363 for (deUint32 i = 0; i < (deUint32)size / sizeof(deUint32); ++i)
1364 {
1365 deUint32 expected = (m_odd == (i / m_xSize)) ? i : 0u;
1366 if (ptrs[0][i] != expected)
1367 {
1368 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1369 return tcu::TestStatus::fail("compute failed");
1370 }
1371 }
1372
1373 deMemset(ptrs[0], 0xff, (size_t)size);
1374 flushAlloc(vk, device, buffers[0]->getAllocation());
1375 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1376 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1377 setUpdateBuilder.update(vk, device);
1378 }
1379
1380 return tcu::TestStatus::pass("compute succeeded");
1381 }
1382
AddRepeatedPipelineTests(tcu::TestCaseGroup * group)1383 void AddRepeatedPipelineTests(tcu::TestCaseGroup* group)
1384 {
1385 std::vector<deUint32> xSizes = {4, 16, 32, 64};
1386 std::vector<deUint32> odds = {0, 1};
1387 std::vector<deUint32> repeats = {2, 4, 8, 16};
1388 for (deUint32 i = 0; i < xSizes.size(); ++i)
1389 {
1390 deUint32 x = xSizes[i];
1391 for (deUint32 j = 0; j < odds.size(); ++j)
1392 {
1393 deUint32 odd = odds[j];
1394 for (deUint32 k = 0; k < repeats.size(); ++k)
1395 {
1396 deUint32 repeat = repeats[k];
1397 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1398 std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1399 std::string("x_") + de::toString(x) + (odd == 1 ? "_odd" : "_even") + "_repeat_" + de::toString(repeat),
1400 x, odd, repeat));
1401 }
1402 }
1403 }
1404 }
1405 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1406 void AddSharedMemoryTests (tcu::TestCaseGroup* group)
1407 {
1408 tcu::TestContext& testCtx = group->getTestContext();
1409 std::string filePath = "compute/zero_initialize_workgroup_memory";
1410 std::vector<std::string> requirements;
1411
1412 std::string testNames[] =
1413 {
1414 "workgroup_size_128",
1415 "workgroup_size_8x8x2",
1416 "workgroup_size_8x2x8",
1417 "workgroup_size_2x8x8",
1418 "workgroup_size_8x4x4",
1419 "workgroup_size_4x8x4",
1420 "workgroup_size_4x4x8"
1421 };
1422
1423 requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1424
1425 for (const auto& testName : testNames)
1426 {
1427 group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(), testName + ".amber", requirements));
1428 }
1429 }
1430 #endif // CTS_USES_VULKANSC
1431
1432 } // anonymous
1433
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx)1434 tcu::TestCaseGroup* createZeroInitializeWorkgroupMemoryTests(tcu::TestContext& testCtx)
1435 {
1436 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory", "VK_KHR_zero_intialize_workgroup_memory tests"));
1437
1438 tcu::TestCaseGroup* maxWorkgroupMemoryGroup =
1439 new tcu::TestCaseGroup(testCtx, "max_workgroup_memory", "Read initialization of max workgroup memory");
1440 AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup);
1441 tests->addChild(maxWorkgroupMemoryGroup);
1442
1443 tcu::TestCaseGroup* typeGroup = new tcu::TestCaseGroup(testCtx, "types", "basic type tests");
1444 AddTypeTests(typeGroup);
1445 tests->addChild(typeGroup);
1446
1447 tcu::TestCaseGroup* compositeGroup = new tcu::TestCaseGroup(testCtx, "composites", "composite type tests");
1448 AddCompositeTests(compositeGroup);
1449 tests->addChild(compositeGroup);
1450
1451 tcu::TestCaseGroup* maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups", "max workgroups");
1452 AddMaxWorkgroupsTests(maxWorkgroupsGroup);
1453 tests->addChild(maxWorkgroupsGroup);
1454
1455 tcu::TestCaseGroup* specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup", "specialize workgroup size");
1456 AddSpecializeWorkgroupTests(specializeWorkgroupGroup);
1457 tests->addChild(specializeWorkgroupGroup);
1458
1459 tcu::TestCaseGroup* repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline", "repeated pipeline run");
1460 AddRepeatedPipelineTests(repeatPipelineGroup);
1461 tests->addChild(repeatPipelineGroup);
1462
1463 #ifndef CTS_USES_VULKANSC
1464 tcu::TestCaseGroup* subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks", "shared memory tests");
1465 AddSharedMemoryTests(subgroupInvocationGroup);
1466 tests->addChild(subgroupInvocationGroup);
1467 #endif // CTS_USES_VULKANSC
1468
1469 return tests.release();
1470 }
1471
1472 } // compute
1473 } // vkt
1474