• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Google LLC.
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief VK_KHR_zero_initialize_workgroup_memory tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeZeroInitializeWorkgroupMemoryTests.hpp"
28 #include "vktTestCase.hpp"
29 #include "vktTestCaseUtil.hpp"
30 #include "vktTestGroupUtil.hpp"
31 #include "vktAmberTestCase.hpp"
32 
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42 
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49 
50 #include <algorithm>
51 #include <vector>
52 
53 using namespace vk;
54 
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61 
runCompute(Context & context,uint32_t bufferSize,uint32_t numWGX,uint32_t numWGY,uint32_t numWGZ,vk::ComputePipelineConstructionType m_computePipelineConstructionType,const std::vector<uint32_t> specValues={},uint32_t increment=0)62 tcu::TestStatus runCompute(Context &context, uint32_t bufferSize, uint32_t numWGX, uint32_t numWGY, uint32_t numWGZ,
63                            vk::ComputePipelineConstructionType m_computePipelineConstructionType,
64                            const std::vector<uint32_t> specValues = {}, uint32_t increment = 0)
65 {
66     const DeviceInterface &vk = context.getDeviceInterface();
67     const VkDevice device     = context.getDevice();
68     Allocator &allocator      = context.getDefaultAllocator();
69     tcu::TestLog &log         = context.getTestContext().getLog();
70 
71     de::MovePtr<BufferWithMemory> buffer;
72     VkDescriptorBufferInfo bufferDescriptor;
73 
74     VkDeviceSize size = bufferSize;
75     buffer            = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
76         vk, device, allocator,
77         makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
78                                                   VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
79         MemoryRequirement::HostVisible));
80     bufferDescriptor  = makeDescriptorBufferInfo(**buffer, 0, size);
81 
82     uint32_t *ptr = (uint32_t *)buffer->getAllocation().getHostPtr();
83     deMemset(ptr, increment ? 0 : 0xff, (size_t)size);
84 
85     DescriptorSetLayoutBuilder layoutBuilder;
86     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
87 
88     Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
89     Unique<VkDescriptorPool> descriptorPool(
90         DescriptorPoolBuilder()
91             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
92             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
93     Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
94 
95     std::vector<VkSpecializationMapEntry> entries(specValues.size());
96     if (!specValues.empty())
97     {
98         for (uint32_t i = 0; i < specValues.size(); ++i)
99         {
100             entries[i] = {i, (uint32_t)(sizeof(uint32_t) * i), sizeof(uint32_t)};
101         }
102     }
103     const VkSpecializationInfo specInfo = {
104         (uint32_t)specValues.size(),
105         entries.data(),
106         specValues.size() * sizeof(uint32_t),
107         specValues.data(),
108     };
109     VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
110     flushAlloc(vk, device, buffer->getAllocation());
111 
112     ComputePipelineWrapper pipeline(vk, device, m_computePipelineConstructionType,
113                                     context.getBinaryCollection().get("comp"));
114     pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
115     pipeline.setSpecializationInfo(specInfo);
116     pipeline.buildPipeline();
117 
118     const VkQueue queue             = context.getUniversalQueue();
119     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
120                                                         context.getUniversalQueueFamilyIndex());
121     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
122 
123     DescriptorSetUpdateBuilder setUpdateBuilder;
124     setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
125                                  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
126     setUpdateBuilder.update(vk, device);
127 
128     beginCommandBuffer(vk, *cmdBuffer, 0);
129 
130     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
131     pipeline.bind(*cmdBuffer);
132 
133     vk.cmdDispatch(*cmdBuffer, numWGX, numWGY, numWGZ);
134 
135     endCommandBuffer(vk, *cmdBuffer);
136 
137     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
138 
139     invalidateAlloc(vk, device, buffer->getAllocation());
140 
141     for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
142     {
143         uint32_t expected = increment ? numWGX * numWGY * numWGZ : 0u;
144         if (ptr[i] != expected)
145         {
146             log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i]
147                 << tcu::TestLog::EndMessage;
148             return tcu::TestStatus::fail("compute failed");
149         }
150     }
151 
152     return tcu::TestStatus::pass("compute succeeded");
153 }
154 
155 class MaxWorkgroupMemoryInstance : public vkt::TestInstance
156 {
157 public:
MaxWorkgroupMemoryInstance(Context & context,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)158     MaxWorkgroupMemoryInstance(Context &context, uint32_t numWorkgroups,
159                                const vk::ComputePipelineConstructionType computePipelineConstructionType)
160         : TestInstance(context)
161         , m_numWorkgroups(numWorkgroups)
162         , m_computePipelineConstructionType(computePipelineConstructionType)
163     {
164     }
165     tcu::TestStatus iterate(void);
166 
167 private:
168     uint32_t m_numWorkgroups;
169     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
170 };
171 
172 class MaxWorkgroupMemoryTest : public vkt::TestCase
173 {
174 public:
MaxWorkgroupMemoryTest(tcu::TestContext & testCtx,const std::string & name,uint32_t numWorkgroups,const vk::ComputePipelineConstructionType computePipelineConstructionType)175     MaxWorkgroupMemoryTest(tcu::TestContext &testCtx, const std::string &name, uint32_t numWorkgroups,
176                            const vk::ComputePipelineConstructionType computePipelineConstructionType)
177         : TestCase(testCtx, name)
178         , m_numWorkgroups(numWorkgroups)
179         , m_computePipelineConstructionType(computePipelineConstructionType)
180     {
181     }
182 
183     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const184     TestInstance *createInstance(Context &context) const
185     {
186         return new MaxWorkgroupMemoryInstance(context, m_numWorkgroups, m_computePipelineConstructionType);
187     }
188     virtual void checkSupport(Context &context) const;
189 
190 private:
191     uint32_t m_numWorkgroups;
192     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
193 };
194 
checkSupport(Context & context) const195 void MaxWorkgroupMemoryTest::checkSupport(Context &context) const
196 {
197     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
198     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
199                                   m_computePipelineConstructionType);
200 }
201 
initPrograms(SourceCollections & sourceCollections) const202 void MaxWorkgroupMemoryTest::initPrograms(SourceCollections &sourceCollections) const
203 {
204     std::ostringstream src;
205     src << "#version 450\n";
206     src << "#extension GL_EXT_null_initializer : enable\n";
207     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
208     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
209     src << "layout(constant_id = 3) const uint num_elems = " << 16384 / 16 << ";\n";
210     src << "layout(constant_id = 4) const uint num_wgs = 0;\n";
211     src << "shared uvec4 wg_mem[num_elems] = {};\n";
212     src << "void main() {\n";
213     src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
214     src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
215     src << "  uint idx_x = gl_LocalInvocationID.x;\n";
216     src << "  uint idx = idx_x + idx_y + idx_z;\n";
217     src << "  uint wg_size = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;\n";
218     src << "  for (uint i = 0; i < num_elems; ++i) {\n";
219     src << "    for (uint j = 0; j < 4; ++j) {\n";
220     src << "      uint shared_idx = 4*i + j;\n";
221     src << "      uint wg_val = wg_mem[i][j];\n";
222     src << "      if (idx == shared_idx) {\n";
223     src << "        atomicAdd(a.a[idx], wg_val == 0 ? 1 : 0);\n";
224     src << "      } else if (idx == 0 && shared_idx >= wg_size) {\n";
225     src << "        atomicAdd(a.a[shared_idx], wg_val == 0 ? 1 : 0);\n";
226     src << "      }\n";
227     src << "    }\n";
228     src << "  }\n";
229     src << "}\n";
230 
231     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
232 }
233 
iterate(void)234 tcu::TestStatus MaxWorkgroupMemoryInstance::iterate(void)
235 {
236     VkPhysicalDeviceProperties properties;
237     m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
238     const uint32_t maxMemSize = properties.limits.maxComputeSharedMemorySize;
239 
240     const uint32_t maxWG = std::min(247u, (properties.limits.maxComputeWorkGroupInvocations / 13) * 13);
241     uint32_t wgx         = (properties.limits.maxComputeWorkGroupSize[0] / 13) * 13;
242     uint32_t wgy         = 1;
243     uint32_t wgz         = 1;
244     if (wgx < maxWG)
245     {
246         wgy = std::min(maxWG / wgx, (properties.limits.maxComputeWorkGroupSize[1] / 13) * 13);
247     }
248     if ((wgx * wgy) < maxWG)
249     {
250         wgz = std::min(maxWG / wgx / wgy, (properties.limits.maxComputeWorkGroupSize[2] / 13) * 13);
251     }
252     const uint32_t size     = maxMemSize;
253     const uint32_t numElems = maxMemSize / 16;
254 
255     return runCompute(m_context, size, m_numWorkgroups, 1, 1, m_computePipelineConstructionType,
256                       {wgx, wgy, wgz, numElems}, /*increment*/ 1);
257 }
258 
AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)259 void AddMaxWorkgroupMemoryTests(tcu::TestCaseGroup *group,
260                                 vk::ComputePipelineConstructionType computePipelineConstructionType)
261 {
262     std::vector<uint32_t> workgroups = {1, 2, 4, 16, 64, 128};
263     for (uint32_t i = 0; i < workgroups.size(); ++i)
264     {
265         uint32_t numWG = workgroups[i];
266         group->addChild(new MaxWorkgroupMemoryTest(group->getTestContext(), de::toString(numWG), numWG,
267                                                    computePipelineConstructionType));
268     }
269 }
270 
271 struct TypeCaseDef
272 {
273     std::string typeName;
274     uint32_t typeSize;
275     uint32_t numElements;
276     uint32_t numRows;
277     uint32_t numVariables;
278 };
279 
280 class TypeTestInstance : public vkt::TestInstance
281 {
282 public:
TypeTestInstance(Context & context,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)283     TypeTestInstance(Context &context, const TypeCaseDef &caseDef,
284                      const vk::ComputePipelineConstructionType computePipelineConstructionType)
285         : TestInstance(context)
286         , m_caseDef(caseDef)
287         , m_computePipelineConstructionType(computePipelineConstructionType)
288     {
289     }
290     tcu::TestStatus iterate(void);
291 
292 private:
293     TypeCaseDef m_caseDef;
294     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
295 };
296 
297 class TypeTest : public vkt::TestCase
298 {
299 public:
TypeTest(tcu::TestContext & testCtx,const std::string & name,const TypeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)300     TypeTest(tcu::TestContext &testCtx, const std::string &name, const TypeCaseDef &caseDef,
301              const vk::ComputePipelineConstructionType computePipelineConstructionType)
302         : TestCase(testCtx, name)
303         , m_caseDef(caseDef)
304         , m_computePipelineConstructionType(computePipelineConstructionType)
305     {
306     }
307 
308     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const309     TestInstance *createInstance(Context &context) const
310     {
311         return new TypeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
312     }
313     virtual void checkSupport(Context &context) const;
314 
315 private:
316     TypeCaseDef m_caseDef;
317     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
318 };
319 
checkSupport(Context & context) const320 void TypeTest::checkSupport(Context &context) const
321 {
322     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
323     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
324                                   m_computePipelineConstructionType);
325 
326     VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
327     deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
328     f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
329     f16_i8_features.pNext = DE_NULL;
330 
331     VkPhysicalDeviceFeatures2 features2;
332     deMemset(&features2, 0, sizeof(features2));
333     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
334     features2.pNext = &f16_i8_features;
335     context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
336 
337     if (m_caseDef.typeName == "float16_t" || m_caseDef.typeName == "f16vec2" || m_caseDef.typeName == "f16vec3" ||
338         m_caseDef.typeName == "f16vec4" || m_caseDef.typeName == "f16mat2x2" || m_caseDef.typeName == "f16mat2x3" ||
339         m_caseDef.typeName == "f16mat2x4" || m_caseDef.typeName == "f16mat3x2" || m_caseDef.typeName == "f16mat3x3" ||
340         m_caseDef.typeName == "f16mat3x4" || m_caseDef.typeName == "f16mat4x2" || m_caseDef.typeName == "f16mat4x3" ||
341         m_caseDef.typeName == "f16mat4x4")
342     {
343         if (f16_i8_features.shaderFloat16 != VK_TRUE)
344             TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
345     }
346 
347     if (m_caseDef.typeName == "float64_t" || m_caseDef.typeName == "f64vec2" || m_caseDef.typeName == "f64vec3" ||
348         m_caseDef.typeName == "f64vec4" || m_caseDef.typeName == "f64mat2x2" || m_caseDef.typeName == "f64mat2x3" ||
349         m_caseDef.typeName == "f64mat2x4" || m_caseDef.typeName == "f64mat3x2" || m_caseDef.typeName == "f64mat3x3" ||
350         m_caseDef.typeName == "f64mat3x4" || m_caseDef.typeName == "f64mat4x2" || m_caseDef.typeName == "f64mat4x3" ||
351         m_caseDef.typeName == "f64mat4x4")
352     {
353         if (features2.features.shaderFloat64 != VK_TRUE)
354             TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
355     }
356 
357     if (m_caseDef.typeName == "int8_t" || m_caseDef.typeName == "i8vec2" || m_caseDef.typeName == "i8vec3" ||
358         m_caseDef.typeName == "i8vec4" || m_caseDef.typeName == "uint8_t" || m_caseDef.typeName == "u8vec2" ||
359         m_caseDef.typeName == "u8vec3" || m_caseDef.typeName == "u8vec4")
360     {
361         if (f16_i8_features.shaderInt8 != VK_TRUE)
362             TCU_THROW(NotSupportedError, "shaderInt8 not supported");
363     }
364 
365     if (m_caseDef.typeName == "int16_t" || m_caseDef.typeName == "i16vec2" || m_caseDef.typeName == "i16vec3" ||
366         m_caseDef.typeName == "i16vec4" || m_caseDef.typeName == "uint16_t" || m_caseDef.typeName == "u16vec2" ||
367         m_caseDef.typeName == "u16vec3" || m_caseDef.typeName == "u16vec4")
368     {
369         if (features2.features.shaderInt16 != VK_TRUE)
370             TCU_THROW(NotSupportedError, "shaderInt16 not supported");
371     }
372 
373     if (m_caseDef.typeName == "int64_t" || m_caseDef.typeName == "i64vec2" || m_caseDef.typeName == "i64vec3" ||
374         m_caseDef.typeName == "i64vec4" || m_caseDef.typeName == "uint64_t" || m_caseDef.typeName == "u64vec2" ||
375         m_caseDef.typeName == "u64vec3" || m_caseDef.typeName == "u64vec4")
376     {
377         if (features2.features.shaderInt64 != VK_TRUE)
378             TCU_THROW(NotSupportedError, "shaderInt64 not supported");
379     }
380 }
381 
initPrograms(SourceCollections & sourceCollections) const382 void TypeTest::initPrograms(SourceCollections &sourceCollections) const
383 {
384     std::ostringstream src;
385     src << "#version 450\n";
386     src << "#extension GL_EXT_null_initializer : enable\n";
387     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
388     src << "layout(local_size_x = " << m_caseDef.numElements * m_caseDef.numRows
389         << ", local_size_y = 1, local_size_z = 1) in;\n";
390     src << "layout(set = 0, binding = 0) buffer A  { uint a[]; } a;\n";
391     for (uint32_t i = 0; i < m_caseDef.numVariables; ++i)
392     {
393         src << "shared " << m_caseDef.typeName << " wg_mem" << i << " = {};\n";
394     }
395     src << "void main() {\n";
396     if (m_caseDef.numRows > 1)
397     {
398         src << "  uint row = gl_LocalInvocationID.x % " << m_caseDef.numRows << ";\n";
399         src << "  uint col = gl_LocalInvocationID.x / " << m_caseDef.numRows << ";\n";
400     }
401     std::string conv = m_caseDef.typeSize > 4 ? "int64_t" : "int";
402     for (uint32_t v = 0; v < m_caseDef.numVariables; ++v)
403     {
404         if (m_caseDef.numElements == 1)
405         {
406             // Scalars.
407             src << "  a.a[" << v << "] = (" << conv << "(wg_mem" << v << ") ==  0) ? 0 : 1;\n";
408         }
409         else if (m_caseDef.numRows == 1)
410         {
411             // Vectors.
412             src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
413                 << "(wg_mem" << v << "[gl_LocalInvocationID.x]) ==  0) ? 0 : 1;\n";
414         }
415         else
416         {
417             // Matrices.
418             src << "  a.a[" << v * m_caseDef.numRows * m_caseDef.numElements << " + gl_LocalInvocationID.x] = (" << conv
419                 << "(wg_mem" << v << "[row][col]) ==  0) ? 0 : 1;\n";
420         }
421     }
422     src << "}\n";
423 
424     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
425 }
426 
iterate(void)427 tcu::TestStatus TypeTestInstance::iterate(void)
428 {
429     const uint32_t varBytes = m_caseDef.numElements * m_caseDef.numRows * (uint32_t)sizeof(uint32_t);
430     return runCompute(m_context, varBytes * m_caseDef.numVariables, 1, 1, 1, m_computePipelineConstructionType);
431 }
432 
AddTypeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)433 void AddTypeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
434 {
435     deRandom rnd;
436     deRandom_init(&rnd, 0);
437     std::vector<TypeCaseDef> cases = {
438         {"bool", 1, 1, 1, 0},      {"bvec2", 1, 2, 1, 0},     {"bvec3", 1, 3, 1, 0},     {"bvec4", 1, 4, 1, 0},
439         {"uint32_t", 4, 1, 1, 0},  {"uvec2", 4, 2, 1, 0},     {"uvec3", 4, 3, 1, 0},     {"uvec4", 4, 4, 1, 0},
440         {"int32_t", 4, 1, 1, 0},   {"ivec2", 4, 2, 1, 0},     {"ivec3", 4, 3, 1, 0},     {"ivec4", 4, 4, 1, 0},
441         {"uint8_t", 1, 1, 1, 0},   {"u8vec2", 1, 2, 1, 0},    {"u8vec3", 1, 3, 1, 0},    {"u8vec4", 1, 4, 1, 0},
442         {"int8_t", 1, 1, 1, 0},    {"i8vec2", 1, 2, 1, 0},    {"i8vec3", 1, 3, 1, 0},    {"i8vec4", 1, 4, 1, 0},
443         {"uint16_t", 2, 1, 1, 0},  {"u16vec2", 2, 2, 1, 0},   {"u16vec3", 2, 3, 1, 0},   {"u16vec4", 2, 4, 1, 0},
444         {"int16_t", 2, 1, 1, 0},   {"i16vec2", 2, 2, 1, 0},   {"i16vec3", 2, 3, 1, 0},   {"i16vec4", 2, 4, 1, 0},
445         {"uint64_t", 8, 1, 1, 0},  {"u64vec2", 8, 2, 1, 0},   {"u64vec3", 8, 3, 1, 0},   {"u64vec4", 8, 4, 1, 0},
446         {"int64_t", 8, 1, 1, 0},   {"i64vec2", 8, 2, 1, 0},   {"i64vec3", 8, 3, 1, 0},   {"i64vec4", 8, 4, 1, 0},
447         {"float32_t", 4, 1, 1, 0}, {"f32vec2", 4, 2, 1, 0},   {"f32vec3", 4, 3, 1, 0},   {"f32vec4", 4, 4, 1, 0},
448         {"f32mat2x2", 4, 2, 2, 0}, {"f32mat2x3", 4, 3, 2, 0}, {"f32mat2x4", 4, 4, 2, 0}, {"f32mat3x2", 4, 2, 3, 0},
449         {"f32mat3x3", 4, 3, 3, 0}, {"f32mat3x4", 4, 4, 3, 0}, {"f32mat4x2", 4, 2, 4, 0}, {"f32mat4x3", 4, 3, 4, 0},
450         {"f32mat4x4", 4, 4, 4, 0}, {"float16_t", 2, 1, 1, 0}, {"f16vec2", 2, 2, 1, 0},   {"f16vec3", 2, 3, 1, 0},
451         {"f16vec4", 2, 4, 1, 0},   {"f16mat2x2", 2, 2, 2, 0}, {"f16mat2x3", 2, 3, 2, 0}, {"f16mat2x4", 2, 4, 2, 0},
452         {"f16mat3x2", 2, 2, 3, 0}, {"f16mat3x3", 2, 3, 3, 0}, {"f16mat3x4", 2, 4, 3, 0}, {"f16mat4x2", 2, 2, 4, 0},
453         {"f16mat4x3", 2, 3, 4, 0}, {"f16mat4x4", 2, 4, 4, 0}, {"float64_t", 8, 1, 1, 0}, {"f64vec2", 8, 2, 1, 0},
454         {"f64vec3", 8, 3, 1, 0},   {"f64vec4", 8, 4, 1, 0},   {"f64mat2x2", 8, 2, 2, 0}, {"f64mat2x3", 8, 3, 2, 0},
455         {"f64mat2x4", 8, 4, 2, 0}, {"f64mat3x2", 8, 2, 3, 0}, {"f64mat3x3", 8, 3, 3, 0}, {"f64mat3x4", 8, 4, 3, 0},
456         {"f64mat4x2", 8, 2, 4, 0}, {"f64mat4x3", 8, 3, 4, 0}, {"f64mat4x4", 8, 4, 4, 0},
457     };
458 
459     for (uint32_t i = 0; i < cases.size(); ++i)
460     {
461         cases[i].numVariables = (deRandom_getUint32(&rnd) % 16) + 1;
462         group->addChild(new TypeTest(group->getTestContext(), cases[i].typeName.c_str(), cases[i],
463                                      computePipelineConstructionType));
464     }
465 }
466 
467 struct CompositeCaseDef
468 {
469     uint32_t index;
470     std::string typeDefinition;
471     std::string assignment;
472     uint32_t elements;
473     std::vector<uint32_t> specValues;
474 
CompositeCaseDefvkt::compute::__anona2831aff0111::CompositeCaseDef475     CompositeCaseDef(uint32_t index_, const std::string &typeDefinition_, const std::string &assignment_,
476                      uint32_t elements_, const std::vector<uint32_t> &specValues_)
477         : index(index_)
478         , typeDefinition(typeDefinition_)
479         , assignment(assignment_)
480         , elements(elements_)
481         , specValues(specValues_)
482     {
483     }
484 };
485 
486 class CompositeTestInstance : public vkt::TestInstance
487 {
488 public:
CompositeTestInstance(Context & context,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)489     CompositeTestInstance(Context &context, const CompositeCaseDef &caseDef,
490                           const vk::ComputePipelineConstructionType computePipelineConstructionType)
491         : TestInstance(context)
492         , m_caseDef(caseDef)
493         , m_computePipelineConstructionType(computePipelineConstructionType)
494     {
495     }
496     tcu::TestStatus iterate(void);
497 
498 private:
499     CompositeCaseDef m_caseDef;
500     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
501 };
502 
503 class CompositeTest : public vkt::TestCase
504 {
505 public:
CompositeTest(tcu::TestContext & testCtx,const std::string & name,const CompositeCaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)506     CompositeTest(tcu::TestContext &testCtx, const std::string &name, const CompositeCaseDef &caseDef,
507                   const vk::ComputePipelineConstructionType computePipelineConstructionType)
508         : TestCase(testCtx, name)
509         , m_caseDef(caseDef)
510         , m_computePipelineConstructionType(computePipelineConstructionType)
511     {
512     }
513 
514     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const515     TestInstance *createInstance(Context &context) const
516     {
517         return new CompositeTestInstance(context, m_caseDef, m_computePipelineConstructionType);
518     }
519     virtual void checkSupport(Context &context) const;
520 
521 private:
522     CompositeCaseDef m_caseDef;
523     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
524 };
525 
checkSupport(Context & context) const526 void CompositeTest::checkSupport(Context &context) const
527 {
528     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
529     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
530                                   m_computePipelineConstructionType);
531 
532     VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
533     deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
534     f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
535     f16_i8_features.pNext = DE_NULL;
536 
537     VkPhysicalDeviceFeatures2 features2;
538     deMemset(&features2, 0, sizeof(features2));
539     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
540     features2.pNext = &f16_i8_features;
541     context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
542 
543     bool needsFloat16 = (m_caseDef.index & 0x1) != 0;
544     bool needsFloat64 = (m_caseDef.index & 0x2) != 0;
545     bool needsInt8    = (m_caseDef.index & 0x4) != 0;
546     bool needsInt16   = (m_caseDef.index & 0x8) != 0;
547     bool needsInt64   = (m_caseDef.index & 0x10) != 0;
548 
549     if (needsFloat16 && f16_i8_features.shaderFloat16 != VK_TRUE)
550         TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
551     if (needsFloat64 && features2.features.shaderFloat64 != VK_TRUE)
552         TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
553     if (needsInt8 && f16_i8_features.shaderInt8 != VK_TRUE)
554         TCU_THROW(NotSupportedError, "shaderInt8 not supported");
555     if (needsInt16 && features2.features.shaderInt16 != VK_TRUE)
556         TCU_THROW(NotSupportedError, "shaderInt16 not supported");
557     if (needsInt64 && features2.features.shaderInt64 != VK_TRUE)
558         TCU_THROW(NotSupportedError, "shaderInt64 not supported");
559 }
560 
initPrograms(SourceCollections & sourceCollections) const561 void CompositeTest::initPrograms(SourceCollections &sourceCollections) const
562 {
563     std::ostringstream src;
564     src << "#version 450\n";
565     src << "#extension GL_EXT_null_initializer : enable\n";
566     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
567     src << "\n";
568     for (uint32_t i = 0; i < m_caseDef.specValues.size(); ++i)
569     {
570         src << "layout(constant_id = " << i << ") const uint specId" << i << " = 1;\n";
571     }
572     src << "\n";
573     src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
574     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
575     src << "\n";
576     src << m_caseDef.typeDefinition;
577     src << "\n";
578     src << "void main() {\n";
579     src << m_caseDef.assignment;
580     src << "}\n";
581 
582     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
583 }
584 
iterate(void)585 tcu::TestStatus CompositeTestInstance::iterate(void)
586 {
587     const uint32_t bufferSize = (uint32_t)sizeof(uint32_t) * m_caseDef.elements;
588     return runCompute(m_context, bufferSize, 1, 1, 1, m_computePipelineConstructionType, m_caseDef.specValues);
589 }
590 
AddCompositeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)591 void AddCompositeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
592 {
593     const std::vector<CompositeCaseDef> cases{
594         {
595             0,
596             "shared uint wg_mem[specId0] = {};\n",
597 
598             "for (uint i = 0; i < specId0; ++i) {\n"
599             "  a.a[i] = wg_mem[i];\n"
600             "}\n",
601             16,
602             {16},
603         },
604 
605         {
606             0,
607             "shared float wg_mem[specId0][specId1] = {};\n",
608 
609             "for (uint i = 0; i < specId0; ++i) {\n"
610             "  for (uint j = 0; j < specId1; ++j) {\n"
611             "    uint idx = i * specId1 + j;\n"
612             "    a.a[idx] = wg_mem[i][j] == 0.0f ? 0 : 1;\n"
613             "  }\n"
614             "}\n",
615             32,
616             {4, 8},
617         },
618 
619         {
620             0,
621             "struct Sa {\n"
622             "  uint a;\n"
623             "  uvec2 b;\n"
624             "  uvec3 c;\n"
625             "  uvec4 d;\n"
626             "  float e;\n"
627             "  vec2 f;\n"
628             "  vec3 g;\n"
629             "  vec4 h;\n"
630             "  bool i;\n"
631             "  bvec2 j;\n"
632             "  bvec3 k;\n"
633             "  bvec4 l;\n"
634             "};\n"
635             "shared Sa wg_mem = {};\n",
636 
637             "uint i = 0;\n"
638             "a.a[i++] = wg_mem.a;\n"
639             "a.a[i++] = wg_mem.b.x;\n"
640             "a.a[i++] = wg_mem.b.y;\n"
641             "a.a[i++] = wg_mem.c.x;\n"
642             "a.a[i++] = wg_mem.c.y;\n"
643             "a.a[i++] = wg_mem.c.z;\n"
644             "a.a[i++] = wg_mem.d.x;\n"
645             "a.a[i++] = wg_mem.d.y;\n"
646             "a.a[i++] = wg_mem.d.z;\n"
647             "a.a[i++] = wg_mem.d.w;\n"
648             "a.a[i++] = wg_mem.e == 0.0f ? 0 : 1;\n"
649             "a.a[i++] = wg_mem.f.x == 0.0f ? 0 : 1;\n"
650             "a.a[i++] = wg_mem.f.y == 0.0f ? 0 : 1;\n"
651             "a.a[i++] = wg_mem.g.x == 0.0f ? 0 : 1;\n"
652             "a.a[i++] = wg_mem.g.y == 0.0f ? 0 : 1;\n"
653             "a.a[i++] = wg_mem.g.z == 0.0f ? 0 : 1;\n"
654             "a.a[i++] = wg_mem.h.x == 0.0f ? 0 : 1;\n"
655             "a.a[i++] = wg_mem.h.y == 0.0f ? 0 : 1;\n"
656             "a.a[i++] = wg_mem.h.z == 0.0f ? 0 : 1;\n"
657             "a.a[i++] = wg_mem.h.w == 0.0f ? 0 : 1;\n"
658             "a.a[i++] = wg_mem.i ? 1 : 0;\n"
659             "a.a[i++] = wg_mem.j.x ? 1 : 0;\n"
660             "a.a[i++] = wg_mem.j.y ? 1 : 0;\n"
661             "a.a[i++] = wg_mem.k.x ? 1 : 0;\n"
662             "a.a[i++] = wg_mem.k.y ? 1 : 0;\n"
663             "a.a[i++] = wg_mem.k.z ? 1 : 0;\n"
664             "a.a[i++] = wg_mem.l.x ? 1 : 0;\n"
665             "a.a[i++] = wg_mem.l.y ? 1 : 0;\n"
666             "a.a[i++] = wg_mem.l.z ? 1 : 0;\n"
667             "a.a[i++] = wg_mem.l.w ? 1 : 0;\n",
668             30,
669             {},
670         },
671 
672         {
673             0,
674             "struct Sa {\n"
675             "  uint a;\n"
676             "};\n"
677             "struct Sb {\n"
678             "  uvec2 a;\n"
679             "};\n"
680             "struct Sc {\n"
681             "  Sa a[specId0];\n"
682             "  Sb b[specId1];\n"
683             "};\n"
684             "shared Sc wg_mem[specId2] = {};\n",
685 
686             "uint idx = 0;\n"
687             "for (uint i = 0; i < specId2; ++i) {\n"
688             "  for (uint j = 0; j < specId0; ++j) {\n"
689             "    a.a[idx++] = wg_mem[i].a[j].a;\n"
690             "  }\n"
691             "  for (uint j = 0; j < specId1; ++j) {\n"
692             "    a.a[idx++] = wg_mem[i].b[j].a.x;\n"
693             "    a.a[idx++] = wg_mem[i].b[j].a.y;\n"
694             "  }\n"
695             "}\n",
696             32,
697             {2, 3, 4},
698         },
699 
700         {
701             1,
702             "struct Sa {\n"
703             "  f16vec2 a;\n"
704             "  float16_t b[specId0];\n"
705             "};\n"
706             "shared Sa wg_mem = {};\n",
707 
708             "uint idx = 0;\n"
709             "a.a[idx++] = floatBitsToUint(wg_mem.a.x) == 0 ? 0 : 1;\n"
710             "a.a[idx++] = floatBitsToUint(wg_mem.a.y) == 0 ? 0 : 1;\n"
711             "for (uint i = 0; i < specId0; ++i) {\n"
712             "  a.a[idx++] = floatBitsToUint(wg_mem.b[i]) == 0 ? 0 : 1;\n"
713             "}\n",
714             18,
715             {16},
716         },
717 
718         {
719             2,
720             "struct Sa {\n"
721             "  f64vec2 a;\n"
722             "  float64_t b[specId0];\n"
723             "};\n"
724             "shared Sa wg_mem = {};\n",
725 
726             "uint idx = 0;\n"
727             "a.a[idx++] = wg_mem.a.x == 0.0 ? 0 : 1;\n"
728             "a.a[idx++] = wg_mem.a.y == 0.0 ? 0 : 1;\n"
729             "for (uint i = 0; i < specId0; ++i) {\n"
730             "  a.a[idx++] = wg_mem.b[i] == 0.0 ? 0 : 1;\n"
731             "}\n",
732             7,
733             {5},
734         },
735 
736         {
737             4,
738             "struct Sa {\n"
739             "  i8vec2 a;\n"
740             "  int8_t b[specId0];\n"
741             "};\n"
742             "shared Sa wg_mem = {};\n",
743 
744             "uint idx = 0;\n"
745             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
746             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
747             "for (uint i = 0; i < specId0; ++i) {\n"
748             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
749             "}\n",
750             34,
751             {32},
752         },
753 
754         {
755             8,
756             "struct Sa {\n"
757             "  i16vec2 a;\n"
758             "  int16_t b[specId0];\n"
759             "};\n"
760             "shared Sa wg_mem = {};\n",
761 
762             "uint idx = 0;\n"
763             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
764             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
765             "for (uint i = 0; i < specId0; ++i) {\n"
766             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
767             "}\n",
768             122,
769             {120},
770         },
771 
772         {
773             16,
774             "struct Sa {\n"
775             "  i64vec2 a;\n"
776             "  int64_t b[specId0];\n"
777             "};\n"
778             "shared Sa wg_mem = {};\n",
779 
780             "uint idx = 0;\n"
781             "a.a[idx++] = wg_mem.a.x == 0 ? 0 : 1;\n"
782             "a.a[idx++] = wg_mem.a.y == 0 ? 0 : 1;\n"
783             "for (uint i = 0; i < specId0; ++i) {\n"
784             "  a.a[idx++] = wg_mem.b[i] == 0 ? 0 : 1;\n"
785             "}\n",
786             63,
787             {61},
788         },
789 
790         {
791             0x1f,
792             "struct Sa {\n"
793             "  float16_t a;\n"
794             "  float b;\n"
795             "  int8_t c;\n"
796             "  int16_t d;\n"
797             "  int e;\n"
798             "  int64_t f;\n"
799             "  float64_t g;\n"
800             "};\n"
801             "shared Sa wg_mem = {};\n",
802 
803             "uint idx = 0;\n"
804             "a.a[idx++] = floatBitsToUint(wg_mem.a) == 0 ? 0 : 1;\n"
805             "a.a[idx++] = floatBitsToUint(wg_mem.b) == 0 ? 0 : 1;\n"
806             "a.a[idx++] = uint(wg_mem.c);\n"
807             "a.a[idx++] = uint(wg_mem.d);\n"
808             "a.a[idx++] = uint(wg_mem.e);\n"
809             "a.a[idx++] = uint(wg_mem.f);\n"
810             "a.a[idx++] = wg_mem.g == 0.0 ? 0 : 1;\n",
811             7,
812             {},
813         },
814 
815         {
816             0,
817             "struct Sa {\n"
818             "  uint a;\n"
819             "};\n"
820             "struct Sb {\n"
821             "  Sa a[specId0];\n"
822             "  uint b;\n"
823             "};\n"
824             "struct Sc {\n"
825             "  Sb b[specId1];\n"
826             "  uint c;\n"
827             "};\n"
828             "struct Sd {\n"
829             "  Sc c[specId2];\n"
830             "  uint d;\n"
831             "};\n"
832             "struct Se {\n"
833             "  Sd d[specId3];\n"
834             "  uint e;\n"
835             "};\n"
836             "shared Se wg_mem[specId4] = {};\n",
837 
838             "uint idx = 0;\n"
839             "for (uint i1 = 0; i1 < specId4; ++i1) {\n"
840             "  a.a[idx++] = wg_mem[i1].e;\n"
841             "  for (uint i2 = 0; i2 < specId3; ++i2) {\n"
842             "    a.a[idx++] = wg_mem[i1].d[i2].d;\n"
843             "    for (uint i3 = 0; i3 < specId2; ++i3) {\n"
844             "      a.a[idx++] = wg_mem[i1].d[i2].c[i3].c;\n"
845             "      for (uint i4 = 0; i4 < specId1; ++i4) {\n"
846             "        a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].b;\n"
847             "        for (uint i5 = 0; i5 < specId0; ++i5) {\n"
848             "          a.a[idx++] = wg_mem[i1].d[i2].c[i3].b[i4].a[i5].a;\n"
849             "        }\n"
850             "      }\n"
851             "    }\n"
852             "  }\n"
853             "}\n",
854             872,
855             {6, 5, 4, 3, 2},
856         },
857     };
858 
859     for (uint32_t i = 0; i < cases.size(); ++i)
860     {
861         group->addChild(
862             new CompositeTest(group->getTestContext(), de::toString(i), cases[i], computePipelineConstructionType));
863     }
864 }
865 
866 enum Dim
867 {
868     DimX,
869     DimY,
870     DimZ,
871 };
872 
873 class MaxWorkgroupsInstance : public vkt::TestInstance
874 {
875 public:
MaxWorkgroupsInstance(Context & context,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)876     MaxWorkgroupsInstance(Context &context, Dim dim,
877                           const vk::ComputePipelineConstructionType computePipelineConstructionType)
878         : TestInstance(context)
879         , m_dim(dim)
880         , m_computePipelineConstructionType(computePipelineConstructionType)
881     {
882     }
883     tcu::TestStatus iterate(void);
884 
885 private:
886     Dim m_dim;
887     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
888 };
889 
890 class MaxWorkgroupsTest : public vkt::TestCase
891 {
892 public:
MaxWorkgroupsTest(tcu::TestContext & testCtx,const std::string & name,Dim dim,const vk::ComputePipelineConstructionType computePipelineConstructionType)893     MaxWorkgroupsTest(tcu::TestContext &testCtx, const std::string &name, Dim dim,
894                       const vk::ComputePipelineConstructionType computePipelineConstructionType)
895         : TestCase(testCtx, name)
896         , m_dim(dim)
897         , m_computePipelineConstructionType(computePipelineConstructionType)
898     {
899     }
900 
901     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const902     TestInstance *createInstance(Context &context) const
903     {
904         return new MaxWorkgroupsInstance(context, m_dim, m_computePipelineConstructionType);
905     }
906     virtual void checkSupport(Context &context) const;
907 
908 private:
909     Dim m_dim;
910     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
911 };
912 
checkSupport(Context & context) const913 void MaxWorkgroupsTest::checkSupport(Context &context) const
914 {
915     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
916     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
917                                   m_computePipelineConstructionType);
918 }
919 
initPrograms(SourceCollections & sourceCollections) const920 void MaxWorkgroupsTest::initPrograms(SourceCollections &sourceCollections) const
921 {
922     std::ostringstream src;
923     src << "#version 450\n";
924     src << "#extension GL_EXT_null_initializer : enable\n";
925     src << "\n";
926     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
927     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
928     src << "shared uint wg_mem[2] = {};\n";
929     std::string dim;
930     switch (m_dim)
931     {
932     case DimX:
933         dim = "x";
934         break;
935     case DimY:
936         dim = "y";
937         break;
938     case DimZ:
939         dim = "z";
940         break;
941     }
942     src << "\n";
943     src << "void main() {\n";
944     src << "  uint idx_z = gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y;\n";
945     src << "  uint idx_y = gl_LocalInvocationID.y * gl_WorkGroupSize.x;\n";
946     src << "  uint idx_x = gl_LocalInvocationID.x;\n";
947     src << "  uint idx = idx_x + idx_y + idx_z;\n";
948     src << "  if (gl_LocalInvocationID.x == 0) {\n";
949     src << "    wg_mem[0] = atomicExchange(wg_mem[1], wg_mem[0]);\n";
950     src << "  }\n";
951     src << "  barrier();\n";
952     src << "  atomicAdd(a.a[idx], wg_mem[idx_x % 2] == 0 ? 1 : 0);\n";
953     src << "}\n";
954 
955     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
956 }
957 
iterate(void)958 tcu::TestStatus MaxWorkgroupsInstance::iterate(void)
959 {
960     VkPhysicalDeviceProperties properties;
961     deMemset(&properties, 0, sizeof(properties));
962     m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
963 
964     const uint32_t maxWG = std::min(2048u, properties.limits.maxComputeWorkGroupInvocations);
965     uint32_t wgx         = properties.limits.maxComputeWorkGroupSize[0];
966     uint32_t wgy         = 1;
967     uint32_t wgz         = 1;
968     if (wgx < maxWG)
969     {
970         wgy = std::min(maxWG / wgx, properties.limits.maxComputeWorkGroupSize[1]);
971     }
972     if ((wgx * wgy) < maxWG)
973     {
974         wgz = std::min(maxWG / wgx / wgy, properties.limits.maxComputeWorkGroupSize[2]);
975     }
976     uint32_t size = (uint32_t)sizeof(uint32_t) * wgx * wgy * wgz;
977 
978     uint32_t num_wgx = m_dim == DimX ? 65535 : 1;
979     uint32_t num_wgy = m_dim == DimY ? 65535 : 1;
980     uint32_t num_wgz = m_dim == DimZ ? 65535 : 1;
981 
982     return runCompute(m_context, size, num_wgx, num_wgy, num_wgz, m_computePipelineConstructionType, {wgx, wgy, wgz},
983                       /*increment*/ 1);
984 }
985 
AddMaxWorkgroupsTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)986 void AddMaxWorkgroupsTests(tcu::TestCaseGroup *group,
987                            vk::ComputePipelineConstructionType computePipelineConstructionType)
988 {
989     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "x", DimX, computePipelineConstructionType));
990     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "y", DimY, computePipelineConstructionType));
991     group->addChild(new MaxWorkgroupsTest(group->getTestContext(), "z", DimZ, computePipelineConstructionType));
992 }
993 
994 class SpecializeWorkgroupInstance : public vkt::TestInstance
995 {
996 public:
SpecializeWorkgroupInstance(Context & context,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)997     SpecializeWorkgroupInstance(Context &context, uint32_t x, uint32_t y, uint32_t z,
998                                 const vk::ComputePipelineConstructionType computePipelineConstructionType)
999         : TestInstance(context)
1000         , m_x(x)
1001         , m_y(y)
1002         , m_z(z)
1003         , m_computePipelineConstructionType(computePipelineConstructionType)
1004     {
1005     }
1006     tcu::TestStatus iterate(void);
1007 
1008 private:
1009     uint32_t m_x;
1010     uint32_t m_y;
1011     uint32_t m_z;
1012     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1013 };
1014 
1015 class SpecializeWorkgroupTest : public vkt::TestCase
1016 {
1017 public:
SpecializeWorkgroupTest(tcu::TestContext & testCtx,const std::string & name,uint32_t x,uint32_t y,uint32_t z,const vk::ComputePipelineConstructionType computePipelineConstructionType)1018     SpecializeWorkgroupTest(tcu::TestContext &testCtx, const std::string &name, uint32_t x, uint32_t y, uint32_t z,
1019                             const vk::ComputePipelineConstructionType computePipelineConstructionType)
1020         : TestCase(testCtx, name)
1021         , m_x(x)
1022         , m_y(y)
1023         , m_z(z)
1024         , m_computePipelineConstructionType(computePipelineConstructionType)
1025     {
1026     }
1027 
1028     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1029     TestInstance *createInstance(Context &context) const
1030     {
1031         return new SpecializeWorkgroupInstance(context, m_x, m_y, m_z, m_computePipelineConstructionType);
1032     }
1033     virtual void checkSupport(Context &context) const;
1034 
1035 private:
1036     uint32_t m_x;
1037     uint32_t m_y;
1038     uint32_t m_z;
1039     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1040 };
1041 
checkSupport(Context & context) const1042 void SpecializeWorkgroupTest::checkSupport(Context &context) const
1043 {
1044     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1045     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1046                                   m_computePipelineConstructionType);
1047 
1048     VkPhysicalDeviceProperties properties;
1049     deMemset(&properties, 0, sizeof(properties));
1050     context.getInstanceInterface().getPhysicalDeviceProperties(context.getPhysicalDevice(), &properties);
1051     if (m_x * m_y * m_z > properties.limits.maxComputeWorkGroupInvocations)
1052         TCU_THROW(NotSupportedError, "Workgroup size exceeds limits");
1053 }
1054 
initPrograms(SourceCollections & sourceCollections) const1055 void SpecializeWorkgroupTest::initPrograms(SourceCollections &sourceCollections) const
1056 {
1057     std::ostringstream src;
1058     src << "#version 450\n";
1059     src << "#extension GL_EXT_null_initializer : enable\n";
1060     src << "\n";
1061     src << "layout(constant_id = 0) const uint WGX = 1;\n";
1062     src << "layout(constant_id = 1) const uint WGY = 1;\n";
1063     src << "layout(constant_id = 2) const uint WGZ = 1;\n";
1064     src << "layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;\n";
1065     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1066     src << "shared uint wg_mem[WGX][WGY][WGZ] = {};\n";
1067     src << "\n";
1068     src << "void main() {\n";
1069     src << "  a.a[gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + gl_LocalInvocationID.y * "
1070            "gl_WorkGroupSize.x + gl_LocalInvocationID.x] = "
1071            "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y][gl_LocalInvocationID.z];\n";
1072     src << "}\n";
1073 
1074     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1075 }
1076 
iterate(void)1077 tcu::TestStatus SpecializeWorkgroupInstance::iterate(void)
1078 {
1079     const uint32_t size = (uint32_t)sizeof(uint32_t) * m_x * m_y * m_z;
1080     return runCompute(m_context, size, 1, 1, 1, m_computePipelineConstructionType, {m_x, m_y, m_z});
1081 }
1082 
AddSpecializeWorkgroupTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1083 void AddSpecializeWorkgroupTests(tcu::TestCaseGroup *group,
1084                                  vk::ComputePipelineConstructionType computePipelineConstructionType)
1085 {
1086     for (uint32_t z = 1; z <= 8; ++z)
1087     {
1088         for (uint32_t y = 1; y <= 8; ++y)
1089         {
1090             for (uint32_t x = 1; x <= 8; ++x)
1091             {
1092                 group->addChild(new SpecializeWorkgroupTest(
1093                     group->getTestContext(), de::toString(x) + "_" + de::toString(y) + "_" + de::toString(z), x, y, z,
1094                     computePipelineConstructionType));
1095             }
1096         }
1097     }
1098 }
1099 
1100 class RepeatedPipelineInstance : public vkt::TestInstance
1101 {
1102 public:
RepeatedPipelineInstance(Context & context,uint32_t xSize,uint32_t repeat,uint32_t odd)1103     RepeatedPipelineInstance(Context &context, uint32_t xSize, uint32_t repeat, uint32_t odd)
1104         : TestInstance(context)
1105         , m_xSize(xSize)
1106         , m_repeat(repeat)
1107         , m_odd(odd)
1108     {
1109     }
1110     tcu::TestStatus iterate(void);
1111 
1112 private:
1113     uint32_t m_xSize;
1114     uint32_t m_repeat;
1115     uint32_t m_odd;
1116 };
1117 
1118 class RepeatedPipelineTest : public vkt::TestCase
1119 {
1120 public:
RepeatedPipelineTest(tcu::TestContext & testCtx,const std::string & name,uint32_t xSize,uint32_t repeat,uint32_t odd,const vk::ComputePipelineConstructionType computePipelineConstructionType)1121     RepeatedPipelineTest(tcu::TestContext &testCtx, const std::string &name, uint32_t xSize, uint32_t repeat,
1122                          uint32_t odd, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1123         : TestCase(testCtx, name)
1124         , m_xSize(xSize)
1125         , m_repeat(repeat)
1126         , m_odd(odd)
1127         , m_computePipelineConstructionType(computePipelineConstructionType)
1128     {
1129     }
1130 
1131     void initPrograms(SourceCollections &sourceCollections) const;
createInstance(Context & context) const1132     TestInstance *createInstance(Context &context) const
1133     {
1134         return new RepeatedPipelineInstance(context, m_xSize, m_repeat, m_odd);
1135     }
1136     virtual void checkSupport(Context &context) const;
1137 
1138 private:
1139     uint32_t m_xSize;
1140     uint32_t m_repeat;
1141     uint32_t m_odd;
1142     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1143 };
1144 
checkSupport(Context & context) const1145 void RepeatedPipelineTest::checkSupport(Context &context) const
1146 {
1147     context.requireDeviceFunctionality("VK_KHR_zero_initialize_workgroup_memory");
1148     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1149                                   m_computePipelineConstructionType);
1150 }
1151 
initPrograms(SourceCollections & sourceCollections) const1152 void RepeatedPipelineTest::initPrograms(SourceCollections &sourceCollections) const
1153 {
1154     std::ostringstream src;
1155     src << "#version 450\n";
1156     src << "#extension GL_EXT_null_initializer : enable\n";
1157     src << "\n";
1158     src << "layout(constant_id = 0) const uint WGX = 1;\n";
1159     src << "layout(local_size_x_id = 0, local_size_y = 2, local_size_z = 1) in;\n";
1160     src << "\n";
1161     src << "layout(set = 0, binding = 0) buffer A { uint a[]; } a;\n";
1162     src << "layout(set = 0, binding = 1) buffer B { uint b[]; } b;\n";
1163     src << "\n";
1164     src << "shared uint wg_mem[WGX][2] = {};\n";
1165     src << "void main() {\n";
1166     src << "  if (gl_LocalInvocationID.y == " << m_odd << ") {\n";
1167     src << "    wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y] = b.b[gl_LocalInvocationID.y * WGX + "
1168            "gl_LocalInvocationID.x];\n";
1169     src << "  }\n";
1170     src << "  barrier();\n";
1171     src << "  a.a[gl_LocalInvocationID.y * WGX + gl_LocalInvocationID.x] = "
1172            "wg_mem[gl_LocalInvocationID.x][gl_LocalInvocationID.y];\n";
1173     src << "}\n";
1174 
1175     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
1176 }
1177 
iterate(void)1178 tcu::TestStatus RepeatedPipelineInstance::iterate(void)
1179 {
1180     Context &context          = m_context;
1181     const uint32_t bufferSize = m_xSize * 2 * (uint32_t)sizeof(uint32_t);
1182     const uint32_t numBuffers = 2;
1183 
1184     const DeviceInterface &vk = context.getDeviceInterface();
1185     const VkDevice device     = context.getDevice();
1186     Allocator &allocator      = context.getDefaultAllocator();
1187     tcu::TestLog &log         = context.getTestContext().getLog();
1188 
1189     de::MovePtr<BufferWithMemory> buffers[numBuffers];
1190     VkDescriptorBufferInfo bufferDescriptors[numBuffers];
1191 
1192     VkDeviceSize size = bufferSize;
1193     for (uint32_t i = 0; i < numBuffers; ++i)
1194     {
1195         buffers[i]           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1196             vk, device, allocator,
1197             makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1198                                                      VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
1199             MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1200         bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, size);
1201     }
1202 
1203     uint32_t *ptrs[numBuffers];
1204     for (uint32_t i = 0; i < numBuffers; ++i)
1205     {
1206         ptrs[i] = (uint32_t *)buffers[i]->getAllocation().getHostPtr();
1207     }
1208     for (uint32_t i = 0; i < bufferSize / sizeof(uint32_t); ++i)
1209     {
1210         ptrs[1][i] = i;
1211     }
1212     deMemset(ptrs[0], 0xff, (size_t)size);
1213 
1214     DescriptorSetLayoutBuilder layoutBuilder;
1215     for (uint32_t i = 0; i < numBuffers; ++i)
1216     {
1217         layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
1218     }
1219 
1220     Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1221     Unique<VkDescriptorPool> descriptorPool(
1222         DescriptorPoolBuilder()
1223             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBuffers)
1224             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1225     Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1226 
1227     const uint32_t specData[1] = {
1228         m_xSize,
1229     };
1230     const vk::VkSpecializationMapEntry entries[1] = {
1231         {0, (uint32_t)(sizeof(uint32_t) * 0), sizeof(uint32_t)},
1232     };
1233     const vk::VkSpecializationInfo specInfo = {1, entries, sizeof(specData), specData};
1234 
1235     const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = {
1236         VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1237         DE_NULL,
1238         (VkPipelineLayoutCreateFlags)0,
1239         1,
1240         &descriptorSetLayout.get(),
1241         0u,
1242         DE_NULL,
1243     };
1244     Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1245     VkPipelineBindPoint bindPoint         = VK_PIPELINE_BIND_POINT_COMPUTE;
1246 
1247     for (uint32_t i = 0; i < numBuffers; ++i)
1248     {
1249         flushAlloc(vk, device, buffers[i]->getAllocation());
1250     }
1251 
1252     const Unique<VkShaderModule> shader(createShaderModule(vk, device, context.getBinaryCollection().get("comp"), 0));
1253     const VkPipelineShaderStageCreateInfo shaderInfo = {
1254         VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1255         DE_NULL,
1256         0,
1257         VK_SHADER_STAGE_COMPUTE_BIT,
1258         *shader,
1259         "main",
1260         &specInfo,
1261     };
1262 
1263     const VkComputePipelineCreateInfo pipelineInfo = {
1264         VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, DE_NULL, 0u, shaderInfo, *pipelineLayout, (VkPipeline)0, 0u,
1265     };
1266     Move<VkPipeline> pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineInfo, NULL);
1267 
1268     const VkQueue queue             = context.getUniversalQueue();
1269     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1270                                                         context.getUniversalQueueFamilyIndex());
1271     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1272 
1273     DescriptorSetUpdateBuilder setUpdateBuilder;
1274     for (uint32_t i = 0; i < numBuffers; ++i)
1275     {
1276         setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(i),
1277                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[i]);
1278     }
1279     setUpdateBuilder.update(vk, device);
1280 
1281     beginCommandBuffer(vk, *cmdBuffer, 0);
1282 
1283     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1284     vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1285 
1286     vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
1287 
1288     endCommandBuffer(vk, *cmdBuffer);
1289 
1290     for (uint32_t r = 0; r < m_repeat; ++r)
1291     {
1292         submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1293 
1294         invalidateAlloc(vk, device, buffers[0]->getAllocation());
1295 
1296         for (uint32_t i = 0; i < (uint32_t)size / sizeof(uint32_t); ++i)
1297         {
1298             uint32_t expected = (m_odd == (i / m_xSize)) ? i : 0u;
1299             if (ptrs[0][i] != expected)
1300             {
1301                 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected
1302                     << ", got: " << ptrs[0][i] << tcu::TestLog::EndMessage;
1303                 return tcu::TestStatus::fail("compute failed");
1304             }
1305         }
1306 
1307         deMemset(ptrs[0], 0xff, (size_t)size);
1308         flushAlloc(vk, device, buffers[0]->getAllocation());
1309         setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
1310                                      VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1311         setUpdateBuilder.update(vk, device);
1312     }
1313 
1314     return tcu::TestStatus::pass("compute succeeded");
1315 }
1316 
AddRepeatedPipelineTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1317 void AddRepeatedPipelineTests(tcu::TestCaseGroup *group,
1318                               vk::ComputePipelineConstructionType computePipelineConstructionType)
1319 {
1320     std::vector<uint32_t> xSizes  = {4, 16, 32, 64};
1321     std::vector<uint32_t> odds    = {0, 1};
1322     std::vector<uint32_t> repeats = {2, 4, 8, 16};
1323     for (uint32_t i = 0; i < xSizes.size(); ++i)
1324     {
1325         uint32_t x = xSizes[i];
1326         for (uint32_t j = 0; j < odds.size(); ++j)
1327         {
1328             uint32_t odd = odds[j];
1329             for (uint32_t k = 0; k < repeats.size(); ++k)
1330             {
1331                 uint32_t repeat = repeats[k];
1332                 group->addChild(new RepeatedPipelineTest(group->getTestContext(),
1333                                                          std::string("x_") + de::toString(x) +
1334                                                              (odd == 1 ? "_odd" : "_even") + "_repeat_" +
1335                                                              de::toString(repeat),
1336                                                          x, odd, repeat, computePipelineConstructionType));
1337             }
1338         }
1339     }
1340 }
1341 #ifndef CTS_USES_VULKANSC
AddSharedMemoryTests(tcu::TestCaseGroup * group)1342 void AddSharedMemoryTests(tcu::TestCaseGroup *group)
1343 {
1344     tcu::TestContext &testCtx = group->getTestContext();
1345     std::string filePath      = "compute/zero_initialize_workgroup_memory";
1346     std::vector<std::string> requirements;
1347 
1348     std::string testNames[] = {"workgroup_size_128",   "workgroup_size_8x8x2", "workgroup_size_8x2x8",
1349                                "workgroup_size_2x8x8", "workgroup_size_8x4x4", "workgroup_size_4x8x4",
1350                                "workgroup_size_4x4x8"};
1351 
1352     requirements.push_back("VK_KHR_zero_initialize_workgroup_memory");
1353 
1354     for (const auto &testName : testNames)
1355     {
1356         group->addChild(cts_amber::createAmberTestCase(testCtx, testName.c_str(), "", filePath.c_str(),
1357                                                        testName + ".amber", requirements));
1358     }
1359 }
1360 #endif // CTS_USES_VULKANSC
1361 
1362 } // namespace
1363 
createZeroInitializeWorkgroupMemoryTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1364 tcu::TestCaseGroup *createZeroInitializeWorkgroupMemoryTests(
1365     tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1366 {
1367     de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "zero_initialize_workgroup_memory"));
1368 
1369     tcu::TestCaseGroup *maxWorkgroupMemoryGroup =
1370         // Read initialization of max workgroup memory
1371         new tcu::TestCaseGroup(testCtx, "max_workgroup_memory");
1372     AddMaxWorkgroupMemoryTests(maxWorkgroupMemoryGroup, computePipelineConstructionType);
1373     tests->addChild(maxWorkgroupMemoryGroup);
1374 
1375     tcu::TestCaseGroup *typeGroup = new tcu::TestCaseGroup(testCtx, "types");
1376     AddTypeTests(typeGroup, computePipelineConstructionType);
1377     tests->addChild(typeGroup);
1378 
1379     tcu::TestCaseGroup *compositeGroup = new tcu::TestCaseGroup(testCtx, "composites");
1380     AddCompositeTests(compositeGroup, computePipelineConstructionType);
1381     tests->addChild(compositeGroup);
1382 
1383     tcu::TestCaseGroup *maxWorkgroupsGroup = new tcu::TestCaseGroup(testCtx, "max_workgroups");
1384     AddMaxWorkgroupsTests(maxWorkgroupsGroup, computePipelineConstructionType);
1385     tests->addChild(maxWorkgroupsGroup);
1386 
1387     tcu::TestCaseGroup *specializeWorkgroupGroup = new tcu::TestCaseGroup(testCtx, "specialize_workgroup");
1388     AddSpecializeWorkgroupTests(specializeWorkgroupGroup, computePipelineConstructionType);
1389     tests->addChild(specializeWorkgroupGroup);
1390 
1391     tcu::TestCaseGroup *repeatPipelineGroup = new tcu::TestCaseGroup(testCtx, "repeat_pipeline");
1392     AddRepeatedPipelineTests(repeatPipelineGroup, computePipelineConstructionType);
1393     tests->addChild(repeatPipelineGroup);
1394 
1395 #ifndef CTS_USES_VULKANSC
1396     // These are Amber tests and Amber cannot use shader objects.
1397     if (!isComputePipelineConstructionTypeShaderObject(computePipelineConstructionType))
1398     {
1399         tcu::TestCaseGroup *subgroupInvocationGroup = new tcu::TestCaseGroup(testCtx, "shared_memory_blocks");
1400         AddSharedMemoryTests(subgroupInvocationGroup);
1401         tests->addChild(subgroupInvocationGroup);
1402     }
1403 #endif // CTS_USES_VULKANSC
1404 
1405     return tests.release();
1406 }
1407 
1408 } // namespace compute
1409 } // namespace vkt
1410