• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Pipeline Library Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingPipelineLibraryTests.hpp"
25 
26 #include <list>
27 #include <vector>
28 
29 #include "vkDefs.hpp"
30 
31 #include "vktTestCase.hpp"
32 #include "vktTestGroupUtil.hpp"
33 #include "vktCustomInstancesDevices.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkObjUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 #include "vkBufferWithMemory.hpp"
39 #include "vkImageWithMemory.hpp"
40 #include "vkTypeUtil.hpp"
41 
42 #include "vkRayTracingUtil.hpp"
43 
44 #include "tcuCommandLine.hpp"
45 
46 namespace vkt
47 {
48 namespace RayTracing
49 {
50 namespace
51 {
52 using namespace vk;
53 using namespace vkt;
54 
55 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
56                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
57                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
58 
59 static const uint32_t RTPL_DEFAULT_SIZE          = 8u;
60 static const uint32_t RTPL_MAX_CHIT_SHADER_COUNT = 16;
61 
62 struct LibraryConfiguration
63 {
64     int32_t pipelineShaders;
65     std::vector<tcu::IVec2> pipelineLibraries; // IVec2 = ( parentID, shaderCount )
66 };
67 
68 enum class TestType
69 {
70     DEFAULT = 0,
71     CHECK_GROUP_HANDLES,
72     CHECK_CAPTURE_REPLAY_HANDLES,
73     CHECK_ALL_HANDLES,
74 };
75 
76 struct TestParams
77 {
78     LibraryConfiguration libraryConfiguration;
79     bool multithreadedCompilation;
80     bool pipelinesCreatedUsingDHO;
81     TestType testType;
82     bool useAABBs;
83     bool useMaintenance5;
84     bool useLinkTimeOptimizations;
85     bool retainLinkTimeOptimizations;
86     uint32_t width;
87     uint32_t height;
88 
getPixelCountvkt::RayTracing::__anonb6d8bd5c0111::TestParams89     uint32_t getPixelCount(void) const
90     {
91         return width * height;
92     }
93 
getHitGroupCountvkt::RayTracing::__anonb6d8bd5c0111::TestParams94     uint32_t getHitGroupCount(void) const
95     {
96         uint32_t numShadersUsed = libraryConfiguration.pipelineShaders;
97         for (const auto &lib : libraryConfiguration.pipelineLibraries)
98             numShadersUsed += lib.y();
99         return numShadersUsed;
100     }
101 
includesCaptureReplayvkt::RayTracing::__anonb6d8bd5c0111::TestParams102     bool includesCaptureReplay(void) const
103     {
104         return (testType == TestType::CHECK_CAPTURE_REPLAY_HANDLES || testType == TestType::CHECK_ALL_HANDLES);
105     }
106 };
107 
108 // This class will help verify shader group handles in libraries by maintaining information of the library tree and being able to
109 // calculate the offset of the handles for each pipeline in the "flattened" array of shader group handles.
110 class PipelineTree
111 {
112 protected:
113     // Each node represents a pipeline.
114     class Node
115     {
116     public:
Node(int64_t parent,uint32_t groupCount)117         Node(int64_t parent, uint32_t groupCount)
118             : m_parent(parent)
119             , m_groupCount(groupCount)
120             , m_children()
121             , m_frozen(false)
122             , m_flatOffset(std::numeric_limits<uint32_t>::max())
123         {
124         }
125 
appendChild(Node * child)126         void appendChild(Node *child)
127         {
128             m_children.push_back(child);
129         }
getOffset(void) const130         uint32_t getOffset(void) const
131         {
132             return m_flatOffset;
133         }
freeze(void)134         void freeze(void)
135         {
136             m_frozen = true;
137         }
calcOffsetRecursively(uint32_t currentOffset)138         uint32_t calcOffsetRecursively(uint32_t currentOffset) // Returns the next offset.
139         {
140             DE_ASSERT(m_frozen);
141             m_flatOffset       = currentOffset;
142             uint32_t newOffset = currentOffset + m_groupCount;
143             for (auto &node : m_children)
144                 newOffset = node->calcOffsetRecursively(newOffset);
145             return newOffset;
146         }
147 
148     protected:
149         const int64_t m_parent; // Parent pipeline (-1 for the root node).
150         const uint32_t
151             m_groupCount; // Shader group count in pipeline. Related to LibraryConfiguration::pipelineLibraries[1].
152         std::vector<Node *>
153             m_children;        // How many child pipelines. Related to LibraryConfiguration::pipelineLibraries[0]
154         bool m_frozen;         // No sense to calculate offsets before the tree structure is fully constructed.
155         uint32_t m_flatOffset; // Calculated offset in the flattened array.
156     };
157 
158 public:
PipelineTree()159     PipelineTree() : m_nodes(), m_root(nullptr), m_frozen(false), m_offsetsCalculated(false)
160     {
161     }
162 
163     // See LibraryConfiguration::pipelineLibraries.
addNode(int64_t parent,uint32_t groupCount)164     void addNode(int64_t parent, uint32_t groupCount)
165     {
166         DE_ASSERT(m_nodes.size() < static_cast<size_t>(std::numeric_limits<uint32_t>::max()));
167 
168         if (parent < 0)
169         {
170             DE_ASSERT(!m_root);
171             m_nodes.emplace_back(new Node(parent, groupCount));
172             m_root = m_nodes.back().get();
173         }
174         else
175         {
176             DE_ASSERT(parent < static_cast<int64_t>(m_nodes.size()));
177             m_nodes.emplace_back(new Node(parent, groupCount));
178             m_nodes.at(static_cast<size_t>(parent))->appendChild(m_nodes.back().get());
179         }
180     }
181 
182     // Confirms we will not be adding more nodes to the tree.
freeze(void)183     void freeze(void)
184     {
185         for (auto &node : m_nodes)
186             node->freeze();
187         m_frozen = true;
188     }
189 
190     // When obtaining shader group handles from the root pipeline, we get a vector of handles in which some of those handles come from pipeline libraries.
191     // This method returns, for each pipeline, the offset of its shader group handles in that vector as the number of shader groups (not bytes).
getGroupOffsets(void)192     std::vector<uint32_t> getGroupOffsets(void)
193     {
194         DE_ASSERT(m_frozen);
195 
196         if (!m_offsetsCalculated)
197         {
198             calcOffsets();
199             m_offsetsCalculated = true;
200         }
201 
202         std::vector<uint32_t> offsets;
203         offsets.reserve(m_nodes.size());
204 
205         for (const auto &node : m_nodes)
206             offsets.push_back(node->getOffset());
207 
208         return offsets;
209     }
210 
211 protected:
calcOffsets(void)212     void calcOffsets(void)
213     {
214         DE_ASSERT(m_frozen);
215         if (m_root)
216         {
217             m_root->calcOffsetRecursively(0);
218         }
219     }
220 
221     std::vector<std::unique_ptr<Node>> m_nodes;
222     Node *m_root;
223     bool m_frozen;
224     bool m_offsetsCalculated;
225 };
226 
makeImageCreateInfo(uint32_t width,uint32_t height,VkFormat format)227 VkImageCreateInfo makeImageCreateInfo(uint32_t width, uint32_t height, VkFormat format)
228 {
229     const VkImageCreateInfo imageCreateInfo = {
230         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
231         DE_NULL,                             // const void* pNext;
232         (VkImageCreateFlags)0u,              // VkImageCreateFlags flags;
233         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
234         format,                              // VkFormat format;
235         makeExtent3D(width, height, 1),      // VkExtent3D extent;
236         1u,                                  // uint32_t mipLevels;
237         1u,                                  // uint32_t arrayLayers;
238         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
239         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
240         VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
241             VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
242         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
243         0u,                                  // uint32_t queueFamilyIndexCount;
244         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
245         VK_IMAGE_LAYOUT_UNDEFINED            // VkImageLayout initialLayout;
246     };
247 
248     return imageCreateInfo;
249 }
250 
251 class RayTracingPipelineLibraryTestCase : public TestCase
252 {
253 public:
254     RayTracingPipelineLibraryTestCase(tcu::TestContext &context, const char *name, const TestParams data);
255     ~RayTracingPipelineLibraryTestCase(void);
256 
257     virtual void checkSupport(Context &context) const;
258     virtual void initPrograms(SourceCollections &programCollection) const;
259     virtual TestInstance *createInstance(Context &context) const;
260 
261 private:
262     TestParams m_data;
263 };
264 
265 class RayTracingPipelineLibraryTestInstance : public TestInstance
266 {
267 public:
268     RayTracingPipelineLibraryTestInstance(Context &context, const TestParams &data);
269     ~RayTracingPipelineLibraryTestInstance(void);
270     tcu::TestStatus iterate(void);
271 
272 protected:
273     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> initBottomAccelerationStructures(
274         VkCommandBuffer cmdBuffer);
275     de::MovePtr<TopLevelAccelerationStructure> initTopAccelerationStructure(
276         VkCommandBuffer cmdBuffer,
277         std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures);
278     std::vector<uint32_t> runTest(bool replay = false);
279 
280 private:
281     TestParams m_data;
282     PipelineTree m_pipelineTree;
283     std::vector<uint8_t> m_captureReplayHandles;
284 };
285 
RayTracingPipelineLibraryTestCase(tcu::TestContext & context,const char * name,const TestParams data)286 RayTracingPipelineLibraryTestCase::RayTracingPipelineLibraryTestCase(tcu::TestContext &context, const char *name,
287                                                                      const TestParams data)
288     : vkt::TestCase(context, name)
289     , m_data(data)
290 {
291 }
292 
~RayTracingPipelineLibraryTestCase(void)293 RayTracingPipelineLibraryTestCase::~RayTracingPipelineLibraryTestCase(void)
294 {
295 }
296 
checkSupport(Context & context) const297 void RayTracingPipelineLibraryTestCase::checkSupport(Context &context) const
298 {
299     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
300     context.requireDeviceFunctionality("VK_KHR_pipeline_library");
301 
302     if (m_data.testType != TestType::DEFAULT)
303         context.requireDeviceFunctionality("VK_EXT_pipeline_library_group_handles");
304 
305     if (m_data.useLinkTimeOptimizations)
306         context.requireDeviceFunctionality("VK_EXT_graphics_pipeline_library");
307 
308     if (m_data.useMaintenance5)
309         context.requireDeviceFunctionality("VK_KHR_maintenance5");
310 
311     if (m_data.includesCaptureReplay())
312     {
313         const auto &rtFeatures = context.getRayTracingPipelineFeatures();
314         if (!rtFeatures.rayTracingPipelineShaderGroupHandleCaptureReplay)
315             TCU_THROW(NotSupportedError, "rayTracingPipelineShaderGroupHandleCaptureReplay not supported");
316     }
317 }
318 
initPrograms(SourceCollections & programCollection) const319 void RayTracingPipelineLibraryTestCase::initPrograms(SourceCollections &programCollection) const
320 {
321     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
322 
323     {
324         std::stringstream css;
325         css << "#version 460 core\n"
326                "#extension GL_EXT_ray_tracing : require\n"
327                "layout(location = 0) rayPayloadEXT uvec4 hitValue;\n"
328                "layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
329                "layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
330                "\n"
331                "void main()\n"
332                "{\n"
333                "  float tmin     = 0.0;\n"
334                "  float tmax     = 1.0;\n"
335                "  vec3  origin   = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, "
336                "float(gl_LaunchIDEXT.z + 0.5f));\n"
337                "  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
338                "  hitValue       = uvec4("
339             << RTPL_MAX_CHIT_SHADER_COUNT + 1
340             << ",0,0,0);\n"
341                "  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
342                "  imageStore(result, ivec2(gl_LaunchIDEXT.xy), hitValue);\n"
343                "}\n";
344         programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
345     }
346 
347     {
348         std::stringstream css;
349         css << "#version 460 core\n"
350                "#extension GL_EXT_ray_tracing : require\n"
351                "layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
352                "void main()\n"
353                "{\n"
354                "  hitValue = uvec4("
355             << RTPL_MAX_CHIT_SHADER_COUNT
356             << ",0,0,1);\n"
357                "}\n";
358 
359         programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
360     }
361 
362     if (m_data.useAABBs)
363     {
364         std::ostringstream isec;
365         isec << "#version 460 core\n"
366              << "#extension GL_EXT_ray_tracing : require\n"
367              << "void main()\n"
368              << "{\n"
369              << "  reportIntersectionEXT(gl_RayTminEXT, 0);\n"
370              << "}\n";
371         programCollection.glslSources.add("isec")
372             << glu::IntersectionSource(updateRayTracingGLSL(isec.str())) << buildOptions;
373     }
374 
375     for (uint32_t i = 0; i < RTPL_MAX_CHIT_SHADER_COUNT; ++i)
376     {
377         std::stringstream css;
378         css << "#version 460 core\n"
379                "#extension GL_EXT_ray_tracing : require\n"
380                "layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
381                "void main()\n"
382                "{\n"
383                "  hitValue = uvec4("
384             << i
385             << ",0,0,1);\n"
386                "}\n";
387         std::stringstream csname;
388         csname << "chit" << i;
389         programCollection.glslSources.add(csname.str())
390             << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
391     }
392 }
393 
createInstance(Context & context) const394 TestInstance *RayTracingPipelineLibraryTestCase::createInstance(Context &context) const
395 {
396     return new RayTracingPipelineLibraryTestInstance(context, m_data);
397 }
398 
RayTracingPipelineLibraryTestInstance(Context & context,const TestParams & data)399 RayTracingPipelineLibraryTestInstance::RayTracingPipelineLibraryTestInstance(Context &context, const TestParams &data)
400     : vkt::TestInstance(context)
401     , m_data(data)
402     , m_pipelineTree()
403 {
404     // Build the helper pipeline tree, which helps for some tests.
405     m_pipelineTree.addNode(-1, static_cast<uint32_t>(m_data.libraryConfiguration.pipelineShaders +
406                                                      2 /*rgen and miss for the root pipeline*/));
407 
408     for (const auto &lib : m_data.libraryConfiguration.pipelineLibraries)
409         m_pipelineTree.addNode(lib.x(), static_cast<uint32_t>(lib.y()));
410 
411     m_pipelineTree.freeze();
412 }
413 
~RayTracingPipelineLibraryTestInstance(void)414 RayTracingPipelineLibraryTestInstance::~RayTracingPipelineLibraryTestInstance(void)
415 {
416 }
417 
418 std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> RayTracingPipelineLibraryTestInstance::
initBottomAccelerationStructures(VkCommandBuffer cmdBuffer)419     initBottomAccelerationStructures(VkCommandBuffer cmdBuffer)
420 {
421     const auto &vkd   = m_context.getDeviceInterface();
422     const auto device = m_context.getDevice();
423     auto &allocator   = m_context.getDefaultAllocator();
424     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> result;
425 
426     tcu::Vec3 v0(0.0, 1.0, 0.0);
427     tcu::Vec3 v1(0.0, 0.0, 0.0);
428     tcu::Vec3 v2(1.0, 1.0, 0.0);
429     tcu::Vec3 v3(1.0, 0.0, 0.0);
430 
431     for (uint32_t y = 0; y < m_data.height; ++y)
432         for (uint32_t x = 0; x < m_data.width; ++x)
433         {
434             // let's build a 3D chessboard of geometries
435             if (((x + y) % 2) == 0)
436                 continue;
437             tcu::Vec3 xyz((float)x, (float)y, 0.0f);
438             std::vector<tcu::Vec3> geometryData;
439 
440             de::MovePtr<BottomLevelAccelerationStructure> bottomLevelAccelerationStructure =
441                 makeBottomLevelAccelerationStructure();
442             bottomLevelAccelerationStructure->setGeometryCount(1u);
443 
444             if (m_data.useAABBs)
445             {
446                 geometryData.push_back(xyz + v1);
447                 geometryData.push_back(xyz + v2);
448             }
449             else
450             {
451                 geometryData.push_back(xyz + v0);
452                 geometryData.push_back(xyz + v1);
453                 geometryData.push_back(xyz + v2);
454                 geometryData.push_back(xyz + v2);
455                 geometryData.push_back(xyz + v1);
456                 geometryData.push_back(xyz + v3);
457             }
458 
459             bottomLevelAccelerationStructure->addGeometry(geometryData, !m_data.useAABBs /*triangles*/);
460             bottomLevelAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
461             result.push_back(
462                 de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
463         }
464 
465     return result;
466 }
467 
initTopAccelerationStructure(VkCommandBuffer cmdBuffer,std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> & bottomLevelAccelerationStructures)468 de::MovePtr<TopLevelAccelerationStructure> RayTracingPipelineLibraryTestInstance::initTopAccelerationStructure(
469     VkCommandBuffer cmdBuffer,
470     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> &bottomLevelAccelerationStructures)
471 {
472     const auto &vkd   = m_context.getDeviceInterface();
473     const auto device = m_context.getDevice();
474     auto &allocator   = m_context.getDefaultAllocator();
475 
476     uint32_t instanceCount = m_data.width * m_data.height / 2;
477 
478     de::MovePtr<TopLevelAccelerationStructure> result = makeTopLevelAccelerationStructure();
479     result->setInstanceCount(instanceCount);
480 
481     uint32_t currentInstanceIndex = 0;
482     uint32_t numShadersUsed       = m_data.getHitGroupCount();
483 
484     for (uint32_t y = 0; y < m_data.height; ++y)
485         for (uint32_t x = 0; x < m_data.width; ++x)
486         {
487             if (((x + y) % 2) == 0)
488                 continue;
489 
490             result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex], identityMatrix3x4, 0, 0xFF,
491                                 currentInstanceIndex % numShadersUsed, 0U);
492             currentInstanceIndex++;
493         }
494     result->createAndBuild(vkd, device, cmdBuffer, allocator);
495 
496     return result;
497 }
498 
compileShaders(Context & context,de::SharedPtr<de::MovePtr<RayTracingPipeline>> & pipeline,const std::vector<std::tuple<std::string,VkShaderStageFlagBits>> & shaderData,const Move<VkShaderModule> & isecMod)499 void compileShaders(Context &context, de::SharedPtr<de::MovePtr<RayTracingPipeline>> &pipeline,
500                     const std::vector<std::tuple<std::string, VkShaderStageFlagBits>> &shaderData,
501                     const Move<VkShaderModule> &isecMod)
502 {
503     const auto &vkd      = context.getDeviceInterface();
504     const auto device    = context.getDevice();
505     const auto &binaries = context.getBinaryCollection();
506     const bool hasISec   = static_cast<bool>(isecMod);
507 
508     for (uint32_t i = 0; i < shaderData.size(); ++i)
509     {
510         std::string shaderName;
511         VkShaderStageFlagBits shaderStage;
512         std::tie(shaderName, shaderStage) = shaderData[i];
513 
514         auto pipelinePtr = pipeline->get();
515         pipelinePtr->addShader(shaderStage, createShaderModule(vkd, device, binaries.get(shaderName)), i);
516         if (hasISec && shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
517             pipelinePtr->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, isecMod.get(), i);
518     }
519 }
520 
521 struct CompileShadersMultithreadData
522 {
523     Context &context;
524     de::SharedPtr<de::MovePtr<RayTracingPipeline>> &pipeline;
525     const std::vector<std::tuple<std::string, VkShaderStageFlagBits>> &shaderData;
526     const Move<VkShaderModule> &isecMod;
527 };
528 
compileShadersThread(void * param)529 void compileShadersThread(void *param)
530 {
531     CompileShadersMultithreadData *csmd = (CompileShadersMultithreadData *)param;
532     compileShaders(csmd->context, csmd->pipeline, csmd->shaderData, csmd->isecMod);
533 }
534 
getAllGroupCounts(const std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> & rayTracingPipelines)535 std::vector<uint32_t> getAllGroupCounts(
536     const std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> &rayTracingPipelines)
537 {
538     std::vector<uint32_t> allGroupCounts;
539     allGroupCounts.reserve(rayTracingPipelines.size());
540     std::transform(begin(rayTracingPipelines), end(rayTracingPipelines), std::back_inserter(allGroupCounts),
541                    [](const de::SharedPtr<de::MovePtr<RayTracingPipeline>> &rtPipeline)
542                    { return rtPipeline->get()->getFullShaderGroupCount(); });
543 
544     return allGroupCounts;
545 }
546 
547 // Sometimes we want to obtain shader group handles and do checks on them, and the processing we do is the same for normal handles
548 // and for capture/replay handles. Yet their sizes can be different, and the function to get them also changes. The type below
549 // provides a small abstraction so we only have to choose the right class to instantiate, and the rest of the code is the same.
550 class HandleGetter
551 {
552 public:
HandleGetter(const uint32_t handleSize)553     HandleGetter(const uint32_t handleSize) : m_handleSize(handleSize)
554     {
555     }
~HandleGetter()556     virtual ~HandleGetter()
557     {
558     }
559 
560     virtual std::vector<uint8_t> getShaderGroupHandlesVector(const RayTracingPipeline *rtPipeline,
561                                                              const DeviceInterface &vkd, const VkDevice device,
562                                                              const VkPipeline pipeline, const uint32_t firstGroup,
563                                                              const uint32_t groupCount) const = 0;
564 
565 protected:
566     const uint32_t m_handleSize;
567 };
568 
569 class NormalHandleGetter : public HandleGetter
570 {
571 public:
NormalHandleGetter(const uint32_t shaderGroupHandleSize)572     NormalHandleGetter(const uint32_t shaderGroupHandleSize) : HandleGetter(shaderGroupHandleSize)
573     {
574     }
~NormalHandleGetter()575     virtual ~NormalHandleGetter()
576     {
577     }
578 
getShaderGroupHandlesVector(const RayTracingPipeline * rtPipeline,const DeviceInterface & vkd,const VkDevice device,const VkPipeline pipeline,const uint32_t firstGroup,const uint32_t groupCount) const579     std::vector<uint8_t> getShaderGroupHandlesVector(const RayTracingPipeline *rtPipeline, const DeviceInterface &vkd,
580                                                      const VkDevice device, const VkPipeline pipeline,
581                                                      const uint32_t firstGroup,
582                                                      const uint32_t groupCount) const override
583     {
584         return rtPipeline->getShaderGroupHandles(vkd, device, pipeline, m_handleSize, firstGroup, groupCount);
585     }
586 };
587 
588 class CaptureReplayHandleGetter : public HandleGetter
589 {
590 public:
CaptureReplayHandleGetter(const uint32_t shaderGroupHandleCaptureReplaySize)591     CaptureReplayHandleGetter(const uint32_t shaderGroupHandleCaptureReplaySize)
592         : HandleGetter(shaderGroupHandleCaptureReplaySize)
593     {
594     }
~CaptureReplayHandleGetter()595     virtual ~CaptureReplayHandleGetter()
596     {
597     }
598 
getShaderGroupHandlesVector(const RayTracingPipeline * rtPipeline,const DeviceInterface & vkd,const VkDevice device,const VkPipeline pipeline,const uint32_t firstGroup,const uint32_t groupCount) const599     std::vector<uint8_t> getShaderGroupHandlesVector(const RayTracingPipeline *rtPipeline, const DeviceInterface &vkd,
600                                                      const VkDevice device, const VkPipeline pipeline,
601                                                      const uint32_t firstGroup,
602                                                      const uint32_t groupCount) const override
603     {
604         return rtPipeline->getShaderGroupReplayHandles(vkd, device, pipeline, m_handleSize, firstGroup, groupCount);
605     }
606 };
607 
runTest(bool replay)608 std::vector<uint32_t> RayTracingPipelineLibraryTestInstance::runTest(bool replay)
609 {
610     const InstanceInterface &vki               = m_context.getInstanceInterface();
611     const VkPhysicalDevice physicalDevice      = m_context.getPhysicalDevice();
612     const auto &vkd                            = m_context.getDeviceInterface();
613     const auto device                          = m_context.getDevice();
614     const auto queueFamilyIndex                = m_context.getUniversalQueueFamilyIndex();
615     const auto queue                           = m_context.getUniversalQueue();
616     auto &allocator                            = m_context.getDefaultAllocator();
617     const auto pixelCount                      = m_data.getPixelCount();
618     const auto hitGroupCount                   = m_data.getHitGroupCount();
619     const auto rayTracingProperties            = makeRayTracingProperties(vki, physicalDevice);
620     const uint32_t shaderGroupHandleSize       = rayTracingProperties->getShaderGroupHandleSize();
621     const uint32_t shaderGroupBaseAlignment    = rayTracingProperties->getShaderGroupBaseAlignment();
622     const uint32_t shaderGroupHandleReplaySize = rayTracingProperties->getShaderGroupHandleCaptureReplaySize();
623     const auto allGroupOffsets                 = m_pipelineTree.getGroupOffsets();
624 
625     // Make sure we only replay in CAPTURE_REPLAY handles mode.
626     // When checking capture/replay handles, the first iteration will save the handles to m_captureReplayHandles.
627     // In the second iteration, the replay argument will be true and we'll use the saved m_captureReplayHandles when creating pipelines.
628     if (replay)
629         DE_ASSERT(m_data.includesCaptureReplay());
630 
631     const Move<VkDescriptorSetLayout> descriptorSetLayout =
632         DescriptorSetLayoutBuilder()
633             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
634             .addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
635             .build(vkd, device);
636     const Move<VkDescriptorPool> descriptorPool =
637         DescriptorPoolBuilder()
638             .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
639             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
640             .build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
641     const Move<VkDescriptorSet> descriptorSet   = makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
642     const Move<VkPipelineLayout> pipelineLayout = makePipelineLayout(vkd, device, descriptorSetLayout.get());
643 
644     // sort pipeline library configurations ( including main pipeline )
645     std::vector<std::tuple<int, uint32_t, uint32_t>> pipelineInfoList;
646     {
647         // push main pipeline on the list
648         uint32_t shaderOffset = 0U;
649         pipelineInfoList.push_back(std::make_tuple(-1, shaderOffset, m_data.libraryConfiguration.pipelineShaders));
650         shaderOffset += m_data.libraryConfiguration.pipelineShaders;
651 
652         for (size_t i = 0; i < m_data.libraryConfiguration.pipelineLibraries.size(); ++i)
653         {
654             int parentIndex      = m_data.libraryConfiguration.pipelineLibraries[i].x();
655             uint32_t shaderCount = uint32_t(m_data.libraryConfiguration.pipelineLibraries[i].y());
656             if (parentIndex < 0 || parentIndex >= int(pipelineInfoList.size()))
657                 TCU_THROW(InternalError, "Wrong library tree definition");
658             pipelineInfoList.push_back(std::make_tuple(parentIndex, shaderOffset, shaderCount));
659             shaderOffset += shaderCount;
660         }
661     }
662 
663     // create pipeline libraries and build a pipeline tree.
664     std::vector<de::SharedPtr<de::MovePtr<RayTracingPipeline>>> rtPipelines(pipelineInfoList.size());
665     std::vector<std::vector<std::tuple<std::string, VkShaderStageFlagBits>>> pipelineShaders(pipelineInfoList.size());
666     for (size_t idx = 0; idx < pipelineInfoList.size(); ++idx)
667     {
668         int parentIndex;
669         uint32_t shaderCount, shaderOffset;
670         std::tie(parentIndex, shaderOffset, shaderCount) = pipelineInfoList[idx];
671 
672         // create pipeline objects
673         de::SharedPtr<de::MovePtr<RayTracingPipeline>> rtPipeline =
674             makeVkSharedPtr(de::MovePtr<RayTracingPipeline>(new RayTracingPipeline));
675 
676         (*rtPipeline)->setDeferredOperation(m_data.pipelinesCreatedUsingDHO);
677 
678         VkPipelineCreateFlags creationFlags = 0u;
679 
680         // all pipelines are pipeline libraries, except for the main pipeline
681         if (idx > 0)
682             creationFlags |= VK_PIPELINE_CREATE_LIBRARY_BIT_KHR;
683 
684         // Sometimes we need capture/replay handles.
685         if (m_data.includesCaptureReplay())
686             creationFlags |= VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
687 
688         if (m_data.useLinkTimeOptimizations)
689         {
690             if (m_data.retainLinkTimeOptimizations)
691                 creationFlags |= VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
692             else
693                 creationFlags |= VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT;
694         }
695 
696         rtPipeline->get()->setCreateFlags(creationFlags);
697         if (m_data.useMaintenance5)
698             rtPipeline->get()->setCreateFlags2(translateCreateFlag(creationFlags));
699 
700         rtPipeline->get()->setMaxPayloadSize(
701             16U); // because rayPayloadInEXT is uvec4 ( = 16 bytes ) for all chit shaders
702         rtPipelines[idx] = rtPipeline;
703 
704         // prepare all shader names for all pipelines
705         if (idx == 0)
706         {
707             pipelineShaders[0].push_back(std::make_tuple("rgen", VK_SHADER_STAGE_RAYGEN_BIT_KHR));
708             pipelineShaders[0].push_back(std::make_tuple("miss", VK_SHADER_STAGE_MISS_BIT_KHR));
709         }
710         for (uint32_t i = 0; i < shaderCount; ++i)
711         {
712             std::stringstream csname;
713             csname << "chit" << shaderOffset + i;
714             pipelineShaders[idx].push_back(std::make_tuple(csname.str(), VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR));
715         }
716     }
717 
718     const auto isecMod =
719         (m_data.useAABBs ? createShaderModule(vkd, device, m_context.getBinaryCollection().get("isec")) :
720                            Move<VkShaderModule>());
721 
722     // singlethreaded / multithreaded compilation of all shaders
723     if (m_data.multithreadedCompilation)
724     {
725         std::vector<CompileShadersMultithreadData> csmds;
726         for (uint32_t i = 0; i < rtPipelines.size(); ++i)
727             csmds.push_back(CompileShadersMultithreadData{m_context, rtPipelines[i], pipelineShaders[i], isecMod});
728 
729         std::vector<deThread> threads;
730         for (uint32_t i = 0; i < csmds.size(); ++i)
731             threads.push_back(deThread_create(compileShadersThread, (void *)&csmds[i], DE_NULL));
732 
733         for (uint32_t i = 0; i < threads.size(); ++i)
734         {
735             deThread_join(threads[i]);
736             deThread_destroy(threads[i]);
737         }
738     }
739     else // m_data.multithreadedCompilation == false
740     {
741         for (uint32_t i = 0; i < rtPipelines.size(); ++i)
742             compileShaders(m_context, rtPipelines[i], pipelineShaders[i], isecMod);
743     }
744 
745     // connect libraries into a tree structure
746     for (size_t idx = 0; idx < pipelineInfoList.size(); ++idx)
747     {
748         int parentIndex;
749         uint32_t shaderCount, shaderOffset;
750         std::tie(parentIndex, shaderOffset, shaderCount) = pipelineInfoList[idx];
751         if (parentIndex != -1)
752             rtPipelines[parentIndex]->get()->addLibrary(rtPipelines[idx]);
753     }
754 
755     // Add the saved capture/replay handles when in replay mode.
756     if (replay)
757     {
758         for (size_t pipelineIdx = 0; pipelineIdx < rtPipelines.size(); ++pipelineIdx)
759         {
760             const auto pipelineOffsetBytes = allGroupOffsets.at(pipelineIdx) * shaderGroupHandleReplaySize;
761             for (size_t groupIdx = 0; groupIdx < pipelineShaders.at(pipelineIdx).size(); ++groupIdx)
762             {
763                 const auto groupOffsetBytes = pipelineOffsetBytes + groupIdx * shaderGroupHandleReplaySize;
764                 rtPipelines[pipelineIdx]->get()->setGroupCaptureReplayHandle(
765                     static_cast<uint32_t>(groupIdx), &m_captureReplayHandles.at(groupOffsetBytes));
766             }
767         }
768     }
769 
770     // build main pipeline and all pipeline libraries that it depends on
771     const auto firstRTPipeline = rtPipelines.at(0)->get();
772     std::vector<de::SharedPtr<Move<VkPipeline>>> pipelines =
773         firstRTPipeline->createPipelineWithLibraries(vkd, device, *pipelineLayout);
774     const VkPipeline pipeline = pipelines.at(0)->get();
775 
776     // Obtain and verify shader group handles.
777     if (m_data.testType != TestType::DEFAULT)
778     {
779         // When checking all handles, we'll do two iterations, checking the normal handles first and the capture/replay handles later.
780         const bool checkAllHandles = (m_data.testType == TestType::CHECK_ALL_HANDLES);
781         const uint32_t iterations  = (checkAllHandles ? 2u : 1u);
782 
783         for (uint32_t iter = 0u; iter < iterations; ++iter)
784         {
785             const bool normalHandles = (iter == 0u && m_data.testType != TestType::CHECK_CAPTURE_REPLAY_HANDLES);
786             const auto handleSize    = (normalHandles ? shaderGroupHandleSize : shaderGroupHandleReplaySize);
787             de::MovePtr<HandleGetter> handleGetter(
788                 normalHandles ? static_cast<HandleGetter *>(new NormalHandleGetter(handleSize)) :
789                                 static_cast<HandleGetter *>(new CaptureReplayHandleGetter(handleSize)));
790 
791             const auto allHandles = handleGetter->getShaderGroupHandlesVector(
792                 firstRTPipeline, vkd, device, pipeline, 0u, firstRTPipeline->getFullShaderGroupCount());
793             const auto allGroupCounts = getAllGroupCounts(rtPipelines);
794 
795             DE_ASSERT(allGroupOffsets.size() == rtPipelines.size());
796             DE_ASSERT(allGroupCounts.size() == rtPipelines.size());
797             DE_ASSERT(rtPipelines.size() == pipelines.size());
798 
799             for (size_t idx = 0; idx < rtPipelines.size(); ++idx)
800             {
801                 const auto curRTPipeline   = rtPipelines[idx]->get();
802                 const auto &curPipeline    = pipelines[idx]->get();
803                 const auto &curGroupOffset = allGroupOffsets[idx];
804                 const auto &curGroupCount  = allGroupCounts[idx];
805                 const auto curHandles      = handleGetter->getShaderGroupHandlesVector(curRTPipeline, vkd, device,
806                                                                                        curPipeline, 0u, curGroupCount);
807 
808                 const auto rangeStart = curGroupOffset * shaderGroupHandleSize;
809                 const auto rangeEnd   = (curGroupOffset + curGroupCount) * shaderGroupHandleSize;
810 
811                 const std::vector<uint8_t> handleRange(allHandles.begin() + rangeStart, allHandles.begin() + rangeEnd);
812                 if (handleRange != curHandles)
813                 {
814                     std::ostringstream msg;
815                     msg << (normalHandles ? "" : "Capture Replay ")
816                         << "Shader Group Handle verification failed for pipeline " << idx;
817                     TCU_FAIL(msg.str());
818                 }
819             }
820 
821             // Save or check capture/replay handles.
822             if (!normalHandles)
823             {
824                 if (replay)
825                 {
826                     // Check saved handles.
827                     if (allHandles != m_captureReplayHandles)
828                         TCU_FAIL(
829                             "Capture Replay Shader Group Handles do not match creation handles for top-level pipeline");
830                 }
831                 else
832                 {
833                     // Save handles for the replay phase.
834                     m_captureReplayHandles = allHandles;
835                 }
836             }
837         }
838     }
839 
840     // build shader binding tables
841     const de::MovePtr<BufferWithMemory> raygenShaderBindingTable = firstRTPipeline->createShaderBindingTable(
842         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
843     const de::MovePtr<BufferWithMemory> missShaderBindingTable = firstRTPipeline->createShaderBindingTable(
844         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
845     const de::MovePtr<BufferWithMemory> hitShaderBindingTable = firstRTPipeline->createShaderBindingTable(
846         vkd, device, pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, hitGroupCount);
847     const VkStridedDeviceAddressRegionKHR raygenShaderBindingTableRegion =
848         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0),
849                                           shaderGroupHandleSize, shaderGroupHandleSize);
850     const VkStridedDeviceAddressRegionKHR missShaderBindingTableRegion =
851         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0),
852                                           shaderGroupHandleSize, shaderGroupHandleSize);
853     const VkStridedDeviceAddressRegionKHR hitShaderBindingTableRegion =
854         makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0),
855                                           shaderGroupHandleSize, hitGroupCount * shaderGroupHandleSize);
856     const VkStridedDeviceAddressRegionKHR callableShaderBindingTableRegion =
857         makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
858 
859     const VkFormat imageFormat              = VK_FORMAT_R32_UINT;
860     const VkImageCreateInfo imageCreateInfo = makeImageCreateInfo(m_data.width, m_data.height, imageFormat);
861     const VkImageSubresourceRange imageSubresourceRange =
862         makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
863     const de::MovePtr<ImageWithMemory> image = de::MovePtr<ImageWithMemory>(
864         new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
865     const Move<VkImageView> imageView =
866         makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, imageFormat, imageSubresourceRange);
867 
868     const VkBufferCreateInfo resultBufferCreateInfo =
869         makeBufferCreateInfo(pixelCount * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
870     const VkImageSubresourceLayers resultBufferImageSubresourceLayers =
871         makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
872     const VkBufferImageCopy resultBufferImageRegion =
873         makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1), resultBufferImageSubresourceLayers);
874     de::MovePtr<BufferWithMemory> resultBuffer = de::MovePtr<BufferWithMemory>(
875         new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
876     auto &resultBufferAlloc = resultBuffer->getAllocation();
877 
878     const VkDescriptorImageInfo descriptorImageInfo =
879         makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
880 
881     const Move<VkCommandPool> cmdPool = createCommandPool(vkd, device, 0, queueFamilyIndex);
882     const Move<VkCommandBuffer> cmdBuffer =
883         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
884 
885     std::vector<de::SharedPtr<BottomLevelAccelerationStructure>> bottomLevelAccelerationStructures;
886     de::MovePtr<TopLevelAccelerationStructure> topLevelAccelerationStructure;
887 
888     beginCommandBuffer(vkd, *cmdBuffer, 0u);
889     {
890         const VkImageMemoryBarrier preImageBarrier =
891             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
892                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, **image, imageSubresourceRange);
893         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
894                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
895 
896         const VkClearValue clearValue = makeClearValueColorU32(0xFF, 0u, 0u, 0u);
897         vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1,
898                                &imageSubresourceRange);
899 
900         const VkImageMemoryBarrier postImageBarrier = makeImageMemoryBarrier(
901             VK_ACCESS_TRANSFER_WRITE_BIT,
902             VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
903             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, **image, imageSubresourceRange);
904         cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
905                                       VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
906 
907         bottomLevelAccelerationStructures = initBottomAccelerationStructures(*cmdBuffer);
908         topLevelAccelerationStructure     = initTopAccelerationStructure(*cmdBuffer, bottomLevelAccelerationStructures);
909 
910         const TopLevelAccelerationStructure *topLevelAccelerationStructurePtr = topLevelAccelerationStructure.get();
911         VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
912             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
913             DE_NULL,                                                           //  const void* pNext;
914             1u,                                                                //  uint32_t accelerationStructureCount;
915             topLevelAccelerationStructurePtr->getPtr(), //  const VkAccelerationStructureKHR* pAccelerationStructures;
916         };
917 
918         DescriptorSetUpdateBuilder()
919             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
920                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
921             .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u),
922                          VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
923             .update(vkd, device);
924 
925         vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1,
926                                   &descriptorSet.get(), 0, DE_NULL);
927 
928         vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline);
929 
930         cmdTraceRays(vkd, *cmdBuffer, &raygenShaderBindingTableRegion, &missShaderBindingTableRegion,
931                      &hitShaderBindingTableRegion, &callableShaderBindingTableRegion, m_data.width, m_data.height, 1);
932 
933         const VkMemoryBarrier postTraceMemoryBarrier =
934             makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
935         const VkMemoryBarrier postCopyMemoryBarrier =
936             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
937         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
938                                  VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
939 
940         vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u,
941                                  &resultBufferImageRegion);
942 
943         cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
944                                  &postCopyMemoryBarrier);
945     }
946     endCommandBuffer(vkd, *cmdBuffer);
947 
948     submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
949 
950     invalidateAlloc(vkd, device, resultBufferAlloc);
951 
952     std::vector<uint32_t> resultVector(pixelCount);
953     deMemcpy(resultVector.data(), resultBufferAlloc.getHostPtr(), de::dataSize(resultVector));
954 
955     return resultVector;
956 }
957 
iterate(void)958 tcu::TestStatus RayTracingPipelineLibraryTestInstance::iterate(void)
959 {
960     // run test using arrays of pointers
961     const auto numShadersUsed = m_data.getHitGroupCount();
962     const auto bufferVec      = runTest();
963 
964     if (m_data.includesCaptureReplay())
965     {
966         const auto replayResults = runTest(true /*replay*/);
967         if (bufferVec != replayResults)
968             return tcu::TestStatus::fail("Replay results differ from original results");
969     }
970 
971     uint32_t failures  = 0;
972     uint32_t pos       = 0;
973     uint32_t shaderIdx = 0;
974 
975     // Verify results.
976     for (uint32_t y = 0; y < m_data.height; ++y)
977         for (uint32_t x = 0; x < m_data.width; ++x)
978         {
979             uint32_t expectedResult;
980             if ((x + y) % 2)
981             {
982                 expectedResult = shaderIdx % numShadersUsed;
983                 ++shaderIdx;
984             }
985             else
986                 expectedResult = RTPL_MAX_CHIT_SHADER_COUNT;
987 
988             if (bufferVec.at(pos) != expectedResult)
989                 failures++;
990 
991             ++pos;
992         }
993 
994     if (failures == 0)
995         return tcu::TestStatus::pass("Pass");
996     else
997         return tcu::TestStatus::fail("failures=" + de::toString(failures));
998 }
999 
1000 } // namespace
1001 
addPipelineLibraryConfigurationsTests(tcu::TestCaseGroup * group)1002 void addPipelineLibraryConfigurationsTests(tcu::TestCaseGroup *group)
1003 {
1004     struct ThreadData
1005     {
1006         bool multithreaded;
1007         bool pipelinesCreatedUsingDHO;
1008         const char *name;
1009     } threadData[] = {
1010         {false, false, "singlethreaded_compilation"},
1011         {true, false, "multithreaded_compilation"},
1012         {true, true, "multithreaded_compilation_dho"},
1013     };
1014 
1015     struct LibraryConfigurationData
1016     {
1017         LibraryConfiguration libraryConfiguration;
1018         const char *name;
1019     } libraryConfigurationData[] = {
1020         {{0, {{0, 1}}}, "s0_l1"},          // 0 shaders in a main pipeline. 1 pipeline library with 1 shader
1021         {{1, {{0, 1}}}, "s1_l1"},          // 1 shader  in a main pipeline. 1 pipeline library with 1 shader
1022         {{0, {{0, 1}, {0, 1}}}, "s0_l11"}, // 0 shaders in a main pipeline. 2 pipeline libraries with 1 shader each
1023         {{3, {{0, 1}, {0, 1}}}, "s3_l11"}, // 3 shaders in a main pipeline. 2 pipeline libraries with 1 shader each
1024         {{0, {{0, 2}, {0, 3}}},
1025          "s0_l23"}, // 0 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively
1026         {{2, {{0, 2}, {0, 3}}},
1027          "s2_l23"}, // 2 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively
1028         {{0, {{0, 1}, {1, 1}}},
1029          "s0_l1_l1"}, // 0 shaders in a main pipeline. 2 pipeline libraries with 1 shader each. Second library is a child of a first library
1030         {{1, {{0, 1}, {1, 1}}},
1031          "s1_l1_l1"}, // 1 shader  in a main pipeline. 2 pipeline libraries with 1 shader each. Second library is a child of a first library
1032         {{0, {{0, 2}, {1, 3}}},
1033          "s0_l2_l3"}, // 0 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively. Second library is a child of a first library
1034         {{3, {{0, 2}, {1, 3}}},
1035          "s3_l2_l3"}, // 3 shaders in a main pipeline. 2 pipeline libraries with 2 and 3 shaders respectively. Second library is a child of a first library
1036         {{3, {{0, 2}, {0, 3}, {0, 2}}},
1037          "s3_l232"}, // 3 shaders in a main pipeline. 3 pipeline libraries with 2, 3 and 2 shaders respectively.
1038         {{3, {{0, 2}, {1, 2}, {1, 2}, {0, 2}}},
1039          "s3_l22_l22"}, // 3 shaders in a main pipeline. 4 pipeline libraries with 2 shaders each. Second and third library is a child of a first library
1040     };
1041 
1042     struct
1043     {
1044         const TestType testType;
1045         const char *suffix;
1046     } testTypeCases[] = {
1047         {TestType::DEFAULT, ""},
1048         {TestType::CHECK_GROUP_HANDLES, "_check_group_handles"},
1049         {TestType::CHECK_CAPTURE_REPLAY_HANDLES, "_check_capture_replay_handles"},
1050         {TestType::CHECK_ALL_HANDLES, "_check_all_handles"},
1051     };
1052 
1053     struct
1054     {
1055         const bool useAABBs;
1056         const char *suffix;
1057     } geometryTypeCases[] = {
1058         {false, ""},
1059         {true, "_aabbs"},
1060     };
1061 
1062     for (size_t threadNdx = 0; threadNdx < DE_LENGTH_OF_ARRAY(threadData); ++threadNdx)
1063     {
1064         de::MovePtr<tcu::TestCaseGroup> threadGroup(
1065             new tcu::TestCaseGroup(group->getTestContext(), threadData[threadNdx].name));
1066 
1067         for (size_t libConfigNdx = 0; libConfigNdx < DE_LENGTH_OF_ARRAY(libraryConfigurationData); ++libConfigNdx)
1068         {
1069             for (const auto &testTypeCase : testTypeCases)
1070             {
1071                 for (const auto &geometryCase : geometryTypeCases)
1072                 {
1073                     TestParams testParams{libraryConfigurationData[libConfigNdx].libraryConfiguration,
1074                                           threadData[threadNdx].multithreaded,
1075                                           threadData[threadNdx].pipelinesCreatedUsingDHO,
1076                                           testTypeCase.testType,
1077                                           geometryCase.useAABBs,
1078                                           false,
1079                                           false,
1080                                           false,
1081                                           RTPL_DEFAULT_SIZE,
1082                                           RTPL_DEFAULT_SIZE};
1083 
1084                     const std::string testName = std::string(libraryConfigurationData[libConfigNdx].name) +
1085                                                  geometryCase.suffix + testTypeCase.suffix;
1086                     threadGroup->addChild(
1087                         new RayTracingPipelineLibraryTestCase(group->getTestContext(), testName.c_str(), testParams));
1088                 }
1089             }
1090         }
1091         group->addChild(threadGroup.release());
1092     }
1093 
1094     {
1095         de::MovePtr<tcu::TestCaseGroup> miscGroup(new tcu::TestCaseGroup(group->getTestContext(), "misc"));
1096 
1097         TestParams testParamsMaintenance5{libraryConfigurationData[1].libraryConfiguration,
1098                                           false,
1099                                           false,
1100                                           TestType::CHECK_CAPTURE_REPLAY_HANDLES,
1101                                           false,
1102                                           true,
1103                                           false,
1104                                           true,
1105                                           RTPL_DEFAULT_SIZE,
1106                                           RTPL_DEFAULT_SIZE};
1107         miscGroup->addChild(
1108             new RayTracingPipelineLibraryTestCase(group->getTestContext(), "maintenance5", testParamsMaintenance5));
1109 
1110         TestParams testParamsUseLinkTimeOpt{libraryConfigurationData[5].libraryConfiguration,
1111                                             false,
1112                                             false,
1113                                             TestType::DEFAULT,
1114                                             true,
1115                                             true,
1116                                             false,
1117                                             false,
1118                                             RTPL_DEFAULT_SIZE,
1119                                             RTPL_DEFAULT_SIZE};
1120         miscGroup->addChild(new RayTracingPipelineLibraryTestCase(
1121             group->getTestContext(), "use_link_time_optimizations", testParamsUseLinkTimeOpt));
1122 
1123         TestParams testParamsRetainLinkTimeOpt{libraryConfigurationData[5].libraryConfiguration,
1124                                                false,
1125                                                false,
1126                                                TestType::DEFAULT,
1127                                                true,
1128                                                true,
1129                                                true,
1130                                                false,
1131                                                RTPL_DEFAULT_SIZE,
1132                                                RTPL_DEFAULT_SIZE};
1133         miscGroup->addChild(new RayTracingPipelineLibraryTestCase(
1134             group->getTestContext(), "retain_link_time_optimizations", testParamsRetainLinkTimeOpt));
1135 
1136         group->addChild(miscGroup.release());
1137     }
1138 }
1139 
createPipelineLibraryTests(tcu::TestContext & testCtx)1140 tcu::TestCaseGroup *createPipelineLibraryTests(tcu::TestContext &testCtx)
1141 {
1142     de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "pipeline_library"));
1143 
1144     addTestGroup(group.get(), "configurations", addPipelineLibraryConfigurationsTests);
1145 
1146     return group.release();
1147 }
1148 
1149 } // namespace RayTracing
1150 
1151 } // namespace vkt
1152