• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Misc tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingMiscTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 
27 #include "vkDefs.hpp"
28 
29 #include "vktTestCase.hpp"
30 #include "vkCmdUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkBufferWithMemory.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "vkRayTracingUtil.hpp"
39 
40 #include "tcuImageCompare.hpp"
41 #include "deRandom.hpp"
42 #include <algorithm>
43 #include <memory>
44 #include <sstream>
45 
46 namespace vkt
47 {
48 namespace RayTracing
49 {
50 namespace
51 {
52 using namespace vk;
53 using namespace std;
54 
55 enum class BaseType
56 {
57     F32,
58     F64,
59     I8,
60     I16,
61     I32,
62     I64,
63     U8,
64     U16,
65     U32,
66     U64,
67 
68     UNKNOWN
69 };
70 
71 enum class GeometryType
72 {
73     FIRST = 0,
74 
75     AABB = FIRST,
76     TRIANGLES,
77 
78     COUNT,
79 
80     AABB_AND_TRIANGLES, //< Only compatible with ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES AS layout.
81 };
82 
83 enum class MatrixMajorOrder
84 {
85     COLUMN_MAJOR,
86     ROW_MAJOR,
87 
88     UNKNOWN
89 };
90 
91 enum class ShaderGroups
92 {
93     FIRST_GROUP  = 0,
94     RAYGEN_GROUP = FIRST_GROUP,
95     MISS_GROUP,
96     HIT_GROUP,
97 
98     FIRST_CALLABLE_GROUP,
99 };
100 
101 enum class TestType
102 {
103     AABBS_AND_TRIS_IN_ONE_TL,
104     AS_STRESS_TEST,
105     CALLABLE_SHADER_STRESS_DYNAMIC_TEST,
106     CALLABLE_SHADER_STRESS_TEST,
107     CULL_MASK,
108     MAX_RAY_HIT_ATTRIBUTE_SIZE,
109     MAX_RT_INVOCATIONS_SUPPORTED,
110     CULL_MASK_EXTRA_BITS,
111     NO_DUPLICATE_ANY_HIT,
112     REPORT_INTERSECTION_RESULT,
113     RAY_PAYLOAD_IN,
114     RECURSIVE_TRACES_0,
115     RECURSIVE_TRACES_1,
116     RECURSIVE_TRACES_2,
117     RECURSIVE_TRACES_3,
118     RECURSIVE_TRACES_4,
119     RECURSIVE_TRACES_5,
120     RECURSIVE_TRACES_6,
121     RECURSIVE_TRACES_7,
122     RECURSIVE_TRACES_8,
123     RECURSIVE_TRACES_9,
124     RECURSIVE_TRACES_10,
125     RECURSIVE_TRACES_11,
126     RECURSIVE_TRACES_12,
127     RECURSIVE_TRACES_13,
128     RECURSIVE_TRACES_14,
129     RECURSIVE_TRACES_15,
130     RECURSIVE_TRACES_16,
131     RECURSIVE_TRACES_17,
132     RECURSIVE_TRACES_18,
133     RECURSIVE_TRACES_19,
134     RECURSIVE_TRACES_20,
135     RECURSIVE_TRACES_21,
136     RECURSIVE_TRACES_22,
137     RECURSIVE_TRACES_23,
138     RECURSIVE_TRACES_24,
139     RECURSIVE_TRACES_25,
140     RECURSIVE_TRACES_26,
141     RECURSIVE_TRACES_27,
142     RECURSIVE_TRACES_28,
143     RECURSIVE_TRACES_29,
144     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1,
145     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2,
146     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3,
147     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4,
148     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5,
149     SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6,
150     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1,
151     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2,
152     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3,
153     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4,
154     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5,
155     SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6,
156     SHADER_RECORD_BLOCK_SCALAR_1,
157     SHADER_RECORD_BLOCK_SCALAR_2,
158     SHADER_RECORD_BLOCK_SCALAR_3,
159     SHADER_RECORD_BLOCK_SCALAR_4,
160     SHADER_RECORD_BLOCK_SCALAR_5,
161     SHADER_RECORD_BLOCK_SCALAR_6,
162     SHADER_RECORD_BLOCK_STD430_1,
163     SHADER_RECORD_BLOCK_STD430_2,
164     SHADER_RECORD_BLOCK_STD430_3,
165     SHADER_RECORD_BLOCK_STD430_4,
166     SHADER_RECORD_BLOCK_STD430_5,
167     SHADER_RECORD_BLOCK_STD430_6,
168     IGNORE_ANY_HIT_STATICALLY,
169     IGNORE_ANY_HIT_DYNAMICALLY,
170     TERMINATE_ANY_HIT_STATICALLY,
171     TERMINATE_ANY_HIT_DYNAMICALLY,
172     TERMINATE_INTERSECTION_STATICALLY,
173     TERMINATE_INTERSECTION_DYNAMICALLY,
174     USE_MEMORY_ACCESS,
175 
176     COUNT
177 };
178 
179 enum class VariableType
180 {
181     FIRST,
182 
183     FLOAT = FIRST,
184     VEC2,
185     VEC3,
186     VEC4,
187 
188     MAT2,
189     MAT2X2,
190     MAT2X3,
191     MAT2X4,
192     MAT3,
193     MAT3X2,
194     MAT3X3,
195     MAT3X4,
196     MAT4,
197     MAT4X2,
198     MAT4X3,
199     MAT4X4,
200 
201     INT,
202     IVEC2,
203     IVEC3,
204     IVEC4,
205 
206     INT8,
207     I8VEC2,
208     I8VEC3,
209     I8VEC4,
210 
211     INT16,
212     I16VEC2,
213     I16VEC3,
214     I16VEC4,
215 
216     INT64,
217     I64VEC2,
218     I64VEC3,
219     I64VEC4,
220 
221     UINT,
222     UVEC2,
223     UVEC3,
224     UVEC4,
225 
226     UINT16,
227     U16VEC2,
228     U16VEC3,
229     U16VEC4,
230 
231     UINT64,
232     U64VEC2,
233     U64VEC3,
234     U64VEC4,
235 
236     UINT8,
237     U8VEC2,
238     U8VEC3,
239     U8VEC4,
240 
241     DOUBLE,
242     DVEC2,
243     DVEC3,
244     DVEC4,
245 
246     DMAT2,
247     DMAT2X2,
248     DMAT2X3,
249     DMAT2X4,
250     DMAT3,
251     DMAT3X2,
252     DMAT3X3,
253     DMAT3X4,
254     DMAT4,
255     DMAT4X2,
256     DMAT4X3,
257     DMAT4X4,
258 
259     UNKNOWN,
260     COUNT = UNKNOWN,
261 };
262 
263 enum class AccelerationStructureLayout
264 {
265     FIRST = 0,
266 
267     ONE_TL_ONE_BL_ONE_GEOMETRY = FIRST,
268     ONE_TL_ONE_BL_MANY_GEOMETRIES,
269     ONE_TL_MANY_BLS_ONE_GEOMETRY,
270     ONE_TL_MANY_BLS_MANY_GEOMETRIES,
271 
272     COUNT,
273 
274     ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES
275 };
276 
277 static const VkFlags ALL_RAY_TRACING_STAGES = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
278                                               VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR | VK_SHADER_STAGE_MISS_BIT_KHR |
279                                               VK_SHADER_STAGE_INTERSECTION_BIT_KHR | VK_SHADER_STAGE_CALLABLE_BIT_KHR;
280 
281 struct CaseDef
282 {
283     TestType type;
284     GeometryType geometryType;
285     AccelerationStructureLayout asLayout;
286 
CaseDefvkt::RayTracing::__anon2e56165f0111::CaseDef287     CaseDef() : type(TestType::COUNT), geometryType(GeometryType::COUNT), asLayout(AccelerationStructureLayout::COUNT)
288     {
289         /* Stub */
290     }
291 
CaseDefvkt::RayTracing::__anon2e56165f0111::CaseDef292     CaseDef(const TestType &inType)
293         : type(inType)
294         , geometryType(GeometryType::COUNT)
295         , asLayout(AccelerationStructureLayout::COUNT)
296     {
297         /* Stub */
298     }
299 
CaseDefvkt::RayTracing::__anon2e56165f0111::CaseDef300     CaseDef(const TestType &inType, const GeometryType &inGeometryType, const AccelerationStructureLayout &inAsLayout)
301         : type(inType)
302         , geometryType(inGeometryType)
303         , asLayout(inAsLayout)
304     {
305         /* Stub */
306     }
307 };
308 
309 /* Helper global functions */
getSuffixForASLayout(const AccelerationStructureLayout & layout)310 static const char *getSuffixForASLayout(const AccelerationStructureLayout &layout)
311 {
312     const char *result = "?!";
313 
314     switch (layout)
315     {
316     case AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY:
317         result = "1TL1BL1G";
318         break;
319     case AccelerationStructureLayout::ONE_TL_ONE_BL_MANY_GEOMETRIES:
320         result = "1TL1BLnG";
321         break;
322     case AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY:
323         result = "1TLnBL1G";
324         break;
325     case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES:
326         result = "1TLnBLnG";
327         break;
328 
329     default:
330     {
331         deAssertFail("This should never happen", __FILE__, __LINE__);
332     }
333     }
334 
335     return result;
336 }
337 
getSuffixForGeometryType(const GeometryType & type)338 static const char *getSuffixForGeometryType(const GeometryType &type)
339 {
340     const char *result = "?!";
341 
342     switch (type)
343     {
344     case GeometryType::AABB:
345         result = "AABB";
346         break;
347     case GeometryType::TRIANGLES:
348         result = "tri";
349         break;
350 
351     default:
352     {
353         deAssertFail("This should never happen", __FILE__, __LINE__);
354     }
355     }
356 
357     return result;
358 }
359 
360 /* Instances and primitives in acceleration structures can have additional information assigned.
361  *
362  * By overriding functions of interest in this class, tests can further customize ASes generated by AS providers.
363  */
364 class ASPropertyProvider
365 {
366 public:
~ASPropertyProvider()367     virtual ~ASPropertyProvider()
368     {
369         /* Stub */
370     }
371 
getCullMask(const uint32_t & nBL,const uint32_t & nInstance) const372     virtual uint8_t getCullMask(const uint32_t &nBL, const uint32_t &nInstance) const
373     {
374         DE_UNREF(nBL);
375         DE_UNREF(nInstance);
376 
377         return 0xFF;
378     }
379 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const380     virtual uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const
381     {
382         DE_UNREF(nBL);
383         DE_UNREF(nInstance);
384         return 0;
385     }
386 };
387 
388 class IGridASFeedback
389 {
390 public:
~IGridASFeedback()391     virtual ~IGridASFeedback()
392     {
393         /* Stub */
394     }
395 
396     virtual void onCullMaskAssignedToCell(const tcu::UVec3 &cellLocation, const uint8_t &cullMaskAssigned) = 0;
397     virtual void onInstanceCustomIndexAssignedToCell(const tcu::UVec3 &cellLocation,
398                                                      const uint32_t &customIndexAssigned)                  = 0;
399 };
400 
401 /* Acceleration structure data providers.
402  *
403  * These are expected to be reused across different test cases.
404  **/
405 class ASProviderBase
406 {
407 public:
~ASProviderBase()408     virtual ~ASProviderBase()
409     {
410         /* Stub */
411     }
412 
413     virtual std::unique_ptr<TopLevelAccelerationStructure> createTLAS(
414         Context &context, const AccelerationStructureLayout &asLayout, VkCommandBuffer cmdBuffer,
415         const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
416         const ASPropertyProvider *optAsPropertyProviderPtr = nullptr,
417         IGridASFeedback *optASFeedbackPtr                  = nullptr) const = 0;
418     virtual uint32_t getNPrimitives() const                = 0;
419 };
420 
421 /* A 3D grid built of primitives. Size and distribution of the geometry can be configured both at creation time and at a later time. */
422 class GridASProvider : public ASProviderBase
423 {
424 public:
GridASProvider(const tcu::Vec3 & gridStartXYZ,const tcu::Vec3 & gridCellSizeXYZ,const tcu::UVec3 & gridSizeXYZ,const tcu::Vec3 & gridInterCellDeltaXYZ,const GeometryType & geometryType)425     GridASProvider(const tcu::Vec3 &gridStartXYZ, const tcu::Vec3 &gridCellSizeXYZ, const tcu::UVec3 &gridSizeXYZ,
426                    const tcu::Vec3 &gridInterCellDeltaXYZ, const GeometryType &geometryType)
427         : m_geometryType(geometryType)
428         , m_gridCellSizeXYZ(gridCellSizeXYZ)
429         , m_gridInterCellDeltaXYZ(gridInterCellDeltaXYZ)
430         , m_gridSizeXYZ(gridSizeXYZ)
431         , m_gridStartXYZ(gridStartXYZ)
432     {
433         fillVertexVec();
434     }
435 
createTLAS(Context & context,const AccelerationStructureLayout & asLayout,VkCommandBuffer cmdBuffer,const VkGeometryFlagsKHR & bottomLevelGeometryFlags,const ASPropertyProvider * optASPropertyProviderPtr,IGridASFeedback * optASFeedbackPtr) const436     std::unique_ptr<TopLevelAccelerationStructure> createTLAS(Context &context,
437                                                               const AccelerationStructureLayout &asLayout,
438                                                               VkCommandBuffer cmdBuffer,
439                                                               const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
440                                                               const ASPropertyProvider *optASPropertyProviderPtr,
441                                                               IGridASFeedback *optASFeedbackPtr) const final
442     {
443         Allocator &allocator                   = context.getDefaultAllocator();
444         const DeviceInterface &deviceInterface = context.getDeviceInterface();
445         const VkDevice deviceVk                = context.getDevice();
446         const auto nCells                      = m_gridSizeXYZ.x() * m_gridSizeXYZ.y() * m_gridSizeXYZ.z();
447         std::unique_ptr<TopLevelAccelerationStructure> resultPtr;
448         de::MovePtr<TopLevelAccelerationStructure> tlPtr = makeTopLevelAccelerationStructure();
449 
450         DE_ASSERT(((asLayout == AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES) &&
451                    (m_geometryType == GeometryType::AABB_AND_TRIANGLES)) ||
452                   ((asLayout != AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES) &&
453                    (m_geometryType != GeometryType::AABB_AND_TRIANGLES)));
454 
455         switch (asLayout)
456         {
457         case AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY:
458         {
459             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
460 
461             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
462             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
463                                                                           static_cast<uint8_t>(0xFF);
464             const auto instanceCustomIndex =
465                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
466 
467             tlPtr->setInstanceCount(1);
468 
469             {
470                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
471 
472                 blPtr->setGeometryCount(1u);
473                 blPtr->addGeometry(vertexVec, (m_geometryType == GeometryType::TRIANGLES), bottomLevelGeometryFlags);
474 
475                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
476 
477                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
478                                    instanceCustomIndex, cullMask);
479             }
480 
481             if (optASFeedbackPtr != nullptr)
482             {
483                 for (auto nCell = 0u; nCell < nCells; nCell++)
484                 {
485                     const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
486                     const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
487                     const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
488 
489                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
490                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
491                                                                           instanceCustomIndex);
492                 }
493             }
494 
495             break;
496         }
497 
498         case AccelerationStructureLayout::ONE_TL_ONE_BL_MANY_GEOMETRIES:
499         {
500             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
501 
502             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
503             const auto nVerticesPerPrimitive =
504                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
505             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
506                                                                           static_cast<uint8_t>(0xFF);
507             const auto instanceCustomIndex =
508                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
509 
510             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
511 
512             tlPtr->setInstanceCount(1);
513 
514             {
515                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
516                 const auto nGeometries                              = vertexVec.size() / nVerticesPerPrimitive;
517 
518                 blPtr->setGeometryCount(nGeometries);
519 
520                 for (uint32_t nGeometry = 0; nGeometry < nGeometries; ++nGeometry)
521                 {
522                     std::vector<tcu::Vec3> currentGeometry(nVerticesPerPrimitive);
523 
524                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
525                     {
526                         currentGeometry.at(nVertex) = vertexVec.at(nGeometry * nVerticesPerPrimitive + nVertex);
527                     }
528 
529                     blPtr->addGeometry(currentGeometry, (m_geometryType == GeometryType::TRIANGLES),
530                                        bottomLevelGeometryFlags);
531                 }
532 
533                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
534 
535                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
536                                    instanceCustomIndex, cullMask);
537             }
538 
539             if (optASFeedbackPtr != nullptr)
540             {
541                 for (auto nCell = 0u; nCell < nCells; nCell++)
542                 {
543                     const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
544                     const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
545                     const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
546 
547                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
548                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
549                                                                           instanceCustomIndex);
550                 }
551             }
552 
553             break;
554         }
555 
556         case AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY:
557         {
558             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
559 
560             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
561             const auto nVerticesPerPrimitive =
562                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
563             const auto nInstances = vertexVec.size() / nVerticesPerPrimitive;
564 
565             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
566 
567             tlPtr->setInstanceCount(nInstances);
568 
569             for (uint32_t nInstance = 0; nInstance < nInstances; nInstance++)
570             {
571                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
572                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
573                                                                           optASPropertyProviderPtr->getCullMask(0, nInstance) :
574                                                                           static_cast<uint8_t>(0xFF);
575                 std::vector<tcu::Vec3> currentInstanceVertexVec;
576                 const auto instanceCustomIndex = (optASPropertyProviderPtr != nullptr) ?
577                                                      optASPropertyProviderPtr->getInstanceCustomIndex(0, nInstance) :
578                                                      0;
579 
580                 for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
581                 {
582                     currentInstanceVertexVec.push_back(vertexVec.at(nInstance * nVerticesPerPrimitive + nVertex));
583                 }
584 
585                 blPtr->setGeometryCount(1u);
586                 blPtr->addGeometry(currentInstanceVertexVec, (m_geometryType == GeometryType::TRIANGLES),
587                                    bottomLevelGeometryFlags);
588 
589                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
590 
591                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
592                                    instanceCustomIndex, cullMask);
593 
594                 if (optASFeedbackPtr != nullptr)
595                 {
596                     const auto cellX = (((nInstance) % m_gridSizeXYZ.x()));
597                     const auto cellY = (((nInstance / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
598                     const auto cellZ = (((nInstance / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
599 
600                     optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
601                     optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
602                                                                           instanceCustomIndex);
603                 }
604             }
605 
606             break;
607         }
608 
609         case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES:
610         {
611             DE_ASSERT((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::TRIANGLES));
612 
613             const auto &vertexVec = (m_geometryType == GeometryType::AABB) ? m_aabbVertexVec : m_triVertexVec;
614             const auto nVerticesPerPrimitive =
615                 (m_geometryType == GeometryType::AABB) ? 2u : 12u /* tris */ * 3 /* verts */;
616             const auto nPrimitivesDefined = static_cast<uint32_t>(vertexVec.size() / nVerticesPerPrimitive);
617             const auto nPrimitivesPerBLAS = 4;
618             const auto nBottomLevelASes   = nPrimitivesDefined / nPrimitivesPerBLAS;
619 
620             DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
621             DE_ASSERT((nPrimitivesDefined % nPrimitivesPerBLAS) == 0);
622 
623             tlPtr->setInstanceCount(nBottomLevelASes);
624 
625             for (uint32_t nBottomLevelAS = 0; nBottomLevelAS < nBottomLevelASes; nBottomLevelAS++)
626             {
627                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
628                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
629                                                                           optASPropertyProviderPtr->getCullMask(nBottomLevelAS, 0) :
630                                                                           static_cast<uint8_t>(0xFF);
631                 const auto instanceCustomIndex =
632                     (optASPropertyProviderPtr != nullptr) ?
633                         optASPropertyProviderPtr->getInstanceCustomIndex(nBottomLevelAS, 0) :
634                         0;
635 
636                 blPtr->setGeometryCount(nPrimitivesPerBLAS);
637 
638                 for (uint32_t nGeometry = 0; nGeometry < nPrimitivesPerBLAS; nGeometry++)
639                 {
640                     std::vector<tcu::Vec3> currentVertexVec;
641 
642                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
643                     {
644                         currentVertexVec.push_back(vertexVec.at(
645                             (nBottomLevelAS * nPrimitivesPerBLAS + nGeometry) * nVerticesPerPrimitive + nVertex));
646                     }
647 
648                     blPtr->addGeometry(currentVertexVec, (m_geometryType == GeometryType::TRIANGLES),
649                                        bottomLevelGeometryFlags);
650                 }
651 
652                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
653                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
654                                    instanceCustomIndex, cullMask);
655 
656                 if (optASFeedbackPtr != nullptr)
657                 {
658                     for (uint32_t cellIndex = nPrimitivesPerBLAS * nBottomLevelAS;
659                          cellIndex < nPrimitivesPerBLAS * (nBottomLevelAS + 1); cellIndex++)
660                     {
661                         const auto cellX = (((cellIndex) % m_gridSizeXYZ.x()));
662                         const auto cellY = (((cellIndex / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
663                         const auto cellZ = (((cellIndex / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
664 
665                         optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
666                         optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
667                                                                               instanceCustomIndex);
668                     }
669                 }
670             }
671 
672             break;
673         }
674 
675         case AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES:
676         {
677             DE_ASSERT(m_geometryType == GeometryType::AABB_AND_TRIANGLES);
678 
679             const auto nCellsDefined      = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
680             const auto nPrimitivesPerBLAS = 1;
681             const auto nBottomLevelASes   = nCellsDefined / nPrimitivesPerBLAS;
682 
683             DE_ASSERT((nCellsDefined % nPrimitivesPerBLAS) == 0);
684 
685             tlPtr->setInstanceCount(nBottomLevelASes);
686 
687             for (uint32_t nBottomLevelAS = 0; nBottomLevelAS < nBottomLevelASes; nBottomLevelAS++)
688             {
689                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
690                 const auto cullMask                                 = (optASPropertyProviderPtr != nullptr) ?
691                                                                           optASPropertyProviderPtr->getCullMask(nBottomLevelAS, 0) :
692                                                                           static_cast<uint8_t>(0xFF);
693                 const auto instanceCustomIndex =
694                     (optASPropertyProviderPtr != nullptr) ?
695                         optASPropertyProviderPtr->getInstanceCustomIndex(nBottomLevelAS, 0) :
696                         0;
697                 const bool usesAABB              = (nBottomLevelAS % 2) == 0;
698                 const auto &vertexVec            = (usesAABB) ? m_aabbVertexVec : m_triVertexVec;
699                 const auto nVerticesPerPrimitive = (usesAABB) ? 2u : 12u /* tris */ * 3 /* verts */;
700 
701                 // For this case, AABBs use the first shader group and triangles use the second shader group in the table.
702                 const auto instanceSBTOffset = (usesAABB ? 0u : 1u);
703 
704                 blPtr->setGeometryCount(nPrimitivesPerBLAS);
705 
706                 for (uint32_t nGeometry = 0; nGeometry < nPrimitivesPerBLAS; nGeometry++)
707                 {
708                     DE_ASSERT((vertexVec.size() % nVerticesPerPrimitive) == 0);
709 
710                     std::vector<tcu::Vec3> currentVertexVec;
711 
712                     for (uint32_t nVertex = 0; nVertex < nVerticesPerPrimitive; ++nVertex)
713                     {
714                         currentVertexVec.push_back(vertexVec.at(
715                             (nBottomLevelAS * nPrimitivesPerBLAS + nGeometry) * nVerticesPerPrimitive + nVertex));
716                     }
717 
718                     blPtr->addGeometry(currentVertexVec, !usesAABB, bottomLevelGeometryFlags);
719                 }
720 
721                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
722 
723                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
724                                    instanceCustomIndex, cullMask, instanceSBTOffset);
725 
726                 if (optASFeedbackPtr != nullptr)
727                 {
728                     for (uint32_t cellIndex = nPrimitivesPerBLAS * nBottomLevelAS;
729                          cellIndex < nPrimitivesPerBLAS * (nBottomLevelAS + 1); cellIndex++)
730                     {
731                         const auto cellX = (((cellIndex) % m_gridSizeXYZ.x()));
732                         const auto cellY = (((cellIndex / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
733                         const auto cellZ = (((cellIndex / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
734 
735                         optASFeedbackPtr->onCullMaskAssignedToCell(tcu::UVec3(cellX, cellY, cellZ), cullMask);
736                         optASFeedbackPtr->onInstanceCustomIndexAssignedToCell(tcu::UVec3(cellX, cellY, cellZ),
737                                                                               instanceCustomIndex);
738                     }
739                 }
740             }
741 
742             break;
743         }
744 
745         default:
746         {
747             deAssertFail("This should never happen", __FILE__, __LINE__);
748         }
749         }
750 
751         tlPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
752 
753         resultPtr = decltype(resultPtr)(tlPtr.release());
754         return resultPtr;
755     }
756 
getNPrimitives() const757     uint32_t getNPrimitives() const final
758     {
759         return m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
760     }
761 
setProperties(const tcu::Vec3 & gridStartXYZ,const tcu::Vec3 & gridCellSizeXYZ,const tcu::UVec3 & gridSizeXYZ,const tcu::Vec3 & gridInterCellDeltaXYZ,const GeometryType & geometryType)762     void setProperties(const tcu::Vec3 &gridStartXYZ, const tcu::Vec3 &gridCellSizeXYZ, const tcu::UVec3 &gridSizeXYZ,
763                        const tcu::Vec3 &gridInterCellDeltaXYZ, const GeometryType &geometryType)
764     {
765         m_gridStartXYZ          = gridStartXYZ;
766         m_gridCellSizeXYZ       = gridCellSizeXYZ;
767         m_gridSizeXYZ           = gridSizeXYZ;
768         m_gridInterCellDeltaXYZ = gridInterCellDeltaXYZ;
769         m_geometryType          = geometryType;
770 
771         fillVertexVec();
772     }
773 
774 private:
fillVertexVec()775     void fillVertexVec()
776     {
777         const auto nCellsNeeded = m_gridSizeXYZ.x() * m_gridSizeXYZ.y() * m_gridSizeXYZ.z();
778 
779         m_aabbVertexVec.clear();
780         m_triVertexVec.clear();
781 
782         for (auto nCell = 0u; nCell < nCellsNeeded; nCell++)
783         {
784             const auto cellX = (((nCell) % m_gridSizeXYZ.x()));
785             const auto cellY = (((nCell / m_gridSizeXYZ.x()) % m_gridSizeXYZ.y()));
786             const auto cellZ = (((nCell / m_gridSizeXYZ.x()) / m_gridSizeXYZ.y()) % m_gridSizeXYZ.z());
787 
788             const auto cellX1Y1Z1 =
789                 tcu::Vec3(m_gridStartXYZ.x() + static_cast<float>(cellX) * m_gridInterCellDeltaXYZ.x(),
790                           m_gridStartXYZ.y() + static_cast<float>(cellY) * m_gridInterCellDeltaXYZ.y(),
791                           m_gridStartXYZ.z() + static_cast<float>(cellZ) * m_gridInterCellDeltaXYZ.z());
792             const auto cellX2Y2Z2 = tcu::Vec3(
793                 m_gridStartXYZ.x() + static_cast<float>(cellX) * m_gridInterCellDeltaXYZ.x() + m_gridCellSizeXYZ.x(),
794                 m_gridStartXYZ.y() + static_cast<float>(cellY) * m_gridInterCellDeltaXYZ.y() + m_gridCellSizeXYZ.y(),
795                 m_gridStartXYZ.z() + static_cast<float>(cellZ) * m_gridInterCellDeltaXYZ.z() + m_gridCellSizeXYZ.z());
796 
797             if (m_geometryType == GeometryType::AABB || m_geometryType == GeometryType::AABB_AND_TRIANGLES)
798             {
799                 /* Cell = AABB of the cell */
800                 m_aabbVertexVec.push_back(cellX1Y1Z1);
801                 m_aabbVertexVec.push_back(cellX2Y2Z2);
802             }
803 
804             if (m_geometryType == GeometryType::AABB_AND_TRIANGLES || m_geometryType == GeometryType::TRIANGLES)
805             {
806                 /* Cell == Six triangles forming a cube
807                  *
808                  * Lower-case characters: vertices with Z == Z2
809                  * Upper-case characters: vertices with Z == Z1
810 
811 
812                         g                h
813 
814 
815                     C              D
816 
817 
818 
819                         e                f
820 
821                     A              B
822 
823 
824                  */
825                 const auto A = tcu::Vec3(cellX1Y1Z1.x(), cellX1Y1Z1.y(), cellX1Y1Z1.z());
826                 const auto B = tcu::Vec3(cellX2Y2Z2.x(), cellX1Y1Z1.y(), cellX1Y1Z1.z());
827                 const auto C = tcu::Vec3(cellX1Y1Z1.x(), cellX2Y2Z2.y(), cellX1Y1Z1.z());
828                 const auto D = tcu::Vec3(cellX2Y2Z2.x(), cellX2Y2Z2.y(), cellX1Y1Z1.z());
829                 const auto E = tcu::Vec3(cellX1Y1Z1.x(), cellX1Y1Z1.y(), cellX2Y2Z2.z());
830                 const auto F = tcu::Vec3(cellX2Y2Z2.x(), cellX1Y1Z1.y(), cellX2Y2Z2.z());
831                 const auto G = tcu::Vec3(cellX1Y1Z1.x(), cellX2Y2Z2.y(), cellX2Y2Z2.z());
832                 const auto H = tcu::Vec3(cellX2Y2Z2.x(), cellX2Y2Z2.y(), cellX2Y2Z2.z());
833 
834                 // Z = Z1 face
835                 m_triVertexVec.push_back(A);
836                 m_triVertexVec.push_back(C);
837                 m_triVertexVec.push_back(D);
838 
839                 m_triVertexVec.push_back(D);
840                 m_triVertexVec.push_back(B);
841                 m_triVertexVec.push_back(A);
842 
843                 // Z = Z2 face
844                 m_triVertexVec.push_back(E);
845                 m_triVertexVec.push_back(H);
846                 m_triVertexVec.push_back(G);
847 
848                 m_triVertexVec.push_back(H);
849                 m_triVertexVec.push_back(E);
850                 m_triVertexVec.push_back(F);
851 
852                 // X = X0 face
853                 m_triVertexVec.push_back(A);
854                 m_triVertexVec.push_back(G);
855                 m_triVertexVec.push_back(C);
856 
857                 m_triVertexVec.push_back(G);
858                 m_triVertexVec.push_back(A);
859                 m_triVertexVec.push_back(E);
860 
861                 // X = X1 face
862                 m_triVertexVec.push_back(B);
863                 m_triVertexVec.push_back(D);
864                 m_triVertexVec.push_back(H);
865 
866                 m_triVertexVec.push_back(H);
867                 m_triVertexVec.push_back(F);
868                 m_triVertexVec.push_back(B);
869 
870                 // Y = Y0 face
871                 m_triVertexVec.push_back(C);
872                 m_triVertexVec.push_back(H);
873                 m_triVertexVec.push_back(D);
874 
875                 m_triVertexVec.push_back(H);
876                 m_triVertexVec.push_back(C);
877                 m_triVertexVec.push_back(G);
878 
879                 // Y = y1 face
880                 m_triVertexVec.push_back(A);
881                 m_triVertexVec.push_back(B);
882                 m_triVertexVec.push_back(E);
883 
884                 m_triVertexVec.push_back(B);
885                 m_triVertexVec.push_back(F);
886                 m_triVertexVec.push_back(E);
887             }
888         }
889     }
890 
891     std::vector<tcu::Vec3> m_aabbVertexVec;
892     std::vector<tcu::Vec3> m_triVertexVec;
893 
894     GeometryType m_geometryType;
895     tcu::Vec3 m_gridCellSizeXYZ;
896     tcu::Vec3 m_gridInterCellDeltaXYZ;
897     tcu::UVec3 m_gridSizeXYZ;
898     tcu::Vec3 m_gridStartXYZ;
899 };
900 
901 /* Provides an AS holding a single {(0, 0, 0), (-1, 1, 0), {1, 1, 0} tri. */
902 class TriASProvider : public ASProviderBase
903 {
904 public:
TriASProvider()905     TriASProvider()
906     {
907         /* Stub*/
908     }
909 
createTLAS(Context & context,const AccelerationStructureLayout &,VkCommandBuffer cmdBuffer,const VkGeometryFlagsKHR & bottomLevelGeometryFlags,const ASPropertyProvider * optASPropertyProviderPtr,IGridASFeedback *) const910     std::unique_ptr<TopLevelAccelerationStructure> createTLAS(Context &context,
911                                                               const AccelerationStructureLayout & /* asLayout */,
912                                                               VkCommandBuffer cmdBuffer,
913                                                               const VkGeometryFlagsKHR &bottomLevelGeometryFlags,
914                                                               const ASPropertyProvider *optASPropertyProviderPtr,
915                                                               IGridASFeedback * /* optASFeedbackPtr */) const final
916     {
917         Allocator &allocator                   = context.getDefaultAllocator();
918         const DeviceInterface &deviceInterface = context.getDeviceInterface();
919         const VkDevice deviceVk                = context.getDevice();
920         std::unique_ptr<TopLevelAccelerationStructure> resultPtr;
921         de::MovePtr<TopLevelAccelerationStructure> tlPtr = makeTopLevelAccelerationStructure();
922 
923         {
924 
925             const auto cullMask = (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getCullMask(0, 0) :
926                                                                           static_cast<uint8_t>(0xFF);
927             const auto instanceCustomIndex =
928                 (optASPropertyProviderPtr != nullptr) ? optASPropertyProviderPtr->getInstanceCustomIndex(0, 0) : 0;
929 
930             tlPtr->setInstanceCount(1);
931 
932             {
933                 de::MovePtr<BottomLevelAccelerationStructure> blPtr = makeBottomLevelAccelerationStructure();
934                 const std::vector<tcu::Vec3> vertexVec = {tcu::Vec3(0, 0, 0), tcu::Vec3(-1, 1, 0), tcu::Vec3(1, 1, 0)};
935 
936                 blPtr->setGeometryCount(1u);
937                 blPtr->addGeometry(vertexVec, true, /* triangles */
938                                    bottomLevelGeometryFlags);
939 
940                 blPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
941 
942                 tlPtr->addInstance(de::SharedPtr<BottomLevelAccelerationStructure>(blPtr.release()), identityMatrix3x4,
943                                    instanceCustomIndex, cullMask);
944             }
945         }
946 
947         tlPtr->createAndBuild(deviceInterface, deviceVk, cmdBuffer, allocator);
948 
949         resultPtr = decltype(resultPtr)(tlPtr.release());
950         return resultPtr;
951     }
952 
getNPrimitives() const953     uint32_t getNPrimitives() const final
954     {
955         return 1;
956     }
957 };
958 
959 /* Test logic providers ==> */
960 class TestBase
961 {
962 public:
~TestBase()963     virtual ~TestBase()
964     {
965         /* Stub */
966     }
967 
968     virtual tcu::UVec3 getDispatchSize() const                                             = 0;
969     virtual uint32_t getResultBufferSize() const                                           = 0;
970     virtual std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const       = 0;
971     virtual void resetTLAS()                                                               = 0;
972     virtual void initAS(vkt::Context &context, RayTracingProperties *rtPropertiesPtr,
973                         VkCommandBuffer commandBuffer)                                     = 0;
974     virtual void initPrograms(SourceCollections &programCollection) const                  = 0;
975     virtual bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const = 0;
976 
CopyBufferContent(vkt::Context & context,VkCommandBuffer cmdBuffer,BufferWithMemory & src,BufferWithMemory & dst,VkBufferCopy bufferCopy) const977     void CopyBufferContent(vkt::Context &context, VkCommandBuffer cmdBuffer, BufferWithMemory &src,
978                            BufferWithMemory &dst, VkBufferCopy bufferCopy) const
979     {
980         const DeviceInterface &deviceInterface = context.getDeviceInterface();
981         const VkDevice deviceVk                = context.getDevice();
982         const VkQueue queueVk                  = context.getUniversalQueue();
983 
984         deviceInterface.resetCommandBuffer(cmdBuffer, 0);
985         beginCommandBuffer(deviceInterface, cmdBuffer, 0u /* flags */);
986 
987         deviceInterface.cmdCopyBuffer(cmdBuffer, *src, *dst, 1, &bufferCopy);
988 
989         const VkMemoryBarrier postCopyMemoryBarrier =
990             makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
991 
992         cmdPipelineMemoryBarrier(deviceInterface, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
993                                  &postCopyMemoryBarrier);
994 
995         endCommandBuffer(deviceInterface, cmdBuffer);
996 
997         submitCommandsAndWait(deviceInterface, deviceVk, queueVk, cmdBuffer);
998 
999         invalidateMappedMemoryRange(deviceInterface, deviceVk, dst.getAllocation().getMemory(),
1000                                     dst.getAllocation().getOffset(), VK_WHOLE_SIZE);
1001     }
1002 
copyDeviceBufferToHost(vkt::Context & context,BufferWithMemory & buffer) const1003     de::MovePtr<BufferWithMemory> copyDeviceBufferToHost(vkt::Context &context, BufferWithMemory &buffer) const
1004     {
1005         const DeviceInterface &deviceInterface = context.getDeviceInterface();
1006         const VkDevice deviceVk                = context.getDevice();
1007         Allocator &allocator                   = context.getDefaultAllocator();
1008         const uint32_t queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
1009 
1010         const Move<VkCommandPool> cmdPoolPtr = createCommandPool(
1011             deviceInterface, deviceVk, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, /* pCreateInfo */
1012             queueFamilyIndex);
1013         const Move<VkCommandBuffer> cmdBufferPtr =
1014             allocateCommandBuffer(deviceInterface, deviceVk, *cmdPoolPtr, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1015 
1016         VkDeviceSize resultBufferSize = buffer.getBufferSize();
1017         const auto resultBufferCreateInfo =
1018             makeBufferCreateInfo(resultBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1019 
1020         de::MovePtr<BufferWithMemory> resultBufferPtr;
1021 
1022         resultBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1023             deviceInterface, deviceVk, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1024 
1025         const VkBufferCopy bufferCopy{0, 0, resultBufferSize};
1026         CopyBufferContent(context, *cmdBufferPtr, buffer, *resultBufferPtr, bufferCopy);
1027 
1028         return resultBufferPtr;
1029     }
1030 
getAHitShaderCollectionShaderNames() const1031     virtual std::vector<std::string> getAHitShaderCollectionShaderNames() const
1032     {
1033         return {"ahit"};
1034     }
1035 
getASBindingArraySize() const1036     virtual uint32_t getASBindingArraySize() const
1037     {
1038         return 1u;
1039     }
1040 
getCallableShaderCollectionNames() const1041     virtual std::vector<std::string> getCallableShaderCollectionNames() const
1042     {
1043         return std::vector<std::string>{};
1044     }
1045 
getCHitShaderCollectionShaderNames() const1046     virtual std::vector<std::string> getCHitShaderCollectionShaderNames() const
1047     {
1048         return {"chit"};
1049     }
1050 
getDynamicStackSize(uint32_t maxPipelineRayRecursionDepth) const1051     virtual uint32_t getDynamicStackSize(uint32_t maxPipelineRayRecursionDepth) const
1052     {
1053         DE_ASSERT(false);
1054 
1055         DE_UNREF(maxPipelineRayRecursionDepth);
1056 
1057         return 0;
1058     }
1059 
getIntersectionShaderCollectionShaderNames() const1060     virtual std::vector<std::string> getIntersectionShaderCollectionShaderNames() const
1061     {
1062         return {"intersection"};
1063     }
1064 
getMaxRecursionDepthUsed() const1065     virtual uint32_t getMaxRecursionDepthUsed() const
1066     {
1067         return 1;
1068     }
1069 
getMissShaderCollectionShaderNames() const1070     virtual std::vector<std::string> getMissShaderCollectionShaderNames() const
1071     {
1072         return {"miss"};
1073     }
1074 
getNTraceRayInvocationsNeeded() const1075     virtual uint32_t getNTraceRayInvocationsNeeded() const
1076     {
1077         return 1;
1078     }
1079 
getPipelineLayout(const vk::DeviceInterface & deviceInterface,VkDevice deviceVk,VkDescriptorSetLayout descriptorSetLayout)1080     virtual Move<VkPipelineLayout> getPipelineLayout(const vk::DeviceInterface &deviceInterface, VkDevice deviceVk,
1081                                                      VkDescriptorSetLayout descriptorSetLayout)
1082     {
1083         return makePipelineLayout(deviceInterface, deviceVk, descriptorSetLayout);
1084     }
1085 
getResultBufferStartData() const1086     virtual std::vector<uint8_t> getResultBufferStartData() const
1087     {
1088         return std::vector<uint8_t>();
1089     }
1090 
getShaderRecordData(const ShaderGroups &) const1091     virtual const void *getShaderRecordData(const ShaderGroups & /* shaderGroup */) const
1092     {
1093         return nullptr;
1094     }
1095 
getShaderRecordSize(const ShaderGroups &) const1096     virtual uint32_t getShaderRecordSize(const ShaderGroups & /* shaderGroup */) const
1097     {
1098         return 0;
1099     }
1100 
getSpecializationInfoPtr(const VkShaderStageFlagBits &)1101     virtual VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits & /* shaderStage */)
1102     {
1103         return nullptr;
1104     }
1105 
init(vkt::Context &,RayTracingProperties *)1106     virtual bool init(vkt::Context & /* context    */, RayTracingProperties * /* rtPropsPtr */)
1107     {
1108         return true;
1109     }
1110 
onBeforeCmdTraceRays(const uint32_t &,vkt::Context &,VkCommandBuffer,VkPipelineLayout)1111     virtual void onBeforeCmdTraceRays(const uint32_t & /* nDispatch      */, vkt::Context & /* context        */,
1112                                       VkCommandBuffer /* commandBuffer  */, VkPipelineLayout /* pipelineLayout */)
1113     {
1114         /* Stub */
1115     }
1116 
onShaderStackSizeDiscovered(const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &,const VkDeviceSize &)1117     virtual void onShaderStackSizeDiscovered(const VkDeviceSize & /* raygenShaderStackSize   */,
1118                                              const VkDeviceSize & /* ahitShaderStackSize     */,
1119                                              const VkDeviceSize & /* chitShaderStackSize     */,
1120                                              const VkDeviceSize & /* missShaderStackSize     */,
1121                                              const VkDeviceSize & /* callableShaderStackSize */,
1122                                              const VkDeviceSize & /* isectShaderStackSize    */)
1123     {
1124         /* Stub */
1125     }
1126 
usesDynamicStackSize() const1127     virtual bool usesDynamicStackSize() const
1128     {
1129         return false;
1130     }
1131 };
1132 
1133 class AABBTriTLTest : public TestBase, public ASPropertyProvider
1134 {
1135 public:
AABBTriTLTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)1136     AABBTriTLTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
1137         : m_asStructureLayout(asStructureLayout)
1138         , m_geometryType(geometryType)
1139         , m_gridSize(tcu::UVec3(720, 1, 1))
1140         , m_lastCustomInstanceIndexUsed(0)
1141     {
1142     }
1143 
~AABBTriTLTest()1144     ~AABBTriTLTest()
1145     {
1146         /* Stub */
1147     }
1148 
getAHitShaderCollectionShaderNames() const1149     virtual std::vector<std::string> getAHitShaderCollectionShaderNames() const
1150     {
1151         return {"ahit", "ahit"};
1152     }
1153 
getCHitShaderCollectionShaderNames() const1154     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
1155     {
1156         return {};
1157     }
1158 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const1159     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
1160     {
1161         DE_UNREF(nBL);
1162         DE_UNREF(nInstance);
1163 
1164         return ++m_lastCustomInstanceIndexUsed;
1165     }
1166 
getDispatchSize() const1167     tcu::UVec3 getDispatchSize() const final
1168     {
1169         return tcu::UVec3(m_gridSize[0], m_gridSize[1], m_gridSize[2]);
1170     }
1171 
getResultBufferSize() const1172     uint32_t getResultBufferSize() const final
1173     {
1174         return static_cast<uint32_t>((2 /* nHits, nMisses */ + m_gridSize[0] * m_gridSize[1] * m_gridSize[2] *
1175                                                                    1 /* hit instance custom indices */) *
1176                                      sizeof(uint32_t));
1177     }
1178 
getTLASPtrVecToBind() const1179     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1180     {
1181         DE_ASSERT(m_tlPtr != nullptr);
1182 
1183         return {m_tlPtr.get()};
1184     }
1185 
resetTLAS()1186     void resetTLAS() final
1187     {
1188         m_tlPtr.reset();
1189     }
1190 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1191     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1192                 VkCommandBuffer commandBuffer) final
1193     {
1194         /* Each AS holds a single unit AABB / cube built of tris.
1195          *
1196          * Geometry in the zeroth acceleration structure starts at the origin. Subsequent ASes
1197          * hold geometry that is positioned so that geometry formed by the union of all ASes never
1198          * intersects.
1199          *
1200          * Each raygen shader invocation uses a unique origin+target pair for the traced ray, and
1201          * only one AS is expected to hold geometry that the ray can find intersection for.
1202          * The AS index is stored in the result buffer, which is later verified by the CPU.
1203          *
1204          * Due to the fact AccelerationStructureEXT array indexing must be dynamically uniform and
1205          * it is not guaranteed we can determine workgroup size on VK 1.1-conformant platforms,
1206          * we can only trace rays against the same AS in a single ray trace dispatch.
1207          */
1208         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
1209                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1210                                                                          m_gridSize,
1211                                                                          tcu::Vec3(3, 0, 0), /* gridInterCellDeltaXYZ */
1212                                                                          m_geometryType));
1213 
1214         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
1215                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
1216                                             this,     /* optASPropertyProviderPtr */
1217                                             nullptr); /* optASFeedbackPtr            */
1218     }
1219 
initPrograms(SourceCollections & programCollection) const1220     void initPrograms(SourceCollections &programCollection) const final
1221     {
1222         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1223                                                   0u,    /* flags        */
1224                                                   true); /* allowSpirv14 */
1225 
1226         const char *hitPropsDefinition = "struct HitProps\n"
1227                                          "{\n"
1228                                          "    uint instanceCustomIndex;\n"
1229                                          "};\n";
1230 
1231         {
1232             std::stringstream css;
1233 
1234             css << "#version 460 core\n"
1235                    "\n"
1236                    "#extension GL_EXT_ray_tracing : require\n"
1237                    "\n"
1238                    "hitAttributeEXT vec3 unusedAttribute;\n"
1239                    "\n" +
1240                        de::toString(hitPropsDefinition) +
1241                        "\n"
1242                        "layout(location = 0) rayPayloadInEXT      uint   unusedPayload;\n"
1243                        "layout(set      = 0, binding = 0, std430) buffer result\n"
1244                        "{\n"
1245                        "    uint     nHitsRegistered;\n"
1246                        "    uint     nMissesRegistered;\n"
1247                        "    HitProps hits[];\n"
1248                        "};\n"
1249                        "\n"
1250                        "void main()\n"
1251                        "{\n"
1252                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
1253                        "\n"
1254                        "    hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
1255                        "}\n";
1256 
1257             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1258         }
1259 
1260         {
1261             std::stringstream css;
1262 
1263             css << "#version 460 core\n"
1264                    "\n"
1265                    "#extension GL_EXT_ray_tracing : require\n"
1266                    "\n"
1267                    "hitAttributeEXT vec3 hitAttribute;\n"
1268                    "\n"
1269                    "void main()\n"
1270                    "{\n"
1271                    "    reportIntersectionEXT(0.95f, 0);\n"
1272                    "}\n";
1273 
1274             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1275         }
1276 
1277         {
1278             std::stringstream css;
1279 
1280             css << "#version 460 core\n"
1281                    "\n"
1282                    "#extension GL_EXT_ray_tracing : require\n"
1283                    "\n" +
1284                        de::toString(hitPropsDefinition) +
1285                        "\n"
1286                        "layout(set = 0, binding = 0, std430) buffer result\n"
1287                        "{\n"
1288                        "    uint     nHitsRegistered;\n"
1289                        "    uint     nMissesRegistered;\n"
1290                        "    HitProps hits[];\n"
1291                        "};\n"
1292                        "\n"
1293                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
1294                        "\n"
1295                        "void main()\n"
1296                        "{\n"
1297                        "    atomicAdd(nMissesRegistered, 1);\n"
1298                        "}\n";
1299 
1300             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
1301         }
1302 
1303         {
1304             std::stringstream css;
1305 
1306             css << "#version 460 core\n"
1307                    "\n"
1308                    "#extension GL_EXT_ray_tracing : require\n"
1309                    "\n"
1310                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
1311                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
1312                    "\n"
1313                    "void main()\n"
1314                    "{\n"
1315                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1316                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1317                    "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
1318                    "    float tmin         = 0.001;\n"
1319                    "    float tmax         = 9.0;\n"
1320                    "\n"
1321                    "    uint  cullMask     = 0xFF;\n"
1322                    "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
1323                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
1324                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
1325                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
1326                    "    vec3  direct       = normalize(target - origin);\n"
1327                    "\n"
1328                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
1329                    "0);\n"
1330                    "}\n";
1331 
1332             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
1333         }
1334     }
1335 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const1336     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
1337     {
1338         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
1339         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
1340         bool result                                   = false;
1341 
1342         typedef struct
1343         {
1344             uint32_t instanceCustomIndex;
1345         } HitProperties;
1346 
1347         std::map<uint32_t, uint32_t> customInstanceIndexToHitCountMap;
1348         const auto nHitsReported   = *resultU32Ptr;
1349         const auto nMissesReported = *(resultU32Ptr + 1);
1350 
1351         if (nHitsReported != m_gridSize[0] * m_gridSize[1] * m_gridSize[2])
1352         {
1353             goto end;
1354         }
1355 
1356         if (nMissesReported != 0)
1357         {
1358             goto end;
1359         }
1360 
1361         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
1362         {
1363             const HitProperties *hitPropsPtr =
1364                 reinterpret_cast<const HitProperties *>(resultU32Ptr + 2 /* preamble ints */) + nHit;
1365 
1366             customInstanceIndexToHitCountMap[hitPropsPtr->instanceCustomIndex]++;
1367 
1368             if (customInstanceIndexToHitCountMap[hitPropsPtr->instanceCustomIndex] > 1)
1369             {
1370                 goto end;
1371             }
1372         }
1373 
1374         for (uint32_t nInstance = 0; nInstance < nHitsReported; ++nInstance)
1375         {
1376             if (customInstanceIndexToHitCountMap.find(1 + nInstance) == customInstanceIndexToHitCountMap.end())
1377             {
1378                 goto end;
1379             }
1380         }
1381 
1382         result = true;
1383     end:
1384         return result;
1385     }
1386 
1387 private:
1388     const AccelerationStructureLayout m_asStructureLayout;
1389     const GeometryType m_geometryType;
1390 
1391     const tcu::UVec3 m_gridSize;
1392     mutable uint32_t m_lastCustomInstanceIndexUsed;
1393     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
1394 };
1395 
1396 class ASStressTest : public TestBase, public ASPropertyProvider
1397 {
1398 public:
ASStressTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)1399     ASStressTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
1400         : m_asStructureLayout(asStructureLayout)
1401         , m_geometryType(geometryType)
1402         , m_lastCustomInstanceIndexUsed(0)
1403         , m_nASesToUse(0)
1404         , m_nMaxASToUse(16u)
1405     {
1406     }
1407 
~ASStressTest()1408     ~ASStressTest()
1409     {
1410         /* Stub */
1411     }
1412 
getASBindingArraySize() const1413     uint32_t getASBindingArraySize() const final
1414     {
1415         DE_ASSERT(m_nASesToUse != 0);
1416 
1417         return m_nASesToUse;
1418     }
1419 
getCHitShaderCollectionShaderNames() const1420     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
1421     {
1422         return {};
1423     }
1424 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const1425     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
1426     {
1427         DE_UNREF(nBL);
1428         DE_UNREF(nInstance);
1429 
1430         return ++m_lastCustomInstanceIndexUsed;
1431     }
1432 
getDispatchSize() const1433     tcu::UVec3 getDispatchSize() const final
1434     {
1435         return tcu::UVec3(1, 1, 1);
1436     }
1437 
getNTraceRayInvocationsNeeded() const1438     uint32_t getNTraceRayInvocationsNeeded() const final
1439     {
1440         return m_nMaxASToUse;
1441     }
1442 
getResultBufferSize() const1443     uint32_t getResultBufferSize() const final
1444     {
1445         return static_cast<uint32_t>(
1446             (2 /* nHits, nMisses */ + 2 * m_nMaxASToUse /* hit instance custom indices + AS index */) *
1447             sizeof(uint32_t));
1448     }
1449 
getTLASPtrVecToBind() const1450     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1451     {
1452         std::vector<TopLevelAccelerationStructure *> resultVec;
1453 
1454         DE_ASSERT(m_tlPtrVec.size() != 0);
1455 
1456         for (auto &currentTLPtr : m_tlPtrVec)
1457         {
1458             resultVec.push_back(currentTLPtr.get());
1459         }
1460 
1461         return resultVec;
1462     }
1463 
resetTLAS()1464     void resetTLAS() final
1465     {
1466         for (auto &currentTLPtr : m_tlPtrVec)
1467         {
1468             currentTLPtr.reset();
1469         }
1470     }
1471 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)1472     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
1473     {
1474         /* NOTE: We clamp the number below to a sensible value, in case the implementation has no restrictions on the number of
1475          *         ASes accessible to shaders.
1476          */
1477         m_nASesToUse = std::min(rtPropertiesPtr->getMaxDescriptorSetAccelerationStructures(), m_nMaxASToUse);
1478 
1479         return true;
1480     }
1481 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1482     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1483                 VkCommandBuffer commandBuffer) final
1484     {
1485         /* Each AS holds a single unit AABB / cube built of tris.
1486          *
1487          * Geometry in the zeroth acceleration structure starts at the origin. Subsequent ASes
1488          * hold geometry that is positioned so that geometry formed by the union of all ASes never
1489          * intersects.
1490          *
1491          * Each raygen shader invocation uses a unique origin+target pair for the traced ray, and
1492          * only one AS is expected to hold geometry that the ray can find intersection for.
1493          * The AS index is stored in the result buffer, which is later verified by the CPU.
1494          *
1495          * Due to the fact AccelerationStructureEXT array indexing must be dynamically uniform and
1496          * it is not guaranteed we can determine workgroup size on VK 1.1-conformant platforms,
1497          * we can only trace rays against the same AS in a single ray trace dispatch.
1498          */
1499         std::unique_ptr<GridASProvider> asProviderPtr(
1500             new GridASProvider(tcu::Vec3(0, 0, 0),  /* gridStartXYZ          */
1501                                tcu::Vec3(1, 1, 1),  /* gridCellSizeXYZ       */
1502                                tcu::UVec3(1, 1, 1), /* gridSizeXYZ           */
1503                                tcu::Vec3(0, 0, 0),  /* gridInterCellDeltaXYZ */
1504                                m_geometryType));
1505 
1506         for (uint32_t nAS = 0; nAS < m_nASesToUse; ++nAS)
1507         {
1508             const auto origin = tcu::Vec3(3.0f * static_cast<float>(nAS), 0.0f, 0.0f);
1509 
1510             asProviderPtr->setProperties(origin, tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1511                                          tcu::UVec3(1, 1, 1),        /* gridSizeXYZ           */
1512                                          tcu::Vec3(0, 0, 0),         /* gridInterCellDeltaXYZ */
1513                                          m_geometryType);
1514 
1515             auto tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
1516                                                    VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
1517                                                    this,     /* optASPropertyProviderPtr */
1518                                                    nullptr); /* optASFeedbackPtr            */
1519 
1520             m_tlPtrVec.push_back(std::move(tlPtr));
1521         }
1522     }
1523 
initPrograms(SourceCollections & programCollection) const1524     void initPrograms(SourceCollections &programCollection) const final
1525     {
1526         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1527                                                   0u,    /* flags        */
1528                                                   true); /* allowSpirv14 */
1529 
1530         const char *hitPropsDefinition = "struct HitProps\n"
1531                                          "{\n"
1532                                          "    uint instanceCustomIndex;\n"
1533                                          "    uint nAS;\n"
1534                                          "};\n";
1535 
1536         {
1537             std::stringstream css;
1538 
1539             css << "#version 460 core\n"
1540                    "\n"
1541                    "#extension GL_EXT_ray_tracing : require\n"
1542                    "\n"
1543                    "hitAttributeEXT vec3 unusedAttribute;\n"
1544                    "\n" +
1545                        de::toString(hitPropsDefinition) +
1546                        "\n"
1547                        "layout(location = 0) rayPayloadInEXT      uint   nAS;\n"
1548                        "layout(set      = 0, binding = 0, std430) buffer result\n"
1549                        "{\n"
1550                        "    uint     nHitsRegistered;\n"
1551                        "    uint     nMissesRegistered;\n"
1552                        "    HitProps hits[];\n"
1553                        "};\n"
1554                        "\n"
1555                        "void main()\n"
1556                        "{\n"
1557                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
1558                        "\n"
1559                        "    hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
1560                        "    hits[nHit].nAS                 = nAS;\n"
1561                        "}\n";
1562 
1563             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1564         }
1565 
1566         {
1567             std::stringstream css;
1568 
1569             css << "#version 460 core\n"
1570                    "\n"
1571                    "#extension GL_EXT_ray_tracing : require\n"
1572                    "\n"
1573                    "hitAttributeEXT vec3 hitAttribute;\n"
1574                    "\n"
1575                    "void main()\n"
1576                    "{\n"
1577                    "    reportIntersectionEXT(0.95f, 0);\n"
1578                    "}\n";
1579 
1580             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1581         }
1582 
1583         {
1584             std::stringstream css;
1585 
1586             css << "#version 460 core\n"
1587                    "\n"
1588                    "#extension GL_EXT_ray_tracing : require\n"
1589                    "\n" +
1590                        de::toString(hitPropsDefinition) +
1591                        "\n"
1592                        "layout(set = 0, binding = 0, std430) buffer result\n"
1593                        "{\n"
1594                        "    uint     nHitsRegistered;\n"
1595                        "    uint     nMissesRegistered;\n"
1596                        "    HitProps hits[];\n"
1597                        "};\n"
1598                        "\n"
1599                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
1600                        "\n"
1601                        "void main()\n"
1602                        "{\n"
1603                        "    atomicAdd(nMissesRegistered, 1);\n"
1604                        "}\n";
1605 
1606             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
1607         }
1608 
1609         {
1610             std::stringstream css;
1611 
1612             css << "#version 460 core\n"
1613                    "\n"
1614                    "#extension GL_EXT_ray_tracing : require\n"
1615                    "\n"
1616                    "layout(push_constant) uniform pcUB\n"
1617                    "{\n"
1618                    "    uint nAS;\n"
1619                    "} ub;\n"
1620                    "\n"
1621                    "layout(location = 0)              rayPayloadEXT uint               payload;\n"
1622                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructures[" +
1623                        de::toString(m_nMaxASToUse) +
1624                        "];\n"
1625                        "\n"
1626                        "void main()\n"
1627                        "{\n"
1628                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1629                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1630                        "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
1631                        "    float tmin         = 0.001;\n"
1632                        "    float tmax         = 9.0;\n"
1633                        "\n"
1634                        "    uint  cullMask     = 0xFF;\n"
1635                        "    vec3  cellStartXYZ = vec3(ub.nAS * 3.0, 0.0, 0.0);\n"
1636                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
1637                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
1638                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
1639                        "    vec3  direct       = normalize(target - origin);\n"
1640                        "\n"
1641                        "    payload = ub.nAS;\n"
1642                        "\n"
1643                        "    traceRayEXT(accelerationStructures[ub.nAS], rayFlags, cullMask, 0, 0, 0, origin, tmin, "
1644                        "direct, tmax, 0);\n"
1645                        "}\n";
1646 
1647             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
1648         }
1649     }
1650 
getPipelineLayout(const vk::DeviceInterface & deviceInterface,VkDevice deviceVk,VkDescriptorSetLayout descriptorSetLayout)1651     Move<VkPipelineLayout> getPipelineLayout(const vk::DeviceInterface &deviceInterface, VkDevice deviceVk,
1652                                              VkDescriptorSetLayout descriptorSetLayout) final
1653     {
1654         VkPushConstantRange pushConstantRange;
1655 
1656         pushConstantRange.offset     = 0;
1657         pushConstantRange.size       = sizeof(uint32_t);
1658         pushConstantRange.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
1659 
1660         return makePipelineLayout(deviceInterface, deviceVk, 1, /* setLayoutCount */
1661                                   &descriptorSetLayout, 1,      /* pushRangeCount */
1662                                   &pushConstantRange);
1663     }
1664 
onBeforeCmdTraceRays(const uint32_t & nDispatch,vkt::Context & context,VkCommandBuffer commandBuffer,VkPipelineLayout pipelineLayout)1665     void onBeforeCmdTraceRays(const uint32_t &nDispatch, vkt::Context &context, VkCommandBuffer commandBuffer,
1666                               VkPipelineLayout pipelineLayout) final
1667     {
1668         /* No need for a sync point in-between trace ray commands - all writes are atomic */
1669         VkMemoryBarrier memBarrier;
1670 
1671         memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1672         memBarrier.pNext         = nullptr;
1673         memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1674         memBarrier.sType         = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
1675 
1676         context.getDeviceInterface().cmdPipelineBarrier(
1677             commandBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* srcStageMask       */
1678             VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,                /* dstStageMask       */
1679             0,                                                           /* dependencyFlags    */
1680             1,                                                           /* memoryBarrierCount */
1681             &memBarrier, 0,                                              /* bufferMemoryBarrierCount */
1682             nullptr,                                                     /* pBufferMemoryBarriers    */
1683             0,                                                           /* imageMemoryBarrierCount  */
1684             nullptr);                                                    /* pImageMemoryBarriers     */
1685 
1686         context.getDeviceInterface().cmdPushConstants(commandBuffer, pipelineLayout, VK_SHADER_STAGE_RAYGEN_BIT_KHR,
1687                                                       0, /* offset */
1688                                                       sizeof(uint32_t), &nDispatch);
1689     }
1690 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const1691     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
1692     {
1693         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
1694         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
1695         bool result                                   = false;
1696 
1697         typedef struct
1698         {
1699             uint32_t instanceCustomIndex;
1700             uint32_t nAS;
1701         } HitProperties;
1702 
1703         const auto nHitsReported   = *resultU32Ptr;
1704         const auto nMissesReported = *(resultU32Ptr + 1);
1705 
1706         if (nHitsReported != m_nMaxASToUse)
1707         {
1708             goto end;
1709         }
1710 
1711         if (nMissesReported != 0)
1712         {
1713             goto end;
1714         }
1715 
1716         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
1717         {
1718             const HitProperties *hitPropsPtr =
1719                 reinterpret_cast<const HitProperties *>(resultU32Ptr + 2 /* preamble ints */) + nHit;
1720 
1721             if (hitPropsPtr->instanceCustomIndex != (nHit + 1))
1722             {
1723                 goto end;
1724             }
1725 
1726             if (hitPropsPtr->nAS != nHit)
1727             {
1728                 goto end;
1729             }
1730         }
1731 
1732         result = true;
1733     end:
1734         return result;
1735     }
1736 
1737 private:
1738     const AccelerationStructureLayout m_asStructureLayout;
1739     const GeometryType m_geometryType;
1740 
1741     mutable uint32_t m_lastCustomInstanceIndexUsed;
1742     uint32_t m_nASesToUse;
1743     std::vector<std::unique_ptr<TopLevelAccelerationStructure>> m_tlPtrVec;
1744 
1745     const uint32_t m_nMaxASToUse;
1746 };
1747 
1748 class CallableShaderStressTest : public TestBase
1749 {
1750 public:
CallableShaderStressTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout,const bool & useDynamicStackSize)1751     CallableShaderStressTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout,
1752                              const bool &useDynamicStackSize)
1753         : m_asStructureLayout(asStructureLayout)
1754         , m_geometryType(geometryType)
1755         , m_gridSizeXYZ(tcu::UVec3(128, 1, 1))
1756         , m_nMaxCallableLevels((useDynamicStackSize) ? 8 : 2 /* as per spec */)
1757         , m_useDynamicStackSize(useDynamicStackSize)
1758         , m_ahitShaderStackSize(0)
1759         , m_callableShaderStackSize(0)
1760         , m_chitShaderStackSize(0)
1761         , m_isectShaderStackSize(0)
1762         , m_missShaderStackSize(0)
1763         , m_raygenShaderStackSize(0)
1764     {
1765     }
1766 
~CallableShaderStressTest()1767     ~CallableShaderStressTest()
1768     {
1769         /* Stub */
1770     }
1771 
getCallableShaderCollectionNames() const1772     std::vector<std::string> getCallableShaderCollectionNames() const final
1773     {
1774         std::vector<std::string> resultVec(m_nMaxCallableLevels);
1775 
1776         for (uint32_t nLevel = 0; nLevel < m_nMaxCallableLevels; nLevel++)
1777         {
1778             resultVec.at(nLevel) = "call" + de::toString(nLevel);
1779         }
1780 
1781         return resultVec;
1782     }
1783 
getDispatchSize() const1784     tcu::UVec3 getDispatchSize() const final
1785     {
1786         DE_ASSERT(m_gridSizeXYZ[0] != 0);
1787         DE_ASSERT(m_gridSizeXYZ[1] != 0);
1788         DE_ASSERT(m_gridSizeXYZ[2] != 0);
1789 
1790         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
1791     }
1792 
getDynamicStackSize(const uint32_t maxPipelineRayRecursionDepth) const1793     uint32_t getDynamicStackSize(const uint32_t maxPipelineRayRecursionDepth) const final
1794     {
1795         uint32_t result                              = 0;
1796         const auto maxStackSpaceNeededForZerothTrace = static_cast<uint32_t>(de::max(
1797             de::max(m_chitShaderStackSize, m_missShaderStackSize), m_isectShaderStackSize + m_ahitShaderStackSize));
1798         const auto maxStackSpaceNeededForNonZerothTraces =
1799             static_cast<uint32_t>(de::max(m_chitShaderStackSize, m_missShaderStackSize));
1800 
1801         DE_ASSERT(m_useDynamicStackSize);
1802 
1803         result = static_cast<uint32_t>(m_raygenShaderStackSize) +
1804                  de::min(1u, maxPipelineRayRecursionDepth) * maxStackSpaceNeededForZerothTrace +
1805                  de::max(0u, maxPipelineRayRecursionDepth - 1) * maxStackSpaceNeededForNonZerothTraces +
1806                  m_nMaxCallableLevels * static_cast<uint32_t>(m_callableShaderStackSize);
1807 
1808         DE_ASSERT(result != 0);
1809         return result;
1810     }
1811 
getResultBufferSize() const1812     uint32_t getResultBufferSize() const final
1813     {
1814         const auto nRaysTraced                          = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
1815         const auto nClosestHitShaderInvocationsExpected = nRaysTraced / 2;
1816         const auto nMissShaderInvocationsExpected       = nRaysTraced / 2;
1817         const auto resultItemSize =
1818             sizeof(uint32_t) * 3 /* shaderStage, nOriginRay, nLevel */ + sizeof(float) * m_nMaxCallableLevels;
1819 
1820         DE_ASSERT((nRaysTraced % 2) == 0);
1821         DE_ASSERT(m_nMaxCallableLevels != 0);
1822         DE_ASSERT(m_gridSizeXYZ[0] != 0);
1823         DE_ASSERT(m_gridSizeXYZ[1] != 0);
1824         DE_ASSERT(m_gridSizeXYZ[2] != 0);
1825 
1826         return static_cast<uint32_t>(
1827             sizeof(uint32_t) /* nItemsStored */ +
1828             (resultItemSize * m_nMaxCallableLevels) *
1829                 (nRaysTraced + nMissShaderInvocationsExpected + nClosestHitShaderInvocationsExpected));
1830     }
1831 
getTLASPtrVecToBind() const1832     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
1833     {
1834         DE_ASSERT(m_tlPtr != nullptr);
1835 
1836         return {m_tlPtr.get()};
1837     }
1838 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)1839     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
1840     {
1841         DE_UNREF(rtPropertiesPtr);
1842         return true;
1843     }
1844 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)1845     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
1846                 VkCommandBuffer commandBuffer) final
1847     {
1848         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
1849                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
1850                                                                          m_gridSizeXYZ,
1851                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
1852                                                                          m_geometryType));
1853 
1854         m_tlPtr =
1855             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
1856                                       nullptr,                                        /* optASPropertyProviderPtr */
1857                                       nullptr);                                       /* optASFeedbackPtr            */
1858     }
1859 
initPrograms(SourceCollections & programCollection) const1860     void initPrograms(SourceCollections &programCollection) const final
1861     {
1862         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1863                                                   0u,    /* flags        */
1864                                                   true); /* allowSpirv14 */
1865 
1866         std::vector<std::string> callableDataDefinitions(m_nMaxCallableLevels);
1867         std::vector<std::string> callableDataInDefinitions(m_nMaxCallableLevels);
1868 
1869         for (uint32_t nCallableDataLevel = 0; nCallableDataLevel < m_nMaxCallableLevels; ++nCallableDataLevel)
1870         {
1871             const auto locationsPerCallableData = (3 /* uints */ + (nCallableDataLevel + 1) /* dataChunks */);
1872             const auto callableDataLocation     = locationsPerCallableData * nCallableDataLevel;
1873 
1874             callableDataDefinitions.at(nCallableDataLevel) =
1875                 "layout (location = " + de::toString(callableDataLocation) +
1876                 ") callableDataEXT struct\n"
1877                 "{\n"
1878                 "    uint  shaderStage;\n"
1879                 "    uint  nOriginRay;\n"
1880                 "    uint  nLevel;\n"
1881                 "    float dataChunk[" +
1882                 de::toString(nCallableDataLevel + 1) +
1883                 "];\n"
1884                 "} callableData" +
1885                 de::toString(nCallableDataLevel) + ";\n";
1886 
1887             callableDataInDefinitions.at(nCallableDataLevel) =
1888                 "layout(location = " + de::toString(callableDataLocation) +
1889                 ") callableDataInEXT struct\n"
1890                 "{\n"
1891                 "    uint  shaderStage;\n"
1892                 "    uint  nOriginRay;\n"
1893                 "    uint  nLevel;\n"
1894                 "    float dataChunk[" +
1895                 de::toString(nCallableDataLevel + 1) +
1896                 "];\n"
1897                 "} inData;\n";
1898 
1899             m_callableDataLevelToCallableDataLocation[nCallableDataLevel] = callableDataLocation;
1900         }
1901 
1902         const auto resultBufferDefinition = "struct ResultData\n"
1903                                             "{\n"
1904                                             "    uint  shaderStage;\n"
1905                                             "    uint  nOriginRay;\n"
1906                                             "    uint  nLevel;\n"
1907                                             "    float dataChunk[" +
1908                                             de::toString(m_nMaxCallableLevels) +
1909                                             "];\n"
1910                                             "};\n"
1911                                             "\n"
1912                                             "layout(set = 0, binding = 0, std430) buffer result\n"
1913                                             "{\n"
1914                                             "    uint       nInvocationsRegistered;\n"
1915                                             "    ResultData resultData[];\n"
1916                                             "};\n";
1917 
1918         {
1919             std::stringstream css;
1920 
1921             /* NOTE: executeCallable() is unavailable in ahit stage */
1922             css << "#version 460 core\n"
1923                    "\n"
1924                    "#extension GL_EXT_ray_tracing : require\n"
1925                    "\n"
1926                    "layout(location = 128) rayPayloadInEXT uint unusedPayload;\n"
1927                    "\n"
1928                    "void main()\n"
1929                    "{\n"
1930                    "}\n";
1931 
1932             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
1933         }
1934 
1935         {
1936             std::stringstream css;
1937 
1938             css << "#version 460 core\n"
1939                    "\n"
1940                    "#extension GL_EXT_ray_tracing : require\n"
1941                    "\n"
1942                    "layout(location = 128) rayPayloadInEXT uint rayIndex;\n"
1943                    "\n" +
1944                        de::toString(callableDataDefinitions.at(0)) + de::toString(resultBufferDefinition) +
1945                        "void main()\n"
1946                        "{\n"
1947                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1948                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1949                        "\n"
1950                        "    callableData0.shaderStage  = 3;\n"
1951                        "    callableData0.nOriginRay   = nInvocation;\n"
1952                        "    callableData0.nLevel       = 0;\n"
1953                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
1954                        "\n"
1955                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
1956                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
1957                        ");\n"
1958                        "}\n";
1959 
1960             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
1961         }
1962 
1963         {
1964             std::stringstream css;
1965 
1966             /* NOTE: executeCallable() is unavailable in isect stage */
1967             css << "#version 460 core\n"
1968                    "\n"
1969                    "#extension GL_EXT_ray_tracing : require\n"
1970                    "\n"
1971                    "void main()\n"
1972                    "{\n"
1973                    "    reportIntersectionEXT(0.95f, 0);\n"
1974                    "}\n";
1975 
1976             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
1977         }
1978 
1979         {
1980             std::stringstream css;
1981 
1982             css << "#version 460 core\n"
1983                    "\n"
1984                    "#extension GL_EXT_ray_tracing : require\n"
1985                    "\n" +
1986                        de::toString(callableDataDefinitions.at(0)) + de::toString(resultBufferDefinition) +
1987                        "\n"
1988                        "void main()\n"
1989                        "{\n"
1990                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
1991                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
1992                        "\n"
1993                        "    callableData0.shaderStage  = 2;\n"
1994                        "    callableData0.nOriginRay   = nInvocation;\n"
1995                        "    callableData0.nLevel       = 0;\n"
1996                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
1997                        "\n"
1998                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
1999                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
2000                        ");\n"
2001                        "}\n";
2002 
2003             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
2004         }
2005 
2006         {
2007             std::stringstream css;
2008 
2009             css << "#version 460 core\n"
2010                    "\n"
2011                    "#extension GL_EXT_ray_tracing : require\n"
2012                    "\n" +
2013                        de::toString(callableDataDefinitions.at(0)) +
2014                        "layout(location = 128)            rayPayloadEXT uint               unusedPayload;\n"
2015                        "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
2016                        "\n"
2017                        "void main()\n"
2018                        "{\n"
2019                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2020                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2021                        "    uint  rayFlags     = 0;\n"
2022                        "    float tmin         = 0.001;\n"
2023                        "    float tmax         = 9.0;\n"
2024                        "\n"
2025                        "    uint  cullMask     = 0xFF;\n"
2026                        "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
2027                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
2028                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
2029                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
2030                        "    vec3  direct       = normalize(target - origin);\n"
2031                        "\n"
2032                        "    callableData0.shaderStage  = 0;\n"
2033                        "    callableData0.nOriginRay   = nInvocation;\n"
2034                        "    callableData0.nLevel       = 0;\n"
2035                        "    callableData0.dataChunk[0] = float(nInvocation);\n"
2036                        "\n"
2037                        "    executeCallableEXT(0 /* sbtRecordIndex */, " +
2038                        de::toString(m_callableDataLevelToCallableDataLocation.at(0)) +
2039                        ");\n"
2040                        "\n"
2041                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
2042                        "tmax, 128);\n"
2043                        "}\n";
2044 
2045             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
2046         }
2047 
2048         for (uint32_t nCallableShader = 0; nCallableShader < m_nMaxCallableLevels; ++nCallableShader)
2049         {
2050             const bool canInvokeExecutable = (nCallableShader != (m_nMaxCallableLevels - 1));
2051             std::stringstream css;
2052 
2053             css << "#version 460 core\n"
2054                    "\n"
2055                    "#extension GL_EXT_ray_tracing : require\n"
2056                    "\n" +
2057                        de::toString(resultBufferDefinition);
2058 
2059             if ((nCallableShader + 1) != m_nMaxCallableLevels)
2060             {
2061                 css << de::toString(callableDataDefinitions.at(nCallableShader + 1));
2062             }
2063 
2064             css << callableDataInDefinitions[nCallableShader] +
2065                        "\n"
2066                        "void main()\n"
2067                        "{\n"
2068                        "    uint nInvocation = atomicAdd(nInvocationsRegistered, 1);\n"
2069                        "\n"
2070                        "    resultData[nInvocation].shaderStage = inData.shaderStage;\n"
2071                        "    resultData[nInvocation].nOriginRay  = inData.nOriginRay;\n"
2072                        "    resultData[nInvocation].nLevel      = inData.nLevel;\n";
2073 
2074             for (uint32_t nLevel = 0; nLevel < nCallableShader + 1; ++nLevel)
2075             {
2076                 css << "    resultData[nInvocation].dataChunk[" + de::toString(nLevel) + "] = inData.dataChunk[" +
2077                            de::toString(nLevel) + "];\n";
2078             }
2079 
2080             if (canInvokeExecutable)
2081             {
2082                 css << "\n"
2083                        "    callableData" +
2084                            de::toString(nCallableShader + 1) +
2085                            ".shaderStage = 1;\n"
2086                            "    callableData" +
2087                            de::toString(nCallableShader + 1) +
2088                            ".nOriginRay  = inData.nOriginRay;\n"
2089                            "    callableData" +
2090                            de::toString(nCallableShader + 1) + ".nLevel      = " + de::toString(nCallableShader) +
2091                            ";\n"
2092                            "\n";
2093 
2094                 for (uint32_t nLevel = 0; nLevel <= nCallableShader + 1; ++nLevel)
2095                 {
2096                     css << "    callableData" + de::toString(nCallableShader + 1) + ".dataChunk[" +
2097                                de::toString(nLevel) + "] = float(inData.nOriginRay + " + de::toString(nLevel) + ");\n";
2098                 }
2099 
2100                 css << "\n"
2101                        "    executeCallableEXT(" +
2102                            de::toString(nCallableShader + 1) + ", " +
2103                            de::toString(m_callableDataLevelToCallableDataLocation[nCallableShader + 1]) + ");\n";
2104             }
2105 
2106             css << "\n"
2107                    "};\n";
2108 
2109             programCollection.glslSources.add("call" + de::toString(nCallableShader))
2110                 << glu::CallableSource(css.str()) << buildOptions;
2111         }
2112     }
2113 
onShaderStackSizeDiscovered(const VkDeviceSize & raygenShaderStackSize,const VkDeviceSize & ahitShaderStackSize,const VkDeviceSize & chitShaderStackSize,const VkDeviceSize & missShaderStackSize,const VkDeviceSize & callableShaderStackSize,const VkDeviceSize & isectShaderStackSize)2114     void onShaderStackSizeDiscovered(const VkDeviceSize &raygenShaderStackSize, const VkDeviceSize &ahitShaderStackSize,
2115                                      const VkDeviceSize &chitShaderStackSize, const VkDeviceSize &missShaderStackSize,
2116                                      const VkDeviceSize &callableShaderStackSize,
2117                                      const VkDeviceSize &isectShaderStackSize) final
2118     {
2119         m_ahitShaderStackSize     = ahitShaderStackSize;
2120         m_callableShaderStackSize = callableShaderStackSize;
2121         m_chitShaderStackSize     = chitShaderStackSize;
2122         m_isectShaderStackSize    = isectShaderStackSize;
2123         m_missShaderStackSize     = missShaderStackSize;
2124         m_raygenShaderStackSize   = raygenShaderStackSize;
2125     }
2126 
resetTLAS()2127     void resetTLAS() final
2128     {
2129         m_tlPtr.reset();
2130     }
2131 
usesDynamicStackSize() const2132     bool usesDynamicStackSize() const final
2133     {
2134         return m_useDynamicStackSize;
2135     }
2136 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const2137     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
2138     {
2139         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
2140         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
2141         bool result                                   = false;
2142         const auto nItemsStored                       = *resultU32Ptr;
2143 
2144         /* Convert raw binary data into a human-readable vector representation */
2145         struct ResultItem
2146         {
2147             VkShaderStageFlagBits shaderStage;
2148             uint32_t nLevel;
2149             std::vector<float> dataChunk;
2150 
2151             ResultItem() : shaderStage(VK_SHADER_STAGE_ALL), nLevel(0)
2152             {
2153                 /* Stub */
2154             }
2155         };
2156 
2157         std::map<uint32_t, std::vector<ResultItem>> nRayToResultItemVecMap;
2158 
2159         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
2160         {
2161             const uint32_t *itemDataPtr = resultU32Ptr + 1 /* nItemsStored */ +
2162                                           nItem * (3 /* preamble ints */ + m_nMaxCallableLevels /* received data */);
2163             ResultItem item;
2164             const auto &nOriginRay = *(itemDataPtr + 1);
2165 
2166             item.dataChunk.resize(m_nMaxCallableLevels);
2167 
2168             switch (*itemDataPtr)
2169             {
2170             case 0:
2171                 item.shaderStage = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
2172                 break;
2173             case 1:
2174                 item.shaderStage = VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2175                 break;
2176             case 2:
2177                 item.shaderStage = VK_SHADER_STAGE_MISS_BIT_KHR;
2178                 break;
2179             case 3:
2180                 item.shaderStage = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR;
2181                 break;
2182 
2183             default:
2184             {
2185                 deAssertFail("This should never happen", __FILE__, __LINE__);
2186             }
2187             }
2188 
2189             item.nLevel = *(itemDataPtr + 2);
2190 
2191             memcpy(item.dataChunk.data(), itemDataPtr + 3, m_nMaxCallableLevels * sizeof(float));
2192 
2193             nRayToResultItemVecMap[nOriginRay].push_back(item);
2194         }
2195 
2196         for (uint32_t nRay = 0; nRay < m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2]; ++nRay)
2197         {
2198             /* 1. Make sure each ray generated the anticipated number of stores */
2199             const bool closestHitShaderInvoked            = (nRay % 2) == 0;
2200             const bool missShaderInvoked                  = (nRay % 2) != 0;
2201             const uint32_t nShaderStagesInvokingCallables = 1 + /* raygen */
2202                                                             ((closestHitShaderInvoked) ? 1 : 0) +
2203                                                             ((missShaderInvoked) ? 1 : 0);
2204             auto rayIterator = nRayToResultItemVecMap.find(nRay);
2205 
2206             if (rayIterator == nRayToResultItemVecMap.end())
2207             {
2208                 goto end;
2209             }
2210 
2211             if (rayIterator->second.size() != nShaderStagesInvokingCallables * m_nMaxCallableLevels)
2212             {
2213                 goto end;
2214             }
2215 
2216             /* 2. Make sure each shader stage generated the anticipated number of result items */
2217             {
2218                 uint32_t nCallableShaderStageItemsFound   = 0;
2219                 uint32_t nClosestHitShaderStageItemsFound = 0;
2220                 uint32_t nMissShaderStageItemsFound       = 0;
2221                 uint32_t nRaygenShaderStageItemsFound     = 0;
2222 
2223                 for (const auto &currentItem : rayIterator->second)
2224                 {
2225                     if (currentItem.shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
2226                     {
2227                         nRaygenShaderStageItemsFound++;
2228                     }
2229                     else if (currentItem.shaderStage == VK_SHADER_STAGE_CALLABLE_BIT_KHR)
2230                     {
2231                         nCallableShaderStageItemsFound++;
2232                     }
2233                     else if (currentItem.shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)
2234                     {
2235                         nMissShaderStageItemsFound++;
2236                     }
2237                     else if (currentItem.shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
2238                     {
2239                         nClosestHitShaderStageItemsFound++;
2240                     }
2241                     else
2242                     {
2243                         DE_ASSERT(false);
2244                     }
2245                 }
2246 
2247                 if (nRaygenShaderStageItemsFound != 1)
2248                 {
2249                     goto end;
2250                 }
2251 
2252                 /* Even rays hit geometry. Odd ones don't */
2253                 if (!missShaderInvoked)
2254                 {
2255                     if (nClosestHitShaderStageItemsFound == 0)
2256                     {
2257                         goto end;
2258                     }
2259 
2260                     if (nMissShaderStageItemsFound != 0)
2261                     {
2262                         goto end;
2263                     }
2264                 }
2265                 else
2266                 {
2267                     if (nClosestHitShaderStageItemsFound != 0)
2268                     {
2269                         goto end;
2270                     }
2271 
2272                     if (nMissShaderStageItemsFound != 1)
2273                     {
2274                         goto end;
2275                     }
2276                 }
2277 
2278                 if (nCallableShaderStageItemsFound != nShaderStagesInvokingCallables * (m_nMaxCallableLevels - 1))
2279                 {
2280                     goto end;
2281                 }
2282             }
2283 
2284             /* 3. Verify data chunk's correctness */
2285             {
2286                 for (const auto &currentItem : rayIterator->second)
2287                 {
2288                     const auto nValidItemsRequired =
2289                         (currentItem.shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)      ? 1 :
2290                         (currentItem.shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)        ? 1 :
2291                         (currentItem.shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) ? 1 :
2292                                                                                            (currentItem.nLevel + 1);
2293 
2294                     for (uint32_t nItem = 0; nItem < nValidItemsRequired; ++nItem)
2295                     {
2296                         if (fabsf(currentItem.dataChunk.at(nItem) - static_cast<float>(nRay + nItem)) > 1e-3f)
2297                         {
2298                             goto end;
2299                         }
2300                     }
2301                 }
2302             }
2303 
2304             /* 4. Verify all shader levels have been reported for relevant shader stages */
2305             {
2306                 std::map<VkShaderStageFlagBits, std::vector<uint32_t>> shaderStageToLevelVecReportedMap;
2307 
2308                 for (const auto &currentItem : rayIterator->second)
2309                 {
2310                     shaderStageToLevelVecReportedMap[currentItem.shaderStage].push_back(currentItem.nLevel);
2311                 }
2312 
2313                 if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_RAYGEN_BIT_KHR).size() != 1 ||
2314                     shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_RAYGEN_BIT_KHR).at(0) != 0)
2315                 {
2316                     goto end;
2317                 }
2318 
2319                 if (closestHitShaderInvoked)
2320                 {
2321                     if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR).size() != 1 ||
2322                         shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR).at(0) != 0)
2323                     {
2324                         goto end;
2325                     }
2326                 }
2327                 else
2328                 {
2329                     if (shaderStageToLevelVecReportedMap.find(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) !=
2330                         shaderStageToLevelVecReportedMap.end())
2331                     {
2332                         goto end;
2333                     }
2334                 }
2335 
2336                 if (missShaderInvoked)
2337                 {
2338                     if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_MISS_BIT_KHR).size() != 1 ||
2339                         shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_MISS_BIT_KHR).at(0) != 0)
2340                     {
2341                         goto end;
2342                     }
2343                 }
2344                 else
2345                 {
2346                     if (shaderStageToLevelVecReportedMap.find(VK_SHADER_STAGE_MISS_BIT_KHR) !=
2347                         shaderStageToLevelVecReportedMap.end())
2348                     {
2349                         goto end;
2350                     }
2351                 }
2352 
2353                 if (shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CALLABLE_BIT_KHR).size() !=
2354                     nShaderStagesInvokingCallables * (m_nMaxCallableLevels - 1))
2355                 {
2356                     goto end;
2357                 }
2358 
2359                 for (uint32_t nLevel = 0; nLevel < m_nMaxCallableLevels - 1; ++nLevel)
2360                 {
2361                     const auto &vec  = shaderStageToLevelVecReportedMap.at(VK_SHADER_STAGE_CALLABLE_BIT_KHR);
2362                     auto vecIterator = std::find(vec.begin(), vec.end(), nLevel);
2363 
2364                     if (vecIterator == vec.end())
2365                     {
2366                         goto end;
2367                     }
2368                 }
2369             }
2370         }
2371 
2372         result = true;
2373     end:
2374         return result;
2375     }
2376 
2377 private:
2378     const AccelerationStructureLayout m_asStructureLayout;
2379     const GeometryType m_geometryType;
2380 
2381     const tcu::UVec3 m_gridSizeXYZ;
2382     const uint32_t m_nMaxCallableLevels;
2383     const bool m_useDynamicStackSize;
2384     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
2385 
2386     VkDeviceSize m_ahitShaderStackSize;
2387     VkDeviceSize m_callableShaderStackSize;
2388     VkDeviceSize m_chitShaderStackSize;
2389     VkDeviceSize m_isectShaderStackSize;
2390     VkDeviceSize m_missShaderStackSize;
2391     VkDeviceSize m_raygenShaderStackSize;
2392 
2393     mutable std::map<uint32_t, uint32_t> m_callableDataLevelToCallableDataLocation;
2394 };
2395 
2396 class CullMaskTest : public TestBase, public ASPropertyProvider
2397 {
2398 public:
CullMaskTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType,const bool & useExtraCullMaskBits)2399     CullMaskTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType,
2400                  const bool &useExtraCullMaskBits)
2401         : m_asLayout(asLayout)
2402         , m_geometryType(geometryType)
2403         , m_nMaxHitsToRegister(256)
2404         , m_nRaysPerInvocation(4)
2405         , m_useExtraCullMaskBits(useExtraCullMaskBits)
2406         , m_lastCustomInstanceIndexUsed(0)
2407         , m_nCullMasksUsed(1)
2408     {
2409         /* Stub */
2410     }
2411 
~CullMaskTest()2412     ~CullMaskTest()
2413     {
2414         /* Stub */
2415     }
2416 
getCHitShaderCollectionShaderNames() const2417     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
2418     {
2419         return {};
2420     }
2421 
getCullMask(const uint32_t & nBL,const uint32_t & nInstance) const2422     uint8_t getCullMask(const uint32_t &nBL, const uint32_t &nInstance) const final
2423     {
2424         DE_UNREF(nBL);
2425         DE_UNREF(nInstance);
2426 
2427         uint8_t result = (m_nCullMasksUsed++) & 0xFF;
2428 
2429         DE_ASSERT(result != 0);
2430         return result;
2431     }
2432 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const2433     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
2434     {
2435         DE_UNREF(nBL);
2436         DE_UNREF(nInstance);
2437 
2438         /* NOTE: The formula below generates a sequence of unique large values. */
2439         uint32_t result = (m_lastCustomInstanceIndexUsed * 7 + 153325) & ((1 << 24) - 1);
2440 
2441         if (m_instanceCustomIndexVec.size() <= nInstance)
2442         {
2443             m_instanceCustomIndexVec.resize(nInstance + 1);
2444         }
2445 
2446         m_instanceCustomIndexVec[nInstance] = result;
2447         m_lastCustomInstanceIndexUsed       = result;
2448 
2449         return result;
2450     }
2451 
getDispatchSize() const2452     tcu::UVec3 getDispatchSize() const final
2453     {
2454         //< 3*5*17 == 255, which coincidentally is the maximum cull mask value the spec permits.
2455         //<
2456         //< This global WG size is excessively large if m_nRaysPerInvocation > 1 but the raygen shader has
2457         //< a guard condition check that drops extraneous invocations.
2458         return tcu::UVec3(3, 5, 17);
2459     }
2460 
getResultBufferSize() const2461     uint32_t getResultBufferSize() const final
2462     {
2463         return static_cast<uint32_t>((1 + m_nMaxHitsToRegister * 2) * sizeof(uint32_t));
2464     }
2465 
getTLASPtrVecToBind() const2466     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
2467     {
2468         return {m_tlPtr.get()};
2469     }
2470 
resetTLAS()2471     void resetTLAS() final
2472     {
2473         m_tlPtr.reset();
2474     }
2475 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)2476     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
2477                 VkCommandBuffer commandBuffer) final
2478     {
2479         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),          /* gridStartXYZ          */
2480                                                  tcu::Vec3(1, 1, 1),          /* gridCellSizeXYZ       */
2481                                                  tcu::UVec3(3, 5, 17),        /* gridSizeXYZ           */
2482                                                  tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
2483                                                  m_geometryType));
2484 
2485         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer,
2486                                               VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
2487                                               this,     /* optASPropertyProviderPtr */
2488                                               nullptr); /* optASFeedbackPtr         */
2489     }
2490 
initPrograms(SourceCollections & programCollection) const2491     void initPrograms(SourceCollections &programCollection) const final
2492     {
2493         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
2494                                                   0u,    /* flags        */
2495                                                   true); /* allowSpirv14 */
2496 
2497         const char *hitPropsDefinition = "struct HitProps\n"
2498                                          "{\n"
2499                                          "    uint rayIndex;\n"
2500                                          "    uint instanceCustomIndex;\n"
2501                                          "};\n";
2502 
2503         {
2504             std::stringstream css;
2505 
2506             css << "#version 460 core\n"
2507                    "\n"
2508                    "#extension GL_EXT_ray_tracing : require\n"
2509                    "\n"
2510                    "hitAttributeEXT vec3 unusedAttribute;\n"
2511                    "\n" +
2512                        de::toString(hitPropsDefinition) +
2513                        "\n"
2514                        "layout(location = 0) rayPayloadInEXT      uint   nRay;\n"
2515                        "layout(set      = 0, binding = 0, std430) buffer result\n"
2516                        "{\n"
2517                        "    uint     nHitsRegistered;\n"
2518                        "    uint     nMissesRegistered;\n"
2519                        "    HitProps hits[];\n"
2520                        "};\n"
2521                        "\n"
2522                        "void main()\n"
2523                        "{\n"
2524                        "    uint nHit = atomicAdd(nHitsRegistered, 1);\n"
2525                        "\n"
2526                        "    if (nHit < " +
2527                        de::toString(m_nMaxHitsToRegister) +
2528                        ")\n"
2529                        "    {\n"
2530                        "        hits[nHit].rayIndex            = nRay;\n"
2531                        "        hits[nHit].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
2532                        "    }\n"
2533                        "}\n";
2534 
2535             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
2536         }
2537 
2538         {
2539             std::stringstream css;
2540 
2541             css << "#version 460 core\n"
2542                    "\n"
2543                    "#extension GL_EXT_ray_tracing : require\n"
2544                    "\n"
2545                    "hitAttributeEXT vec3 hitAttribute;\n"
2546                    "\n"
2547                    "void main()\n"
2548                    "{\n"
2549                    "    reportIntersectionEXT(0.95f, 0);\n"
2550                    "}\n";
2551 
2552             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
2553         }
2554 
2555         {
2556             std::stringstream css;
2557 
2558             css << "#version 460 core\n"
2559                    "\n"
2560                    "#extension GL_EXT_ray_tracing : require\n"
2561                    "\n" +
2562                        de::toString(hitPropsDefinition) +
2563                        "\n"
2564                        "layout(set      = 0, binding = 0, std430) buffer result\n"
2565                        "{\n"
2566                        "    uint     nHitsRegistered;\n"
2567                        "    uint     nMissesRegistered;\n"
2568                        "    HitProps hits[];\n"
2569                        "};\n"
2570                        "\n"
2571                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
2572                        "\n"
2573                        "void main()\n"
2574                        "{\n"
2575                        "    atomicAdd(nMissesRegistered, 1);\n"
2576                        "}\n";
2577 
2578             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
2579         }
2580 
2581         {
2582             std::stringstream css;
2583 
2584             css << "#version 460 core\n"
2585                    "\n"
2586                    "#extension GL_EXT_ray_tracing : require\n"
2587                    "\n"
2588                    "layout(location = 0)              rayPayloadEXT uint               rayIndex;\n"
2589                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
2590                    "\n"
2591                    "void main()\n"
2592                    "{\n"
2593                    "    const uint nRaysPerInvocation = " +
2594                        de::toString(m_nRaysPerInvocation) +
2595                        ";\n"
2596                        "\n"
2597                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2598                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2599                        "    uint  rayFlags     = gl_RayFlagsCullBackFacingTrianglesEXT;\n"
2600                        "    float tmin         = 0.001;\n"
2601                        "    float tmax         = 4.0;\n"
2602                        "\n"
2603                        "    if (nInvocation >= 256 / nRaysPerInvocation)\n"
2604                        "    {\n"
2605                        "        return;\n"
2606                        "    }\n"
2607                        "\n"
2608                        "    for (uint nRay = 0; nRay < nRaysPerInvocation; ++nRay)\n"
2609                        "    {\n"
2610                        "        uint  cullMask     = 1 + nInvocation * nRaysPerInvocation + nRay;\n";
2611 
2612             if (m_useExtraCullMaskBits)
2613             {
2614                 css << "cullMask |= 0x00FFFFFF;\n";
2615             }
2616 
2617             css << "        uint  nCell        = nInvocation * nRaysPerInvocation + nRay;\n"
2618                    "        uvec3 cellXYZ      = uvec3(nCell % gl_LaunchSizeEXT.x, (nCell / gl_LaunchSizeEXT.x) % "
2619                    "gl_LaunchSizeEXT.y, (nCell / gl_LaunchSizeEXT.x / gl_LaunchSizeEXT.y) % gl_LaunchSizeEXT.z);\n"
2620                    "        vec3  cellStartXYZ = vec3(cellXYZ) * vec3(2.0);\n"
2621                    "        vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
2622                    "        vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
2623                    "        vec3  origin       = target - vec3(1, 1, 1);\n"
2624                    "        vec3  direct       = normalize(target - origin);\n"
2625                    "\n"
2626                    "        if (nCell < 255)\n"
2627                    "        {\n"
2628                    "            rayIndex = nCell;"
2629                    "\n"
2630                    "            traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
2631                    "        }\n"
2632                    "    }\n"
2633                    "}\n";
2634 
2635             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
2636         }
2637     }
2638 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const2639     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
2640     {
2641         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
2642         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
2643         const auto nHitsReported                      = *resultU32Ptr;
2644         const auto nMissesReported                    = *(resultU32Ptr + 1);
2645         bool result                                   = true;
2646 
2647         // For each traced ray:
2648         //
2649         // 1. Exactly one ahit invocation per ray should be reported.
2650         // 2. All hits reported for a ray R should point to a primitive with a valid custom instance index
2651         // 3. The reported custom instance indices must be valid.
2652         std::map<uint32_t, std::vector<uint32_t>> customInstanceIndexToRayIndexVecMap;
2653         std::map<uint32_t, std::vector<uint32_t>> rayIndexToCustomInstanceIndexVecMap;
2654 
2655         typedef struct
2656         {
2657             uint32_t rayIndex;
2658             uint32_t customInstanceHit;
2659         } HitProperties;
2660 
2661         if (nHitsReported != 0xFF)
2662         {
2663             result = false;
2664 
2665             goto end;
2666         }
2667 
2668         if (nMissesReported != 0)
2669         {
2670             result = false;
2671 
2672             goto end;
2673         }
2674 
2675         for (uint32_t nHit = 0; nHit < nHitsReported; ++nHit)
2676         {
2677             const HitProperties *hitPropsPtr = reinterpret_cast<const HitProperties *>(
2678                 resultU32Ptr + 2 /* preamble ints */ + nHit * 2 /* ints per HitProperties item */);
2679 
2680             customInstanceIndexToRayIndexVecMap[hitPropsPtr->customInstanceHit].push_back(hitPropsPtr->rayIndex);
2681             rayIndexToCustomInstanceIndexVecMap[hitPropsPtr->rayIndex].push_back(hitPropsPtr->customInstanceHit);
2682         }
2683 
2684         if (static_cast<uint32_t>(customInstanceIndexToRayIndexVecMap.size()) != nHitsReported)
2685         {
2686             /* Invalid number of unique custom instance indices reported. */
2687             result = false;
2688 
2689             goto end;
2690         }
2691 
2692         if (static_cast<uint32_t>(rayIndexToCustomInstanceIndexVecMap.size()) != nHitsReported)
2693         {
2694             /* Invalid ray indices reported by ahit invocations */
2695             result = false;
2696 
2697             goto end;
2698         }
2699 
2700         for (const auto &currentItem : customInstanceIndexToRayIndexVecMap)
2701         {
2702             if (currentItem.second.size() != 1)
2703             {
2704                 /* More than one ray associated with the same custom instance index */
2705                 result = false;
2706 
2707                 goto end;
2708             }
2709 
2710             if (currentItem.second.at(0) > 255)
2711             {
2712                 /* Invalid ray index associated with the instance index */
2713                 result = false;
2714 
2715                 goto end;
2716             }
2717 
2718             if (std::find(m_instanceCustomIndexVec.begin(), m_instanceCustomIndexVec.end(), currentItem.first) ==
2719                 m_instanceCustomIndexVec.end())
2720             {
2721                 /* Invalid custom instance index reported for the ray */
2722                 result = false;
2723 
2724                 goto end;
2725             }
2726         }
2727 
2728     end:
2729         return result;
2730     }
2731 
2732 private:
2733     const AccelerationStructureLayout m_asLayout;
2734     const GeometryType m_geometryType;
2735     const uint32_t m_nMaxHitsToRegister;
2736     const uint32_t m_nRaysPerInvocation;
2737     const bool m_useExtraCullMaskBits;
2738 
2739     mutable std::vector<uint32_t> m_instanceCustomIndexVec;
2740     mutable uint32_t m_lastCustomInstanceIndexUsed;
2741     mutable uint32_t m_nCullMasksUsed;
2742 
2743     std::unique_ptr<GridASProvider> m_asProviderPtr;
2744     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
2745 };
2746 
2747 class MAXRayHitAttributeSizeTest : public TestBase
2748 {
2749 public:
MAXRayHitAttributeSizeTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)2750     MAXRayHitAttributeSizeTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
2751         : m_asStructureLayout(asStructureLayout)
2752         , m_geometryType(geometryType)
2753         , m_gridSizeXYZ(tcu::UVec3(512, 1, 1))
2754         , m_nRayAttributeU32s(0)
2755     {
2756     }
2757 
~MAXRayHitAttributeSizeTest()2758     ~MAXRayHitAttributeSizeTest()
2759     {
2760         /* Stub */
2761     }
2762 
getDispatchSize() const2763     tcu::UVec3 getDispatchSize() const final
2764     {
2765         DE_ASSERT(m_gridSizeXYZ[0] != 0);
2766         DE_ASSERT(m_gridSizeXYZ[1] != 0);
2767         DE_ASSERT(m_gridSizeXYZ[2] != 0);
2768 
2769         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
2770     }
2771 
getResultBufferSize() const2772     uint32_t getResultBufferSize() const final
2773     {
2774         DE_ASSERT(m_gridSizeXYZ[0] != 0);
2775         DE_ASSERT(m_gridSizeXYZ[1] != 0);
2776         DE_ASSERT(m_gridSizeXYZ[2] != 0);
2777 
2778         return static_cast<uint32_t>(
2779             (3 /* nAHits, nCHits, nMisses */ + m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] *
2780                                                    m_nRayAttributeU32s * 2 /* stages where result data is stored */) *
2781             sizeof(uint32_t));
2782     }
2783 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)2784     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
2785     {
2786         VkSpecializationInfo *resultPtr = nullptr;
2787 
2788         if (shaderStage == VK_SHADER_STAGE_INTERSECTION_BIT_KHR || shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ||
2789             shaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR)
2790         {
2791             resultPtr = &m_specializationInfo;
2792         }
2793 
2794         return resultPtr;
2795     }
2796 
getTLASPtrVecToBind() const2797     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
2798     {
2799         DE_ASSERT(m_tlPtr != nullptr);
2800 
2801         return {m_tlPtr.get()};
2802     }
2803 
resetTLAS()2804     void resetTLAS() final
2805     {
2806         m_tlPtr.reset();
2807     }
2808 
init(vkt::Context &,RayTracingProperties * rtPropertiesPtr)2809     bool init(vkt::Context & /* context    */, RayTracingProperties *rtPropertiesPtr) final
2810     {
2811         const auto maxRayHitAttributeSize = rtPropertiesPtr->getMaxRayHitAttributeSize();
2812 
2813         // TODO: If U8s are supported, we could cover the remaining space with these..
2814         m_nRayAttributeU32s = maxRayHitAttributeSize / static_cast<uint32_t>(sizeof(uint32_t));
2815         DE_ASSERT(m_nRayAttributeU32s != 0);
2816 
2817         m_specializationInfoMapEntry.constantID = 1;
2818         m_specializationInfoMapEntry.offset     = 0;
2819         m_specializationInfoMapEntry.size       = sizeof(uint32_t);
2820 
2821         m_specializationInfo.dataSize      = sizeof(uint32_t);
2822         m_specializationInfo.mapEntryCount = 1;
2823         m_specializationInfo.pData         = reinterpret_cast<const void *>(&m_nRayAttributeU32s);
2824         m_specializationInfo.pMapEntries   = &m_specializationInfoMapEntry;
2825 
2826         return true;
2827     }
2828 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)2829     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
2830                 VkCommandBuffer commandBuffer) final
2831     {
2832         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
2833                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
2834                                                                          m_gridSizeXYZ,
2835                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
2836                                                                          m_geometryType));
2837 
2838         m_tlPtr =
2839             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
2840                                       nullptr,                                        /* optASPropertyProviderPtr */
2841                                       nullptr);                                       /* optASFeedbackPtr         */
2842     }
2843 
initPrograms(SourceCollections & programCollection) const2844     void initPrograms(SourceCollections &programCollection) const final
2845     {
2846         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
2847                                                   0u,    /* flags        */
2848                                                   true); /* allowSpirv14 */
2849 
2850         const char *constantDefinitions = "layout(constant_id = 1) const uint N_UINTS_IN_HIT_ATTRIBUTE = 1;\n";
2851 
2852         const char *hitAttributeDefinition = "\n"
2853                                              "hitAttributeEXT block\n"
2854                                              "{\n"
2855                                              "    uint values[N_UINTS_IN_HIT_ATTRIBUTE];\n"
2856                                              "};\n"
2857                                              "\n";
2858 
2859         const char *resultBufferDefinition = "layout(set      = 0, binding = 0, std430) buffer result\n"
2860                                              "{\n"
2861                                              "    uint nAHitsRegistered;\n"
2862                                              "    uint nCHitsRegistered;\n"
2863                                              "    uint nMissesRegistered;\n"
2864                                              "    uint retrievedValues[N_UINTS_IN_HIT_ATTRIBUTE];\n"
2865                                              "};\n";
2866 
2867         {
2868             std::stringstream css;
2869 
2870             css << "#version 460 core\n"
2871                    "\n"
2872                    "#extension GL_EXT_ray_tracing : require\n"
2873                    "\n" +
2874                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2875                        "\n"
2876                        "layout(location = 0) rayPayloadInEXT uint unusedPayload;\n" +
2877                        de::toString(resultBufferDefinition) +
2878                        "\n"
2879                        "void main()\n"
2880                        "{\n"
2881                        "    atomicAdd(nAHitsRegistered, 1);\n"
2882                        "\n"
2883                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2884                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2885                        "\n"
2886                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2887                        "    {\n"
2888                        "        retrievedValues[(2 * nInvocation + 1) * N_UINTS_IN_HIT_ATTRIBUTE + nUint] = "
2889                        "values[nUint];\n"
2890                        "    }\n"
2891                        "}\n";
2892 
2893             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
2894         }
2895 
2896         {
2897             std::stringstream css;
2898 
2899             css << "#version 460 core\n"
2900                    "\n"
2901                    "#extension GL_EXT_ray_tracing : require\n"
2902                    "\n" +
2903                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2904                        de::toString(resultBufferDefinition) +
2905                        "\n"
2906                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
2907                        "\n"
2908                        "void main()\n"
2909                        "{\n"
2910                        "    atomicAdd(nCHitsRegistered, 1);\n"
2911                        "\n"
2912                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2913                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2914                        "\n"
2915                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2916                        "    {\n"
2917                        "        retrievedValues[(2 * nInvocation + 0) * N_UINTS_IN_HIT_ATTRIBUTE + nUint] = "
2918                        "values[nUint];\n"
2919                        "    }\n"
2920                        "}\n";
2921 
2922             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
2923         }
2924 
2925         {
2926             std::stringstream css;
2927 
2928             css << "#version 460 core\n"
2929                    "\n"
2930                    "#extension GL_EXT_ray_tracing : require\n"
2931                    "\n" +
2932                        de::toString(constantDefinitions) + de::toString(hitAttributeDefinition) +
2933                        de::toString(resultBufferDefinition) +
2934                        "\n"
2935                        "void main()\n"
2936                        "{\n"
2937                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2938                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2939                        "\n"
2940                        "    for (uint nUint = 0; nUint < N_UINTS_IN_HIT_ATTRIBUTE; ++nUint)\n"
2941                        "    {\n"
2942                        "        values[nUint] = 1 + nInvocation + nUint;\n"
2943                        "    }\n"
2944                        "\n"
2945                        "    reportIntersectionEXT(0.95f, 0);\n"
2946                        "}\n";
2947 
2948             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
2949         }
2950 
2951         {
2952             std::stringstream css;
2953 
2954             css << "#version 460 core\n"
2955                    "\n"
2956                    "#extension GL_EXT_ray_tracing : require\n"
2957                    "\n" +
2958                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
2959                        "\n"
2960                        "void main()\n"
2961                        "{\n"
2962                        "    atomicAdd(nMissesRegistered, 1);\n"
2963                        "}\n";
2964 
2965             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
2966         }
2967 
2968         {
2969             std::stringstream css;
2970 
2971             css << "#version 460 core\n"
2972                    "\n"
2973                    "#extension GL_EXT_ray_tracing : require\n"
2974                    "\n"
2975                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
2976                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
2977                    "\n"
2978                    "void main()\n"
2979                    "{\n"
2980                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
2981                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
2982                    "    uint  rayFlags     = 0;\n"
2983                    "    float tmin         = 0.001;\n"
2984                    "    float tmax         = 9.0;\n"
2985                    "\n"
2986                    "    uint  cullMask     = 0xFF;\n"
2987                    "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
2988                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
2989                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
2990                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
2991                    "    vec3  direct       = normalize(target - origin);\n"
2992                    "\n"
2993                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
2994                    "0);\n"
2995                    "}\n";
2996 
2997             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
2998         }
2999     }
3000 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const3001     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
3002     {
3003         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
3004         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
3005         bool result                                   = false;
3006 
3007         const auto nAHitsReported    = *resultU32Ptr;
3008         const auto nCHitsRegistered  = *(resultU32Ptr + 1);
3009         const auto nMissesRegistered = *(resultU32Ptr + 2);
3010 
3011         if (nAHitsReported != m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] / 2)
3012         {
3013             goto end;
3014         }
3015 
3016         if (nCHitsRegistered != nAHitsReported)
3017         {
3018             goto end;
3019         }
3020 
3021         if (nMissesRegistered != nAHitsReported)
3022         {
3023             goto end;
3024         }
3025 
3026         for (uint32_t nHit = 0; nHit < nAHitsReported; ++nHit)
3027         {
3028             const uint32_t *ahitValues = resultU32Ptr + 3 /* preamble ints */ + (2 * nHit + 0) * m_nRayAttributeU32s;
3029             const uint32_t *chitValues = resultU32Ptr + 3 /* preamble ints */ + (2 * nHit + 1) * m_nRayAttributeU32s;
3030             const bool missExpected    = (nHit % 2) != 0;
3031 
3032             for (uint32_t nValue = 0; nValue < m_nRayAttributeU32s; ++nValue)
3033             {
3034                 if (!missExpected)
3035                 {
3036                     if (ahitValues[nValue] != 1 + nHit + nValue)
3037                     {
3038                         goto end;
3039                     }
3040 
3041                     if (chitValues[nValue] != 1 + nHit + nValue)
3042                     {
3043                         goto end;
3044                     }
3045                 }
3046                 else
3047                 {
3048                     if (ahitValues[nValue] != 0)
3049                     {
3050                         goto end;
3051                     }
3052 
3053                     if (chitValues[nValue] != 0)
3054                     {
3055                         goto end;
3056                     }
3057                 }
3058             }
3059         }
3060 
3061         result = true;
3062     end:
3063         return result;
3064     }
3065 
3066 private:
3067     const AccelerationStructureLayout m_asStructureLayout;
3068     const GeometryType m_geometryType;
3069 
3070     const tcu::UVec3 m_gridSizeXYZ;
3071     uint32_t m_nRayAttributeU32s;
3072     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3073 
3074     VkSpecializationInfo m_specializationInfo;
3075     VkSpecializationMapEntry m_specializationInfoMapEntry;
3076 };
3077 
3078 class MAXRTInvocationsSupportedTest : public TestBase, public ASPropertyProvider, public IGridASFeedback
3079 {
3080 public:
MAXRTInvocationsSupportedTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)3081     MAXRTInvocationsSupportedTest(const GeometryType &geometryType,
3082                                   const AccelerationStructureLayout &asStructureLayout)
3083         : m_asStructureLayout(asStructureLayout)
3084         , m_geometryType(geometryType)
3085         , m_lastCustomInstanceIndexUsed(0)
3086         , m_nMaxCells(8 * 8 * 8)
3087     {
3088     }
3089 
~MAXRTInvocationsSupportedTest()3090     ~MAXRTInvocationsSupportedTest()
3091     {
3092         /* Stub */
3093     }
3094 
getCHitShaderCollectionShaderNames() const3095     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
3096     {
3097         return {};
3098     }
3099 
getInstanceCustomIndex(const uint32_t & nBL,const uint32_t & nInstance) const3100     uint32_t getInstanceCustomIndex(const uint32_t &nBL, const uint32_t &nInstance) const final
3101     {
3102         DE_UNREF(nBL);
3103         DE_UNREF(nInstance);
3104 
3105         return ++m_lastCustomInstanceIndexUsed;
3106     }
3107 
getDispatchSize() const3108     tcu::UVec3 getDispatchSize() const final
3109     {
3110         DE_ASSERT(m_gridSizeXYZ[0] != 0);
3111         DE_ASSERT(m_gridSizeXYZ[1] != 0);
3112         DE_ASSERT(m_gridSizeXYZ[2] != 0);
3113 
3114         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
3115     }
3116 
getResultBufferSize() const3117     uint32_t getResultBufferSize() const final
3118     {
3119         DE_ASSERT(m_gridSizeXYZ[0] != 0);
3120         DE_ASSERT(m_gridSizeXYZ[1] != 0);
3121         DE_ASSERT(m_gridSizeXYZ[2] != 0);
3122 
3123         return static_cast<uint32_t>((2 /* nHits, nMisses */ + m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] *
3124                                                                    1 /* hit instance custom index */) *
3125                                      sizeof(uint32_t));
3126     }
3127 
getTLASPtrVecToBind() const3128     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3129     {
3130         DE_ASSERT(m_tlPtr != nullptr);
3131 
3132         return {m_tlPtr.get()};
3133     }
3134 
init(vkt::Context & context,RayTracingProperties * rtPropertiesPtr)3135     bool init(vkt::Context &context, RayTracingProperties *rtPropertiesPtr) final
3136     {
3137         m_context = &context;
3138         /* NOTE: In order to avoid running into a situation where the test attempts to create a buffer of size larger than permitted by Vulkan,
3139          *       we limit the maximum number of testable invocations to 2^29 on 64bit CTS build and driver or to 2^27 on 32bit */
3140         const auto maxComputeWorkGroupCount        = context.getDeviceProperties().limits.maxComputeWorkGroupCount;
3141         const auto maxComputeWorkGroupSize         = context.getDeviceProperties().limits.maxComputeWorkGroupSize;
3142         const uint64_t maxGlobalRTWorkGroupSize[3] = {
3143             static_cast<uint64_t>(maxComputeWorkGroupCount[0]) * static_cast<uint64_t>(maxComputeWorkGroupSize[0]),
3144             static_cast<uint64_t>(maxComputeWorkGroupCount[1]) * static_cast<uint64_t>(maxComputeWorkGroupSize[1]),
3145             static_cast<uint64_t>(maxComputeWorkGroupCount[2]) * static_cast<uint64_t>(maxComputeWorkGroupSize[2])};
3146         const auto maxRayDispatchInvocationCount =
3147             de::min(static_cast<uint64_t>(rtPropertiesPtr->getMaxRayDispatchInvocationCount()),
3148 #if (DE_PTR_SIZE == 4)
3149                     static_cast<uint64_t>(1ULL << 27));
3150 #else
3151                     static_cast<uint64_t>(1ULL << 29));
3152 #endif
3153 
3154         m_gridSizeXYZ[0] =
3155             de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount) % maxGlobalRTWorkGroupSize[0]));
3156         m_gridSizeXYZ[1] = de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount / m_gridSizeXYZ[0]) %
3157                                                              maxGlobalRTWorkGroupSize[1]));
3158         m_gridSizeXYZ[2] =
3159             de::max(1u, static_cast<uint32_t>((maxRayDispatchInvocationCount / m_gridSizeXYZ[0] / m_gridSizeXYZ[1]) %
3160                                               maxGlobalRTWorkGroupSize[2]));
3161 
3162         /* TODO: The simple formulas above may need to be improved to handle your implementation correctly */
3163         DE_ASSERT(m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2] == maxRayDispatchInvocationCount);
3164 
3165         return true;
3166     }
3167 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3168     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3169                 VkCommandBuffer commandBuffer) final
3170     {
3171         std::unique_ptr<GridASProvider> asProviderPtr(
3172             new GridASProvider(tcu::Vec3(0, 0, 0),    /* gridStartXYZ          */
3173                                tcu::Vec3(1, 1, 1),    /* gridCellSizeXYZ       */
3174                                tcu::UVec3(512, 1, 1), /* gridSizeXYZ           */
3175                                tcu::Vec3(3, 0, 0),    /* gridInterCellDeltaXYZ */
3176                                m_geometryType));
3177 
3178         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
3179                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3180                                             this,  /* optASPropertyProviderPtr */
3181                                             this); /* optASFeedbackPtr            */
3182     }
3183 
initPrograms(SourceCollections & programCollection) const3184     void initPrograms(SourceCollections &programCollection) const final
3185     {
3186         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3187                                                   0u,    /* flags        */
3188                                                   true); /* allowSpirv14 */
3189 
3190         const char *hitPropsDefinition = "struct HitProps\n"
3191                                          "{\n"
3192                                          "    uint instanceCustomIndex;\n"
3193                                          "};\n";
3194 
3195         {
3196             std::stringstream css;
3197 
3198             css << "#version 460 core\n"
3199                    "\n"
3200                    "#extension GL_EXT_ray_tracing : require\n"
3201                    "\n"
3202                    "hitAttributeEXT vec3 unusedAttribute;\n"
3203                    "\n" +
3204                        de::toString(hitPropsDefinition) +
3205                        "\n"
3206                        "layout(location = 0) rayPayloadInEXT      uint   unusedPayload;\n"
3207                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3208                        "{\n"
3209                        "    uint     nHitsRegistered;\n"
3210                        "    uint     nMissesRegistered;\n"
3211                        "    HitProps hits[];\n"
3212                        "};\n"
3213                        "\n"
3214                        "void main()\n"
3215                        "{\n"
3216                        "    atomicAdd(nHitsRegistered, 1);\n"
3217                        "\n"
3218                        "    uint nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3219                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3220                        "\n"
3221                        "    hits[nInvocation].instanceCustomIndex = gl_InstanceCustomIndexEXT;\n"
3222                        "}\n";
3223 
3224             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3225         }
3226 
3227         {
3228             std::stringstream css;
3229 
3230             css << "#version 460 core\n"
3231                    "\n"
3232                    "#extension GL_EXT_ray_tracing : require\n"
3233                    "\n"
3234                    "hitAttributeEXT vec3 hitAttribute;\n"
3235                    "\n"
3236                    "void main()\n"
3237                    "{\n"
3238                    "    reportIntersectionEXT(0.95f, 0);\n"
3239                    "}\n";
3240 
3241             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
3242         }
3243 
3244         {
3245             std::stringstream css;
3246 
3247             css << "#version 460 core\n"
3248                    "\n"
3249                    "#extension GL_EXT_ray_tracing : require\n"
3250                    "\n" +
3251                        de::toString(hitPropsDefinition) +
3252                        "\n"
3253                        "layout(set = 0, binding = 0, std430) buffer result\n"
3254                        "{\n"
3255                        "    uint     nHitsRegistered;\n"
3256                        "    uint     nMissesRegistered;\n"
3257                        "    HitProps hits[];\n"
3258                        "};\n"
3259                        "\n"
3260                        "layout(location = 0) rayPayloadInEXT uint rayIndex;\n"
3261                        "\n"
3262                        "void main()\n"
3263                        "{\n"
3264                        "    atomicAdd(nMissesRegistered, 1);\n"
3265                        "}\n";
3266 
3267             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
3268         }
3269 
3270         {
3271             std::stringstream css;
3272 
3273             css << "#version 460 core\n"
3274                    "\n"
3275                    "#extension GL_EXT_ray_tracing : require\n"
3276                    "\n"
3277                    "layout(location = 0)              rayPayloadEXT uint               unusedPayload;\n"
3278                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
3279                    "\n"
3280                    "void main()\n"
3281                    "{\n"
3282                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3283                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3284                    "    uint  rayFlags     = 0;\n"
3285                    "    float tmin         = 0.001;\n"
3286                    "    float tmax         = 2.1;\n"
3287                    "\n"
3288                    "    uint  cullMask     = 0xFF;\n"
3289                    "    vec3  cellStartXYZ = vec3( (nInvocation % " +
3290                        de::toString(m_nMaxCells) +
3291                        ") * 3, 0.0, 0.0);\n"
3292                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
3293                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
3294                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
3295                        "    vec3  direct       = normalize(target - origin);\n"
3296                        "\n"
3297                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
3298                        "tmax, 0);\n"
3299                        "}\n";
3300 
3301             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
3302         }
3303     }
3304 
resetTLAS()3305     void resetTLAS() final
3306     {
3307         m_tlPtr.reset();
3308     }
3309 
verifyHitsMisses(uint32_t nHitsReported,uint32_t nMissesReported) const3310     bool verifyHitsMisses(uint32_t nHitsReported, uint32_t nMissesReported) const
3311     {
3312         if (nHitsReported != m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2])
3313         {
3314             return false;
3315         }
3316 
3317         if (nMissesReported != 0)
3318         {
3319             return false;
3320         }
3321 
3322         return true;
3323     }
3324 
verifyResultChunk(const void * inBufferPtr,uint32_t size,uint32_t offset) const3325     bool verifyResultChunk(const void *inBufferPtr, uint32_t size, uint32_t offset) const
3326     {
3327         const uint32_t *resultU32Ptr = reinterpret_cast<const uint32_t *>(inBufferPtr);
3328 
3329         typedef struct
3330         {
3331             uint32_t instanceCustomIndex;
3332         } HitProperties;
3333 
3334         for (uint32_t nRay = 0; nRay < size; ++nRay)
3335         {
3336             // Touch watch dog every 100000 loops to avoid timeout issue.
3337             if (nRay > 0 && (nRay % 100000 == 0))
3338                 m_context->getTestContext().touchWatchdog();
3339             const HitProperties *hitPropsPtr = reinterpret_cast<const HitProperties *>(resultU32Ptr) + nRay;
3340 
3341             if (m_nRayToInstanceIndexExpected.at((nRay + offset) % m_nMaxCells) != hitPropsPtr->instanceCustomIndex)
3342             {
3343                 return false;
3344             }
3345         }
3346 
3347         return true;
3348     }
3349 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const3350     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
3351     {
3352         const DeviceInterface &deviceInterface = context.getDeviceInterface();
3353         const VkDevice deviceVk                = context.getDevice();
3354         Allocator &allocator                   = context.getDefaultAllocator();
3355         const uint32_t queueFamilyIndex        = context.getUniversalQueueFamilyIndex();
3356 
3357         const Move<VkCommandPool> cmdPoolPtr = createCommandPool(deviceInterface, deviceVk, 0, /* pCreateInfo */
3358                                                                  queueFamilyIndex);
3359         const Move<VkCommandBuffer> cmdBufferPtr =
3360             allocateCommandBuffer(deviceInterface, deviceVk, *cmdPoolPtr, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3361 
3362         // first read the header and then data as chunks
3363         uint32_t headerSize               = 2 * sizeof(uint32_t);
3364         const auto headerBufferCreateInfo = makeBufferCreateInfo(headerSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3365         de::MovePtr<BufferWithMemory> headerBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
3366             deviceInterface, deviceVk, allocator, headerBufferCreateInfo, MemoryRequirement::HostVisible));
3367 
3368         const VkBufferCopy headerCopy{0, 0, headerSize};
3369         CopyBufferContent(context, *cmdBufferPtr, buffer, *headerBufferPtr, headerCopy);
3370 
3371         const uint32_t *headerPtr = (uint32_t *)headerBufferPtr->getAllocation().getHostPtr();
3372 
3373         const auto nHitsReported   = headerPtr[0];
3374         const auto nMissesReported = headerPtr[1];
3375 
3376         if (!verifyHitsMisses(nHitsReported, nMissesReported))
3377         {
3378             return false;
3379         }
3380 
3381         // verification loop that works in chunks
3382         uint32_t itemsInChunk = 1024 * 1024;
3383         uint32_t chunkSize    = static_cast<uint32_t>(itemsInChunk * sizeof(uint32_t));
3384         uint32_t amount       = (getResultBufferSize() - headerSize) / chunkSize;
3385 
3386         // allocate a buffer for verification
3387         const auto chunkBufferCreateInfo = makeBufferCreateInfo(chunkSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
3388         de::MovePtr<BufferWithMemory> chunkBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
3389             deviceInterface, deviceVk, allocator, chunkBufferCreateInfo, MemoryRequirement::HostVisible));
3390 
3391         // copy each chunk using offset and verify the contents
3392         for (uint32_t chunk = 0; chunk < amount; ++chunk)
3393         {
3394             uint32_t srcOffset = headerSize + (chunk * chunkSize);
3395             const VkBufferCopy chunkCopy{srcOffset, 0, chunkSize};
3396             CopyBufferContent(context, *cmdBufferPtr, buffer, *chunkBufferPtr, chunkCopy);
3397 
3398             const uint32_t *chunkDataPtr = (uint32_t *)chunkBufferPtr->getAllocation().getHostPtr();
3399 
3400             if (!verifyResultChunk(chunkDataPtr, itemsInChunk, static_cast<uint32_t>(srcOffset / sizeof(uint32_t))))
3401             {
3402                 return false;
3403             }
3404         }
3405 
3406         return true;
3407     }
3408 
3409 private:
onCullMaskAssignedToCell(const tcu::UVec3 & cellLocation,const uint8_t & cullMaskAssigned)3410     void onCullMaskAssignedToCell(const tcu::UVec3 &cellLocation, const uint8_t &cullMaskAssigned)
3411     {
3412         /* Dont'care */
3413         DE_UNREF(cellLocation);
3414         DE_UNREF(cullMaskAssigned);
3415     }
3416 
onInstanceCustomIndexAssignedToCell(const tcu::UVec3 & cellLocation,const uint32_t & customIndexAssigned)3417     void onInstanceCustomIndexAssignedToCell(const tcu::UVec3 &cellLocation, const uint32_t &customIndexAssigned)
3418     {
3419         DE_ASSERT(cellLocation[1] == 0);
3420         DE_ASSERT(cellLocation[2] == 0);
3421 
3422         m_nRayToInstanceIndexExpected[cellLocation[0]] = customIndexAssigned;
3423     }
3424 
3425     vkt::Context *m_context;
3426     const AccelerationStructureLayout m_asStructureLayout;
3427     const GeometryType m_geometryType;
3428 
3429     tcu::UVec3 m_gridSizeXYZ;
3430     mutable uint32_t m_lastCustomInstanceIndexUsed;
3431     const uint32_t m_nMaxCells;
3432     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3433 
3434     std::map<uint32_t, uint32_t> m_nRayToInstanceIndexExpected;
3435 };
3436 
3437 class NoDuplicateAnyHitTest : public TestBase
3438 {
3439 public:
NoDuplicateAnyHitTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType)3440     NoDuplicateAnyHitTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType)
3441         : m_asLayout(asLayout)
3442         , m_geometryType(geometryType)
3443         , m_gridSizeXYZ(tcu::UVec3(4, 4, 4))
3444         , m_nRaysToTrace(32)
3445     {
3446         /* Stub */
3447     }
3448 
~NoDuplicateAnyHitTest()3449     ~NoDuplicateAnyHitTest()
3450     {
3451         /* Stub */
3452     }
3453 
getCHitShaderCollectionShaderNames() const3454     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
3455     {
3456         return {};
3457     }
3458 
getDispatchSize() const3459     tcu::UVec3 getDispatchSize() const final
3460     {
3461         return tcu::UVec3(4, 4, m_nRaysToTrace / (4 * 4) + 1);
3462     }
3463 
getResultBufferSize() const3464     uint32_t getResultBufferSize() const final
3465     {
3466         const auto nPrimitives = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3467 
3468         return static_cast<uint32_t>((2 /* nHits, nMisses */ + 3 * nPrimitives /* instancePrimitiveIDPairsUsed */) *
3469                                      sizeof(uint32_t) * m_nRaysToTrace);
3470     }
3471 
getTLASPtrVecToBind() const3472     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3473     {
3474         return {m_tlPtr.get()};
3475     }
3476 
resetTLAS()3477     void resetTLAS() final
3478     {
3479         m_tlPtr.reset();
3480     }
3481 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3482     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3483                 VkCommandBuffer commandBuffer) final
3484     {
3485         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         /* gridStartXYZ          */
3486                                                  tcu::Vec3(1, 1, 1),                         /* gridCellSizeXYZ       */
3487                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
3488                                                  m_geometryType));
3489 
3490         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer,
3491                                               VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3492                                               nullptr,  /* optASPropertyProviderPtr */
3493                                               nullptr); /* optASFedbackPtr          */
3494     }
3495 
initPrograms(SourceCollections & programCollection) const3496     void initPrograms(SourceCollections &programCollection) const final
3497     {
3498         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3499                                                   0u,    /* flags        */
3500                                                   true); /* allowSpirv14 */
3501 
3502         const auto nTotalPrimitives        = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3503         const auto hitPropertiesDefinition = "struct HitProperties\n"
3504                                              "{\n"
3505                                              "    uint nHitsRegistered;\n"
3506                                              "     uint nMissRegistered;\n"
3507                                              "    uint instancePrimitiveIDPairsUsed[3 * " +
3508                                              de::toString(nTotalPrimitives) +
3509                                              "];\n"
3510                                              "};\n";
3511 
3512         {
3513             std::stringstream css;
3514 
3515             css << "#version 460 core\n"
3516                    "\n"
3517                    "#extension GL_EXT_ray_tracing : require\n"
3518                    "\n"
3519                    "hitAttributeEXT vec3 unusedAttribute;\n"
3520                    "\n" +
3521                        hitPropertiesDefinition +
3522                        "\n"
3523                        "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
3524                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3525                        "{\n"
3526                        "    HitProperties rayToHitProps["
3527                 << de::toString(m_nRaysToTrace)
3528                 << "];\n"
3529                    "};\n"
3530                    "\n"
3531                    "void main()\n"
3532                    "{\n"
3533                    "    uint nRay            = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3534                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3535                    "    uint nHitsRegistered = atomicAdd(rayToHitProps[nRay].nHitsRegistered, 1);\n"
3536                    "\n"
3537                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 0] = 1 + "
3538                    "gl_InstanceID;\n"
3539                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 1] = 1 + "
3540                    "gl_PrimitiveID;\n"
3541                    "    rayToHitProps[nRay].instancePrimitiveIDPairsUsed[3 * nHitsRegistered + 2] = 1 + "
3542                    "gl_GeometryIndexEXT;\n"
3543                    "}\n";
3544 
3545             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3546         }
3547 
3548         {
3549             std::stringstream css;
3550 
3551             css << "#version 460 core\n"
3552                    "\n"
3553                    "#extension GL_EXT_ray_tracing : require\n"
3554                    "\n"
3555                    "hitAttributeEXT vec3 hitAttribute;\n"
3556                    "\n"
3557                    "void main()\n"
3558                    "{\n"
3559                    "    reportIntersectionEXT(0.95f, 0);\n"
3560                    "}\n";
3561 
3562             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
3563         }
3564 
3565         {
3566             std::stringstream css;
3567 
3568             css << "#version 460 core\n"
3569                    "\n"
3570                    "#extension GL_EXT_ray_tracing : require\n"
3571                    "\n" +
3572                        hitPropertiesDefinition +
3573                        "layout(location = 0) rayPayloadInEXT      vec3   unusedPayload;\n"
3574                        "layout(set      = 0, binding = 0, std430) buffer result\n"
3575                        "{\n"
3576                        "    HitProperties rayToHitProps["
3577                 << de::toString(m_nRaysToTrace)
3578                 << "];\n"
3579                    "};\n"
3580                    "\n"
3581                    "void main()\n"
3582                    "{\n"
3583                    "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + gl_LaunchIDEXT.y * "
3584                    "gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3585                    "\n"
3586                    "    atomicAdd(rayToHitProps[nRay].nMissRegistered, 1);\n"
3587                    "}\n";
3588 
3589             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
3590         }
3591 
3592         {
3593             std::stringstream css;
3594 
3595             css << "#version 460 core\n"
3596                    "\n"
3597                    "#extension GL_EXT_ray_tracing : require\n"
3598                    "\n" +
3599                        hitPropertiesDefinition +
3600                        "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
3601                        "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
3602                        "\n"
3603                        "void main()\n"
3604                        "{\n"
3605                        "    uint  nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
3606                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
3607                        "    uint  rayFlags    = 0;\n"
3608                        "    uint  cullMask    = 0xFF;\n"
3609                        "    float tmin        = 0.001;\n"
3610                        "    float tmax        = 9.0;\n"
3611                        "    vec3  origin      = vec3(4,                                  4,                            "
3612                        "      4);\n"
3613                        "    vec3  target      = vec3(float(gl_LaunchIDEXT.x * 2) + 0.5f, float(gl_LaunchIDEXT.y * 2) + "
3614                        "0.5f, float(gl_LaunchIDEXT.z * 2) + 0.5f);\n"
3615                        "    vec3  direct      = normalize(target - origin);\n"
3616                        "\n"
3617                        "    if (nInvocation >= "
3618                 << m_nRaysToTrace
3619                 << ")\n"
3620                    "    {\n"
3621                    "        return;\n"
3622                    "    }\n"
3623                    "\n"
3624                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
3625                    "}\n";
3626 
3627             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
3628         }
3629     }
3630 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const3631     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
3632     {
3633         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
3634         const void *resultDataPtr                     = resultBufferPtr->getAllocation().getHostPtr();
3635 
3636         const auto nTotalPrimitives = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
3637         bool result                 = true;
3638 
3639         for (uint32_t nRay = 0; nRay < m_nRaysToTrace; ++nRay)
3640         {
3641             std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> tupleVec;
3642             const auto rayProps = reinterpret_cast<const uint32_t *>(resultDataPtr) + (2 + 3 * nTotalPrimitives) * nRay;
3643 
3644             // 1. At least one ahit invocation must have been made.
3645             if (rayProps[0] == 0)
3646             {
3647                 result = false;
3648 
3649                 goto end;
3650             }
3651 
3652             // 2. It's OK for each ray to intersect many AABBs, but no AABB should have had >1 ahit invocation fired.
3653             for (uint32_t nPrimitive = 0; nPrimitive < nTotalPrimitives; nPrimitive++)
3654             {
3655                 const auto instanceID    = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 0];
3656                 const auto primitiveID   = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 1];
3657                 const auto geometryIndex = rayProps[2 /* nHits, nMissesRegistered */ + 3 * nPrimitive + 2];
3658 
3659                 const auto currentTuple =
3660                     std::tuple<uint32_t, uint32_t, uint32_t>(instanceID, primitiveID, geometryIndex);
3661 
3662                 if (instanceID != 0 || primitiveID != 0 || geometryIndex != 0)
3663                 {
3664                     if (std::find(tupleVec.begin(), tupleVec.end(), currentTuple) != tupleVec.end())
3665                     {
3666                         result = false;
3667 
3668                         goto end;
3669                     }
3670 
3671                     tupleVec.push_back(currentTuple);
3672                 }
3673             }
3674 
3675             // 3. None of the traced rays should have triggered the miss shader invocation.
3676             if (rayProps[1] != 0)
3677             {
3678                 result = false;
3679 
3680                 goto end;
3681             }
3682         }
3683 
3684     end:
3685         return result;
3686     }
3687 
3688 private:
3689     const AccelerationStructureLayout m_asLayout;
3690     const GeometryType m_geometryType;
3691     const tcu::UVec3 m_gridSizeXYZ;
3692     const uint32_t m_nRaysToTrace;
3693 
3694     std::unique_ptr<GridASProvider> m_asProviderPtr;
3695     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
3696 };
3697 
3698 const std::vector<VariableType> g_ShaderRecordBlockTestVars1 = {
3699     VariableType::FLOAT, VariableType::VEC2,   VariableType::VEC3,   VariableType::VEC4,
3700 
3701     VariableType::MAT2,  VariableType::MAT2X2, VariableType::MAT2X3, VariableType::MAT2X4,
3702     VariableType::MAT3,  VariableType::MAT3X2, VariableType::MAT3X3, VariableType::MAT3X4,
3703     VariableType::MAT4,  VariableType::MAT4X2, VariableType::MAT4X3, VariableType::MAT4X4,
3704 
3705     VariableType::INT,   VariableType::IVEC2,  VariableType::IVEC3,  VariableType::IVEC4,
3706 
3707     VariableType::UINT,  VariableType::UVEC2,  VariableType::UVEC3,  VariableType::UVEC4,
3708 };
3709 
3710 const std::vector<VariableType> g_ShaderRecordBlockTestVars2 = {
3711     VariableType::DOUBLE, VariableType::DVEC2,   VariableType::DVEC3,   VariableType::DVEC4,
3712 
3713     VariableType::DMAT2,  VariableType::DMAT2X2, VariableType::DMAT2X3, VariableType::DMAT2X4, VariableType::DMAT3,
3714 };
3715 
3716 const std::vector<VariableType> g_ShaderRecordBlockTestVars3 = {
3717     VariableType::DMAT3X2, VariableType::DMAT3X3, VariableType::DMAT3X4, VariableType::DMAT4,
3718     VariableType::DMAT4X2, VariableType::DMAT4X3, VariableType::DMAT4X4,
3719 };
3720 
3721 const std::vector<VariableType> g_ShaderRecordBlockTestVars4 = {
3722     VariableType::VEC3,   VariableType::VEC4,
3723 
3724     VariableType::INT16,  VariableType::I16VEC2, VariableType::I16VEC3, VariableType::I16VEC4,
3725 
3726     VariableType::MAT3X3, VariableType::MAT3X4,  VariableType::MAT4X3,
3727 
3728     VariableType::UINT16, VariableType::U16VEC2, VariableType::U16VEC3, VariableType::U16VEC4,
3729 };
3730 
3731 const std::vector<VariableType> g_ShaderRecordBlockTestVars5 = {
3732     VariableType::VEC3,   VariableType::VEC4,
3733 
3734     VariableType::INT64,  VariableType::I64VEC2, VariableType::I64VEC3, VariableType::I64VEC4,
3735 
3736     VariableType::MAT3X3, VariableType::MAT3X4,  VariableType::MAT4X3,
3737 
3738     VariableType::UINT64, VariableType::U64VEC2, VariableType::U64VEC3, VariableType::U64VEC4,
3739 };
3740 
3741 const std::vector<VariableType> g_ShaderRecordBlockTestVars6 = {
3742     VariableType::VEC3,   VariableType::VEC4,
3743 
3744     VariableType::INT8,   VariableType::I8VEC2, VariableType::I8VEC3, VariableType::I8VEC4,
3745 
3746     VariableType::MAT3X3, VariableType::MAT3X4, VariableType::MAT4X3,
3747 
3748     VariableType::UINT8,  VariableType::U8VEC2, VariableType::U8VEC3, VariableType::U8VEC4,
3749 };
3750 
3751 class ShaderRecordBlockTest : public TestBase
3752 {
3753 public:
ShaderRecordBlockTest(const TestType & testType,const std::vector<VariableType> & varTypesToTest)3754     ShaderRecordBlockTest(const TestType &testType, const std::vector<VariableType> &varTypesToTest)
3755         : m_gridSizeXYZ(tcu::UVec3(2, 2, 2))
3756         , m_testType(testType)
3757         , m_varTypesToTest(varTypesToTest)
3758         , m_resultBufferSize(0)
3759         , m_shaderRecordSize(0)
3760     {
3761         initTestItems();
3762     }
3763 
~ShaderRecordBlockTest()3764     ~ShaderRecordBlockTest()
3765     {
3766         /* Stub */
3767     }
3768 
getDispatchSize() const3769     tcu::UVec3 getDispatchSize() const final
3770     {
3771         return tcu::UVec3(3, 1, 1);
3772     }
3773 
getResultBufferSize() const3774     uint32_t getResultBufferSize() const final
3775     {
3776         return m_resultBufferSize;
3777     }
3778 
getShaderRecordData(const ShaderGroups & shaderGroup) const3779     const void *getShaderRecordData(const ShaderGroups &shaderGroup) const final
3780     {
3781         return (shaderGroup == ShaderGroups::HIT_GROUP)  ? m_shaderGroupToRecordDataMap.at(shaderGroup).data() :
3782                (shaderGroup == ShaderGroups::MISS_GROUP) ? m_shaderGroupToRecordDataMap.at(shaderGroup).data() :
3783                                                            nullptr;
3784     }
3785 
getShaderRecordSize(const ShaderGroups & shaderGroup) const3786     uint32_t getShaderRecordSize(const ShaderGroups &shaderGroup) const final
3787     {
3788         DE_ASSERT(m_shaderRecordSize != 0);
3789 
3790         return ((shaderGroup == ShaderGroups::HIT_GROUP) || (shaderGroup == ShaderGroups::MISS_GROUP)) ?
3791                    m_shaderRecordSize :
3792                    0;
3793     }
3794 
getTLASPtrVecToBind() const3795     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
3796     {
3797         return {m_tlPtr.get()};
3798     }
3799 
getVarsToTest(const TestType & testType)3800     static std::vector<VariableType> getVarsToTest(const TestType &testType)
3801     {
3802         return ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
3803                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
3804                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
3805                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_1)) ?
3806                    g_ShaderRecordBlockTestVars1 :
3807                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
3808                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
3809                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
3810                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_2)) ?
3811                    g_ShaderRecordBlockTestVars2 :
3812                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
3813                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
3814                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
3815                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_3)) ?
3816                    g_ShaderRecordBlockTestVars3 :
3817                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
3818                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
3819                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
3820                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_4)) ?
3821                    g_ShaderRecordBlockTestVars4 :
3822                ((testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
3823                 (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
3824                 (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
3825                 (testType == TestType::SHADER_RECORD_BLOCK_STD430_5)) ?
3826                    g_ShaderRecordBlockTestVars5 :
3827                    g_ShaderRecordBlockTestVars6;
3828     }
3829 
resetTLAS()3830     void resetTLAS() final
3831     {
3832         m_tlPtr.reset();
3833     }
3834 
init(vkt::Context &,RayTracingProperties *)3835     bool init(vkt::Context & /* context */, RayTracingProperties * /* rtPropsPtr */) final
3836     {
3837         // Cache required result buffer size.
3838         {
3839             uint32_t largestBaseTypeSizeUsed = 0;
3840             const auto &lastItem             = m_testItems.items.back();
3841             const uint32_t nResultBytesPerShaderStage =
3842                 lastItem.resultBufferProps.bufferOffset + lastItem.arraySize * lastItem.resultBufferProps.arrayStride;
3843             const VkShaderStageFlagBits shaderStages[] = {
3844                 VK_SHADER_STAGE_MISS_BIT_KHR,
3845                 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
3846                 VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
3847                 VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
3848             };
3849 
3850             m_shaderRecordSize =
3851                 lastItem.inputBufferProps.bufferOffset + lastItem.arraySize * lastItem.inputBufferProps.arrayStride;
3852 
3853             for (const auto &currentTestItem : m_testItems.items)
3854             {
3855                 const auto baseType      = getBaseType(currentTestItem.type);
3856                 const auto componentSize = getComponentSizeBytes(baseType);
3857 
3858                 largestBaseTypeSizeUsed = de::max(componentSize, largestBaseTypeSizeUsed);
3859             }
3860 
3861             for (const auto &currentShaderStage : shaderStages)
3862             {
3863                 m_shaderStageToResultBufferOffset[currentShaderStage] = m_resultBufferSize;
3864 
3865                 m_resultBufferSize =
3866                     de::roundUp(m_resultBufferSize, static_cast<uint32_t>(sizeof(largestBaseTypeSizeUsed)));
3867                 m_resultBufferSize += nResultBytesPerShaderStage;
3868             }
3869         }
3870 
3871         return true;
3872     }
3873 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)3874     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
3875                 VkCommandBuffer commandBuffer) final
3876     {
3877         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         /* gridStartXYZ          */
3878                                                  tcu::Vec3(1, 1, 1),                         /* gridCellSizeXYZ       */
3879                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), /* gridInterCellDeltaXYZ */
3880                                                  GeometryType::AABB));
3881 
3882         m_tlPtr = m_asProviderPtr->createTLAS(context, AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES,
3883                                               commandBuffer, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
3884                                               nullptr,  /* optASPropertyProviderPtr */
3885                                               nullptr); /* optASFedbackPtr          */
3886     }
3887 
initPrograms(SourceCollections & programCollection) const3888     void initPrograms(SourceCollections &programCollection) const final
3889     {
3890         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
3891                                                   0u,    /* flags        */
3892                                                   true); /* allowSpirv14 */
3893 
3894         const bool isSTD430Test         = isExplicitSTD430OffsetTest(m_testType) || isSTD430LayoutTest(m_testType);
3895         const bool requires16BitStorage = usesI16(m_testType) || usesU16(m_testType);
3896         const bool requires8BitStorage  = usesI8(m_testType) || usesU8(m_testType);
3897         const bool requiresInt64        = usesI64(m_testType) || usesU64(m_testType);
3898         const bool usesExplicitOffsets =
3899             isExplicitScalarOffsetTest(m_testType) || isExplicitSTD430OffsetTest(m_testType);
3900         const auto inputBlockVariablesGLSL =
3901             getGLSLForStructItem(m_testItems, usesExplicitOffsets, true /* targetsInputBuffer            */);
3902         const auto outputStructVariablesGLSL =
3903             getGLSLForStructItem(m_testItems, false, /* includeOffsetLayoutQualifier */
3904                                  false /* targetsInputBuffer            */);
3905 
3906         const auto inputBufferGLSL = "layout (" + std::string((!isSTD430Test) ? "scalar, " : "std430, ") +
3907                                      "shaderRecordEXT) buffer ib\n"
3908                                      "{\n" +
3909                                      inputBlockVariablesGLSL + "} inputBuffer;\n";
3910         const auto outputBufferGLSL = "struct OutputData\n"
3911                                       "{\n" +
3912                                       outputStructVariablesGLSL +
3913                                       "};\n"
3914                                       "\n"
3915                                       "layout (std430, set = 0, binding = 0) buffer ob\n"
3916                                       "{\n"
3917                                       "    OutputData results[4];\n"
3918                                       "};\n";
3919 
3920         std::string preamble;
3921 
3922         {
3923             std::stringstream css;
3924 
3925             css << "#version 460 core\n"
3926                    "\n"
3927                    "#extension GL_EXT_ray_tracing : require\n";
3928 
3929             if (!isSTD430Test)
3930             {
3931                 css << "#extension GL_EXT_scalar_block_layout : require\n";
3932             }
3933 
3934             if (requires16BitStorage)
3935             {
3936                 css << "#extension GL_EXT_shader_16bit_storage : require\n";
3937             }
3938 
3939             if (requires8BitStorage)
3940             {
3941                 css << "#extension GL_EXT_shader_8bit_storage : require\n";
3942             }
3943 
3944             if (requiresInt64)
3945             {
3946                 css << "#extension GL_ARB_gpu_shader_int64 : require\n";
3947             }
3948 
3949             preamble = css.str();
3950         }
3951 
3952         {
3953             std::stringstream css;
3954 
3955             css << preamble
3956                 << "\n"
3957                    "                     hitAttributeEXT vec3 unusedAttribute;\n"
3958                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n"
3959                    "\n" +
3960                        inputBufferGLSL + outputBufferGLSL +
3961                        "\n"
3962                        "void main()\n"
3963                        "{\n" +
3964                        getGLSLForSetters(m_testItems, 3) + "}\n";
3965 
3966             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
3967         }
3968 
3969         {
3970             std::stringstream css;
3971 
3972             css << preamble
3973                 << "\n"
3974                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
3975                        inputBufferGLSL + outputBufferGLSL +
3976                        "\n"
3977                        "void main()\n"
3978                        "{\n" +
3979                        getGLSLForSetters(m_testItems, 1) + "}\n";
3980 
3981             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
3982         }
3983 
3984         {
3985             std::stringstream css;
3986 
3987             css << preamble
3988                 << "\n"
3989                    "hitAttributeEXT vec3 hitAttribute;\n"
3990                    "\n" +
3991                        inputBufferGLSL + outputBufferGLSL +
3992                        "\n"
3993                        "void main()\n"
3994                        "{\n" +
3995                        getGLSLForSetters(m_testItems, 2) +
3996                        "\n"
3997                        "    reportIntersectionEXT(0.95f, 0);\n"
3998                        "}\n";
3999 
4000             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
4001         }
4002 
4003         {
4004             std::stringstream css;
4005 
4006             css << preamble
4007                 << "\n"
4008                    "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n"
4009                    "\n" +
4010                        inputBufferGLSL + outputBufferGLSL +
4011                        "\n"
4012                        "void main()\n"
4013                        "{\n"
4014                        "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + gl_LaunchIDEXT.y "
4015                        "* gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
4016                        "\n" +
4017                        getGLSLForSetters(m_testItems, 0) + "}\n";
4018 
4019             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
4020         }
4021 
4022         {
4023             std::stringstream css;
4024 
4025             css << preamble
4026                 << "layout(location = 0)                      rayPayloadEXT vec3       unusedPayload;\n"
4027                    "layout(set      = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
4028                    "\n"
4029                    "void main()\n"
4030                    "{\n"
4031                    "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
4032                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
4033                    "    uint  rayFlags     = 0;\n"
4034                    "    float tmin         = 0.001;\n"
4035                    "    float tmax         = 9.0;\n"
4036                    "\n"
4037                    "    uint  cullMask     = 0xFF;\n"
4038                    "    vec3  cellStartXYZ = vec3(nInvocation * 2.0, 0.0, 0.0);\n"
4039                    "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
4040                    "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
4041                    "    vec3  origin       = target - vec3(0, 2, 0);\n"
4042                    "    vec3  direct       = normalize(target - origin);\n"
4043                    "\n"
4044                    "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, "
4045                    "0);\n"
4046                    "}\n";
4047 
4048             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
4049         }
4050     }
4051 
isExplicitScalarOffsetTest(const TestType & testType)4052     static bool isExplicitScalarOffsetTest(const TestType &testType)
4053     {
4054         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
4055                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
4056                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
4057                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
4058                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
4059                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6);
4060     }
4061 
isExplicitSTD430OffsetTest(const TestType & testType)4062     static bool isExplicitSTD430OffsetTest(const TestType &testType)
4063     {
4064         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
4065                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
4066                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
4067                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
4068                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
4069                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6);
4070     }
4071 
isScalarLayoutTest(const TestType & testType)4072     static bool isScalarLayoutTest(const TestType &testType)
4073     {
4074         return (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
4075                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
4076                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
4077                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
4078                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
4079                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_6);
4080     }
4081 
isSTD430LayoutTest(const TestType & testType)4082     static bool isSTD430LayoutTest(const TestType &testType)
4083     {
4084         return (testType == TestType::SHADER_RECORD_BLOCK_STD430_1) ||
4085                (testType == TestType::SHADER_RECORD_BLOCK_STD430_2) ||
4086                (testType == TestType::SHADER_RECORD_BLOCK_STD430_3) ||
4087                (testType == TestType::SHADER_RECORD_BLOCK_STD430_4) ||
4088                (testType == TestType::SHADER_RECORD_BLOCK_STD430_5) ||
4089                (testType == TestType::SHADER_RECORD_BLOCK_STD430_6);
4090     }
4091 
isTest(const TestType & testType)4092     static bool isTest(const TestType &testType)
4093     {
4094         return (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1) ||
4095                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2) ||
4096                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3) ||
4097                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4) ||
4098                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5) ||
4099                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6) ||
4100                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1) ||
4101                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2) ||
4102                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3) ||
4103                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4) ||
4104                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5) ||
4105                (testType == TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6) ||
4106                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_1) ||
4107                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_2) ||
4108                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_3) ||
4109                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_4) ||
4110                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_5) ||
4111                (testType == TestType::SHADER_RECORD_BLOCK_SCALAR_6) ||
4112                (testType == TestType::SHADER_RECORD_BLOCK_STD430_1) ||
4113                (testType == TestType::SHADER_RECORD_BLOCK_STD430_2) ||
4114                (testType == TestType::SHADER_RECORD_BLOCK_STD430_3) ||
4115                (testType == TestType::SHADER_RECORD_BLOCK_STD430_4) ||
4116                (testType == TestType::SHADER_RECORD_BLOCK_STD430_5) ||
4117                (testType == TestType::SHADER_RECORD_BLOCK_STD430_6);
4118     }
4119 
usesF64(const TestType & testType)4120     static bool usesF64(const TestType &testType)
4121     {
4122         const auto tested_var_types = getVarsToTest(testType);
4123         const bool has_f64 =
4124             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DOUBLE) != tested_var_types.end();
4125         const bool has_f64vec2 =
4126             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC2) != tested_var_types.end();
4127         const bool has_f64vec3 =
4128             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC3) != tested_var_types.end();
4129         const bool has_f64vec4 =
4130             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DVEC4) != tested_var_types.end();
4131         const bool has_f64mat2 =
4132             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT2) != tested_var_types.end();
4133         const bool has_f64mat3 =
4134             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT3) != tested_var_types.end();
4135         const bool has_f64mat4 =
4136             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::DMAT4) != tested_var_types.end();
4137 
4138         return (has_f64 || has_f64vec2 || has_f64vec3 || has_f64vec4 || has_f64mat2 || has_f64mat3 || has_f64mat4);
4139     }
4140 
usesI8(const TestType & testType)4141     static bool usesI8(const TestType &testType)
4142     {
4143         const auto tested_var_types = getVarsToTest(testType);
4144         const bool has_i8 =
4145             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT8) != tested_var_types.end();
4146         const bool has_i8vec2 =
4147             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC2) != tested_var_types.end();
4148         const bool has_i8vec3 =
4149             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC3) != tested_var_types.end();
4150         const bool has_i8vec4 =
4151             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I8VEC4) != tested_var_types.end();
4152 
4153         return (has_i8 || has_i8vec2 || has_i8vec3 || has_i8vec4);
4154     }
4155 
usesI16(const TestType & testType)4156     static bool usesI16(const TestType &testType)
4157     {
4158         const auto tested_var_types = getVarsToTest(testType);
4159         const bool has_i16 =
4160             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT16) != tested_var_types.end();
4161         const bool has_i16vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC2) !=
4162                                  tested_var_types.end();
4163         const bool has_i16vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC3) !=
4164                                  tested_var_types.end();
4165         const bool has_i16vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I16VEC4) !=
4166                                  tested_var_types.end();
4167 
4168         return (has_i16 || has_i16vec2 || has_i16vec3 || has_i16vec4);
4169     }
4170 
usesI64(const TestType & testType)4171     static bool usesI64(const TestType &testType)
4172     {
4173         const auto tested_var_types = getVarsToTest(testType);
4174         const bool has_i64 =
4175             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::INT64) != tested_var_types.end();
4176         const bool has_i64vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC2) !=
4177                                  tested_var_types.end();
4178         const bool has_i64vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC3) !=
4179                                  tested_var_types.end();
4180         const bool has_i64vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::I64VEC4) !=
4181                                  tested_var_types.end();
4182 
4183         return (has_i64 || has_i64vec2 || has_i64vec3 || has_i64vec4);
4184     }
4185 
usesU8(const TestType & testType)4186     static bool usesU8(const TestType &testType)
4187     {
4188         const auto tested_var_types = getVarsToTest(testType);
4189         const bool has_u8 =
4190             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT8) != tested_var_types.end();
4191         const bool has_u8vec2 =
4192             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC2) != tested_var_types.end();
4193         const bool has_u8vec3 =
4194             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC3) != tested_var_types.end();
4195         const bool has_u8vec4 =
4196             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U8VEC4) != tested_var_types.end();
4197 
4198         return (has_u8 || has_u8vec2 || has_u8vec3 || has_u8vec4);
4199     }
4200 
usesU16(const TestType & testType)4201     static bool usesU16(const TestType &testType)
4202     {
4203         const auto tested_var_types = getVarsToTest(testType);
4204         const bool has_u16 =
4205             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT16) != tested_var_types.end();
4206         const bool has_u16vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC2) !=
4207                                  tested_var_types.end();
4208         const bool has_u16vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC3) !=
4209                                  tested_var_types.end();
4210         const bool has_u16vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U16VEC4) !=
4211                                  tested_var_types.end();
4212 
4213         return (has_u16 || has_u16vec2 || has_u16vec3 || has_u16vec4);
4214     }
4215 
usesU64(const TestType & testType)4216     static bool usesU64(const TestType &testType)
4217     {
4218         const auto tested_var_types = getVarsToTest(testType);
4219         const bool has_u64 =
4220             std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::UINT64) != tested_var_types.end();
4221         const bool has_u64vec2 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC2) !=
4222                                  tested_var_types.end();
4223         const bool has_u64vec3 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC3) !=
4224                                  tested_var_types.end();
4225         const bool has_u64vec4 = std::find(tested_var_types.begin(), tested_var_types.end(), VariableType::U64VEC4) !=
4226                                  tested_var_types.end();
4227 
4228         return (has_u64 || has_u64vec2 || has_u64vec3 || has_u64vec4);
4229     }
4230 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const4231     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
4232     {
4233         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
4234         const void *resultBufferDataPtr               = resultBufferPtr->getAllocation().getHostPtr();
4235         bool result                                   = false;
4236 
4237         for (const auto &iterator : m_shaderStageToResultBufferOffset)
4238         {
4239             const auto currentShaderStage = iterator.first;
4240             const auto shaderGroup        = ((currentShaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR) ||
4241                                       (currentShaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) ||
4242                                       (currentShaderStage == VK_SHADER_STAGE_INTERSECTION_BIT_KHR)) ?
4243                                                 ShaderGroups::HIT_GROUP :
4244                                                 ShaderGroups::MISS_GROUP;
4245             const auto resultStartOffset  = iterator.second;
4246 
4247             if (currentShaderStage != VK_SHADER_STAGE_MISS_BIT_KHR)
4248                 continue;
4249 
4250             for (const auto &currentItem : m_testItems.items)
4251             {
4252                 const auto baseDataType      = getBaseType(currentItem.type);
4253                 const auto componentSize     = getComponentSizeBytes(baseDataType);
4254                 const auto &expectedDataVec  = currentItem.shaderGroupToRecordDataMap.at(shaderGroup);
4255                 auto expectedDataPtr         = reinterpret_cast<const uint8_t *>(expectedDataVec.data());
4256                 const auto isMatrixType      = isMatrix(currentItem.type);
4257                 const auto nComponents       = getNComponents(currentItem.type);
4258                 const uint8_t *resultDataPtr = reinterpret_cast<const uint8_t *>(resultBufferDataPtr) +
4259                                                resultStartOffset + currentItem.resultBufferProps.bufferOffset;
4260 
4261                 for (uint32_t nArrayItem = 0; nArrayItem < currentItem.arraySize; ++nArrayItem)
4262                 {
4263                     for (uint32_t nComponent = 0; nComponent < nComponents; ++nComponent)
4264                     {
4265                         const auto expectedComponentDataPtr =
4266                             expectedDataPtr +
4267                             ((!isMatrixType) ? componentSize * nComponent :
4268                                                currentItem.inputBufferProps.matrixElementStartOffsets.at(nComponent));
4269                         const auto resultComponentDataPtr =
4270                             resultDataPtr +
4271                             ((!isMatrixType) ? componentSize * nComponent :
4272                                                currentItem.resultBufferProps.matrixElementStartOffsets.at(nComponent));
4273 
4274                         switch (baseDataType)
4275                         {
4276                         case BaseType::F32:
4277                         {
4278                             if (fabs(*reinterpret_cast<const float *>(resultComponentDataPtr) -
4279                                      *reinterpret_cast<const float *>(expectedComponentDataPtr)) > 1e-3f)
4280                             {
4281                                 goto end;
4282                             }
4283 
4284                             break;
4285                         }
4286 
4287                         case BaseType::F64:
4288                         {
4289                             if (fabs(*reinterpret_cast<const double *>(resultComponentDataPtr) -
4290                                      *reinterpret_cast<const double *>(expectedComponentDataPtr)) > 1e-3)
4291                             {
4292                                 goto end;
4293                             }
4294 
4295                             break;
4296                         }
4297 
4298                         case BaseType::I8:
4299                         {
4300                             if (*reinterpret_cast<const int8_t *>(resultComponentDataPtr) !=
4301                                 *reinterpret_cast<const int8_t *>(expectedComponentDataPtr))
4302                             {
4303                                 goto end;
4304                             }
4305 
4306                             break;
4307                         }
4308 
4309                         case BaseType::I16:
4310                         {
4311                             if (*reinterpret_cast<const int16_t *>(resultComponentDataPtr) !=
4312                                 *reinterpret_cast<const int16_t *>(expectedComponentDataPtr))
4313                             {
4314                                 goto end;
4315                             }
4316 
4317                             break;
4318                         }
4319 
4320                         case BaseType::I32:
4321                         {
4322                             if (*reinterpret_cast<const int32_t *>(resultComponentDataPtr) !=
4323                                 *reinterpret_cast<const int32_t *>(expectedComponentDataPtr))
4324                             {
4325                                 goto end;
4326                             }
4327 
4328                             break;
4329                         }
4330 
4331                         case BaseType::I64:
4332                         {
4333                             if (*reinterpret_cast<const int64_t *>(resultComponentDataPtr) !=
4334                                 *reinterpret_cast<const int64_t *>(expectedComponentDataPtr))
4335                             {
4336                                 goto end;
4337                             }
4338 
4339                             break;
4340                         }
4341 
4342                         case BaseType::U8:
4343                         {
4344                             if (*reinterpret_cast<const uint8_t *>(resultComponentDataPtr) !=
4345                                 *reinterpret_cast<const uint8_t *>(expectedComponentDataPtr))
4346                             {
4347                                 goto end;
4348                             }
4349 
4350                             break;
4351                         }
4352 
4353                         case BaseType::U16:
4354                         {
4355                             if (*reinterpret_cast<const uint16_t *>(resultComponentDataPtr) !=
4356                                 *reinterpret_cast<const uint16_t *>(expectedComponentDataPtr))
4357                             {
4358                                 goto end;
4359                             }
4360 
4361                             break;
4362                         }
4363 
4364                         case BaseType::U32:
4365                         {
4366                             if (*reinterpret_cast<const uint32_t *>(resultComponentDataPtr) !=
4367                                 *reinterpret_cast<const uint32_t *>(expectedComponentDataPtr))
4368                             {
4369                                 goto end;
4370                             }
4371 
4372                             break;
4373                         }
4374 
4375                         case BaseType::U64:
4376                         {
4377                             if (*reinterpret_cast<const uint64_t *>(resultComponentDataPtr) !=
4378                                 *reinterpret_cast<const uint64_t *>(expectedComponentDataPtr))
4379                             {
4380                                 goto end;
4381                             }
4382 
4383                             break;
4384                         }
4385 
4386                         default:
4387                         {
4388                             DE_ASSERT(false);
4389                         }
4390                         }
4391                     }
4392 
4393                     expectedDataPtr += currentItem.inputBufferProps.arrayStride;
4394                     resultDataPtr += currentItem.resultBufferProps.arrayStride;
4395                 }
4396             }
4397         }
4398 
4399         result = true;
4400     end:
4401         return result;
4402     }
4403 
4404 private:
4405     typedef struct Item
4406     {
4407         struct BufferProps
4408         {
4409             uint32_t arrayStride;
4410             uint32_t bufferOffset;
4411             std::vector<uint32_t> matrixElementStartOffsets; //< Holds offsets to consecutive matrix element values.
4412 
BufferPropsvkt::RayTracing::__anon2e56165f0111::ShaderRecordBlockTest::Item::BufferProps4413             BufferProps() : arrayStride(0), bufferOffset(0xFFFFFFFF)
4414             {
4415                 /* Stub */
4416             }
4417         };
4418 
4419         BufferProps inputBufferProps;
4420         BufferProps resultBufferProps;
4421 
4422         uint32_t arraySize;
4423         MatrixMajorOrder matrixOrder;
4424         std::string name;
4425         VariableType type;
4426 
4427         std::map<ShaderGroups, std::vector<uint8_t>> shaderGroupToRecordDataMap;
4428 
Itemvkt::RayTracing::__anon2e56165f0111::ShaderRecordBlockTest::Item4429         Item() : arraySize(0), matrixOrder(MatrixMajorOrder::UNKNOWN), type(VariableType::UNKNOWN)
4430         {
4431             /* Stub */
4432         }
4433     } Item;
4434 
4435     struct StructItem
4436     {
4437         std::vector<Item> items;
4438     };
4439 
4440     // Private functions
getBaseType(const VariableType & type) const4441     BaseType getBaseType(const VariableType &type) const
4442     {
4443         auto result = BaseType::UNKNOWN;
4444 
4445         switch (type)
4446         {
4447         case VariableType::FLOAT:
4448         case VariableType::MAT2:
4449         case VariableType::MAT2X2:
4450         case VariableType::MAT2X3:
4451         case VariableType::MAT2X4:
4452         case VariableType::MAT3:
4453         case VariableType::MAT3X2:
4454         case VariableType::MAT3X3:
4455         case VariableType::MAT3X4:
4456         case VariableType::MAT4:
4457         case VariableType::MAT4X2:
4458         case VariableType::MAT4X3:
4459         case VariableType::MAT4X4:
4460         case VariableType::VEC2:
4461         case VariableType::VEC3:
4462         case VariableType::VEC4:
4463         {
4464             result = BaseType::F32;
4465 
4466             break;
4467         }
4468 
4469         case VariableType::DOUBLE:
4470         case VariableType::DMAT2:
4471         case VariableType::DMAT2X2:
4472         case VariableType::DMAT2X3:
4473         case VariableType::DMAT2X4:
4474         case VariableType::DMAT3:
4475         case VariableType::DMAT3X2:
4476         case VariableType::DMAT3X3:
4477         case VariableType::DMAT3X4:
4478         case VariableType::DMAT4:
4479         case VariableType::DMAT4X2:
4480         case VariableType::DMAT4X3:
4481         case VariableType::DMAT4X4:
4482         case VariableType::DVEC2:
4483         case VariableType::DVEC3:
4484         case VariableType::DVEC4:
4485         {
4486             result = BaseType::F64;
4487 
4488             break;
4489         }
4490 
4491         case VariableType::INT16:
4492         case VariableType::I16VEC2:
4493         case VariableType::I16VEC3:
4494         case VariableType::I16VEC4:
4495         {
4496             result = BaseType::I16;
4497 
4498             break;
4499         }
4500 
4501         case VariableType::INT:
4502         case VariableType::IVEC2:
4503         case VariableType::IVEC3:
4504         case VariableType::IVEC4:
4505         {
4506             result = BaseType::I32;
4507 
4508             break;
4509         }
4510 
4511         case VariableType::INT64:
4512         case VariableType::I64VEC2:
4513         case VariableType::I64VEC3:
4514         case VariableType::I64VEC4:
4515         {
4516             result = BaseType::I64;
4517 
4518             break;
4519         }
4520 
4521         case VariableType::INT8:
4522         case VariableType::I8VEC2:
4523         case VariableType::I8VEC3:
4524         case VariableType::I8VEC4:
4525         {
4526             result = BaseType::I8;
4527 
4528             break;
4529         }
4530 
4531         case VariableType::UINT16:
4532         case VariableType::U16VEC2:
4533         case VariableType::U16VEC3:
4534         case VariableType::U16VEC4:
4535         {
4536             result = BaseType::U16;
4537 
4538             break;
4539         }
4540 
4541         case VariableType::UINT:
4542         case VariableType::UVEC2:
4543         case VariableType::UVEC3:
4544         case VariableType::UVEC4:
4545         {
4546             result = BaseType::U32;
4547 
4548             break;
4549         }
4550 
4551         case VariableType::UINT64:
4552         case VariableType::U64VEC2:
4553         case VariableType::U64VEC3:
4554         case VariableType::U64VEC4:
4555         {
4556             result = BaseType::U64;
4557 
4558             break;
4559         }
4560 
4561         case VariableType::UINT8:
4562         case VariableType::U8VEC2:
4563         case VariableType::U8VEC3:
4564         case VariableType::U8VEC4:
4565         {
4566             result = BaseType::U8;
4567 
4568             break;
4569         }
4570 
4571         default:
4572         {
4573             DE_ASSERT(false);
4574         }
4575         }
4576 
4577         return result;
4578     }
4579 
getComponentSizeBytes(const BaseType & type) const4580     uint32_t getComponentSizeBytes(const BaseType &type) const
4581     {
4582         uint32_t result = 0;
4583 
4584         switch (type)
4585         {
4586         case BaseType::I8:
4587         case BaseType::U8:
4588         {
4589             result = 1;
4590 
4591             break;
4592         }
4593 
4594         case BaseType::I16:
4595         case BaseType::U16:
4596         {
4597             result = 2;
4598 
4599             break;
4600         }
4601 
4602         case BaseType::F32:
4603         case BaseType::I32:
4604         case BaseType::U32:
4605         {
4606             result = 4;
4607 
4608             break;
4609         }
4610 
4611         case BaseType::F64:
4612         case BaseType::I64:
4613         case BaseType::U64:
4614         {
4615             result = 8;
4616 
4617             break;
4618         }
4619 
4620         default:
4621         {
4622             DE_ASSERT(false);
4623         }
4624         }
4625 
4626         return result;
4627     }
4628 
getGLSLForSetters(const StructItem & item,const uint32_t & nResultArrayItem) const4629     std::string getGLSLForSetters(const StructItem &item, const uint32_t &nResultArrayItem) const
4630     {
4631         std::string result;
4632 
4633         for (const auto &currentItem : item.items)
4634         {
4635             if (currentItem.arraySize > 1)
4636             {
4637                 result += "for (uint nArrayItem = 0; nArrayItem < " + de::toString(currentItem.arraySize) +
4638                           "; ++nArrayItem)\n"
4639                           "{\n";
4640             }
4641 
4642             result += "results[" + de::toString(nResultArrayItem) + "]." + currentItem.name;
4643 
4644             if (currentItem.arraySize > 1)
4645             {
4646                 result += "[nArrayItem]";
4647             }
4648 
4649             result += " = inputBuffer." + currentItem.name;
4650 
4651             if (currentItem.arraySize > 1)
4652             {
4653                 result += "[nArrayItem]";
4654             }
4655 
4656             result += ";\n";
4657 
4658             if (currentItem.arraySize > 1)
4659             {
4660                 result += "}\n";
4661             }
4662         }
4663 
4664         return result;
4665     }
4666 
getGLSLForStructItem(const StructItem & item,const bool & includeOffsetLayoutQualifier,const bool & targetsInputBuffer) const4667     std::string getGLSLForStructItem(const StructItem &item, const bool &includeOffsetLayoutQualifier,
4668                                      const bool &targetsInputBuffer) const
4669     {
4670         std::string result;
4671 
4672         for (const auto &currentItem : item.items)
4673         {
4674             const bool needsMatrixOrderQualifier = (currentItem.matrixOrder == MatrixMajorOrder::ROW_MAJOR);
4675             const auto variableTypeGLSL          = getVariableTypeGLSLType(currentItem.type);
4676             uint32_t nLayoutQualifiersUsed       = 0;
4677             const uint32_t nLayoutQualifierUses =
4678                 ((includeOffsetLayoutQualifier) ? 1 : 0) + ((needsMatrixOrderQualifier) ? 1 : 0);
4679             const bool usesLayoutQualifiers = (nLayoutQualifierUses > 0);
4680 
4681             if (usesLayoutQualifiers)
4682             {
4683                 result += "layout(";
4684             }
4685 
4686             if (includeOffsetLayoutQualifier)
4687             {
4688                 result += "offset = " + de::toString((targetsInputBuffer) ? currentItem.inputBufferProps.bufferOffset :
4689                                                                             currentItem.resultBufferProps.bufferOffset);
4690 
4691                 if ((++nLayoutQualifiersUsed) != nLayoutQualifierUses)
4692                 {
4693                     result += ", ";
4694                 }
4695             }
4696 
4697             if (needsMatrixOrderQualifier)
4698             {
4699                 result += ((currentItem.matrixOrder == MatrixMajorOrder::COLUMN_MAJOR) ? "column_major" : "row_major");
4700 
4701                 if ((++nLayoutQualifiersUsed) != nLayoutQualifierUses)
4702                 {
4703                     result += ", ";
4704                 }
4705             }
4706 
4707             if (usesLayoutQualifiers)
4708             {
4709                 result += ") ";
4710             }
4711 
4712             result += variableTypeGLSL + std::string(" ") + currentItem.name;
4713 
4714             if (currentItem.arraySize != 1)
4715             {
4716                 result += "[" + de::toString(currentItem.arraySize) + "]";
4717             }
4718 
4719             result += ";\n";
4720         }
4721 
4722         return result;
4723     }
4724 
getMatrixSize(const VariableType & type) const4725     tcu::UVec2 getMatrixSize(const VariableType &type) const
4726     {
4727         auto result = tcu::UVec2();
4728 
4729         switch (type)
4730         {
4731         case VariableType::DMAT2:
4732         case VariableType::DMAT2X2:
4733         case VariableType::MAT2:
4734         case VariableType::MAT2X2:
4735         {
4736             result = tcu::UVec2(2, 2);
4737 
4738             break;
4739         }
4740 
4741         case VariableType::DMAT2X3:
4742         case VariableType::MAT2X3:
4743         {
4744             result = tcu::UVec2(2, 3);
4745 
4746             break;
4747         }
4748 
4749         case VariableType::DMAT2X4:
4750         case VariableType::MAT2X4:
4751         {
4752             result = tcu::UVec2(2, 4);
4753 
4754             break;
4755         }
4756 
4757         case VariableType::DMAT3:
4758         case VariableType::DMAT3X3:
4759         case VariableType::MAT3:
4760         case VariableType::MAT3X3:
4761         {
4762             result = tcu::UVec2(3, 3);
4763 
4764             break;
4765         }
4766 
4767         case VariableType::DMAT3X2:
4768         case VariableType::MAT3X2:
4769         {
4770             result = tcu::UVec2(3, 2);
4771 
4772             break;
4773         }
4774 
4775         case VariableType::DMAT3X4:
4776         case VariableType::MAT3X4:
4777         {
4778             result = tcu::UVec2(3, 4);
4779 
4780             break;
4781         }
4782 
4783         case VariableType::DMAT4:
4784         case VariableType::DMAT4X4:
4785         case VariableType::MAT4:
4786         case VariableType::MAT4X4:
4787         {
4788             result = tcu::UVec2(4, 4);
4789 
4790             break;
4791         }
4792 
4793         case VariableType::DMAT4X2:
4794         case VariableType::MAT4X2:
4795         {
4796             result = tcu::UVec2(4, 2);
4797 
4798             break;
4799         }
4800 
4801         case VariableType::DMAT4X3:
4802         case VariableType::MAT4X3:
4803         {
4804             result = tcu::UVec2(4, 3);
4805 
4806             break;
4807         }
4808 
4809         default:
4810         {
4811             DE_ASSERT(false);
4812 
4813             break;
4814         }
4815         }
4816 
4817         return result;
4818     }
4819 
getNComponents(const VariableType & type) const4820     uint32_t getNComponents(const VariableType &type) const
4821     {
4822         uint32_t result = 0;
4823 
4824         switch (type)
4825         {
4826         case VariableType::DOUBLE:
4827         case VariableType::FLOAT:
4828         case VariableType::INT8:
4829         case VariableType::INT16:
4830         case VariableType::INT64:
4831         case VariableType::INT:
4832         case VariableType::UINT:
4833         case VariableType::UINT8:
4834         case VariableType::UINT16:
4835         case VariableType::UINT64:
4836         {
4837             result = 1;
4838 
4839             break;
4840         }
4841 
4842         case VariableType::DVEC2:
4843         case VariableType::I8VEC2:
4844         case VariableType::I16VEC2:
4845         case VariableType::I64VEC2:
4846         case VariableType::IVEC2:
4847         case VariableType::U8VEC2:
4848         case VariableType::U16VEC2:
4849         case VariableType::U64VEC2:
4850         case VariableType::UVEC2:
4851         case VariableType::VEC2:
4852         {
4853             result = 2;
4854 
4855             break;
4856         }
4857 
4858         case VariableType::DVEC3:
4859         case VariableType::I8VEC3:
4860         case VariableType::I16VEC3:
4861         case VariableType::I64VEC3:
4862         case VariableType::IVEC3:
4863         case VariableType::U8VEC3:
4864         case VariableType::U16VEC3:
4865         case VariableType::U64VEC3:
4866         case VariableType::UVEC3:
4867         case VariableType::VEC3:
4868         {
4869             result = 3;
4870 
4871             break;
4872         }
4873 
4874         case VariableType::DMAT2:
4875         case VariableType::DMAT2X2:
4876         case VariableType::DVEC4:
4877         case VariableType::I8VEC4:
4878         case VariableType::I16VEC4:
4879         case VariableType::I64VEC4:
4880         case VariableType::IVEC4:
4881         case VariableType::MAT2:
4882         case VariableType::MAT2X2:
4883         case VariableType::U8VEC4:
4884         case VariableType::U16VEC4:
4885         case VariableType::U64VEC4:
4886         case VariableType::UVEC4:
4887         case VariableType::VEC4:
4888         {
4889             result = 4;
4890 
4891             break;
4892         }
4893 
4894         case VariableType::DMAT2X3:
4895         case VariableType::DMAT3X2:
4896         case VariableType::MAT2X3:
4897         case VariableType::MAT3X2:
4898         {
4899             result = 6;
4900 
4901             break;
4902         }
4903 
4904         case VariableType::DMAT2X4:
4905         case VariableType::DMAT4X2:
4906         case VariableType::MAT2X4:
4907         case VariableType::MAT4X2:
4908         {
4909             result = 8;
4910 
4911             break;
4912         }
4913 
4914         case VariableType::DMAT3:
4915         case VariableType::DMAT3X3:
4916         case VariableType::MAT3:
4917         case VariableType::MAT3X3:
4918         {
4919             result = 9;
4920 
4921             break;
4922         }
4923 
4924         case VariableType::DMAT3X4:
4925         case VariableType::DMAT4X3:
4926         case VariableType::MAT3X4:
4927         case VariableType::MAT4X3:
4928         {
4929             result = 12;
4930 
4931             break;
4932         }
4933 
4934         case VariableType::DMAT4:
4935         case VariableType::DMAT4X4:
4936         case VariableType::MAT4:
4937         case VariableType::MAT4X4:
4938         {
4939             result = 16;
4940 
4941             break;
4942         }
4943 
4944         default:
4945         {
4946             DE_ASSERT(false);
4947         }
4948         }
4949 
4950         return result;
4951     }
4952 
getNMatrixColumns(const VariableType & type) const4953     uint32_t getNMatrixColumns(const VariableType &type) const
4954     {
4955         uint32_t result = 0;
4956 
4957         switch (type)
4958         {
4959         case VariableType::DMAT2:
4960         case VariableType::DMAT2X2:
4961         case VariableType::DMAT2X3:
4962         case VariableType::DMAT2X4:
4963         case VariableType::MAT2:
4964         case VariableType::MAT2X2:
4965         case VariableType::MAT2X3:
4966         case VariableType::MAT2X4:
4967         {
4968             result = 2;
4969 
4970             break;
4971         }
4972 
4973         case VariableType::DMAT3:
4974         case VariableType::DMAT3X2:
4975         case VariableType::DMAT3X3:
4976         case VariableType::DMAT3X4:
4977         case VariableType::MAT3:
4978         case VariableType::MAT3X2:
4979         case VariableType::MAT3X4:
4980         case VariableType::MAT3X3:
4981         {
4982             result = 3;
4983 
4984             break;
4985         }
4986 
4987         case VariableType::DMAT4X2:
4988         case VariableType::MAT4X2:
4989         case VariableType::DMAT4X3:
4990         case VariableType::MAT4X3:
4991         case VariableType::DMAT4X4:
4992         case VariableType::DMAT4:
4993         case VariableType::MAT4X4:
4994         case VariableType::MAT4:
4995         {
4996             result = 4;
4997 
4998             break;
4999         }
5000 
5001         default:
5002         {
5003             DE_ASSERT(false);
5004         }
5005         }
5006 
5007         return result;
5008     }
5009 
getNMatrixRows(const VariableType & type) const5010     uint32_t getNMatrixRows(const VariableType &type) const
5011     {
5012         uint32_t result = 0;
5013 
5014         switch (type)
5015         {
5016         case VariableType::DMAT2:
5017         case VariableType::DMAT2X2:
5018         case VariableType::DMAT3X2:
5019         case VariableType::DMAT4X2:
5020         case VariableType::MAT2:
5021         case VariableType::MAT2X2:
5022         case VariableType::MAT3X2:
5023         case VariableType::MAT4X2:
5024         {
5025             result = 2;
5026 
5027             break;
5028         }
5029 
5030         case VariableType::DMAT2X3:
5031         case VariableType::DMAT3:
5032         case VariableType::DMAT3X3:
5033         case VariableType::DMAT4X3:
5034         case VariableType::MAT2X3:
5035         case VariableType::MAT3:
5036         case VariableType::MAT3X3:
5037         case VariableType::MAT4X3:
5038         {
5039             result = 3;
5040 
5041             break;
5042         }
5043 
5044         case VariableType::DMAT2X4:
5045         case VariableType::DMAT3X4:
5046         case VariableType::DMAT4:
5047         case VariableType::DMAT4X4:
5048         case VariableType::MAT2X4:
5049         case VariableType::MAT3X4:
5050         case VariableType::MAT4:
5051         case VariableType::MAT4X4:
5052         {
5053             result = 4;
5054 
5055             break;
5056         }
5057 
5058         default:
5059         {
5060             DE_ASSERT(false);
5061         }
5062         }
5063 
5064         return result;
5065     }
5066 
getVariableTypeGLSLType(const VariableType & type) const5067     const char *getVariableTypeGLSLType(const VariableType &type) const
5068     {
5069         const char *resultPtr = "!?";
5070 
5071         switch (type)
5072         {
5073         case VariableType::DOUBLE:
5074             resultPtr = "double";
5075             break;
5076         case VariableType::DMAT2:
5077             resultPtr = "dmat2";
5078             break;
5079         case VariableType::DMAT2X2:
5080             resultPtr = "dmat2x2";
5081             break;
5082         case VariableType::DMAT2X3:
5083             resultPtr = "dmat2x3";
5084             break;
5085         case VariableType::DMAT2X4:
5086             resultPtr = "dmat2x4";
5087             break;
5088         case VariableType::DMAT3:
5089             resultPtr = "dmat3";
5090             break;
5091         case VariableType::DMAT3X2:
5092             resultPtr = "dmat3x2";
5093             break;
5094         case VariableType::DMAT3X3:
5095             resultPtr = "dmat3x3";
5096             break;
5097         case VariableType::DMAT3X4:
5098             resultPtr = "dmat3x4";
5099             break;
5100         case VariableType::DMAT4:
5101             resultPtr = "dmat4";
5102             break;
5103         case VariableType::DMAT4X2:
5104             resultPtr = "dmat4x2";
5105             break;
5106         case VariableType::DMAT4X3:
5107             resultPtr = "dmat4x3";
5108             break;
5109         case VariableType::DMAT4X4:
5110             resultPtr = "dmat4x4";
5111             break;
5112         case VariableType::DVEC2:
5113             resultPtr = "dvec2";
5114             break;
5115         case VariableType::DVEC3:
5116             resultPtr = "dvec3";
5117             break;
5118         case VariableType::DVEC4:
5119             resultPtr = "dvec4";
5120             break;
5121         case VariableType::FLOAT:
5122             resultPtr = "float";
5123             break;
5124         case VariableType::INT16:
5125             resultPtr = "int16_t";
5126             break;
5127         case VariableType::INT64:
5128             resultPtr = "int64_t";
5129             break;
5130         case VariableType::INT8:
5131             resultPtr = "int8_t";
5132             break;
5133         case VariableType::INT:
5134             resultPtr = "int";
5135             break;
5136         case VariableType::I16VEC2:
5137             resultPtr = "i16vec2";
5138             break;
5139         case VariableType::I16VEC3:
5140             resultPtr = "i16vec3";
5141             break;
5142         case VariableType::I16VEC4:
5143             resultPtr = "i16vec4";
5144             break;
5145         case VariableType::I64VEC2:
5146             resultPtr = "i64vec2";
5147             break;
5148         case VariableType::I64VEC3:
5149             resultPtr = "i64vec3";
5150             break;
5151         case VariableType::I64VEC4:
5152             resultPtr = "i64vec4";
5153             break;
5154         case VariableType::I8VEC2:
5155             resultPtr = "i8vec2";
5156             break;
5157         case VariableType::I8VEC3:
5158             resultPtr = "i8vec3";
5159             break;
5160         case VariableType::I8VEC4:
5161             resultPtr = "i8vec4";
5162             break;
5163         case VariableType::IVEC2:
5164             resultPtr = "ivec2";
5165             break;
5166         case VariableType::IVEC3:
5167             resultPtr = "ivec3";
5168             break;
5169         case VariableType::IVEC4:
5170             resultPtr = "ivec4";
5171             break;
5172         case VariableType::MAT2:
5173             resultPtr = "mat2";
5174             break;
5175         case VariableType::MAT2X2:
5176             resultPtr = "mat2x2";
5177             break;
5178         case VariableType::MAT2X3:
5179             resultPtr = "mat2x3";
5180             break;
5181         case VariableType::MAT2X4:
5182             resultPtr = "mat2x4";
5183             break;
5184         case VariableType::MAT3:
5185             resultPtr = "mat3";
5186             break;
5187         case VariableType::MAT3X2:
5188             resultPtr = "mat3x2";
5189             break;
5190         case VariableType::MAT3X3:
5191             resultPtr = "mat3x3";
5192             break;
5193         case VariableType::MAT3X4:
5194             resultPtr = "mat3x4";
5195             break;
5196         case VariableType::MAT4:
5197             resultPtr = "mat4";
5198             break;
5199         case VariableType::MAT4X2:
5200             resultPtr = "mat4x2";
5201             break;
5202         case VariableType::MAT4X3:
5203             resultPtr = "mat4x3";
5204             break;
5205         case VariableType::MAT4X4:
5206             resultPtr = "mat4x4";
5207             break;
5208         case VariableType::UINT16:
5209             resultPtr = "uint16_t";
5210             break;
5211         case VariableType::UINT64:
5212             resultPtr = "uint64_t";
5213             break;
5214         case VariableType::UINT8:
5215             resultPtr = "uint8_t";
5216             break;
5217         case VariableType::UINT:
5218             resultPtr = "uint";
5219             break;
5220         case VariableType::U16VEC2:
5221             resultPtr = "u16vec2";
5222             break;
5223         case VariableType::U16VEC3:
5224             resultPtr = "u16vec3";
5225             break;
5226         case VariableType::U16VEC4:
5227             resultPtr = "u16vec4";
5228             break;
5229         case VariableType::U64VEC2:
5230             resultPtr = "u64vec2";
5231             break;
5232         case VariableType::U64VEC3:
5233             resultPtr = "u64vec3";
5234             break;
5235         case VariableType::U64VEC4:
5236             resultPtr = "u64vec4";
5237             break;
5238         case VariableType::U8VEC2:
5239             resultPtr = "u8vec2";
5240             break;
5241         case VariableType::U8VEC3:
5242             resultPtr = "u8vec3";
5243             break;
5244         case VariableType::U8VEC4:
5245             resultPtr = "u8vec4";
5246             break;
5247         case VariableType::UVEC2:
5248             resultPtr = "uvec2";
5249             break;
5250         case VariableType::UVEC3:
5251             resultPtr = "uvec3";
5252             break;
5253         case VariableType::UVEC4:
5254             resultPtr = "uvec4";
5255             break;
5256         case VariableType::VEC2:
5257             resultPtr = "vec2";
5258             break;
5259         case VariableType::VEC3:
5260             resultPtr = "vec3";
5261             break;
5262         case VariableType::VEC4:
5263             resultPtr = "vec4";
5264             break;
5265 
5266         default:
5267         {
5268             DE_ASSERT(false);
5269         }
5270         }
5271 
5272         return resultPtr;
5273     }
5274 
initTestItems()5275     void initTestItems()
5276     {
5277         de::Random randomNumberGenerator(13567);
5278         const uint32_t testArraySizes[] = {3, 7, 5};
5279 
5280         const ShaderGroups shaderGroups[] = {
5281             ShaderGroups::HIT_GROUP,
5282             ShaderGroups::MISS_GROUP,
5283         };
5284 
5285         const auto nTestArraySizes = sizeof(testArraySizes) / sizeof(testArraySizes[0]);
5286 
5287         for (const auto &currentVariableType : m_varTypesToTest)
5288         {
5289             const auto currentArraySize =
5290                 testArraySizes[static_cast<uint32_t>(m_testItems.items.size()) % nTestArraySizes];
5291             Item newItem;
5292 
5293             newItem.arraySize = currentArraySize;
5294             newItem.name      = "var" + de::toString(m_testItems.items.size());
5295             newItem.type      = currentVariableType;
5296 
5297             // TODO: glslang issue.
5298             // newItem.matrixOrder = static_cast<MatrixMajorOrder>(static_cast<uint32_t>(m_testItems.items.size() ) % static_cast<uint32_t>(MatrixMajorOrder::UNKNOWN) );
5299 
5300             newItem.matrixOrder = MatrixMajorOrder::COLUMN_MAJOR;
5301 
5302             m_testItems.items.push_back(newItem);
5303         }
5304 
5305         // Determine start offsets for matrix elements.
5306         //
5307         // Note: result buffer aways uses std430 layout.
5308         setSTD430MatrixElementOffsets(m_testItems, false /* updateInputBufferProps */);
5309         setSTD430ArrayStrides(m_testItems, false /* updateInputBufferProps */);
5310         setSTD430BufferOffsets(m_testItems, false /* updateInputBufferProps */);
5311 
5312         switch (m_testType)
5313         {
5314         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5315         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5316         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5317         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5318         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5319         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5320         {
5321             setExplicitScalarOffsetMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5322 
5323             break;
5324         }
5325 
5326         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5327         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5328         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5329         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5330         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5331         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5332         {
5333             setExplicitSTD430OffsetMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5334 
5335             break;
5336         }
5337 
5338         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5339         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5340         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5341         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5342         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5343         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5344         {
5345             setScalarMatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5346 
5347             break;
5348         }
5349 
5350         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5351         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5352         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5353         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5354         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5355         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5356         {
5357             setSTD430MatrixElementOffsets(m_testItems, true /* updateInputBufferProps */);
5358 
5359             break;
5360         }
5361 
5362         default:
5363         {
5364             DE_ASSERT(false);
5365         }
5366         }
5367 
5368         // Configure array strides for the variables.
5369         switch (m_testType)
5370         {
5371         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5372         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5373         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5374         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5375         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5376         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5377         {
5378             setExplicitScalarOffsetArrayStrides(m_testItems, true /* updateInputBufferProps */);
5379 
5380             break;
5381         }
5382 
5383         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5384         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5385         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5386         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5387         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5388         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5389         {
5390             setExplicitSTD430OffsetArrayStrides(m_testItems, true /* updateInputBufferProps */);
5391 
5392             break;
5393         }
5394 
5395         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5396         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5397         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5398         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5399         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5400         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5401         {
5402             setScalarArrayStrides(m_testItems, true /* updateInputBufferProps */);
5403 
5404             break;
5405         }
5406 
5407         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5408         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5409         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5410         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5411         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5412         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5413         {
5414             setSTD430ArrayStrides(m_testItems, true /* updateInputBufferProps */);
5415 
5416             break;
5417         }
5418 
5419         default:
5420         {
5421             DE_ASSERT(false);
5422         }
5423         }
5424 
5425         // Configure buffer offsets for the variables.
5426         switch (m_testType)
5427         {
5428         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
5429         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
5430         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
5431         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
5432         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
5433         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
5434         {
5435             setExplicitScalarOffsetBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5436 
5437             break;
5438         }
5439 
5440         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
5441         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
5442         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
5443         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
5444         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
5445         case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
5446         {
5447             setExplicitSTD430OffsetBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5448 
5449             break;
5450         }
5451 
5452         case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
5453         case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
5454         case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
5455         case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
5456         case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
5457         case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
5458         {
5459             setScalarBufferOffsets(m_testItems, true /* updateInputBufferProps */);
5460 
5461             break;
5462         }
5463 
5464         case TestType::SHADER_RECORD_BLOCK_STD430_1:
5465         case TestType::SHADER_RECORD_BLOCK_STD430_2:
5466         case TestType::SHADER_RECORD_BLOCK_STD430_3:
5467         case TestType::SHADER_RECORD_BLOCK_STD430_4:
5468         case TestType::SHADER_RECORD_BLOCK_STD430_5:
5469         case TestType::SHADER_RECORD_BLOCK_STD430_6:
5470         {
5471             setSTD430BufferOffsets(m_testItems, true /* updateInputBufferProps */);
5472 
5473             break;
5474         }
5475 
5476         default:
5477         {
5478             DE_ASSERT(false);
5479         }
5480         }
5481 
5482         // Bake data to be used in the tested buffer.
5483         for (auto &currentTestItem : m_testItems.items)
5484         {
5485             const auto baseType           = getBaseType(currentTestItem.type);
5486             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5487             const bool isMatrixType       = isMatrix(currentTestItem.type);
5488             const auto nComponents        = getNComponents(currentTestItem.type);
5489             const auto nBytesNeeded       = currentTestItem.arraySize * currentTestItem.inputBufferProps.arrayStride;
5490 
5491             for (const auto &currentShaderGroup : shaderGroups)
5492             {
5493                 auto &currentDataVec = currentTestItem.shaderGroupToRecordDataMap[currentShaderGroup];
5494 
5495                 currentDataVec.resize(nBytesNeeded);
5496 
5497                 for (uint32_t nArrayItem = 0; nArrayItem < currentTestItem.arraySize; ++nArrayItem)
5498                 {
5499                     uint8_t *currentItemDataPtr =
5500                         currentDataVec.data() + nArrayItem * currentTestItem.inputBufferProps.arrayStride;
5501 
5502                     for (uint32_t nComponent = 0; nComponent < nComponents; ++nComponent)
5503                     {
5504                         switch (baseType)
5505                         {
5506                         case BaseType::F32:
5507                         {
5508                             DE_ASSERT(currentItemDataPtr + sizeof(float) <=
5509                                       currentDataVec.data() + currentDataVec.size());
5510 
5511                             *reinterpret_cast<float *>(currentItemDataPtr) = randomNumberGenerator.getFloat();
5512 
5513                             break;
5514                         }
5515 
5516                         case BaseType::F64:
5517                         {
5518                             DE_ASSERT(currentItemDataPtr + sizeof(double) <=
5519                                       currentDataVec.data() + currentDataVec.size());
5520 
5521                             *reinterpret_cast<double *>(currentItemDataPtr) = randomNumberGenerator.getDouble();
5522 
5523                             break;
5524                         }
5525 
5526                         case BaseType::I8:
5527                         {
5528                             DE_ASSERT(currentItemDataPtr + sizeof(int8_t) <=
5529                                       currentDataVec.data() + currentDataVec.size());
5530 
5531                             *reinterpret_cast<int8_t *>(currentItemDataPtr) =
5532                                 static_cast<int8_t>(randomNumberGenerator.getInt(-128, 127));
5533 
5534                             break;
5535                         }
5536 
5537                         case BaseType::I16:
5538                         {
5539                             DE_ASSERT(currentItemDataPtr + sizeof(int16_t) <=
5540                                       currentDataVec.data() + currentDataVec.size());
5541 
5542                             *reinterpret_cast<int16_t *>(currentItemDataPtr) =
5543                                 static_cast<int16_t>(randomNumberGenerator.getInt(-32768, 32767));
5544 
5545                             break;
5546                         }
5547 
5548                         case BaseType::I32:
5549                         {
5550                             DE_ASSERT(currentItemDataPtr + sizeof(int32_t) <=
5551                                       currentDataVec.data() + currentDataVec.size());
5552 
5553                             *reinterpret_cast<int32_t *>(currentItemDataPtr) = randomNumberGenerator.getInt(
5554                                 static_cast<int>(-2147483648LL), static_cast<int>(2147483647LL));
5555 
5556                             break;
5557                         }
5558 
5559                         case BaseType::I64:
5560                         {
5561                             DE_ASSERT(currentItemDataPtr + sizeof(int64_t) <=
5562                                       currentDataVec.data() + currentDataVec.size());
5563 
5564                             *reinterpret_cast<int64_t *>(currentItemDataPtr) = randomNumberGenerator.getInt64();
5565 
5566                             break;
5567                         }
5568 
5569                         case BaseType::U8:
5570                         {
5571                             DE_ASSERT(currentItemDataPtr + sizeof(uint8_t) <=
5572                                       currentDataVec.data() + currentDataVec.size());
5573 
5574                             *reinterpret_cast<uint8_t *>(currentItemDataPtr) = randomNumberGenerator.getUint8();
5575 
5576                             break;
5577                         }
5578 
5579                         case BaseType::U16:
5580                         {
5581                             DE_ASSERT(currentItemDataPtr + sizeof(uint16_t) <=
5582                                       currentDataVec.data() + currentDataVec.size());
5583 
5584                             *reinterpret_cast<uint16_t *>(currentItemDataPtr) = randomNumberGenerator.getUint16();
5585 
5586                             break;
5587                         }
5588 
5589                         case BaseType::U32:
5590                         {
5591                             DE_ASSERT(currentItemDataPtr + sizeof(uint32_t) <=
5592                                       currentDataVec.data() + currentDataVec.size());
5593 
5594                             *reinterpret_cast<uint32_t *>(currentItemDataPtr) = randomNumberGenerator.getUint32();
5595 
5596                             break;
5597                         }
5598 
5599                         case BaseType::U64:
5600                         {
5601                             DE_ASSERT(currentItemDataPtr + sizeof(uint64_t) <=
5602                                       currentDataVec.data() + currentDataVec.size());
5603 
5604                             *reinterpret_cast<uint64_t *>(currentItemDataPtr) = randomNumberGenerator.getUint64();
5605 
5606                             break;
5607                         }
5608 
5609                         default:
5610                         {
5611                             DE_ASSERT(false);
5612                         }
5613                         }
5614 
5615                         if (isMatrixType)
5616                         {
5617                             if (nComponent != (nComponents - 1))
5618                             {
5619                                 const auto delta =
5620                                     currentTestItem.inputBufferProps.matrixElementStartOffsets.at(nComponent + 1) -
5621                                     currentTestItem.inputBufferProps.matrixElementStartOffsets.at(nComponent + 0);
5622 
5623                                 DE_ASSERT(delta >= componentSizeBytes);
5624 
5625                                 currentItemDataPtr += delta;
5626                             }
5627                         }
5628                         else
5629                         {
5630                             currentItemDataPtr += componentSizeBytes;
5631                         }
5632                     }
5633                 }
5634             }
5635         }
5636 
5637         // Merge individual member data into coalesced buffers.
5638         for (const auto &currentShaderGroup : shaderGroups)
5639         {
5640             auto &resultVec = m_shaderGroupToRecordDataMap[currentShaderGroup];
5641 
5642             {
5643                 const auto &lastItem = m_testItems.items.back();
5644 
5645                 resultVec.resize(lastItem.inputBufferProps.bufferOffset +
5646                                  lastItem.shaderGroupToRecordDataMap.at(currentShaderGroup).size());
5647             }
5648 
5649             for (const auto &currentVariable : m_testItems.items)
5650             {
5651                 const auto &currentVariableDataVec = currentVariable.shaderGroupToRecordDataMap.at(currentShaderGroup);
5652 
5653                 DE_ASSERT(resultVec.size() >=
5654                           currentVariable.inputBufferProps.bufferOffset + currentVariableDataVec.size());
5655 
5656                 memcpy(resultVec.data() + currentVariable.inputBufferProps.bufferOffset, currentVariableDataVec.data(),
5657                        currentVariableDataVec.size());
5658             }
5659         }
5660     }
5661 
isMatrix(const VariableType & type) const5662     bool isMatrix(const VariableType &type) const
5663     {
5664         bool result = false;
5665 
5666         switch (type)
5667         {
5668         case VariableType::DMAT2:
5669         case VariableType::DMAT2X2:
5670         case VariableType::DMAT2X3:
5671         case VariableType::DMAT2X4:
5672         case VariableType::DMAT3:
5673         case VariableType::DMAT3X2:
5674         case VariableType::DMAT3X3:
5675         case VariableType::DMAT3X4:
5676         case VariableType::DMAT4:
5677         case VariableType::DMAT4X2:
5678         case VariableType::DMAT4X3:
5679         case VariableType::DMAT4X4:
5680         case VariableType::MAT2:
5681         case VariableType::MAT2X2:
5682         case VariableType::MAT2X3:
5683         case VariableType::MAT2X4:
5684         case VariableType::MAT3:
5685         case VariableType::MAT3X2:
5686         case VariableType::MAT3X3:
5687         case VariableType::MAT3X4:
5688         case VariableType::MAT4:
5689         case VariableType::MAT4X2:
5690         case VariableType::MAT4X3:
5691         case VariableType::MAT4X4:
5692         {
5693             result = true;
5694 
5695             break;
5696         }
5697 
5698         case VariableType::DOUBLE:
5699         case VariableType::DVEC2:
5700         case VariableType::DVEC3:
5701         case VariableType::DVEC4:
5702         case VariableType::FLOAT:
5703         case VariableType::INT8:
5704         case VariableType::INT64:
5705         case VariableType::INT16:
5706         case VariableType::INT:
5707         case VariableType::I16VEC2:
5708         case VariableType::I16VEC3:
5709         case VariableType::I16VEC4:
5710         case VariableType::I64VEC2:
5711         case VariableType::I64VEC3:
5712         case VariableType::I64VEC4:
5713         case VariableType::I8VEC2:
5714         case VariableType::I8VEC3:
5715         case VariableType::I8VEC4:
5716         case VariableType::IVEC2:
5717         case VariableType::IVEC3:
5718         case VariableType::IVEC4:
5719         case VariableType::UINT8:
5720         case VariableType::UINT64:
5721         case VariableType::UINT16:
5722         case VariableType::UINT:
5723         case VariableType::U16VEC2:
5724         case VariableType::U16VEC3:
5725         case VariableType::U16VEC4:
5726         case VariableType::U64VEC2:
5727         case VariableType::U64VEC3:
5728         case VariableType::U64VEC4:
5729         case VariableType::U8VEC2:
5730         case VariableType::U8VEC3:
5731         case VariableType::U8VEC4:
5732         case VariableType::UVEC2:
5733         case VariableType::UVEC3:
5734         case VariableType::UVEC4:
5735         case VariableType::VEC2:
5736         case VariableType::VEC3:
5737         case VariableType::VEC4:
5738         {
5739             result = false;
5740 
5741             break;
5742         }
5743 
5744         default:
5745         {
5746             DE_ASSERT(false);
5747         }
5748         }
5749 
5750         return result;
5751     }
5752 
setExplicitScalarOffsetArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5753     void setExplicitScalarOffsetArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5754     {
5755         return setScalarArrayStrides(inputStruct, updateInputBufferProps);
5756     }
5757 
setExplicitScalarOffsetBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5758     void setExplicitScalarOffsetBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5759     {
5760         uint32_t nBytesConsumed = 0;
5761 
5762         for (auto &currentItem : inputStruct.items)
5763         {
5764             const auto baseType = getBaseType(currentItem.type);
5765             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5766             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5767             const auto isMatrixVariable   = isMatrix(currentItem.type);
5768             const auto nComponents        = getNComponents(currentItem.type);
5769 
5770             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, componentSizeBytes * 2);
5771 
5772             if (isMatrixVariable)
5773             {
5774                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5775             }
5776             else
5777             {
5778                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * componentSizeBytes * nComponents;
5779             }
5780         }
5781     }
5782 
setExplicitScalarOffsetElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5783     void setExplicitScalarOffsetElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5784     {
5785         return setScalarMatrixElementOffsets(inputStruct, updateInputBufferProps);
5786     }
5787 
setExplicitScalarOffsetMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5788     void setExplicitScalarOffsetMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5789     {
5790         return setScalarMatrixElementOffsets(inputStruct, updateInputBufferProps);
5791     }
5792 
setExplicitSTD430OffsetArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5793     void setExplicitSTD430OffsetArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5794     {
5795         return setSTD430ArrayStrides(inputStruct, updateInputBufferProps);
5796     }
5797 
setExplicitSTD430OffsetBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5798     void setExplicitSTD430OffsetBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5799     {
5800         uint32_t nBytesConsumed = 0;
5801 
5802         for (auto &currentItem : inputStruct.items)
5803         {
5804             const auto baseType = getBaseType(currentItem.type);
5805             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5806             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5807             const auto isMatrixVariable   = isMatrix(currentItem.type);
5808             const auto nComponents        = getNComponents(currentItem.type);
5809             uint32_t requiredAlignment    = 0;
5810 
5811             uint32_t nMatrixRows = 0;
5812 
5813             if (isMatrixVariable)
5814             {
5815                 nMatrixRows = getNMatrixRows(currentItem.type);
5816 
5817                 if (nMatrixRows == 3)
5818                 {
5819                     nMatrixRows = 4;
5820                 }
5821 
5822                 requiredAlignment = nMatrixRows * componentSizeBytes;
5823             }
5824             else if (nComponents == 1)
5825             {
5826                 DE_ASSERT((baseType == BaseType::F32) || (baseType == BaseType::F64) || (baseType == BaseType::I16) ||
5827                           (baseType == BaseType::I32) || (baseType == BaseType::I64) || (baseType == BaseType::I8) ||
5828                           (baseType == BaseType::U16) || (baseType == BaseType::U32) || (baseType == BaseType::U64) ||
5829                           (baseType == BaseType::U8));
5830 
5831                 requiredAlignment = componentSizeBytes;
5832             }
5833             else if (nComponents == 2)
5834             {
5835                 requiredAlignment = 2 * componentSizeBytes;
5836             }
5837             else
5838             {
5839                 requiredAlignment = 4 * componentSizeBytes;
5840             }
5841 
5842             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, requiredAlignment * 2);
5843 
5844             if (isMatrixVariable)
5845             {
5846                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5847             }
5848             else
5849             {
5850                 nBytesConsumed = bufferProps.bufferOffset +
5851                                  currentItem.arraySize * componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5852             }
5853         }
5854     }
5855 
setExplicitSTD430OffsetElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5856     void setExplicitSTD430OffsetElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5857     {
5858         return setSTD430MatrixElementOffsets(inputStruct, updateInputBufferProps);
5859     }
5860 
setExplicitSTD430OffsetMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5861     void setExplicitSTD430OffsetMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5862     {
5863         return setSTD430MatrixElementOffsets(inputStruct, updateInputBufferProps);
5864     }
5865 
setSTD430ArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5866     void setSTD430ArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5867     {
5868         for (auto &currentItem : inputStruct.items)
5869         {
5870             const auto baseType = getBaseType(currentItem.type);
5871             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5872             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5873             const auto isMatrixVariable   = isMatrix(currentItem.type);
5874             const auto nComponents        = getNComponents(currentItem.type);
5875             uint32_t requiredStride       = 0;
5876 
5877             if (isMatrixVariable)
5878             {
5879                 auto nMatrixColumns = getNMatrixColumns(currentItem.type);
5880                 auto nMatrixRows    = getNMatrixRows(currentItem.type);
5881 
5882                 if (nMatrixRows == 3)
5883                 {
5884                     nMatrixRows = 4;
5885                 }
5886 
5887                 requiredStride = nMatrixRows * nMatrixColumns * componentSizeBytes;
5888             }
5889             else
5890             {
5891                 requiredStride = componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5892             }
5893 
5894             bufferProps.arrayStride = requiredStride;
5895         }
5896     }
5897 
setSTD430BufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5898     void setSTD430BufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5899     {
5900         uint32_t nBytesConsumed = 0;
5901 
5902         for (auto &currentItem : inputStruct.items)
5903         {
5904             const auto baseType = getBaseType(currentItem.type);
5905             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5906             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5907             const auto isMatrixVariable   = isMatrix(currentItem.type);
5908             const auto nComponents        = getNComponents(currentItem.type);
5909             uint32_t requiredAlignment    = 0;
5910 
5911             uint32_t nMatrixRows = 0;
5912 
5913             if (isMatrixVariable)
5914             {
5915                 nMatrixRows = getNMatrixRows(currentItem.type);
5916 
5917                 if (nMatrixRows == 3)
5918                 {
5919                     nMatrixRows = 4;
5920                 }
5921 
5922                 requiredAlignment = nMatrixRows * componentSizeBytes;
5923             }
5924             else if (nComponents == 1)
5925             {
5926                 DE_ASSERT((baseType == BaseType::F32) || (baseType == BaseType::F64) || (baseType == BaseType::I16) ||
5927                           (baseType == BaseType::I32) || (baseType == BaseType::I64) || (baseType == BaseType::I8) ||
5928                           (baseType == BaseType::U16) || (baseType == BaseType::U32) || (baseType == BaseType::U64) ||
5929                           (baseType == BaseType::U8));
5930 
5931                 requiredAlignment = componentSizeBytes;
5932             }
5933             else if (nComponents == 2)
5934             {
5935                 requiredAlignment = 2 * componentSizeBytes;
5936             }
5937             else
5938             {
5939                 requiredAlignment = 4 * componentSizeBytes;
5940             }
5941 
5942             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, requiredAlignment);
5943 
5944             if (isMatrixVariable)
5945             {
5946                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5947             }
5948             else
5949             {
5950                 nBytesConsumed = bufferProps.bufferOffset +
5951                                  currentItem.arraySize * componentSizeBytes * ((nComponents == 3) ? 4 : nComponents);
5952             }
5953         }
5954     }
5955 
setScalarArrayStrides(StructItem & inputStruct,const bool & updateInputBufferProps)5956     void setScalarArrayStrides(StructItem &inputStruct, const bool &updateInputBufferProps)
5957     {
5958         for (auto &currentItem : inputStruct.items)
5959         {
5960             const auto baseType = getBaseType(currentItem.type);
5961             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5962             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5963             const auto isMatrixVariable   = isMatrix(currentItem.type);
5964             const auto nComponents        = getNComponents(currentItem.type);
5965 
5966             if (isMatrixVariable)
5967             {
5968                 auto nMatrixColumns = getNMatrixColumns(currentItem.type);
5969                 auto nMatrixRows    = getNMatrixRows(currentItem.type);
5970 
5971                 bufferProps.arrayStride = nMatrixRows * nMatrixColumns * componentSizeBytes;
5972             }
5973             else
5974             {
5975                 bufferProps.arrayStride = componentSizeBytes * nComponents;
5976             }
5977         }
5978     }
5979 
setScalarBufferOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)5980     void setScalarBufferOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
5981     {
5982         uint32_t nBytesConsumed = 0;
5983 
5984         for (auto &currentItem : inputStruct.items)
5985         {
5986             const auto baseType = getBaseType(currentItem.type);
5987             auto &bufferProps = (updateInputBufferProps) ? currentItem.inputBufferProps : currentItem.resultBufferProps;
5988             const auto componentSizeBytes = getComponentSizeBytes(baseType);
5989             const auto isMatrixVariable   = isMatrix(currentItem.type);
5990             const auto nComponents        = getNComponents(currentItem.type);
5991 
5992             bufferProps.bufferOffset = de::roundUp(nBytesConsumed, componentSizeBytes);
5993 
5994             if (isMatrixVariable)
5995             {
5996                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * bufferProps.arrayStride;
5997             }
5998             else
5999             {
6000                 nBytesConsumed = bufferProps.bufferOffset + currentItem.arraySize * componentSizeBytes * nComponents;
6001             }
6002         }
6003     }
6004 
setScalarMatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)6005     void setScalarMatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
6006     {
6007         for (auto &currentVariable : inputStruct.items)
6008         {
6009             if (isMatrix(currentVariable.type))
6010             {
6011                 auto &bufferProps =
6012                     (updateInputBufferProps) ? currentVariable.inputBufferProps : currentVariable.resultBufferProps;
6013                 const auto componentSizeBytes       = getComponentSizeBytes(getBaseType(currentVariable.type));
6014                 uint32_t currentMatrixElementOffset = 0;
6015                 const auto nMatrixColumns           = getNMatrixColumns(currentVariable.type);
6016                 const auto nMatrixRows              = getNMatrixRows(currentVariable.type);
6017 
6018                 for (uint32_t nMatrixColumn = 0; nMatrixColumn < nMatrixColumns; ++nMatrixColumn)
6019                 {
6020                     currentMatrixElementOffset =
6021                         de::roundUp(nMatrixRows * componentSizeBytes * nMatrixColumn, componentSizeBytes);
6022 
6023                     for (uint32_t nMatrixRow = 0; nMatrixRow < nMatrixRows; ++nMatrixRow)
6024                     {
6025                         bufferProps.matrixElementStartOffsets.push_back(currentMatrixElementOffset);
6026 
6027                         currentMatrixElementOffset += componentSizeBytes;
6028                     }
6029                 }
6030             }
6031         }
6032     }
6033 
setSTD430MatrixElementOffsets(StructItem & inputStruct,const bool & updateInputBufferProps)6034     void setSTD430MatrixElementOffsets(StructItem &inputStruct, const bool &updateInputBufferProps)
6035     {
6036         for (auto &currentVariable : inputStruct.items)
6037         {
6038             if (isMatrix(currentVariable.type))
6039             {
6040                 auto &bufferProps =
6041                     (updateInputBufferProps) ? currentVariable.inputBufferProps : currentVariable.resultBufferProps;
6042                 const auto componentSizeBytes       = getComponentSizeBytes(getBaseType(currentVariable.type));
6043                 uint32_t currentMatrixElementOffset = 0;
6044                 auto nMatrixColumns                 = getNMatrixColumns(currentVariable.type);
6045                 auto nMatrixRows                    = getNMatrixRows(currentVariable.type);
6046 
6047                 if (currentVariable.matrixOrder == MatrixMajorOrder::COLUMN_MAJOR)
6048                 {
6049                     for (uint32_t nMatrixColumn = 0; nMatrixColumn < nMatrixColumns; ++nMatrixColumn)
6050                     {
6051                         currentMatrixElementOffset = de::roundUp(
6052                             static_cast<uint32_t>(nMatrixRows * componentSizeBytes * nMatrixColumn),
6053                             static_cast<uint32_t>(((nMatrixRows == 3) ? 4 : nMatrixRows) * componentSizeBytes));
6054 
6055                         for (uint32_t nMatrixRow = 0; nMatrixRow < nMatrixRows; ++nMatrixRow)
6056                         {
6057                             bufferProps.matrixElementStartOffsets.push_back(currentMatrixElementOffset);
6058 
6059                             currentMatrixElementOffset += componentSizeBytes;
6060                         }
6061                     }
6062                 }
6063                 else
6064                 {
6065                     // TODO
6066                     DE_ASSERT(false);
6067                 }
6068             }
6069         }
6070     }
6071 
6072     // Private variables
6073     const tcu::UVec3 m_gridSizeXYZ;
6074     const TestType m_testType;
6075     const std::vector<VariableType> m_varTypesToTest;
6076 
6077     uint32_t m_resultBufferSize;
6078     uint32_t m_shaderRecordSize;
6079     StructItem m_testItems;
6080 
6081     std::map<ShaderGroups, std::vector<uint8_t>> m_shaderGroupToRecordDataMap;
6082     std::map<VkShaderStageFlagBits, uint32_t> m_shaderStageToResultBufferOffset;
6083     std::unique_ptr<GridASProvider> m_asProviderPtr;
6084     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
6085 };
6086 
6087 class RecursiveTracesTest : public TestBase
6088 {
6089 public:
RecursiveTracesTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout,const uint32_t & depthToUse)6090     RecursiveTracesTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout,
6091                         const uint32_t &depthToUse)
6092         : m_asStructureLayout(asStructureLayout)
6093         , m_geometryType(geometryType)
6094         , m_depthToUse(depthToUse)
6095         , m_nRaysToTest(512)
6096         , m_maxResultBufferSizePermitted(512 * 1024768)
6097     {
6098         const auto nItemsExpectedPerRay         = static_cast<uint32_t>((1 << (m_depthToUse + 0)) - 1);
6099         const auto nItemsExpectedPerRayInclRgen = static_cast<uint32_t>((1 << (m_depthToUse + 1)) - 1);
6100 
6101         m_nResultItemsExpected     = nItemsExpectedPerRayInclRgen * m_nRaysToTest;
6102         m_nCHitInvocationsExpected = nItemsExpectedPerRay * m_nRaysToTest;
6103         m_nMissInvocationsExpected = nItemsExpectedPerRay * m_nRaysToTest;
6104 
6105         {
6106             const uint32_t nPreambleBytes = sizeof(uint32_t) * 3;
6107             const uint32_t resultItemSize = sizeof(uint32_t) * 4;
6108 
6109             m_nMaxResultItemsPermitted = (m_maxResultBufferSizePermitted - nPreambleBytes) / resultItemSize;
6110         }
6111     }
6112 
~RecursiveTracesTest()6113     ~RecursiveTracesTest()
6114     {
6115         /* Stub */
6116     }
6117 
getAHitShaderCollectionShaderNames() const6118     std::vector<std::string> getAHitShaderCollectionShaderNames() const final
6119     {
6120         return m_ahitShaderNameVec;
6121     }
6122 
getCHitShaderCollectionShaderNames() const6123     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
6124     {
6125         return m_chitShaderNameVec;
6126     }
6127 
getDispatchSize() const6128     tcu::UVec3 getDispatchSize() const final
6129     {
6130         DE_ASSERT(m_nRaysToTest != 0);
6131 
6132         return tcu::UVec3(m_nRaysToTest, 1u, 1u);
6133     }
6134 
getIntersectionShaderCollectionShaderNames() const6135     std::vector<std::string> getIntersectionShaderCollectionShaderNames() const final
6136     {
6137         const auto nIntersectionShaders =
6138             ((m_geometryType == GeometryType::AABB) || (m_geometryType == GeometryType::AABB_AND_TRIANGLES)) ?
6139                 m_depthToUse :
6140                 0;
6141 
6142         return std::vector<std::string>(nIntersectionShaders, {"intersection0"});
6143     }
6144 
getMaxRecursionDepthUsed() const6145     uint32_t getMaxRecursionDepthUsed() const final
6146     {
6147         return m_depthToUse;
6148     }
6149 
getMissShaderCollectionShaderNames() const6150     std::vector<std::string> getMissShaderCollectionShaderNames() const final
6151     {
6152         return m_missShaderNameVec;
6153     }
6154 
getResultBufferSize() const6155     uint32_t getResultBufferSize() const final
6156     {
6157         DE_ASSERT(m_depthToUse < 30); //< due to how nItemsExpectedPerRay is stored.
6158         DE_ASSERT(m_nRaysToTest != 0);
6159 
6160         /* NOTE: A single item is generated by rgen shader stage which is invoked once per each initial ray.
6161          *
6162          *       Each ray at level N generates two result items.
6163          *
6164          *       Thus, for a single initial traced ray, we need sum(2^depth)=2^(depth+1)-1 items.
6165          */
6166         const auto nItemsExpectedPerRay = static_cast<uint32_t>((1 << (m_depthToUse + 1)) - 1);
6167         const auto nResultItemsExpected = de::min(nItemsExpectedPerRay * m_nRaysToTest, m_nMaxResultItemsPermitted);
6168         const auto resultItemSize =
6169             static_cast<uint32_t>(sizeof(uint32_t) * 4 /* nOriginRay, stage, depth, parentResultItem */);
6170 
6171         return static_cast<uint32_t>(sizeof(uint32_t) * 3 /* nItemsRegistered, nCHitInvocations, nMissInvocations */) +
6172                nResultItemsExpected * resultItemSize;
6173     }
6174 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)6175     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
6176     {
6177         VkSpecializationInfo *resultPtr = nullptr;
6178 
6179         if (shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR || shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR)
6180         {
6181             resultPtr = &m_specializationInfo;
6182         }
6183 
6184         return resultPtr;
6185     }
6186 
getTLASPtrVecToBind() const6187     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
6188     {
6189         DE_ASSERT(m_tlPtr != nullptr);
6190 
6191         return {m_tlPtr.get()};
6192     }
6193 
init(vkt::Context &,RayTracingProperties *)6194     bool init(vkt::Context & /* context    */, RayTracingProperties * /* rtPropsPtr */) final
6195     {
6196         m_specializationEntry.constantID = 1;
6197         m_specializationEntry.offset     = 0;
6198         m_specializationEntry.size       = sizeof(uint32_t);
6199 
6200         m_specializationInfo.dataSize      = sizeof(uint32_t);
6201         m_specializationInfo.mapEntryCount = 1;
6202         m_specializationInfo.pData         = &m_depthToUse;
6203         m_specializationInfo.pMapEntries   = &m_specializationEntry;
6204 
6205         return true;
6206     }
6207 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)6208     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
6209                 VkCommandBuffer commandBuffer) final
6210     {
6211         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
6212                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
6213                                                                          tcu::UVec3(1, 1, 1),
6214                                                                          tcu::Vec3(2, 0, 2), /* gridInterCellDeltaXYZ */
6215                                                                          m_geometryType));
6216 
6217         m_tlPtr =
6218             asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer, 0, /* bottomLevelGeometryFlags */
6219                                       nullptr,                                        /* optASPropertyProviderPtr */
6220                                       nullptr);                                       /* optASFeedbackPtr         */
6221     }
6222 
initPrograms(SourceCollections & programCollection) const6223     void initPrograms(SourceCollections &programCollection) const final
6224     {
6225         const auto nLocationsPerPayload = 3; /* 3 scalar uints */
6226 
6227         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
6228                                                   0u,    /* flags        */
6229                                                   true); /* allowSpirv14 */
6230 
6231         std::vector<std::string> rayPayloadDefinitionVec(m_depthToUse);
6232         std::vector<std::string> rayPayloadInDefinitionVec(m_depthToUse);
6233 
6234         for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6235         {
6236             rayPayloadDefinitionVec.at(nLevel) = "layout(location = " + de::toString(nLocationsPerPayload * nLevel) +
6237                                                  ") rayPayloadEXT block\n"
6238                                                  "{\n"
6239                                                  "    uint currentDepth;\n"
6240                                                  "    uint currentNOriginRay;\n"
6241                                                  "    uint currentResultItem;\n"
6242                                                  "};\n";
6243 
6244             rayPayloadInDefinitionVec.at(nLevel) = "layout(location = " + de::toString(nLocationsPerPayload * nLevel) +
6245                                                    ") rayPayloadInEXT block\n"
6246                                                    "{\n"
6247                                                    "    uint parentDepth;\n"
6248                                                    "    uint parentNOriginRay;\n"
6249                                                    "    uint parentResultItem;\n"
6250                                                    "};\n";
6251         }
6252 
6253         const std::string constantVariableDefinition =
6254             "layout(constant_id = 1) const uint MAX_RECURSIVE_DEPTH = " + de::toString(m_depthToUse) + ";\n";
6255 
6256         const char *resultBufferDefinition = "struct ResultData\n"
6257                                              "{\n"
6258                                              "    uint nOriginRay;\n"
6259                                              "    uint shaderStage;\n"
6260                                              "    uint depth;\n"
6261                                              "    uint callerResultItem;\n"
6262                                              "};\n"
6263                                              "\n"
6264                                              "layout(set = 0, binding = 0, std430) buffer result\n"
6265                                              "{\n"
6266                                              "    uint       nItemsStored;\n"
6267                                              "    uint       nCHitInvocations;\n"
6268                                              "    uint       nMissInvocations;\n"
6269                                              "    ResultData resultItems[];\n"
6270                                              "};\n";
6271 
6272         {
6273             m_ahitShaderNameVec.resize(m_depthToUse);
6274 
6275             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6276             {
6277                 std::stringstream css;
6278 
6279                 css << "#version 460 core\n"
6280                        "\n"
6281                        "#extension GL_EXT_ray_tracing : require\n"
6282                        "\n" +
6283                            de::toString(resultBufferDefinition) + rayPayloadInDefinitionVec.at(nLevel) +
6284                            "\n"
6285                            "void main()\n"
6286                            "{\n"
6287                            /* Stub - don't care */
6288                            "}\n";
6289 
6290                 m_ahitShaderNameVec.at(nLevel) = std::string("ahit") + de::toString(nLevel);
6291 
6292                 programCollection.glslSources.add(m_ahitShaderNameVec.at(nLevel))
6293                     << glu::AnyHitSource(css.str()) << buildOptions;
6294             }
6295         }
6296 
6297         {
6298             m_chitShaderNameVec.resize(m_depthToUse);
6299 
6300             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6301             {
6302                 std::stringstream css;
6303                 const bool shouldTraceRays = (nLevel != (m_depthToUse - 1));
6304 
6305                 css << "#version 460 core\n"
6306                        "\n"
6307                        "#extension GL_EXT_ray_tracing : require\n"
6308                        "\n"
6309                        "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6310                        "\n" +
6311                            constantVariableDefinition + de::toString(resultBufferDefinition) +
6312                            de::toString(rayPayloadInDefinitionVec.at(nLevel));
6313 
6314                 if (shouldTraceRays)
6315                 {
6316                     css << rayPayloadDefinitionVec.at(nLevel + 1);
6317                 }
6318 
6319                 css << "\n"
6320                        "void main()\n"
6321                        "{\n"
6322                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6323                        "\n"
6324                        "    atomicAdd(nCHitInvocations, 1);\n"
6325                        "\n"
6326                        "    if (nItem < " +
6327                            de::toString(m_nMaxResultItemsPermitted) +
6328                            ")\n"
6329                            "    {\n"
6330                            "        resultItems[nItem].callerResultItem = parentResultItem;\n"
6331                            "        resultItems[nItem].depth            = parentDepth;\n"
6332                            "        resultItems[nItem].nOriginRay       = parentNOriginRay;\n"
6333                            "        resultItems[nItem].shaderStage      = 1;\n"
6334                            "    }\n"
6335                            "\n";
6336 
6337                 if (shouldTraceRays)
6338                 {
6339                     css << "    if (parentDepth < MAX_RECURSIVE_DEPTH - 1)\n"
6340                            "    {\n"
6341                            "        currentDepth      = parentDepth + 1;\n"
6342                            "        currentNOriginRay = parentNOriginRay;\n"
6343                            "        currentResultItem = nItem;\n"
6344                            "\n"
6345                            "        vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6346                            "        vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6347                            "        vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6348                            "        vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6349                            "        vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6350                            "        vec3  directionHit  = normalize(targetHit  - origin);\n"
6351                            "        vec3  directionMiss = normalize(targetMiss - origin);\n"
6352                            "        uint  rayFlags      = 0;\n"
6353                            "        uint  cullMask      = 0xFF;\n"
6354                            "        float tmin          = 0.001;\n"
6355                            "        float tmax          = 5.0;\n"
6356                            "\n"
6357                            "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6358                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionHit,  tmax, " +
6359                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6360                                ");\n"
6361                                "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6362                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionMiss, tmax, " +
6363                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6364                                ");\n"
6365                                "    }\n"
6366                                "\n";
6367                 }
6368 
6369                 css << "}\n";
6370 
6371                 m_chitShaderNameVec.at(nLevel) = std::string("chit") + de::toString(nLevel);
6372 
6373                 programCollection.glslSources.add(m_chitShaderNameVec.at(nLevel))
6374                     << glu::ClosestHitSource(css.str()) << buildOptions;
6375             }
6376         }
6377 
6378         {
6379             std::stringstream css;
6380 
6381             css << "#version 460 core\n"
6382                    "\n"
6383                    "#extension GL_EXT_ray_tracing : require\n"
6384                    "\n"
6385                    "void main()\n"
6386                    "{\n"
6387                    "    reportIntersectionEXT(0.95f, 0);\n"
6388                    "}\n";
6389 
6390             // There is stack caching code that assumes it knows which shader groups are what, but that doesn't apply to
6391             // this test. The other hit group shaders don't hit this issue because they don't use the canonical name, so
6392             // de-canonicalize the name to work around that
6393             programCollection.glslSources.add("intersection0") << glu::IntersectionSource(css.str()) << buildOptions;
6394         }
6395 
6396         {
6397             m_missShaderNameVec.resize(m_depthToUse);
6398 
6399             for (uint32_t nLevel = 0; nLevel < m_depthToUse; ++nLevel)
6400             {
6401                 std::stringstream css;
6402                 const bool shouldTraceRays = (nLevel != (m_depthToUse - 1));
6403 
6404                 css << "#version 460 core\n"
6405                        "\n"
6406                        "#extension GL_EXT_ray_tracing : require\n"
6407                        "\n"
6408                        "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6409                        "\n" +
6410                            constantVariableDefinition + de::toString(resultBufferDefinition) +
6411                            de::toString(rayPayloadInDefinitionVec.at(nLevel));
6412 
6413                 if (shouldTraceRays)
6414                 {
6415                     css << rayPayloadDefinitionVec.at(nLevel + 1);
6416                 }
6417 
6418                 css << "\n"
6419                        "void main()\n"
6420                        "{\n"
6421                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6422                        "\n"
6423                        "    atomicAdd(nMissInvocations, 1);\n"
6424                        "\n"
6425                        "    if (nItem < " +
6426                            de::toString(m_nMaxResultItemsPermitted) +
6427                            ")\n"
6428                            "    {\n"
6429                            "        resultItems[nItem].depth            = parentDepth;\n"
6430                            "        resultItems[nItem].nOriginRay       = parentNOriginRay;\n"
6431                            "        resultItems[nItem].callerResultItem = parentResultItem;\n"
6432                            "        resultItems[nItem].shaderStage      = 2;\n"
6433                            "    }\n"
6434                            "\n";
6435 
6436                 if (shouldTraceRays)
6437                 {
6438                     css << "    if (parentDepth < MAX_RECURSIVE_DEPTH - 1)\n"
6439                            "    {\n"
6440                            "        currentDepth      = parentDepth + 1;\n"
6441                            "        currentNOriginRay = parentNOriginRay;\n"
6442                            "        currentResultItem = nItem;\n"
6443                            "\n"
6444                            "        vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6445                            "        vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6446                            "        vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6447                            "        vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6448                            "        vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6449                            "        vec3  directionHit  = normalize(targetHit  - origin);\n"
6450                            "        vec3  directionMiss = normalize(targetMiss - origin);\n"
6451                            "\n"
6452                            "        uint  rayFlags      = 0;\n"
6453                            "        uint  cullMask      = 0xFF;\n"
6454                            "        float tmin          = 0.001;\n"
6455                            "        float tmax          = 5.0;\n"
6456                            "\n"
6457                            "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6458                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionHit,  tmax, " +
6459                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6460                                ");\n"
6461                                "        traceRayEXT(accelerationStructure, rayFlags, cullMask, " +
6462                                de::toString(nLevel + 1) + ", 0, 0, origin, tmin, directionMiss, tmax, " +
6463                                de::toString(nLocationsPerPayload * (nLevel + 1)) +
6464                                ");\n"
6465                                "    }\n";
6466                 }
6467 
6468                 css << "}\n";
6469 
6470                 m_missShaderNameVec.at(nLevel) = "miss" + de::toString(nLevel);
6471 
6472                 programCollection.glslSources.add(m_missShaderNameVec.at(nLevel))
6473                     << glu::MissSource(css.str()) << buildOptions;
6474             }
6475         }
6476 
6477         {
6478             const std::string rayPayloadDefinition = ((m_depthToUse == 0u) ? "" : rayPayloadDefinitionVec.at(0));
6479 
6480             std::stringstream css;
6481 
6482             css << "#version 460 core\n"
6483                    "\n"
6484                    "#extension GL_EXT_ray_tracing : require\n"
6485                    "\n"
6486                    "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
6487                    "\n" +
6488                        de::toString(resultBufferDefinition) + rayPayloadDefinition +
6489                        "void main()\n"
6490                        "{\n"
6491                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
6492                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
6493                        "    uint  rayFlags     = 0;\n"
6494                        "    float tmin         = 0.001;\n"
6495                        "    float tmax         = 9.0;\n"
6496                        "\n"
6497                        "    uint  cullMask      = 0xFF;\n"
6498                        "    vec3  cellStartXYZ  = vec3(0.0, 0.0, 0.0);\n"
6499                        "    vec3  cellEndXYZ    = cellStartXYZ + vec3(1.0);\n"
6500                        "    vec3  targetHit     = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
6501                        "    vec3  targetMiss    = targetHit + vec3(0, 10, 0);\n"
6502                        "    vec3  origin        = targetHit - vec3(1, 0,  0);\n"
6503                        "    vec3  directionHit  = normalize(targetHit  - origin);\n"
6504                        "    vec3  directionMiss = normalize(targetMiss - origin);\n"
6505                        "\n"
6506                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
6507                        "\n"
6508                        "    if (nItem < " +
6509                        de::toString(m_nMaxResultItemsPermitted) +
6510                        ")\n"
6511                        "    {\n"
6512                        "        resultItems[nItem].callerResultItem = 0xFFFFFFFF;\n"
6513                        "        resultItems[nItem].depth            = 0;\n"
6514                        "        resultItems[nItem].nOriginRay       = nInvocation;\n"
6515                        "        resultItems[nItem].shaderStage      = 3;\n"
6516                        "    }\n"
6517                        "\n" +
6518                        ((m_depthToUse == 0u) ? "" :
6519                                                "    currentDepth      = 0;\n"
6520                                                "    currentNOriginRay = nInvocation;\n"
6521                                                "    currentResultItem = nItem;\n"
6522                                                "\n"
6523                                                "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, "
6524                                                "origin, tmin, directionHit,  tmax, 0);\n"
6525                                                "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, "
6526                                                "origin, tmin, directionMiss, tmax, 0);\n") +
6527                        "}\n";
6528 
6529             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
6530         }
6531     }
6532 
resetTLAS()6533     void resetTLAS() final
6534     {
6535         m_tlPtr.reset();
6536     }
6537 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const6538     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
6539     {
6540         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
6541         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
6542         bool result                                   = false;
6543         auto nItemsStored                             = *resultU32Ptr;
6544         const auto nCHitInvocations                   = *(resultU32Ptr + 1);
6545         const auto nMissInvocations                   = *(resultU32Ptr + 2);
6546         const bool doFullCheck                        = (m_nResultItemsExpected < m_nMaxResultItemsPermitted);
6547 
6548         struct ResultItem
6549         {
6550             uint32_t depth;
6551             uint32_t nOriginRay;
6552             uint32_t nParentNode;
6553 
6554             VkShaderStageFlagBits stage;
6555 
6556             ResultItem *childCHitNodePtr;
6557             ResultItem *childMissNodePtr;
6558 
6559             ResultItem()
6560                 : depth(0xFFFFFFFFu)
6561                 , nOriginRay(0xFFFFFFFFu)
6562                 , nParentNode(0xFFFFFFFFu)
6563                 , stage(VK_SHADER_STAGE_ALL)
6564                 , childCHitNodePtr(nullptr)
6565                 , childMissNodePtr(nullptr)
6566             {
6567                 /* Stub */
6568             }
6569         };
6570 
6571         std::map<uint32_t, ResultItem *> nItemToResultItemPtrMap;
6572         std::map<uint32_t, std::vector<ResultItem *>> nLevelToResultItemPtrVecMap;
6573         std::vector<std::unique_ptr<ResultItem>> resultItemPtrVec;
6574 
6575         uint32_t rayCount;
6576         std::map<uint32_t, std::vector<std::pair<const uint32_t *, uint32_t>>> nRayToResultItemPtrIndexVecMap;
6577 
6578         if (doFullCheck)
6579         {
6580             if (nItemsStored != m_nResultItemsExpected)
6581             {
6582                 goto end;
6583             }
6584         }
6585         else
6586         {
6587             // Test shaders always use an atomic add to obtain a unique index, at which they should write the result item.
6588             // Hence, the value we read back from the result buffer's preamble does not actually indicate how many items
6589             // are available for reading, since a partial (!= full) check implies our result buffer only contains a fraction
6590             // of all expected items (since more items would simply not fit in).
6591             //
6592             // Make sure to use a correct value in subsequent checks.
6593             if (nItemsStored < m_nResultItemsExpected)
6594             {
6595                 goto end;
6596             }
6597 
6598             nItemsStored = m_nMaxResultItemsPermitted;
6599         }
6600 
6601         if (nCHitInvocations != m_nCHitInvocationsExpected)
6602         {
6603             goto end;
6604         }
6605 
6606         if (nMissInvocations != m_nMissInvocationsExpected)
6607         {
6608             goto end;
6609         }
6610 
6611         /*
6612          * We are creating a map of rays, each of which has a list of result items for that ray,
6613          * so we can verify each ray sequentially and save memory on the temporary maps.
6614          */
6615         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
6616         {
6617             const uint32_t *currentItemU32Ptr = resultU32Ptr +
6618                                                 3 /* nItemsRegistered, nCHitInvocations, nMissInvocations*/ +
6619                                                 4 /* items per result item */ * nItem;
6620             uint32_t nOriginRay = *(currentItemU32Ptr + 0);
6621             nRayToResultItemPtrIndexVecMap[nOriginRay].push_back(std::make_pair(currentItemU32Ptr, nItem));
6622         }
6623 
6624         /*
6625          * Convert an array of result items, stored in undefined order, to a representation we can easily verify.
6626          * Loop to verify result items with the same ray id in each iteration.
6627          */
6628         rayCount = getDispatchSize()[0] * getDispatchSize()[1] * getDispatchSize()[2];
6629         for (uint32_t nRay = 0; nRay < rayCount; ++nRay)
6630         {
6631             // If the nRay is not in the map, an empty vector will be created,
6632             // and the subsequent verification will be simplified in this case.
6633             const std::vector<std::pair<const uint32_t *, uint32_t>> &currentItemU32PtrIndexVec =
6634                 nRayToResultItemPtrIndexVecMap[nRay];
6635             for (const auto &iterator1 : currentItemU32PtrIndexVec)
6636             {
6637                 const uint32_t *currentItemU32Ptr = iterator1.first;
6638                 uint32_t nItem                    = iterator1.second;
6639 
6640                 std::unique_ptr<ResultItem> resultItemPtr;
6641                 resultItemPtr.reset(new ResultItem());
6642 
6643                 resultItemPtr->depth       = *(currentItemU32Ptr + 2);
6644                 resultItemPtr->nOriginRay  = *(currentItemU32Ptr + 0);
6645                 resultItemPtr->nParentNode = *(currentItemU32Ptr + 3);
6646 
6647                 switch (*(currentItemU32Ptr + 1))
6648                 {
6649                 case 1:
6650                     resultItemPtr->stage = VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR;
6651                     break;
6652                 case 2:
6653                     resultItemPtr->stage = VK_SHADER_STAGE_MISS_BIT_KHR;
6654                     break;
6655                 case 3:
6656                     resultItemPtr->stage = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
6657                     break;
6658 
6659                 default:
6660                 {
6661                     /* This should never happen */
6662                     DE_ASSERT(false);
6663 
6664                     goto end;
6665                 }
6666                 }
6667 
6668                 if (resultItemPtr->depth >= m_depthToUse && m_depthToUse > 0u)
6669                 {
6670                     DE_ASSERT(resultItemPtr->depth < m_depthToUse);
6671 
6672                     goto end;
6673                 }
6674 
6675                 if (resultItemPtr->nOriginRay >= m_nRaysToTest)
6676                 {
6677                     DE_ASSERT(resultItemPtr->nOriginRay < m_nRaysToTest);
6678 
6679                     goto end;
6680                 }
6681 
6682                 nItemToResultItemPtrMap[nItem] = resultItemPtr.get();
6683 
6684                 nLevelToResultItemPtrVecMap[resultItemPtr->depth].push_back(resultItemPtr.get());
6685                 resultItemPtrVec.push_back(std::move(resultItemPtr));
6686             }
6687 
6688             if (nLevelToResultItemPtrVecMap.empty())
6689             {
6690                 continue;
6691             }
6692 
6693             if (doFullCheck)
6694             {
6695                 uint32_t nRayGenShaderResultItemsFound = 0;
6696 
6697                 for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6698                 {
6699                     const auto &currentResultItemPtrVec = iterator1.second;
6700 
6701                     for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6702                     {
6703                         if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
6704                         {
6705                             if (currentResultItemPtr->nParentNode != 0xFFFFFFFF)
6706                             {
6707                                 DE_ASSERT(currentResultItemPtr->nParentNode == 0xFFFFFFFF);
6708 
6709                                 goto end;
6710                             }
6711 
6712                             nRayGenShaderResultItemsFound++;
6713                         }
6714                         else if (currentResultItemPtr->stage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
6715                         {
6716                             DE_ASSERT(currentResultItemPtr->nParentNode < nItemsStored);
6717 
6718                             auto parentNodePtr = nItemToResultItemPtrMap.at(currentResultItemPtr->nParentNode);
6719 
6720                             if (parentNodePtr->childCHitNodePtr != nullptr)
6721                             {
6722                                 DE_ASSERT(parentNodePtr->childCHitNodePtr == nullptr);
6723 
6724                                 goto end;
6725                             }
6726 
6727                             parentNodePtr->childCHitNodePtr = currentResultItemPtr;
6728                         }
6729                         else
6730                         {
6731                             DE_ASSERT(currentResultItemPtr->stage == VK_SHADER_STAGE_MISS_BIT_KHR);
6732                             DE_ASSERT(currentResultItemPtr->nParentNode < nItemsStored);
6733 
6734                             auto parentNodePtr = nItemToResultItemPtrMap.at(currentResultItemPtr->nParentNode);
6735 
6736                             if (parentNodePtr->childMissNodePtr != nullptr)
6737                             {
6738                                 DE_ASSERT(parentNodePtr->childMissNodePtr == nullptr);
6739 
6740                                 goto end;
6741                             }
6742 
6743                             parentNodePtr->childMissNodePtr = currentResultItemPtr;
6744                         }
6745                     }
6746                 }
6747 
6748                 if (nRayGenShaderResultItemsFound != 1)
6749                 {
6750                     DE_ASSERT(nRayGenShaderResultItemsFound == 1);
6751 
6752                     goto end;
6753                 }
6754             }
6755 
6756             // 1. Verify all nodes that are not leaves have both child nodes attached, and that leaf nodes do not have any children assigned.
6757             if (doFullCheck)
6758             {
6759                 for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6760                 {
6761                     const auto &currentNLevel           = iterator1.first;
6762                     const auto &currentResultItemPtrVec = iterator1.second;
6763 
6764                     for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6765                     {
6766                         if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR ||
6767                             currentNLevel != m_depthToUse - 1)
6768                         {
6769                             if (currentResultItemPtr->childCHitNodePtr == nullptr && m_depthToUse > 0u)
6770                             {
6771                                 DE_ASSERT(currentResultItemPtr->childCHitNodePtr != nullptr);
6772 
6773                                 goto end;
6774                             }
6775 
6776                             if (currentResultItemPtr->childMissNodePtr == nullptr && m_depthToUse > 0u)
6777                             {
6778                                 DE_ASSERT(currentResultItemPtr->childMissNodePtr != nullptr);
6779 
6780                                 goto end;
6781                             }
6782                         }
6783                         else
6784                         {
6785                             if (currentResultItemPtr->childCHitNodePtr != nullptr)
6786                             {
6787                                 DE_ASSERT(currentResultItemPtr->childCHitNodePtr == nullptr);
6788 
6789                                 goto end;
6790                             }
6791 
6792                             if (currentResultItemPtr->childMissNodePtr != nullptr)
6793                             {
6794                                 DE_ASSERT(currentResultItemPtr->childMissNodePtr == nullptr);
6795 
6796                                 goto end;
6797                             }
6798                         }
6799                     }
6800                 }
6801             }
6802 
6803             // 2. Verify depth level is correct for each node.
6804             for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6805             {
6806                 const auto &currentNLevel           = iterator1.first;
6807                 const auto &currentResultItemPtrVec = iterator1.second;
6808 
6809                 for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6810                 {
6811                     if (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
6812                     {
6813                         if (currentResultItemPtr->depth != 0)
6814                         {
6815                             DE_ASSERT(currentResultItemPtr->depth == 0);
6816 
6817                             goto end;
6818                         }
6819                     }
6820                     else if (currentResultItemPtr->depth != currentNLevel)
6821                     {
6822                         DE_ASSERT(currentResultItemPtr->depth == currentNLevel);
6823 
6824                         goto end;
6825                     }
6826                 }
6827             }
6828 
6829             // 3. Verify child node ptrs point to nodes that are assigned correct shader stage.
6830             for (const auto &iterator : nItemToResultItemPtrMap)
6831             {
6832                 const auto &currentResultItemPtr = iterator.second;
6833 
6834                 if (currentResultItemPtr->childCHitNodePtr != nullptr &&
6835                     currentResultItemPtr->childCHitNodePtr->stage != VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)
6836                 {
6837                     DE_ASSERT(currentResultItemPtr->childCHitNodePtr->stage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
6838 
6839                     goto end;
6840                 }
6841 
6842                 if (currentResultItemPtr->childMissNodePtr != nullptr &&
6843                     currentResultItemPtr->childMissNodePtr->stage != VK_SHADER_STAGE_MISS_BIT_KHR)
6844                 {
6845                     DE_ASSERT(currentResultItemPtr->childMissNodePtr->stage = VK_SHADER_STAGE_MISS_BIT_KHR);
6846 
6847                     goto end;
6848                 }
6849             }
6850 
6851             // 4. Verify child nodes are assigned correct depth levels.
6852             for (const auto &iterator1 : nLevelToResultItemPtrVecMap)
6853             {
6854                 const auto &currentNLevel           = iterator1.first;
6855                 const auto &currentResultItemPtrVec = iterator1.second;
6856 
6857                 for (const auto &currentResultItemPtr : currentResultItemPtrVec)
6858                 {
6859                     const auto expectedChildNodeDepth =
6860                         (currentResultItemPtr->stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR) ?
6861                             0 :
6862                             currentResultItemPtr->depth + 1;
6863 
6864                     if (currentResultItemPtr->depth != currentNLevel)
6865                     {
6866                         DE_ASSERT(currentResultItemPtr->depth == currentNLevel);
6867 
6868                         goto end;
6869                     }
6870 
6871                     if (currentResultItemPtr->childCHitNodePtr != nullptr &&
6872                         currentResultItemPtr->childCHitNodePtr->depth != expectedChildNodeDepth)
6873                     {
6874                         DE_ASSERT(currentResultItemPtr->childCHitNodePtr->depth == expectedChildNodeDepth);
6875 
6876                         goto end;
6877                     }
6878 
6879                     if (currentResultItemPtr->childMissNodePtr != nullptr &&
6880                         currentResultItemPtr->childMissNodePtr->depth != expectedChildNodeDepth)
6881                     {
6882                         DE_ASSERT(currentResultItemPtr->childMissNodePtr->depth == expectedChildNodeDepth);
6883 
6884                         goto end;
6885                     }
6886                 }
6887             }
6888 
6889             // 5. Verify that RT shader stages were invoked for all anticipated recursion levels.
6890             if (doFullCheck)
6891             {
6892                 for (uint32_t nLevel = 0; nLevel < m_depthToUse; nLevel++)
6893                 {
6894                     if (nLevelToResultItemPtrVecMap.find(nLevel) == nLevelToResultItemPtrVecMap.end())
6895                     {
6896                         DE_ASSERT(false);
6897 
6898                         goto end;
6899                     }
6900                 }
6901             }
6902 
6903             /* clear containers before next iteration */
6904             {
6905                 nItemToResultItemPtrMap.clear();
6906 
6907                 /* clear nLevelToResultItemPtrVecMap */
6908                 for (auto &iterator1 : nLevelToResultItemPtrVecMap)
6909                 {
6910                     iterator1.second.clear();
6911                 }
6912                 nLevelToResultItemPtrVecMap.clear();
6913 
6914                 resultItemPtrVec.clear();
6915             }
6916 
6917         } // end for (uint32_t nRay = 0; nRay < rayCount; ++nRay)
6918 
6919         result = true;
6920     end:
6921         return result;
6922     }
6923 
6924 private:
6925     const AccelerationStructureLayout m_asStructureLayout;
6926     const GeometryType m_geometryType;
6927 
6928     uint32_t m_depthToUse;
6929     uint32_t m_nMaxResultItemsPermitted;
6930     const uint32_t m_nRaysToTest;
6931     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
6932 
6933     VkSpecializationInfo m_specializationInfo;
6934     VkSpecializationMapEntry m_specializationEntry;
6935 
6936     mutable std::vector<std::string> m_ahitShaderNameVec;
6937     mutable std::vector<std::string> m_chitShaderNameVec;
6938     mutable std::vector<std::string> m_missShaderNameVec;
6939 
6940     uint32_t m_nCHitInvocationsExpected;
6941     uint32_t m_nMissInvocationsExpected;
6942     uint32_t m_nResultItemsExpected;
6943 
6944     const uint32_t m_maxResultBufferSizePermitted;
6945 };
6946 
6947 // Test the return value of reportIntersectionEXT
6948 class ReportIntersectionResultTest : public TestBase
6949 {
6950 public:
ReportIntersectionResultTest(const AccelerationStructureLayout & asLayout,const GeometryType & geometryType)6951     ReportIntersectionResultTest(const AccelerationStructureLayout &asLayout, const GeometryType &geometryType)
6952         : m_asLayout(asLayout)
6953         , m_geometryType(geometryType)
6954         , m_gridSizeXYZ(tcu::UVec3(4, 4, 1))
6955         , m_nRaysToTrace(16)
6956     {
6957     }
6958 
getCHitShaderCollectionShaderNames() const6959     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
6960     {
6961         return {};
6962     }
6963 
getDispatchSize() const6964     tcu::UVec3 getDispatchSize() const final
6965     {
6966         return m_gridSizeXYZ;
6967     }
6968 
getResultBufferSize() const6969     uint32_t getResultBufferSize() const final
6970     {
6971         return static_cast<uint32_t>(2u * sizeof(uint32_t) * m_nRaysToTrace);
6972     }
6973 
getTLASPtrVecToBind() const6974     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
6975     {
6976         return {m_tlPtr.get()};
6977     }
6978 
resetTLAS()6979     void resetTLAS() final
6980     {
6981         m_tlPtr.reset();
6982     }
6983 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)6984     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
6985                 VkCommandBuffer commandBuffer) final
6986     {
6987         m_asProviderPtr.reset(new GridASProvider(tcu::Vec3(0, 0, 0),                         // gridStartXYZ
6988                                                  tcu::Vec3(1, 1, 1),                         // gridCellSizeXYZ
6989                                                  m_gridSizeXYZ, tcu::Vec3(2.0f, 2.0f, 2.0f), // gridInterCellDeltaXYZ
6990                                                  m_geometryType));
6991 
6992         m_tlPtr = m_asProviderPtr->createTLAS(context, m_asLayout, commandBuffer, 0u,
6993                                               nullptr,  // optASPropertyProviderPtr
6994                                               nullptr); // optASFedbackPtr
6995     }
6996 
initPrograms(SourceCollections & programCollection) const6997     void initPrograms(SourceCollections &programCollection) const final
6998     {
6999         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
7000         const std::string hitPropertiesDefinition  = "struct HitProperties\n"
7001                                                      "{\n"
7002                                                      "    uint nHitsRejected;\n"
7003                                                      "    uint nHitsAccepteded;\n"
7004                                                      "};\n";
7005         const std::string hitPropertiesDeclaration = "layout(set = 0, binding = 0, std430) buffer result\n"
7006                                                      "{\n"
7007                                                      "    HitProperties rayToHitProps[" +
7008                                                      de::toString(m_nRaysToTrace) +
7009                                                      "];\n"
7010                                                      "};\n";
7011 
7012         programCollection.glslSources.add("ahit")
7013             << glu::AnyHitSource(std::string() +
7014                                  "#version 460 core\n"
7015                                  "\n"
7016                                  "#extension GL_EXT_ray_tracing : require\n"
7017                                  "\n"
7018                                  "hitAttributeEXT vec3 unusedAttribute;\n"
7019                                  "\n" +
7020                                  hitPropertiesDefinition +
7021                                  "\n"
7022                                  "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
7023                                  hitPropertiesDeclaration +
7024                                  "\n"
7025                                  "void main()\n"
7026                                  "{\n"
7027                                  "    uint nRay = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
7028                                  "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
7029                                  "    if ((gl_RayTmaxEXT > 0.6) && (gl_RayTmaxEXT < 0.8))\n"
7030                                  "    {\n"
7031                                  "        atomicAdd(rayToHitProps[nRay].nHitsRejected, 1);\n"
7032                                  "        ignoreIntersectionEXT;\n" // reportIntersectionEXT should return false
7033                                  "    }\n"
7034                                  "    else if ((gl_RayTmaxEXT > 0.1) && (gl_RayTmaxEXT < 0.3))\n"
7035                                  "    {\n"
7036                                  "        atomicAdd(rayToHitProps[nRay].nHitsAccepteded, 1);\n"
7037                                  "    }\n"
7038                                  "}\n")
7039             << buildOptions;
7040 
7041         programCollection.glslSources.add("intersection")
7042             << glu::IntersectionSource("#version 460 core\n"
7043                                        "#extension GL_EXT_ray_tracing : require\n"
7044                                        "\n"
7045                                        "hitAttributeEXT vec3 hitAttribute;\n"
7046                                        "\n"
7047                                        "void main()\n"
7048                                        "{\n"
7049                                        "    bool resultThatShouldBeRejected = reportIntersectionEXT(0.7f, 0);\n"
7050                                        "    if (resultThatShouldBeRejected)\n"
7051                                        "        reportIntersectionEXT(0.7f, 0);\n"
7052                                        "    else\n"
7053                                        "    {\n"
7054                                        "         bool resultThatShouldBeAccepted = reportIntersectionEXT(0.2f, 0);\n"
7055                                        "         if (!resultThatShouldBeAccepted)\n"
7056                                        "             reportIntersectionEXT(0.2f, 0);\n"
7057                                        "    }\n"
7058                                        "}\n")
7059             << buildOptions;
7060 
7061         programCollection.glslSources.add("miss")
7062             << glu::MissSource(std::string() +
7063                                "#version 460 core\n"
7064                                "\n"
7065                                "#extension GL_EXT_ray_tracing : require\n"
7066                                "\n" +
7067                                hitPropertiesDefinition + "layout(location = 0) rayPayloadInEXT vec3 unusedPayload;\n" +
7068                                hitPropertiesDeclaration +
7069                                "\n"
7070                                "void main()\n"
7071                                "{\n"
7072                                "}\n")
7073             << buildOptions;
7074 
7075         programCollection.glslSources.add("rgen")
7076             << glu::RaygenSource(
7077                    "#version 460 core\n"
7078                    "\n"
7079                    "#extension GL_EXT_ray_tracing : require\n"
7080                    "\n" +
7081                    hitPropertiesDefinition +
7082                    "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
7083                    "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
7084                    "\n"
7085                    "void main()\n"
7086                    "{\n"
7087                    "    uint  rayFlags    = 0;\n"
7088                    "    uint  cullMask    = 0xFF;\n"
7089                    "    float tmin        = 0.001;\n"
7090                    "    float tmax        = 9.0;\n"
7091                    "    vec3  origin      = vec3(4, 4, 4);\n"
7092                    "    vec3  target      = vec3(float(gl_LaunchIDEXT.x * 2) + 0.5f, float(gl_LaunchIDEXT.y * 2) + "
7093                    "0.5f, float(gl_LaunchIDEXT.z * 2) + 0.5f);\n"
7094                    "    vec3  direct      = normalize(target - origin);\n"
7095                    "\n"
7096                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
7097                    "}\n")
7098             << buildOptions;
7099     }
7100 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const7101     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
7102     {
7103         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
7104         const void *resultDataPtr                     = resultBufferPtr->getAllocation().getHostPtr();
7105 
7106         for (uint32_t nRay = 0; nRay < m_nRaysToTrace; ++nRay)
7107         {
7108             const uint32_t *rayProps = reinterpret_cast<const uint32_t *>(resultDataPtr) + 2 * nRay;
7109             if ((rayProps[0] != 1) || (rayProps[1] != 1))
7110                 return false;
7111         }
7112         return true;
7113     }
7114 
7115 private:
7116     const AccelerationStructureLayout m_asLayout;
7117     const GeometryType m_geometryType;
7118     const tcu::UVec3 m_gridSizeXYZ;
7119     const uint32_t m_nRaysToTrace;
7120 
7121     std::unique_ptr<GridASProvider> m_asProviderPtr;
7122     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
7123 };
7124 
7125 class RayPayloadInTest : public TestBase
7126 {
7127 public:
RayPayloadInTest(const GeometryType & geometryType,const AccelerationStructureLayout & asStructureLayout)7128     RayPayloadInTest(const GeometryType &geometryType, const AccelerationStructureLayout &asStructureLayout)
7129         : m_asStructureLayout(asStructureLayout)
7130         , m_geometryType(geometryType)
7131         , m_gridSizeXYZ(tcu::UVec3(512, 1, 1))
7132         , m_nRayPayloadU32s(512)
7133     {
7134     }
7135 
~RayPayloadInTest()7136     ~RayPayloadInTest()
7137     {
7138         /* Stub */
7139     }
7140 
getDispatchSize() const7141     tcu::UVec3 getDispatchSize() const final
7142     {
7143         DE_ASSERT(m_gridSizeXYZ[0] != 0);
7144         DE_ASSERT(m_gridSizeXYZ[1] != 0);
7145         DE_ASSERT(m_gridSizeXYZ[2] != 0);
7146 
7147         return tcu::UVec3(m_gridSizeXYZ[0], m_gridSizeXYZ[1], m_gridSizeXYZ[2]);
7148     }
7149 
getResultBufferSize() const7150     uint32_t getResultBufferSize() const final
7151     {
7152         DE_ASSERT(m_gridSizeXYZ[0] != 0);
7153         DE_ASSERT(m_gridSizeXYZ[1] != 0);
7154         DE_ASSERT(m_gridSizeXYZ[2] != 0);
7155         DE_ASSERT(m_nRayPayloadU32s != 0);
7156 
7157         const auto nRays = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
7158 
7159         DE_ASSERT(nRays != 0);
7160         DE_ASSERT((nRays % 2) == 0);
7161 
7162         const auto nMissShaderInvocationsExpected = nRays / 2;
7163         const auto nAHitShaderInvocationsExpected = nRays / 2;
7164         const auto nCHitShaderInvocationsExpected = nAHitShaderInvocationsExpected;
7165         const auto nResultStoresExpected =
7166             nMissShaderInvocationsExpected + nAHitShaderInvocationsExpected + nCHitShaderInvocationsExpected;
7167 
7168         return static_cast<uint32_t>((1 /* nItems */ + m_nRayPayloadU32s * nResultStoresExpected) * sizeof(uint32_t));
7169     }
7170 
getSpecializationInfoPtr(const VkShaderStageFlagBits & shaderStage)7171     VkSpecializationInfo *getSpecializationInfoPtr(const VkShaderStageFlagBits &shaderStage) final
7172     {
7173         VkSpecializationInfo *resultPtr = nullptr;
7174 
7175         if (shaderStage == VK_SHADER_STAGE_MISS_BIT_KHR || shaderStage == VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR ||
7176             shaderStage == VK_SHADER_STAGE_ANY_HIT_BIT_KHR || shaderStage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
7177         {
7178             resultPtr = &m_specializationInfo;
7179         }
7180 
7181         return resultPtr;
7182     }
7183 
getTLASPtrVecToBind() const7184     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
7185     {
7186         DE_ASSERT(m_tlPtr != nullptr);
7187 
7188         return {m_tlPtr.get()};
7189     }
7190 
init(vkt::Context &,RayTracingProperties *)7191     bool init(vkt::Context & /* context           */, RayTracingProperties * /* rtPropertiesPtr */) final
7192     {
7193         m_specializationInfoMapEntry.constantID = 1;
7194         m_specializationInfoMapEntry.offset     = 0;
7195         m_specializationInfoMapEntry.size       = sizeof(uint32_t);
7196 
7197         m_specializationInfo.dataSize      = sizeof(uint32_t);
7198         m_specializationInfo.mapEntryCount = 1;
7199         m_specializationInfo.pData         = reinterpret_cast<const void *>(&m_nRayPayloadU32s);
7200         m_specializationInfo.pMapEntries   = &m_specializationInfoMapEntry;
7201 
7202         return true;
7203     }
7204 
resetTLAS()7205     void resetTLAS() final
7206     {
7207         m_tlPtr.reset();
7208     }
7209 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)7210     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
7211                 VkCommandBuffer commandBuffer) final
7212     {
7213         std::unique_ptr<GridASProvider> asProviderPtr(new GridASProvider(tcu::Vec3(0, 0, 0), /* gridStartXYZ          */
7214                                                                          tcu::Vec3(1, 1, 1), /* gridCellSizeXYZ       */
7215                                                                          m_gridSizeXYZ,
7216                                                                          tcu::Vec3(6, 0, 0), /* gridInterCellDeltaXYZ */
7217                                                                          m_geometryType));
7218 
7219         m_tlPtr = asProviderPtr->createTLAS(context, m_asStructureLayout, commandBuffer,
7220                                             VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR, nullptr, nullptr);
7221     }
7222 
initPrograms(SourceCollections & programCollection) const7223     void initPrograms(SourceCollections &programCollection) const final
7224     {
7225         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
7226                                                   0u,    /* flags        */
7227                                                   true); /* allowSpirv14 */
7228 
7229         const char *constantDefinitions = "layout(constant_id = 1) const uint N_UINTS_IN_RAY_PAYLOAD = 1;\n";
7230 
7231         const char *rayPayloadDefinition = "\n"
7232                                            "layout(location = 0) rayPayloadEXT block\n"
7233                                            "{\n"
7234                                            "    uint values[N_UINTS_IN_RAY_PAYLOAD];\n"
7235                                            "};\n"
7236                                            "\n";
7237 
7238         const char *rayPayloadInDefinition = "\n"
7239                                              "layout(location = 0) rayPayloadInEXT block\n"
7240                                              "{\n"
7241                                              "    uint values[N_UINTS_IN_RAY_PAYLOAD];\n"
7242                                              "};\n"
7243                                              "\n";
7244 
7245         const char *resultBufferDefinition = "layout(set      = 0, binding = 0, std430) buffer result\n"
7246                                              "{\n"
7247                                              "    uint nItemsStored;\n"
7248                                              "    uint resultValues[];\n"
7249                                              "};\n";
7250 
7251         {
7252             std::stringstream css;
7253 
7254             css << "#version 460 core\n"
7255                    "\n"
7256                    "#extension GL_EXT_ray_tracing : require\n"
7257                    "\n" +
7258                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7259                        de::toString(rayPayloadInDefinition) +
7260                        "\n"
7261                        "void main()\n"
7262                        "{\n"
7263                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7264                        "\n"
7265                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7266                        "    {\n"
7267                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7268                        "    }\n"
7269                        "}\n";
7270 
7271             programCollection.glslSources.add("ahit") << glu::AnyHitSource(css.str()) << buildOptions;
7272         }
7273 
7274         {
7275             std::stringstream css;
7276 
7277             css << "#version 460 core\n"
7278                    "\n"
7279                    "#extension GL_EXT_ray_tracing : require\n"
7280                    "\n" +
7281                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7282                        de::toString(rayPayloadInDefinition) +
7283                        "\n"
7284                        "void main()\n"
7285                        "{\n"
7286                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7287                        "\n"
7288                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7289                        "    {\n"
7290                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7291                        "    }\n"
7292                        "}\n";
7293 
7294             programCollection.glslSources.add("chit") << glu::ClosestHitSource(css.str()) << buildOptions;
7295         }
7296 
7297         {
7298             std::stringstream css;
7299 
7300             css << "#version 460 core\n"
7301                    "\n"
7302                    "#extension GL_EXT_ray_tracing : require\n"
7303                    "\n"
7304                    "void main()\n"
7305                    "{\n"
7306                    "    reportIntersectionEXT(0.95f, 0);\n"
7307                    "}\n";
7308 
7309             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
7310         }
7311 
7312         {
7313             std::stringstream css;
7314 
7315             css << "#version 460 core\n"
7316                    "\n"
7317                    "#extension GL_EXT_ray_tracing : require\n"
7318                    "\n" +
7319                        de::toString(constantDefinitions) + de::toString(resultBufferDefinition) +
7320                        de::toString(rayPayloadInDefinition) +
7321                        "\n"
7322                        "void main()\n"
7323                        "{\n"
7324                        "    uint nItem = atomicAdd(nItemsStored, 1);\n"
7325                        "\n"
7326                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7327                        "    {\n"
7328                        "        resultValues[nItem * N_UINTS_IN_RAY_PAYLOAD + nUint] = values[nUint];\n"
7329                        "    }\n"
7330                        "}\n";
7331 
7332             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
7333         }
7334 
7335         {
7336             std::stringstream css;
7337 
7338             css << "#version 460 core\n"
7339                    "\n"
7340                    "#extension GL_EXT_ray_tracing : require\n"
7341                    "\n"
7342                    "layout(set = 0, binding = 1) uniform accelerationStructureEXT accelerationStructure;\n"
7343                    "\n" +
7344                        de::toString(constantDefinitions) + de::toString(rayPayloadDefinition) +
7345                        "void main()\n"
7346                        "{\n"
7347                        "    uint  nInvocation  = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
7348                        "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
7349                        "    uint  rayFlags     = 0;\n"
7350                        "    float tmin         = 0.001;\n"
7351                        "    float tmax         = 2.1;\n"
7352                        "\n"
7353                        "    uint  cullMask     = 0xFF;\n"
7354                        "    vec3  cellStartXYZ = vec3(nInvocation * 3.0, 0.0, 0.0);\n"
7355                        "    vec3  cellEndXYZ   = cellStartXYZ + vec3(1.0);\n"
7356                        "    vec3  target       = mix(cellStartXYZ, cellEndXYZ, vec3(0.5) );\n"
7357                        "    vec3  origin       = target - vec3(0, 2, 0);\n"
7358                        "    vec3  direct       = normalize(target - origin);\n"
7359                        "\n"
7360                        "    for (uint nUint = 0; nUint < N_UINTS_IN_RAY_PAYLOAD; ++nUint)\n"
7361                        "    {\n"
7362                        "        values[nUint] = (1 + nUint);\n"
7363                        "    }\n"
7364                        "\n"
7365                        "    traceRayEXT(accelerationStructure, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, "
7366                        "tmax, 0);\n"
7367                        "}\n";
7368 
7369             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
7370         }
7371     }
7372 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const7373     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
7374     {
7375         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
7376         const uint32_t *resultU32Ptr                  = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
7377 
7378         bool result = false;
7379 
7380         const auto nItemsStored                   = *resultU32Ptr;
7381         const auto nRays                          = m_gridSizeXYZ[0] * m_gridSizeXYZ[1] * m_gridSizeXYZ[2];
7382         const auto nMissShaderInvocationsExpected = nRays / 2;
7383         const auto nAHitShaderInvocationsExpected = nRays / 2;
7384         const auto nCHitShaderInvocationsExpected = nAHitShaderInvocationsExpected;
7385         const auto nResultStoresExpected =
7386             nMissShaderInvocationsExpected + nAHitShaderInvocationsExpected + nCHitShaderInvocationsExpected;
7387 
7388         if (nItemsStored != nResultStoresExpected)
7389         {
7390             goto end;
7391         }
7392 
7393         for (uint32_t nItem = 0; nItem < nItemsStored; ++nItem)
7394         {
7395             const auto resultItemDataPtr = resultU32Ptr + 1 /* nItemsStored */ + nItem * m_nRayPayloadU32s;
7396 
7397             for (uint32_t nValue = 0; nValue < m_nRayPayloadU32s; ++nValue)
7398             {
7399                 if (resultItemDataPtr[nValue] != (1 + nValue))
7400                 {
7401                     goto end;
7402                 }
7403             }
7404         }
7405 
7406         result = true;
7407     end:
7408         return result;
7409     }
7410 
7411 private:
7412     const AccelerationStructureLayout m_asStructureLayout;
7413     const GeometryType m_geometryType;
7414 
7415     const tcu::UVec3 m_gridSizeXYZ;
7416     uint32_t m_nRayPayloadU32s;
7417     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
7418 
7419     VkSpecializationInfo m_specializationInfo;
7420     VkSpecializationMapEntry m_specializationInfoMapEntry;
7421 };
7422 
7423 class TerminationTest : public TestBase
7424 {
7425 public:
7426     enum class Mode
7427     {
7428         IGNORE_ANY_HIT_STATICALLY,
7429         IGNORE_ANY_HIT_DYNAMICALLY,
7430         TERMINATE_ANY_HIT_STATICALLY,
7431         TERMINATE_ANY_HIT_DYNAMICALLY,
7432         TERMINATE_INTERSECTION_STATICALLY,
7433         TERMINATE_INTERSECTION_DYNAMICALLY,
7434 
7435         UNKNOWN
7436     };
7437 
getModeFromTestType(const TestType & testType)7438     static Mode getModeFromTestType(const TestType &testType)
7439     {
7440         Mode result = Mode::UNKNOWN;
7441 
7442         switch (testType)
7443         {
7444         case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
7445             result = Mode::IGNORE_ANY_HIT_DYNAMICALLY;
7446             break;
7447         case TestType::IGNORE_ANY_HIT_STATICALLY:
7448             result = Mode::IGNORE_ANY_HIT_STATICALLY;
7449             break;
7450         case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
7451             result = Mode::TERMINATE_ANY_HIT_DYNAMICALLY;
7452             break;
7453         case TestType::TERMINATE_ANY_HIT_STATICALLY:
7454             result = Mode::TERMINATE_ANY_HIT_STATICALLY;
7455             break;
7456         case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
7457             result = Mode::TERMINATE_INTERSECTION_DYNAMICALLY;
7458             break;
7459         case TestType::TERMINATE_INTERSECTION_STATICALLY:
7460             result = Mode::TERMINATE_INTERSECTION_STATICALLY;
7461             break;
7462 
7463         default:
7464         {
7465             DE_ASSERT(false && "This should never happen");
7466         }
7467         }
7468 
7469         return result;
7470     }
7471 
TerminationTest(const Mode & mode)7472     TerminationTest(const Mode &mode) : m_mode(mode)
7473     {
7474         /* Stub */
7475     }
7476 
~TerminationTest()7477     ~TerminationTest()
7478     {
7479         /* Stub */
7480     }
7481 
getCHitShaderCollectionShaderNames() const7482     std::vector<std::string> getCHitShaderCollectionShaderNames() const final
7483     {
7484         return {};
7485     }
7486 
getDispatchSize() const7487     tcu::UVec3 getDispatchSize() const final
7488     {
7489         return tcu::UVec3(1, 1, 1);
7490     }
7491 
getResultBufferStartData() const7492     std::vector<uint8_t> getResultBufferStartData() const final
7493     {
7494         auto resultU8Vec      = std::vector<uint8_t>(getResultBufferSize());
7495         auto resultU32DataPtr = reinterpret_cast<uint32_t *>(resultU8Vec.data());
7496 
7497         memset(resultU8Vec.data(), 0, resultU8Vec.size());
7498 
7499         if (m_mode == Mode::IGNORE_ANY_HIT_DYNAMICALLY || m_mode == Mode::TERMINATE_ANY_HIT_DYNAMICALLY)
7500         {
7501             resultU32DataPtr[2] = 1;
7502         }
7503         else if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7504         {
7505             resultU32DataPtr[3] = 1;
7506         }
7507 
7508         return resultU8Vec;
7509     }
7510 
getResultBufferSize() const7511     uint32_t getResultBufferSize() const final
7512     {
7513         const uint32_t nExtraUints =
7514             (m_mode == Mode::IGNORE_ANY_HIT_DYNAMICALLY || m_mode == Mode::TERMINATE_ANY_HIT_DYNAMICALLY ||
7515              m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY) ?
7516                 1 :
7517                 0;
7518         const uint32_t nResultUints =
7519             (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY) ?
7520                 3 :
7521                 2;
7522 
7523         return static_cast<uint32_t>(sizeof(uint32_t)) * (nExtraUints + nResultUints);
7524     }
7525 
getTLASPtrVecToBind() const7526     std::vector<TopLevelAccelerationStructure *> getTLASPtrVecToBind() const final
7527     {
7528         return {m_tlPtr.get()};
7529     }
7530 
resetTLAS()7531     void resetTLAS() final
7532     {
7533         m_tlPtr.reset();
7534     }
7535 
initAS(vkt::Context & context,RayTracingProperties *,VkCommandBuffer commandBuffer)7536     void initAS(vkt::Context &context, RayTracingProperties * /* rtPropertiesPtr */,
7537                 VkCommandBuffer commandBuffer) final
7538     {
7539         if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY)
7540         {
7541             const tcu::Vec3 gridCellSizeXYZ       = tcu::Vec3(2, 1, 1);
7542             const tcu::Vec3 gridInterCellDeltaXYZ = tcu::Vec3(3, 3, 3);
7543             const tcu::UVec3 gridSizeXYZ          = tcu::UVec3(1, 1, 1);
7544             const tcu::Vec3 gridStartXYZ          = tcu::Vec3(-1, -1, -1);
7545 
7546             m_asProviderPtr.reset(new GridASProvider(gridStartXYZ, gridCellSizeXYZ, gridSizeXYZ, gridInterCellDeltaXYZ,
7547                                                      GeometryType::AABB));
7548         }
7549         else
7550         {
7551             m_asProviderPtr.reset(new TriASProvider());
7552         }
7553 
7554         m_tlPtr = m_asProviderPtr->createTLAS(context, AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY,
7555                                               commandBuffer, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR,
7556                                               nullptr,  /* optASPropertyProviderPtr */
7557                                               nullptr); /* optASFedbackPtr          */
7558     }
7559 
initPrograms(SourceCollections & programCollection) const7560     void initPrograms(SourceCollections &programCollection) const final
7561     {
7562         const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
7563                                                   0u,    /* flags        */
7564                                                   true); /* allowSpirv14 */
7565 
7566         const std::string resultBufferSizeString = de::toString(getResultBufferSize() / sizeof(uint32_t));
7567 
7568         {
7569             std::string aHitShader;
7570 
7571             switch (m_mode)
7572             {
7573             case Mode::IGNORE_ANY_HIT_DYNAMICALLY:
7574             {
7575                 aHitShader = "#version 460 core\n"
7576                              "\n"
7577                              "#extension GL_EXT_ray_tracing : require\n"
7578                              "\n"
7579                              "hitAttributeEXT vec3 unusedAttribute;\n"
7580                              "\n"
7581                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7582                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7583                              "{\n"
7584                              "    uint resultData[" +
7585                              resultBufferSizeString +
7586                              "];\n"
7587                              "};\n"
7588                              "\n"
7589                              "void ignoreIntersectionWrapper()\n"
7590                              "{\n"
7591                              "    ignoreIntersectionEXT;\n"
7592                              "}\n"
7593                              "\n"
7594                              "void main()\n"
7595                              "{\n"
7596                              "\n"
7597                              "    if (resultData[2] == 1)\n"
7598                              "    {\n"
7599                              "        ignoreIntersectionWrapper();\n"
7600                              "    }\n"
7601                              "\n"
7602                              "    resultData[0] = 1;\n"
7603                              "}\n";
7604 
7605                 break;
7606             }
7607 
7608             case Mode::IGNORE_ANY_HIT_STATICALLY:
7609             {
7610                 aHitShader = "#version 460 core\n"
7611                              "\n"
7612                              "#extension GL_EXT_ray_tracing : require\n"
7613                              "\n"
7614                              "hitAttributeEXT vec3 unusedAttribute;\n"
7615                              "\n"
7616                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7617                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7618                              "{\n"
7619                              "    uint resultData[" +
7620                              resultBufferSizeString +
7621                              "];\n"
7622                              "};\n"
7623                              "\n"
7624                              "void ignoreIntersectionWrapper()\n"
7625                              "{\n"
7626                              "    ignoreIntersectionEXT;\n"
7627                              "}\n"
7628                              "\n"
7629                              "void main()\n"
7630                              "{\n"
7631                              "    ignoreIntersectionWrapper();\n"
7632                              "\n"
7633                              "    resultData[0] = 1;\n"
7634                              "}\n";
7635 
7636                 break;
7637             }
7638 
7639             case Mode::TERMINATE_ANY_HIT_DYNAMICALLY:
7640             {
7641                 aHitShader = "#version 460 core\n"
7642                              "\n"
7643                              "#extension GL_EXT_ray_tracing : require\n"
7644                              "\n"
7645                              "hitAttributeEXT vec3 unusedAttribute;\n"
7646                              "\n"
7647                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7648                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7649                              "{\n"
7650                              "    uint resultData[" +
7651                              resultBufferSizeString +
7652                              "];\n"
7653                              "};\n"
7654                              "\n"
7655                              "void terminateRayWrapper()\n"
7656                              "{\n"
7657                              "    terminateRayEXT;\n"
7658                              "}\n"
7659                              "\n"
7660                              "void main()\n"
7661                              "{\n"
7662                              "    if (resultData[2] == 1)\n"
7663                              "    {\n"
7664                              "        terminateRayWrapper();\n"
7665                              "    }\n"
7666                              "\n"
7667                              "    resultData[0] = 1;\n"
7668                              "}\n";
7669 
7670                 break;
7671             }
7672 
7673             case Mode::TERMINATE_ANY_HIT_STATICALLY:
7674             case Mode::TERMINATE_INTERSECTION_STATICALLY:
7675             {
7676                 aHitShader = "#version 460 core\n"
7677                              "\n"
7678                              "#extension GL_EXT_ray_tracing : require\n"
7679                              "\n"
7680                              "hitAttributeEXT vec3 unusedAttribute;\n"
7681                              "\n"
7682                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7683                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7684                              "{\n"
7685                              "    uint resultData[" +
7686                              resultBufferSizeString +
7687                              "];\n"
7688                              "};\n"
7689                              "\n"
7690                              "void terminateRayWrapper()\n"
7691                              "{\n"
7692                              "    terminateRayEXT;\n"
7693                              "}\n"
7694                              "\n"
7695                              "void main()\n"
7696                              "{\n"
7697                              "    terminateRayWrapper();\n"
7698                              "\n"
7699                              "    resultData[0] = 1;\n"
7700                              "}\n";
7701 
7702                 break;
7703             }
7704 
7705             case Mode::TERMINATE_INTERSECTION_DYNAMICALLY:
7706             {
7707                 aHitShader = "#version 460 core\n"
7708                              "\n"
7709                              "#extension GL_EXT_ray_tracing : require\n"
7710                              "\n"
7711                              "hitAttributeEXT vec3 unusedAttribute;\n"
7712                              "\n"
7713                              "layout(location = 0) rayPayloadInEXT      vec3 unusedPayload;\n"
7714                              "layout(set      = 0, binding = 0, std430) buffer result\n"
7715                              "{\n"
7716                              "    uint resultData[" +
7717                              resultBufferSizeString +
7718                              "];\n"
7719                              "};\n"
7720                              "\n"
7721                              "void terminateRayWrapper()\n"
7722                              "{\n"
7723                              "    terminateRayEXT;\n"
7724                              "}\n"
7725                              "\n"
7726                              "void main()\n"
7727                              "{\n"
7728                              "    if (resultData[3] == 1)\n"
7729                              "    {\n"
7730                              "        terminateRayWrapper();\n"
7731                              "    }\n"
7732                              "\n"
7733                              "    resultData[0] = 1;\n"
7734                              "}\n";
7735 
7736                 break;
7737             }
7738 
7739             default:
7740             {
7741                 DE_ASSERT(false);
7742             }
7743             }
7744 
7745             programCollection.glslSources.add("ahit") << glu::AnyHitSource(aHitShader) << buildOptions;
7746         }
7747 
7748         if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY || m_mode == Mode::TERMINATE_INTERSECTION_STATICALLY)
7749         {
7750             std::stringstream css;
7751 
7752             css << "#version 460 core\n"
7753                    "\n"
7754                    "#extension GL_EXT_ray_tracing : require\n"
7755                    "\n"
7756                    "hitAttributeEXT vec3 hitAttribute;\n"
7757                    "\n"
7758                    "layout(set = 0, binding = 0, std430) buffer result\n"
7759                    "{\n"
7760                    "    uint resultData[4];\n"
7761                    "};\n"
7762                    "\n"
7763                    "void generateIntersection()\n"
7764                    "{\n"
7765                    "    reportIntersectionEXT(0.95f, 0);\n"
7766                    "}\n"
7767                    "\n"
7768                    "void main()\n"
7769                    "{\n";
7770 
7771             if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7772             {
7773                 css << "    if (resultData[3] == 1)\n"
7774                        "    {\n";
7775             }
7776 
7777             css << "    generateIntersection();\n";
7778 
7779             if (m_mode == Mode::TERMINATE_INTERSECTION_DYNAMICALLY)
7780             {
7781                 css << "    }\n";
7782             }
7783 
7784             css << "\n"
7785                    "    resultData[2] = 1;\n"
7786                    "}\n";
7787 
7788             programCollection.glslSources.add("intersection") << glu::IntersectionSource(css.str()) << buildOptions;
7789         }
7790 
7791         {
7792             std::stringstream css;
7793 
7794             css << "#version 460 core\n"
7795                    "\n"
7796                    "#extension GL_EXT_ray_tracing : require\n"
7797                    "\n"
7798                    "layout(location = 0) rayPayloadInEXT      vec3   unusedPayload;\n"
7799                    "layout(set      = 0, binding = 0, std430) buffer result\n"
7800                    "{\n"
7801                    "    uint resultData[2];\n"
7802                    "};\n"
7803                    "\n"
7804                    "void main()\n"
7805                    "{\n"
7806                    "    resultData[1] = 1;\n"
7807                    "}\n";
7808 
7809             programCollection.glslSources.add("miss") << glu::MissSource(css.str()) << buildOptions;
7810         }
7811 
7812         {
7813             std::stringstream css;
7814 
7815             css << "#version 460 core\n"
7816                    "\n"
7817                    "#extension GL_EXT_ray_tracing : require\n"
7818                    "\n"
7819                    "layout(location = 0)              rayPayloadEXT vec3                     unusedPayload;\n"
7820                    "layout(set      = 0, binding = 1) uniform       accelerationStructureEXT topLevelAS;\n"
7821                    "\n"
7822                    "void main()\n"
7823                    "{\n"
7824                    "    uint  nInvocation = gl_LaunchIDEXT.z * gl_LaunchSizeEXT.x * gl_LaunchSizeEXT.y + "
7825                    "gl_LaunchIDEXT.y * gl_LaunchSizeEXT.x + gl_LaunchIDEXT.x;\n"
7826                    "    uint  rayFlags    = 0;\n"
7827                    "    uint  cullMask    = 0xFF;\n"
7828                    "    float tmin        = 0.001;\n"
7829                    "    float tmax        = 9.0;\n"
7830                    "    vec3  origin      = vec3(-1,  -1,  -1);\n"
7831                    "    vec3  target      = vec3(0.0, 0.5,  0);\n"
7832                    "    vec3  direct      = normalize(target - origin);\n"
7833                    "\n"
7834                    "    traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
7835                    "}\n";
7836 
7837             programCollection.glslSources.add("rgen") << glu::RaygenSource(css.str()) << buildOptions;
7838         }
7839     }
7840 
verifyResultBuffer(vkt::Context & context,BufferWithMemory & buffer) const7841     bool verifyResultBuffer(vkt::Context &context, BufferWithMemory &buffer) const final
7842     {
7843         de::MovePtr<BufferWithMemory> resultBufferPtr = copyDeviceBufferToHost(context, buffer);
7844         const uint32_t *resultU32DataPtr              = (uint32_t *)resultBufferPtr->getAllocation().getHostPtr();
7845         bool result                                   = false;
7846 
7847         switch (m_mode)
7848         {
7849         case Mode::IGNORE_ANY_HIT_DYNAMICALLY:
7850         case Mode::IGNORE_ANY_HIT_STATICALLY:
7851         {
7852             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 1)
7853             {
7854                 goto end;
7855             }
7856 
7857             result = true;
7858 
7859             break;
7860         }
7861 
7862         case Mode::TERMINATE_ANY_HIT_DYNAMICALLY:
7863         case Mode::TERMINATE_ANY_HIT_STATICALLY:
7864         {
7865             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 0)
7866             {
7867                 goto end;
7868             }
7869 
7870             result = true;
7871 
7872             break;
7873         }
7874 
7875         case Mode::TERMINATE_INTERSECTION_DYNAMICALLY:
7876         case Mode::TERMINATE_INTERSECTION_STATICALLY:
7877         {
7878             if (resultU32DataPtr[0] != 0 || resultU32DataPtr[1] != 0 || resultU32DataPtr[2] != 0)
7879             {
7880                 goto end;
7881             }
7882 
7883             result = true;
7884 
7885             break;
7886         }
7887 
7888         default:
7889         {
7890             TCU_FAIL("This should never be reached");
7891         }
7892         }
7893 
7894     end:
7895         return result;
7896     }
7897 
7898 private:
7899     std::unique_ptr<ASProviderBase> m_asProviderPtr;
7900     const Mode m_mode;
7901     std::unique_ptr<TopLevelAccelerationStructure> m_tlPtr;
7902 };
7903 
7904 /* Generic misc test instance */
7905 class RayTracingMiscTestInstance : public TestInstance
7906 {
7907 public:
7908     RayTracingMiscTestInstance(Context &context, const CaseDef &data, TestBase *testPtr);
7909     ~RayTracingMiscTestInstance(void);
7910 
7911     tcu::TestStatus iterate(void);
7912 
7913 protected:
7914     void checkSupport(void) const;
7915     de::MovePtr<BufferWithMemory> runTest(void);
7916 
7917 private:
7918     CaseDef m_data;
7919 
7920     de::MovePtr<RayTracingProperties> m_rayTracingPropsPtr;
7921     TestBase *m_testPtr;
7922 };
7923 
RayTracingMiscTestInstance(Context & context,const CaseDef & data,TestBase * testPtr)7924 RayTracingMiscTestInstance::RayTracingMiscTestInstance(Context &context, const CaseDef &data, TestBase *testPtr)
7925     : vkt::TestInstance(context)
7926     , m_data(data)
7927     , m_rayTracingPropsPtr(makeRayTracingProperties(context.getInstanceInterface(), context.getPhysicalDevice()))
7928     , m_testPtr(testPtr)
7929 {
7930     m_testPtr->init(m_context, m_rayTracingPropsPtr.get());
7931 }
7932 
~RayTracingMiscTestInstance(void)7933 RayTracingMiscTestInstance::~RayTracingMiscTestInstance(void)
7934 {
7935     /* Stub */
7936 }
7937 
checkSupport(void) const7938 void RayTracingMiscTestInstance::checkSupport(void) const
7939 {
7940     if (m_testPtr->getResultBufferSize() > m_context.getDeviceVulkan11Properties().maxMemoryAllocationSize)
7941         TCU_THROW(NotSupportedError,
7942                   "VkPhysicalDeviceVulkan11Properties::maxMemoryAllocationSize too small, allocation might fail");
7943 }
7944 
runTest(void)7945 de::MovePtr<BufferWithMemory> RayTracingMiscTestInstance::runTest(void)
7946 {
7947     const DeviceInterface &deviceInterface = m_context.getDeviceInterface();
7948     const VkDevice deviceVk                = m_context.getDevice();
7949 
7950     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
7951     const VkQueue queueVk           = m_context.getUniversalQueue();
7952     Allocator &allocator            = m_context.getDefaultAllocator();
7953 
7954     de::MovePtr<BufferWithMemory> resultBufferPtr;
7955     de::MovePtr<BufferWithMemory> startBufferPtr;
7956 
7957     // Determine group indices
7958     const auto ahitCollectionShaderNameVec         = m_testPtr->getAHitShaderCollectionShaderNames();
7959     const auto chitCollectionShaderNameVec         = m_testPtr->getCHitShaderCollectionShaderNames();
7960     const auto intersectionCollectionShaderNameVec = m_testPtr->getIntersectionShaderCollectionShaderNames();
7961     const auto missCollectionShaderNameVec         = m_testPtr->getMissShaderCollectionShaderNames();
7962 
7963     const uint32_t nRaygenGroups = 1;
7964     const uint32_t nMissGroups   = static_cast<uint32_t>(missCollectionShaderNameVec.size());
7965     const uint32_t nHitGroups    = de::max(de::max(static_cast<uint32_t>(ahitCollectionShaderNameVec.size()),
7966                                                    static_cast<uint32_t>(chitCollectionShaderNameVec.size())),
7967                                            static_cast<uint32_t>(intersectionCollectionShaderNameVec.size()));
7968 
7969     const uint32_t raygenGroupIndex = 0;
7970     const uint32_t missGroupIndex   = nRaygenGroups;
7971     const uint32_t hitGroupIndex    = missGroupIndex + nMissGroups;
7972 
7973     const auto callableShaderCollectionNames = m_testPtr->getCallableShaderCollectionNames();
7974     auto &collection                         = m_context.getBinaryCollection();
7975     const auto resultBufferSize              = m_testPtr->getResultBufferSize();
7976 
7977     const Move<VkDescriptorSetLayout> descriptorSetLayoutPtr =
7978         DescriptorSetLayoutBuilder()
7979             .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ALL_RAY_TRACING_STAGES)
7980             .addArrayBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, m_testPtr->getASBindingArraySize(),
7981                              ALL_RAY_TRACING_STAGES)
7982             .build(deviceInterface, deviceVk);
7983 
7984     const Move<VkDescriptorPool> descriptorPoolPtr =
7985         DescriptorPoolBuilder()
7986             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
7987             .addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, m_testPtr->getASBindingArraySize())
7988             .build(deviceInterface, deviceVk, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); /* maxSets */
7989 
7990     const Move<VkDescriptorSet> descriptorSetPtr =
7991         makeDescriptorSet(deviceInterface, deviceVk, *descriptorPoolPtr, *descriptorSetLayoutPtr);
7992 
7993     const Move<VkPipelineLayout> pipelineLayoutPtr =
7994         m_testPtr->getPipelineLayout(deviceInterface, deviceVk, descriptorSetLayoutPtr.get());
7995 
7996     const Move<VkCommandPool> cmdPoolPtr = createCommandPool(deviceInterface, deviceVk, 0, /* pCreateInfo */
7997                                                              queueFamilyIndex);
7998 
7999     const Move<VkCommandBuffer> cmdBufferPtr =
8000         allocateCommandBuffer(deviceInterface, deviceVk, *cmdPoolPtr, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8001 
8002     Move<VkPipeline> pipelineVkPtr;
8003     de::MovePtr<RayTracingPipeline> rayTracingPipelinePtr = de::newMovePtr<RayTracingPipeline>();
8004 
8005     {
8006         Move<VkShaderModule> raygenShader =
8007             createShaderModule(deviceInterface, deviceVk, collection.get("rgen"), 0); /* flags */
8008 
8009         rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, makeVkSharedPtr(raygenShader),
8010                                          raygenGroupIndex,
8011                                          m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_RAYGEN_BIT_KHR));
8012     }
8013 
8014     {
8015         for (uint32_t nMissShaderName = 0; nMissShaderName < static_cast<uint32_t>(missCollectionShaderNameVec.size());
8016              nMissShaderName++)
8017         {
8018             const auto &currentMissShaderName = missCollectionShaderNameVec.at(nMissShaderName);
8019             Move<VkShaderModule> missShader =
8020                 createShaderModule(deviceInterface, deviceVk, collection.get(currentMissShaderName), 0); /* flags */
8021 
8022             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, makeVkSharedPtr(missShader),
8023                                              missGroupIndex + nMissShaderName,
8024                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_MISS_BIT_KHR));
8025         }
8026     }
8027 
8028     {
8029         for (uint32_t nAHitShaderName = 0; nAHitShaderName < static_cast<uint32_t>(ahitCollectionShaderNameVec.size());
8030              nAHitShaderName++)
8031         {
8032             const auto &currentAHitShaderName = ahitCollectionShaderNameVec.at(nAHitShaderName);
8033             Move<VkShaderModule> anyHitShader =
8034                 createShaderModule(deviceInterface, deviceVk, collection.get(currentAHitShaderName), 0); /* flags */
8035 
8036             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR, makeVkSharedPtr(anyHitShader),
8037                                              hitGroupIndex + nAHitShaderName,
8038                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_ANY_HIT_BIT_KHR));
8039         }
8040 
8041         for (uint32_t nCHitShaderName = 0; nCHitShaderName < static_cast<uint32_t>(chitCollectionShaderNameVec.size());
8042              nCHitShaderName++)
8043         {
8044             const auto &currentCHitShaderName = chitCollectionShaderNameVec.at(nCHitShaderName);
8045             Move<VkShaderModule> closestHitShader =
8046                 createShaderModule(deviceInterface, deviceVk, collection.get(currentCHitShaderName), 0); /* flags */
8047 
8048             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, makeVkSharedPtr(closestHitShader),
8049                                              hitGroupIndex + nCHitShaderName,
8050                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR));
8051         }
8052 
8053         if (m_data.geometryType == GeometryType::AABB || m_data.geometryType == GeometryType::AABB_AND_TRIANGLES)
8054         {
8055             for (uint32_t nIntersectionShaderName = 0;
8056                  nIntersectionShaderName < static_cast<uint32_t>(intersectionCollectionShaderNameVec.size());
8057                  nIntersectionShaderName++)
8058             {
8059                 const auto &currentIntersectionShaderName =
8060                     intersectionCollectionShaderNameVec.at(nIntersectionShaderName);
8061                 Move<VkShaderModule> intersectionShader = createShaderModule(
8062                     deviceInterface, deviceVk, collection.get(currentIntersectionShaderName), 0); /* flags */
8063 
8064                 rayTracingPipelinePtr->addShader(
8065                     VK_SHADER_STAGE_INTERSECTION_BIT_KHR, makeVkSharedPtr(intersectionShader),
8066                     hitGroupIndex + nIntersectionShaderName,
8067                     m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_INTERSECTION_BIT_KHR));
8068             }
8069         }
8070 
8071         for (uint32_t nCallableShader = 0;
8072              nCallableShader < static_cast<uint32_t>(callableShaderCollectionNames.size()); ++nCallableShader)
8073         {
8074             const auto &currentCallableShaderName = callableShaderCollectionNames.at(nCallableShader);
8075             Move<VkShaderModule> callableShader =
8076                 createShaderModule(deviceInterface, deviceVk, collection.get(currentCallableShaderName), 0); /* flags */
8077 
8078             rayTracingPipelinePtr->addShader(VK_SHADER_STAGE_CALLABLE_BIT_KHR, makeVkSharedPtr(callableShader),
8079                                              static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP) +
8080                                                  nCallableShader,
8081                                              m_testPtr->getSpecializationInfoPtr(VK_SHADER_STAGE_CALLABLE_BIT_KHR));
8082         }
8083 
8084         if (m_testPtr->usesDynamicStackSize())
8085         {
8086             rayTracingPipelinePtr->addDynamicState(VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR);
8087         }
8088 
8089         rayTracingPipelinePtr->setMaxRecursionDepth(m_testPtr->getMaxRecursionDepthUsed());
8090 
8091         pipelineVkPtr = rayTracingPipelinePtr->createPipeline(deviceInterface, deviceVk, *pipelineLayoutPtr);
8092     }
8093 
8094     /* Cache shader stack size info */
8095     {
8096         VkDeviceSize ahitShaderStackSize     = 0;
8097         VkDeviceSize callableShaderStackSize = 0;
8098         VkDeviceSize chitShaderStackSize     = 0;
8099         VkDeviceSize isectShaderStackSize    = 0;
8100         VkDeviceSize missShaderStackSize     = 0;
8101         VkDeviceSize raygenShaderStackSize   = 0;
8102 
8103         raygenShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8104             deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::RAYGEN_GROUP),
8105             VK_SHADER_GROUP_SHADER_GENERAL_KHR);
8106 
8107         if (collection.contains("ahit"))
8108         {
8109             ahitShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8110                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
8111                 VK_SHADER_GROUP_SHADER_ANY_HIT_KHR);
8112         }
8113 
8114         if (collection.contains("chit"))
8115         {
8116             chitShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8117                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
8118                 VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR);
8119         }
8120 
8121         if (m_data.geometryType == GeometryType::AABB || m_data.geometryType == GeometryType::AABB_AND_TRIANGLES)
8122         {
8123             if (collection.contains("intersection"))
8124             {
8125                 isectShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8126                     deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::HIT_GROUP),
8127                     VK_SHADER_GROUP_SHADER_INTERSECTION_KHR);
8128             }
8129         }
8130 
8131         if (nMissGroups > 0u)
8132         {
8133             missShaderStackSize = deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8134                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::MISS_GROUP),
8135                 VK_SHADER_GROUP_SHADER_GENERAL_KHR);
8136         }
8137 
8138         for (uint32_t nCallableShader = 0;
8139              nCallableShader < static_cast<uint32_t>(callableShaderCollectionNames.size()); ++nCallableShader)
8140         {
8141             callableShaderStackSize += deviceInterface.getRayTracingShaderGroupStackSizeKHR(
8142                 deviceVk, *pipelineVkPtr, static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP) + nCallableShader,
8143                 VK_SHADER_GROUP_SHADER_GENERAL_KHR);
8144         }
8145 
8146         m_testPtr->onShaderStackSizeDiscovered(raygenShaderStackSize, ahitShaderStackSize, chitShaderStackSize,
8147                                                missShaderStackSize, callableShaderStackSize, isectShaderStackSize);
8148     }
8149 
8150     auto callableShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8151 
8152     if (callableShaderCollectionNames.size() != 0)
8153     {
8154         callableShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8155             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8156             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(),
8157             static_cast<uint32_t>(ShaderGroups::FIRST_CALLABLE_GROUP),
8158             static_cast<uint32_t>(callableShaderCollectionNames.size()), /* groupCount                  */
8159             0u,                                                          /* additionalBufferCreateFlags */
8160             0u,                                                          /* additionalBufferUsageFlags  */
8161             MemoryRequirement::Any, 0u,                                  /* opaqueCaptureAddress       */
8162             0u,                                                          /* shaderBindingTableOffset   */
8163             m_testPtr->getShaderRecordSize(ShaderGroups::FIRST_CALLABLE_GROUP));
8164     }
8165 
8166     const auto raygenShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8167         deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8168         m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), raygenGroupIndex,
8169         nRaygenGroups,              /* groupCount                  */
8170         0u,                         /* additionalBufferCreateFlags */
8171         0u,                         /* additionalBufferUsageFlags  */
8172         MemoryRequirement::Any, 0u, /* opaqueCaptureAddress        */
8173         0u);                        /* shaderBindingTableOffset    */
8174 
8175     auto missShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8176     if (nMissGroups > 0u)
8177     {
8178         const void *missShaderBindingGroupShaderRecordDataPtr =
8179             m_testPtr->getShaderRecordData(ShaderGroups::MISS_GROUP);
8180         missShaderBindingTablePtr = rayTracingPipelinePtr->createShaderBindingTable(
8181             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8182             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), missGroupIndex,
8183             nMissGroups,                /* groupCount                  */
8184             0u,                         /* additionalBufferCreateFlags */
8185             0u,                         /* additionalBufferUsageFlags  */
8186             MemoryRequirement::Any, 0u, /* opaqueCaptureAddress       */
8187             0u,                         /* shaderBindingTableOffset   */
8188             m_testPtr->getShaderRecordSize(ShaderGroups::MISS_GROUP), &missShaderBindingGroupShaderRecordDataPtr);
8189     }
8190 
8191     auto hitShaderBindingTablePtr = de::MovePtr<BufferWithMemory>();
8192     if (nHitGroups > 0u)
8193     {
8194         const void *hitShaderBindingGroupShaderRecordDataPtr = m_testPtr->getShaderRecordData(ShaderGroups::HIT_GROUP);
8195         hitShaderBindingTablePtr                             = rayTracingPipelinePtr->createShaderBindingTable(
8196             deviceInterface, deviceVk, *pipelineVkPtr, allocator, m_rayTracingPropsPtr->getShaderGroupHandleSize(),
8197             m_rayTracingPropsPtr->getShaderGroupBaseAlignment(), hitGroupIndex,
8198             nHitGroups,                 /* groupCount                  */
8199             0u,                         /* additionalBufferCreateFlags */
8200             0u,                         /* additionalBufferUsageFlags  */
8201             MemoryRequirement::Any, 0u, /* opaqueCaptureAddress       */
8202             0u,                         /* shaderBindingTableOffset   */
8203             m_testPtr->getShaderRecordSize(ShaderGroups::HIT_GROUP), &hitShaderBindingGroupShaderRecordDataPtr);
8204     }
8205 
8206     {
8207         const auto resultBufferCreateInfo = makeBufferCreateInfo(
8208             resultBufferSize,
8209             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
8210         const auto resultBufferDataVec = m_testPtr->getResultBufferStartData();
8211 
8212         resultBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
8213             deviceInterface, deviceVk, allocator, resultBufferCreateInfo, MemoryRequirement::DeviceAddress));
8214 
8215         if (resultBufferDataVec.size() > 0)
8216         {
8217             DE_ASSERT(static_cast<uint32_t>(resultBufferDataVec.size()) == resultBufferSize);
8218 
8219             startBufferPtr = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
8220                 deviceInterface, deviceVk, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
8221 
8222             memcpy(startBufferPtr->getAllocation().getHostPtr(), resultBufferDataVec.data(),
8223                    resultBufferDataVec.size());
8224 
8225             flushAlloc(deviceInterface, deviceVk, startBufferPtr->getAllocation());
8226         }
8227     }
8228 
8229     beginCommandBuffer(deviceInterface, *cmdBufferPtr, 0u /* flags */);
8230     {
8231         m_testPtr->initAS(m_context, m_rayTracingPropsPtr.get(), *cmdBufferPtr);
8232 
8233         std::vector<TopLevelAccelerationStructure *> tlasPtrVec = m_testPtr->getTLASPtrVecToBind();
8234         std::vector<VkAccelerationStructureKHR> tlasVkVec;
8235 
8236         for (auto &currentTLASPtr : tlasPtrVec)
8237         {
8238             tlasVkVec.push_back(*currentTLASPtr->getPtr());
8239         }
8240 
8241         // Clear result buffer if startdata was zero ...
8242         if (m_testPtr->getResultBufferStartData().size() == 0)
8243         {
8244             deviceInterface.cmdFillBuffer(*cmdBufferPtr, **resultBufferPtr, 0, /* dstOffset */
8245                                           VK_WHOLE_SIZE, 0);                   /* data */
8246 
8247             {
8248                 const auto postFillBarrier = makeBufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, /* srcAccessMask */
8249                                                                      VK_ACCESS_SHADER_WRITE_BIT,   /* dstAccessMask */
8250                                                                      **resultBufferPtr, 0,         /* offset */
8251                                                                      VK_WHOLE_SIZE);
8252 
8253                 cmdPipelineBufferMemoryBarrier(deviceInterface, *cmdBufferPtr,
8254                                                VK_PIPELINE_STAGE_TRANSFER_BIT,               /* srcStageMask */
8255                                                VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* dstStageMask */
8256                                                &postFillBarrier);
8257             }
8258         }
8259         else
8260         {
8261             // ... otherwise copy given startdata to the gpubuffer
8262             const VkBufferCopy bufferCopy{0, 0, resultBufferSize};
8263             deviceInterface.cmdCopyBuffer(*cmdBufferPtr, **startBufferPtr, **resultBufferPtr, 1, &bufferCopy);
8264         }
8265 
8266         {
8267             VkWriteDescriptorSetAccelerationStructureKHR accelerationStructureWriteDescriptorSet = {
8268                 VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, //  VkStructureType sType;
8269                 nullptr,                                                           //  const void* pNext;
8270                 static_cast<uint32_t>(tlasVkVec.size()), //  uint32_t accelerationStructureCount;
8271                 tlasVkVec.data(),                        //  const VkAccelerationStructureKHR* pAccelerationStructures;
8272             };
8273 
8274             const auto descriptorResultBufferInfo = makeDescriptorBufferInfo(**resultBufferPtr, 0, /* offset */
8275                                                                              resultBufferSize);
8276 
8277             DescriptorSetUpdateBuilder()
8278                 .writeSingle(*descriptorSetPtr, DescriptorSetUpdateBuilder::Location::binding(0u),
8279                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorResultBufferInfo)
8280                 .writeArray(*descriptorSetPtr, DescriptorSetUpdateBuilder::Location::binding(1u),
8281                             VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, static_cast<uint32_t>(tlasVkVec.size()),
8282                             &accelerationStructureWriteDescriptorSet)
8283                 .update(deviceInterface, deviceVk);
8284         }
8285 
8286         deviceInterface.cmdBindDescriptorSets(*cmdBufferPtr, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayoutPtr,
8287                                               0,                          /* firstSet           */
8288                                               1,                          /* descriptorSetCount */
8289                                               &descriptorSetPtr.get(), 0, /* dynamicOffsetCount */
8290                                               nullptr);                   /* pDynamicOffsets    */
8291 
8292         deviceInterface.cmdBindPipeline(*cmdBufferPtr, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineVkPtr);
8293 
8294         {
8295             const auto preTraceMemoryBarrier =
8296                 (m_data.type == TestType::USE_MEMORY_ACCESS) ?
8297                     makeMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, /* srcAccessMask */
8298                                       VK_ACCESS_MEMORY_READ_BIT)  /* dstAccessMask */
8299                     :
8300                     makeMemoryBarrier(VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, /* srcAccessMask */
8301                                       VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR); /* dstAccessMask */
8302 
8303             cmdPipelineMemoryBarrier(deviceInterface, *cmdBufferPtr,
8304                                      VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, /* srcStageMask */
8305                                      VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,           /* dstStageMask */
8306                                      &preTraceMemoryBarrier);
8307         }
8308 
8309         {
8310             const auto nTraceRaysInvocationsNeeded = m_testPtr->getNTraceRayInvocationsNeeded();
8311             const auto handleSize                  = m_rayTracingPropsPtr->getShaderGroupHandleSize();
8312             const auto missStride =
8313                 de::roundUp(handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::MISS_GROUP), handleSize);
8314             const auto hitStride =
8315                 de::roundUp(handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::HIT_GROUP), handleSize);
8316             const auto callStride = de::roundUp(
8317                 handleSize + m_testPtr->getShaderRecordSize(ShaderGroups::FIRST_CALLABLE_GROUP), handleSize);
8318             const auto raygenShaderBindingTableRegion = makeStridedDeviceAddressRegionKHR(
8319                 getBufferDeviceAddress(deviceInterface, deviceVk, raygenShaderBindingTablePtr->get(), 0 /* offset */),
8320                 handleSize, handleSize);
8321             const auto missShaderBindingTableRegion =
8322                 ((nMissGroups > 0u) ? makeStridedDeviceAddressRegionKHR(
8323                                           getBufferDeviceAddress(deviceInterface, deviceVk,
8324                                                                  missShaderBindingTablePtr->get(), 0 /* offset */),
8325                                           missStride, missStride * nMissGroups) :
8326                                       makeStridedDeviceAddressRegionKHR(0, 0, /* stride */
8327                                                                         0 /* size   */));
8328             const auto hitShaderBindingTableRegion =
8329                 ((nHitGroups > 0u) ? makeStridedDeviceAddressRegionKHR(
8330                                          getBufferDeviceAddress(deviceInterface, deviceVk,
8331                                                                 hitShaderBindingTablePtr->get(), 0 /* offset */),
8332                                          hitStride, hitStride * nHitGroups) :
8333                                      makeStridedDeviceAddressRegionKHR(0, 0, /* stride */
8334                                                                        0 /* size   */));
8335 
8336             const auto callableShaderBindingTableRegion =
8337                 (callableShaderCollectionNames.size() > 0) ?
8338                     makeStridedDeviceAddressRegionKHR(
8339                         getBufferDeviceAddress(deviceInterface, deviceVk, callableShaderBindingTablePtr->get(),
8340                                                0 /* offset */),
8341                         callStride, /* stride */
8342                         callStride * static_cast<uint32_t>(callableShaderCollectionNames.size())) :
8343                     makeStridedDeviceAddressRegionKHR(0, 0, /* stride */
8344                                                       0 /* size   */);
8345 
8346             if (m_testPtr->usesDynamicStackSize())
8347             {
8348                 deviceInterface.cmdSetRayTracingPipelineStackSizeKHR(
8349                     *cmdBufferPtr, m_testPtr->getDynamicStackSize(m_testPtr->getMaxRecursionDepthUsed()));
8350             }
8351 
8352             for (uint32_t nInvocation = 0; nInvocation < nTraceRaysInvocationsNeeded; ++nInvocation)
8353             {
8354                 m_testPtr->onBeforeCmdTraceRays(nInvocation, m_context, *cmdBufferPtr, *pipelineLayoutPtr);
8355 
8356                 cmdTraceRays(deviceInterface, *cmdBufferPtr, &raygenShaderBindingTableRegion,
8357                              &missShaderBindingTableRegion, &hitShaderBindingTableRegion,
8358                              &callableShaderBindingTableRegion, m_testPtr->getDispatchSize()[0],
8359                              m_testPtr->getDispatchSize()[1], m_testPtr->getDispatchSize()[2]);
8360             }
8361         }
8362 
8363         {
8364             const auto postTraceMemoryBarrier = (m_data.type == TestType::USE_MEMORY_ACCESS) ?
8365                                                     makeMemoryBarrier(VK_ACCESS_MEMORY_WRITE_BIT, /* srcAccessMask */
8366                                                                       VK_ACCESS_MEMORY_READ_BIT)  /* dstAccessMask */
8367                                                     :
8368                                                     makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, /* srcAccessMask */
8369                                                                       VK_ACCESS_HOST_READ_BIT);   /* dstAccessMask */
8370 
8371             cmdPipelineMemoryBarrier(deviceInterface, *cmdBufferPtr,
8372                                      VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, /* srcStageMask */
8373                                      VK_PIPELINE_STAGE_HOST_BIT,                   /* dstStageMask */
8374                                      &postTraceMemoryBarrier);
8375         }
8376     }
8377 
8378     const VkMemoryBarrier postTraceMemoryBarrier =
8379         makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
8380 
8381     cmdPipelineMemoryBarrier(deviceInterface, *cmdBufferPtr, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR,
8382                              VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
8383 
8384     endCommandBuffer(deviceInterface, *cmdBufferPtr);
8385 
8386     submitCommandsAndWait(deviceInterface, deviceVk, queueVk, cmdBufferPtr.get());
8387 
8388     m_testPtr->resetTLAS();
8389 
8390     return resultBufferPtr;
8391 }
8392 
iterate(void)8393 tcu::TestStatus RayTracingMiscTestInstance::iterate(void)
8394 {
8395     checkSupport();
8396 
8397     const de::MovePtr<BufferWithMemory> bufferGPUPtr = runTest();
8398     const bool result                                = m_testPtr->verifyResultBuffer(m_context, *bufferGPUPtr);
8399 
8400     if (result)
8401         return tcu::TestStatus::pass("Pass");
8402     else
8403         return tcu::TestStatus::fail("Fail");
8404 }
8405 
checkRTPipelineSupport(Context & context)8406 void checkRTPipelineSupport(Context &context)
8407 {
8408     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
8409     context.requireDeviceFunctionality("VK_KHR_buffer_device_address");
8410     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
8411 }
8412 
checkReuseCreationBufferSupport(Context & context,bool)8413 void checkReuseCreationBufferSupport(Context &context, bool)
8414 {
8415     checkRTPipelineSupport(context);
8416 }
8417 
checkReuseScratchBufferSupport(Context & context)8418 void checkReuseScratchBufferSupport(Context &context)
8419 {
8420     checkRTPipelineSupport(context);
8421 }
8422 
initBasicHitBufferPrograms(vk::SourceCollections & programCollection)8423 void initBasicHitBufferPrograms(vk::SourceCollections &programCollection)
8424 {
8425     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
8426 
8427     std::ostringstream rgen;
8428     std::ostringstream chit;
8429 
8430     rgen << "#version 460\n"
8431          << "#extension GL_EXT_ray_tracing : require\n"
8432          << "layout(location=0) rayPayloadEXT vec3 unused;\n"
8433          << "layout(set=0, binding=0) uniform accelerationStructureEXT topLevelAS;\n"
8434          << "layout(set=0, binding=1) buffer OutputBuffer { float val; } outBuffer;\n"
8435          << "\n"
8436          << "void main()\n"
8437          << "{\n"
8438          << "  uint  rayFlags = 0u;\n"
8439          << "  uint  cullMask = 0xFFu;\n"
8440          << "  float tmin     = 0.0;\n"
8441          << "  float tmax     = 9.0;\n"
8442          << "  vec3  origin   = vec3(0.0, 0.0, 0.0);\n"
8443          << "  vec3  direct   = vec3(0.0, 0.0, 1.0);\n"
8444          << "  traceRayEXT(topLevelAS, rayFlags, cullMask, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
8445          << "}\n";
8446 
8447     chit << "#version 460\n"
8448          << "#extension GL_EXT_ray_tracing : require\n"
8449          << "layout(location=0) rayPayloadInEXT vec3 unused;\n"
8450          << "layout(set=0, binding=0) uniform accelerationStructureEXT topLevelAS;\n"
8451          << "layout(set=0, binding=1) buffer OutputBuffer { float val; } outBuffer;\n"
8452          << "\n"
8453          << "void main()\n"
8454          << "{\n"
8455          << "  outBuffer.val = 1.0;\n"
8456          << "}\n";
8457 
8458     programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(rgen.str())) << buildOptions;
8459     programCollection.glslSources.add("chit")
8460         << glu::ClosestHitSource(updateRayTracingGLSL(chit.str())) << buildOptions;
8461 }
8462 
initReuseCreationBufferPrograms(vk::SourceCollections & programCollection,bool)8463 void initReuseCreationBufferPrograms(vk::SourceCollections &programCollection, bool)
8464 {
8465     initBasicHitBufferPrograms(programCollection);
8466 }
8467 
initReuseScratchBufferPrograms(vk::SourceCollections & programCollection)8468 void initReuseScratchBufferPrograms(vk::SourceCollections &programCollection)
8469 {
8470     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
8471 
8472     std::ostringstream rgen;
8473     rgen << "#version 460 core\n"
8474          << "#extension GL_EXT_ray_tracing : require\n"
8475          << "layout (location=0) rayPayloadEXT vec4 payload;\n"
8476          << "layout (set=0, binding=0) uniform accelerationStructureEXT topLevelAS;\n"
8477          << "layout (set=0, binding=1, rgba8) uniform image2D outColor;\n"
8478          << "void main()\n"
8479          << "{\n"
8480          << "    const uint  rayFlags  = gl_RayFlagsNoneEXT;\n"
8481          << "    const vec3  origin    = vec3(float(gl_LaunchIDEXT.x) + 0.5, float(gl_LaunchIDEXT.y) + 0.5, 0.0);\n"
8482          << "    const vec3  direction = vec3(0.0, 0.0, 1.0);\n"
8483          << "    const float tMin      = 1.0;\n"
8484          << "    const float tMax      = 10.0;\n"
8485          << "    const uint  missIndex = 0u;\n"
8486          << "    const uint  cullMask  = 0xFFu;\n"
8487          << "    const uint  sbtOffset = 0u;\n"
8488          << "    const uint  sbtStride = 0u;\n"
8489          << "\n"
8490          << "    traceRayEXT(topLevelAS, rayFlags, cullMask, sbtOffset, sbtStride, missIndex, origin, tMin, direction, "
8491             "tMax, 0);\n"
8492          << "    imageStore(outColor, ivec2(origin.xy), payload);\n"
8493          << "}\n";
8494     programCollection.glslSources.add("rgen") << glu::RaygenSource(rgen.str()) << buildOptions;
8495 
8496     std::ostringstream chit;
8497     chit << "#version 460 core\n"
8498          //<< "#extension GL_EXT_debug_printf : enable\n"
8499          << "#extension GL_EXT_ray_tracing : require\n"
8500          << "layout (location=0) rayPayloadInEXT vec4 payload;\n"
8501          << "void main(void) {\n"
8502          //<< "    debugPrintfEXT(\"Hit for %u %u\\n\", gl_LaunchIDEXT.x, gl_LaunchIDEXT.y);\n"
8503          << "    payload = vec4(0.0, 0.0, 1.0, 1.0);\n"
8504          << "}\n";
8505     programCollection.glslSources.add("chit") << glu::ClosestHitSource(chit.str()) << buildOptions;
8506 
8507     std::ostringstream miss;
8508     miss << "#version 460 core\n"
8509          << "#extension GL_EXT_ray_tracing : require\n"
8510          //<< "#extension GL_EXT_debug_printf : enable\n"
8511          << "layout (location=0) rayPayloadInEXT vec4 payload;\n"
8512          << "void main(void) {\n"
8513          //<< "    debugPrintfEXT(\"Miss for %u %u\\n\", gl_LaunchIDEXT.x, gl_LaunchIDEXT.y);\n"
8514          << "    payload = vec4(0.0, 0.0, 0.0, 1.0);\n"
8515          << "}\n";
8516     programCollection.glslSources.add("miss") << glu::MissSource(miss.str()) << buildOptions;
8517 }
8518 
8519 // Creates an empty shader binding table with a zeroed-out shader group handle.
createEmptySBT(const DeviceInterface & vkd,VkDevice device,Allocator & alloc,uint32_t shaderGroupHandleSize)8520 de::MovePtr<BufferWithMemory> createEmptySBT(const DeviceInterface &vkd, VkDevice device, Allocator &alloc,
8521                                              uint32_t shaderGroupHandleSize)
8522 {
8523     const auto sbtSize  = static_cast<VkDeviceSize>(shaderGroupHandleSize);
8524     const auto sbtFlags = (VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR |
8525                            VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
8526     const auto sbtInfo  = makeBufferCreateInfo(sbtSize, sbtFlags);
8527     const auto sbtReqs  = (MemoryRequirement::HostVisible | MemoryRequirement::DeviceAddress);
8528 
8529     auto sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, alloc, sbtInfo, sbtReqs));
8530     auto &sbtAlloc = sbtBuffer->getAllocation();
8531     void *sbtData  = sbtAlloc.getHostPtr();
8532 
8533     deMemset(sbtData, 0, static_cast<size_t>(sbtSize));
8534     flushAlloc(vkd, device, sbtAlloc);
8535 
8536     return sbtBuffer;
8537 }
8538 
nullMissInstance(Context & context)8539 tcu::TestStatus nullMissInstance(Context &context)
8540 {
8541     const auto &vki    = context.getInstanceInterface();
8542     const auto physDev = context.getPhysicalDevice();
8543     const auto &vkd    = context.getDeviceInterface();
8544     const auto device  = context.getDevice();
8545     auto &alloc        = context.getDefaultAllocator();
8546     const auto qIndex  = context.getUniversalQueueFamilyIndex();
8547     const auto queue   = context.getUniversalQueue();
8548     const auto stages  = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
8549 
8550     // Command pool and buffer.
8551     const auto cmdPool      = makeCommandPool(vkd, device, qIndex);
8552     const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8553     const auto cmdBuffer    = cmdBufferPtr.get();
8554 
8555     beginCommandBuffer(vkd, cmdBuffer);
8556 
8557     // Build acceleration structures.
8558     auto topLevelAS    = makeTopLevelAccelerationStructure();
8559     auto bottomLevelAS = makeBottomLevelAccelerationStructure();
8560 
8561     std::vector<tcu::Vec3> triangle;
8562     triangle.reserve(3u);
8563     triangle.emplace_back(0.0f, 1.0f, 10.0f);
8564     triangle.emplace_back(-1.0f, -1.0f, 10.0f);
8565     triangle.emplace_back(1.0f, -1.0f, 10.0f);
8566     bottomLevelAS->addGeometry(triangle, true /*triangles*/);
8567     bottomLevelAS->createAndBuild(vkd, device, cmdBuffer, alloc);
8568 
8569     de::SharedPtr<BottomLevelAccelerationStructure> blasSharedPtr(bottomLevelAS.release());
8570     topLevelAS->setInstanceCount(1);
8571     topLevelAS->addInstance(blasSharedPtr);
8572     topLevelAS->createAndBuild(vkd, device, cmdBuffer, alloc);
8573 
8574     // Create output buffer.
8575     const auto bufferSize       = static_cast<VkDeviceSize>(sizeof(float));
8576     const auto bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
8577     BufferWithMemory buffer(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible);
8578     auto &bufferAlloc = buffer.getAllocation();
8579 
8580     // Fill output buffer with an initial value.
8581     deMemset(bufferAlloc.getHostPtr(), 0, sizeof(float));
8582     flushAlloc(vkd, device, bufferAlloc);
8583 
8584     // Descriptor set layout and pipeline layout.
8585     DescriptorSetLayoutBuilder setLayoutBuilder;
8586     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, stages);
8587     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stages);
8588 
8589     const auto setLayout      = setLayoutBuilder.build(vkd, device);
8590     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
8591 
8592     // Descriptor pool and set.
8593     DescriptorPoolBuilder poolBuilder;
8594     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
8595     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
8596     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
8597     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
8598 
8599     // Update descriptor set.
8600     {
8601         const VkWriteDescriptorSetAccelerationStructureKHR accelDescInfo = {
8602             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
8603             nullptr,
8604             1u,
8605             topLevelAS.get()->getPtr(),
8606         };
8607 
8608         const auto bufferDescInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, VK_WHOLE_SIZE);
8609 
8610         DescriptorSetUpdateBuilder updateBuilder;
8611         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
8612                                   VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelDescInfo);
8613         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
8614                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescInfo);
8615         updateBuilder.update(vkd, device);
8616     }
8617 
8618     // Shader modules.
8619     auto rgenModule = createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"), 0);
8620     auto chitModule = createShaderModule(vkd, device, context.getBinaryCollection().get("chit"), 0);
8621 
8622     // Get some ray tracing properties.
8623     uint32_t shaderGroupHandleSize    = 0u;
8624     uint32_t shaderGroupBaseAlignment = 1u;
8625     {
8626         const auto rayTracingPropertiesKHR = makeRayTracingProperties(vki, physDev);
8627         shaderGroupHandleSize              = rayTracingPropertiesKHR->getShaderGroupHandleSize();
8628         shaderGroupBaseAlignment           = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
8629     }
8630 
8631     // Create raytracing pipeline and shader binding tables.
8632     Move<VkPipeline> pipeline;
8633 
8634     de::MovePtr<BufferWithMemory> raygenSBT;
8635     de::MovePtr<BufferWithMemory> missSBT;
8636     de::MovePtr<BufferWithMemory> hitSBT;
8637     de::MovePtr<BufferWithMemory> callableSBT;
8638 
8639     VkStridedDeviceAddressRegionKHR raygenSBTRegion   = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8640     VkStridedDeviceAddressRegionKHR missSBTRegion     = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8641     VkStridedDeviceAddressRegionKHR hitSBTRegion      = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8642     VkStridedDeviceAddressRegionKHR callableSBTRegion = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8643 
8644     {
8645         const auto rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
8646 
8647         rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenModule, 0u);
8648         rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitModule, 1u);
8649 
8650         pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout.get());
8651 
8652         raygenSBT = rayTracingPipeline->createShaderBindingTable(
8653             vkd, device, pipeline.get(), alloc, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
8654         raygenSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenSBT->get(), 0ull),
8655                                                             shaderGroupHandleSize, shaderGroupHandleSize);
8656 
8657         hitSBT = rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline.get(), alloc, shaderGroupHandleSize,
8658                                                               shaderGroupBaseAlignment, 1u, 1u);
8659         hitSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitSBT->get(), 0ull),
8660                                                          shaderGroupHandleSize, shaderGroupHandleSize);
8661 
8662         // Critical for the test: the miss shader binding table buffer is empty and contains a zero'ed out shader group handle.
8663         missSBT       = createEmptySBT(vkd, device, alloc, shaderGroupHandleSize);
8664         missSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missSBT->get(), 0ull),
8665                                                           shaderGroupHandleSize, shaderGroupHandleSize);
8666     }
8667 
8668     // Trace rays.
8669     vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline.get());
8670     vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipelineLayout.get(), 0u, 1u,
8671                               &descriptorSet.get(), 0u, nullptr);
8672     vkd.cmdTraceRaysKHR(cmdBuffer, &raygenSBTRegion, &missSBTRegion, &hitSBTRegion, &callableSBTRegion, 1u, 1u, 1u);
8673 
8674     // Barrier for the output buffer just in case (no writes should take place).
8675     const auto bufferBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
8676     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
8677                            &bufferBarrier, 0u, nullptr, 0u, nullptr);
8678 
8679     endCommandBuffer(vkd, cmdBuffer);
8680     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
8681 
8682     // Read value back from the buffer. No write should have taken place.
8683     float bufferValue = 0.0f;
8684     invalidateAlloc(vkd, device, bufferAlloc);
8685     deMemcpy(&bufferValue, bufferAlloc.getHostPtr(), sizeof(bufferValue));
8686 
8687     if (bufferValue != 0.0f)
8688         TCU_FAIL("Unexpected value found in buffer: " + de::toString(bufferValue));
8689 
8690     return tcu::TestStatus::pass("Pass");
8691 }
8692 
initEmptyPrograms(vk::SourceCollections & programCollection)8693 void initEmptyPrograms(vk::SourceCollections &programCollection)
8694 {
8695     const vk::ShaderBuildOptions buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
8696 
8697     std::string source("#version 460\n"
8698                        "#extension GL_EXT_ray_tracing : require\n"
8699                        "void main()\n"
8700                        "{\n"
8701                        "}\n");
8702 
8703     programCollection.glslSources.add("rgen") << glu::RaygenSource(source) << buildOptions;
8704     programCollection.glslSources.add("miss") << glu::MissSource(source) << buildOptions;
8705 }
8706 
emptyPipelineLayoutInstance(Context & context)8707 tcu::TestStatus emptyPipelineLayoutInstance(Context &context)
8708 {
8709     const auto &vk            = context.getDeviceInterface();
8710     const auto device         = context.getDevice();
8711     const auto pipelineLayout = makePipelineLayout(vk, device);
8712     auto rgenModule           = createShaderModule(vk, device, context.getBinaryCollection().get("rgen"));
8713     auto missModule           = createShaderModule(vk, device, context.getBinaryCollection().get("miss"));
8714 
8715     VkPipelineShaderStageCreateInfo defaultShaderCreateInfo = initVulkanStructure();
8716     defaultShaderCreateInfo.pName                           = "main";
8717     std::vector<VkPipelineShaderStageCreateInfo> shaderCreateInfoVect(2, defaultShaderCreateInfo);
8718     shaderCreateInfoVect[0].stage  = VK_SHADER_STAGE_RAYGEN_BIT_KHR;
8719     shaderCreateInfoVect[0].module = *rgenModule;
8720     shaderCreateInfoVect[1].stage  = VK_SHADER_STAGE_MISS_BIT_KHR;
8721     shaderCreateInfoVect[1].module = *missModule;
8722 
8723     const VkRayTracingShaderGroupCreateInfoKHR defaultShaderGroupCreateInfo{
8724         VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, // VkStructureType sType;
8725         nullptr,                                                    // const void* pNext;
8726         VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR,               // VkRayTracingShaderGroupTypeKHR type;
8727         VK_SHADER_UNUSED_KHR,                                       // uint32_t generalShader;
8728         VK_SHADER_UNUSED_KHR,                                       // uint32_t closestHitShader;
8729         VK_SHADER_UNUSED_KHR,                                       // uint32_t anyHitShader;
8730         VK_SHADER_UNUSED_KHR,                                       // uint32_t intersectionShader;
8731         nullptr,                                                    // const void* pShaderGroupCaptureReplayHandle;
8732     };
8733     std::vector<VkRayTracingShaderGroupCreateInfoKHR> shaderGroupCreateInfoVect(2, defaultShaderGroupCreateInfo);
8734     shaderGroupCreateInfoVect[0].generalShader = 0u;
8735     shaderGroupCreateInfoVect[1].generalShader = 1u;
8736 
8737     VkRayTracingPipelineCreateInfoKHR pipelineCreateInfo = initVulkanStructure();
8738     pipelineCreateInfo.stageCount                        = 2;
8739     pipelineCreateInfo.pStages                           = shaderCreateInfoVect.data();
8740     pipelineCreateInfo.groupCount                        = 2u;
8741     pipelineCreateInfo.pGroups                           = shaderGroupCreateInfoVect.data();
8742     pipelineCreateInfo.maxPipelineRayRecursionDepth      = 1u;
8743     pipelineCreateInfo.layout                            = *pipelineLayout;
8744 
8745     // make sure there is no crash when pipeline layout is empty
8746     auto pipeline = createRayTracingPipelineKHR(vk, device, VK_NULL_HANDLE, VK_NULL_HANDLE, &pipelineCreateInfo);
8747     pipeline      = Move<VkPipeline>();
8748 
8749     return tcu::TestStatus::pass("Pass");
8750 }
8751 
getInRangeTrianglePoints(float offset)8752 std::vector<tcu::Vec3> getInRangeTrianglePoints(float offset)
8753 {
8754     std::vector<tcu::Vec3> triangle;
8755     triangle.reserve(3u);
8756     triangle.emplace_back(0.0f + offset, 1.0f + offset, 5.0f + offset);
8757     triangle.emplace_back(-1.0f + offset, -1.0f + offset, 5.0f + offset);
8758     triangle.emplace_back(1.0f + offset, -1.0f + offset, 5.0f + offset);
8759 
8760     return triangle;
8761 }
8762 
reuseCreationBufferInstance(Context & context,const bool disturbTop)8763 tcu::TestStatus reuseCreationBufferInstance(Context &context, const bool disturbTop /* if false, bottom AS */)
8764 {
8765     const auto &vki          = context.getInstanceInterface();
8766     const auto physDev       = context.getPhysicalDevice();
8767     const auto &vkd          = context.getDeviceInterface();
8768     const auto device        = context.getDevice();
8769     auto &alloc              = context.getDefaultAllocator();
8770     const auto qIndex        = context.getUniversalQueueFamilyIndex();
8771     const auto queue         = context.getUniversalQueue();
8772     const auto stages        = (VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
8773     const bool disturbBottom = (!disturbTop);
8774 
8775     // We don't know exactly how much space each implementation is going to require to build the top and bottom accel structures,
8776     // but in practice the number appears to be in the low-KBs range, so creating a 4MB buffer will give us enough room to almost
8777     // guarantee the buffer is going to be used.
8778     const VkDeviceSize creationBufferSize = 4u * 1024u * 1024u;
8779     const auto creationBufferUsage =
8780         (VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
8781     const auto creationBufferInfo = makeBufferCreateInfo(creationBufferSize, creationBufferUsage);
8782     const auto creationBufferMemReqs =
8783         (MemoryRequirement::HostVisible | MemoryRequirement::Coherent | MemoryRequirement::DeviceAddress);
8784     BufferWithMemory creationBuffer(vkd, device, alloc, creationBufferInfo, creationBufferMemReqs);
8785 
8786     // Command pool and buffer.
8787     const auto cmdPool          = makeCommandPool(vkd, device, qIndex);
8788     const auto mainCmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8789     const auto bottomBuildCmd   = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8790     const auto topBuildCmd      = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8791 
8792     // Build acceleration structures.
8793     auto topLevelAS         = makeTopLevelAccelerationStructure();
8794     auto topLevelOtherAS    = makeTopLevelAccelerationStructure();
8795     auto bottomLevelAS      = makeBottomLevelAccelerationStructure();
8796     auto bottomLevelOtherAS = makeBottomLevelAccelerationStructure();
8797 
8798     const auto goodTriangle = getInRangeTrianglePoints(0.0f);
8799     const auto badTriangle  = getInRangeTrianglePoints(100.0f);
8800 
8801     bottomLevelAS->addGeometry(goodTriangle, true /*triangles*/);
8802     bottomLevelOtherAS->addGeometry(badTriangle, true /*triangles*/);
8803 
8804     // Critical for the test: we create an additional acceleration structure without building it, and reusing the same creation
8805     // buffer. The creation operation is supposed to avoid touching the buffer, so this should not alter its contents and using the
8806     // original acceleration structure after this step should still work.
8807 
8808     beginCommandBuffer(vkd, bottomBuildCmd.get());
8809 
8810     if (disturbBottom)
8811     {
8812         bottomLevelAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8813                               creationBufferSize);
8814         bottomLevelAS->build(vkd, device, bottomBuildCmd.get());
8815     }
8816     else
8817         bottomLevelAS->createAndBuild(vkd, device, bottomBuildCmd.get(), alloc);
8818 
8819     // Submit command buffer so the bottom acceleration structure is actually built and stored in the creation buffer.
8820     endCommandBuffer(vkd, bottomBuildCmd.get());
8821     submitCommandsAndWait(vkd, device, queue, bottomBuildCmd.get());
8822 
8823     if (disturbBottom)
8824     {
8825         bottomLevelOtherAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8826                                    creationBufferSize);
8827         // Note how we have created the second bottom level accel structure reusing the buffer but we haven't built it.
8828     }
8829 
8830     using SharedBottomPtr = de::SharedPtr<BottomLevelAccelerationStructure>;
8831 
8832     SharedBottomPtr blasSharedPtr(bottomLevelAS.release());
8833     SharedBottomPtr blasOtherSharedPtr(nullptr);
8834 
8835     topLevelAS->setInstanceCount(1);
8836     topLevelAS->addInstance(blasSharedPtr);
8837 
8838     beginCommandBuffer(vkd, topBuildCmd.get());
8839 
8840     if (disturbTop)
8841     {
8842         topLevelAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8843                            creationBufferSize);
8844         topLevelAS->build(vkd, device, topBuildCmd.get());
8845 
8846         bottomLevelOtherAS->createAndBuild(vkd, device, topBuildCmd.get(), alloc);
8847     }
8848     else
8849         topLevelAS->createAndBuild(vkd, device, topBuildCmd.get(), alloc);
8850 
8851     // Submit command buffer so the top acceleration structure is actually built and stored in the creation buffer.
8852     endCommandBuffer(vkd, topBuildCmd.get());
8853     submitCommandsAndWait(vkd, device, queue, topBuildCmd.get());
8854 
8855     if (disturbTop)
8856     {
8857         SharedBottomPtr auxiliar(bottomLevelOtherAS.release());
8858         blasOtherSharedPtr.swap(auxiliar);
8859 
8860         topLevelOtherAS->setInstanceCount(1);
8861         topLevelOtherAS->addInstance(blasOtherSharedPtr);
8862         topLevelOtherAS->create(vkd, device, alloc, 0u, 0u, nullptr, MemoryRequirement::Any, creationBuffer.get(),
8863                                 creationBufferSize);
8864         // Note how we have created the second top level accel structure reusing the buffer but we haven't built it.
8865     }
8866 
8867     // Create output buffer.
8868     const auto bufferSize       = static_cast<VkDeviceSize>(sizeof(float));
8869     const auto bufferCreateInfo = makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
8870     BufferWithMemory buffer(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible);
8871     auto &bufferAlloc = buffer.getAllocation();
8872 
8873     // Fill output buffer with an initial value.
8874     deMemset(bufferAlloc.getHostPtr(), 0, sizeof(float));
8875     flushAlloc(vkd, device, bufferAlloc);
8876 
8877     // Descriptor set layout and pipeline layout.
8878     DescriptorSetLayoutBuilder setLayoutBuilder;
8879     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, stages);
8880     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, stages);
8881 
8882     const auto setLayout      = setLayoutBuilder.build(vkd, device);
8883     const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
8884 
8885     // Descriptor pool and set.
8886     DescriptorPoolBuilder poolBuilder;
8887     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
8888     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
8889     const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
8890     const auto descriptorSet  = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
8891 
8892     // Update descriptor set.
8893     {
8894         const VkWriteDescriptorSetAccelerationStructureKHR accelDescInfo = {
8895             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
8896             nullptr,
8897             1u,
8898             topLevelAS.get()->getPtr(),
8899         };
8900 
8901         const auto bufferDescInfo = makeDescriptorBufferInfo(buffer.get(), 0ull, VK_WHOLE_SIZE);
8902 
8903         DescriptorSetUpdateBuilder updateBuilder;
8904         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u),
8905                                   VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelDescInfo);
8906         updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u),
8907                                   VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescInfo);
8908         updateBuilder.update(vkd, device);
8909     }
8910 
8911     // Shader modules.
8912     auto rgenModule = createShaderModule(vkd, device, context.getBinaryCollection().get("rgen"), 0);
8913     auto chitModule = createShaderModule(vkd, device, context.getBinaryCollection().get("chit"), 0);
8914 
8915     // Get some ray tracing properties.
8916     uint32_t shaderGroupHandleSize    = 0u;
8917     uint32_t shaderGroupBaseAlignment = 1u;
8918     {
8919         const auto rayTracingPropertiesKHR = makeRayTracingProperties(vki, physDev);
8920         shaderGroupHandleSize              = rayTracingPropertiesKHR->getShaderGroupHandleSize();
8921         shaderGroupBaseAlignment           = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
8922     }
8923 
8924     // Create raytracing pipeline and shader binding tables.
8925     Move<VkPipeline> pipeline;
8926 
8927     de::MovePtr<BufferWithMemory> raygenSBT;
8928     de::MovePtr<BufferWithMemory> missSBT;
8929     de::MovePtr<BufferWithMemory> hitSBT;
8930     de::MovePtr<BufferWithMemory> callableSBT;
8931 
8932     VkStridedDeviceAddressRegionKHR raygenSBTRegion   = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8933     VkStridedDeviceAddressRegionKHR missSBTRegion     = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8934     VkStridedDeviceAddressRegionKHR hitSBTRegion      = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8935     VkStridedDeviceAddressRegionKHR callableSBTRegion = makeStridedDeviceAddressRegionKHR(0, 0, 0);
8936 
8937     {
8938         const auto rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
8939 
8940         rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenModule, 0u);
8941         rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitModule, 1u);
8942 
8943         pipeline = rayTracingPipeline->createPipeline(vkd, device, pipelineLayout.get());
8944 
8945         raygenSBT = rayTracingPipeline->createShaderBindingTable(
8946             vkd, device, pipeline.get(), alloc, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
8947         raygenSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenSBT->get(), 0ull),
8948                                                             shaderGroupHandleSize, shaderGroupHandleSize);
8949 
8950         hitSBT = rayTracingPipeline->createShaderBindingTable(vkd, device, pipeline.get(), alloc, shaderGroupHandleSize,
8951                                                               shaderGroupBaseAlignment, 1u, 1u);
8952         hitSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitSBT->get(), 0ull),
8953                                                          shaderGroupHandleSize, shaderGroupHandleSize);
8954     }
8955 
8956     const auto mainCmdBuffer = mainCmdBufferPtr.get();
8957     beginCommandBuffer(vkd, mainCmdBuffer);
8958 
8959     // Trace rays.
8960     vkd.cmdBindPipeline(mainCmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline.get());
8961     vkd.cmdBindDescriptorSets(mainCmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipelineLayout.get(), 0u, 1u,
8962                               &descriptorSet.get(), 0u, nullptr);
8963     vkd.cmdTraceRaysKHR(mainCmdBuffer, &raygenSBTRegion, &missSBTRegion, &hitSBTRegion, &callableSBTRegion, 1u, 1u, 1u);
8964 
8965     // Barrier for the output buffer.
8966     const auto bufferBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
8967     vkd.cmdPipelineBarrier(mainCmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_HOST_BIT, 0u,
8968                            1u, &bufferBarrier, 0u, nullptr, 0u, nullptr);
8969 
8970     endCommandBuffer(vkd, mainCmdBuffer);
8971     submitCommandsAndWait(vkd, device, queue, mainCmdBuffer);
8972 
8973     // Read value back from the buffer.
8974     float bufferValue = 0.0f;
8975     invalidateAlloc(vkd, device, bufferAlloc);
8976     deMemcpy(&bufferValue, bufferAlloc.getHostPtr(), sizeof(bufferValue));
8977 
8978     if (bufferValue != 1.0f)
8979         TCU_FAIL("Unexpected value found in buffer: " + de::toString(bufferValue));
8980 
8981     return tcu::TestStatus::pass("Pass");
8982 }
8983 
reuseScratchBufferInstance(Context & context)8984 tcu::TestStatus reuseScratchBufferInstance(Context &context)
8985 {
8986     const auto ctx = context.getContextCommonData();
8987     const tcu::IVec3 extent(256, 256, 1);
8988     const auto extentU                 = extent.asUint();
8989     const auto pixelCount              = extentU.x() * extentU.y() * extentU.z();
8990     const auto apiExtent               = makeExtent3D(extent);
8991     const uint32_t blasCount           = 2u;                      // Number of bottom-level acceleration structures.
8992     const uint32_t rowsPerAS           = extentU.y() / blasCount; // The last one could be larger but not in practice.
8993     const float coordMargin            = 0.25f;
8994     const uint32_t perTriangleVertices = 3u;
8995     const uint32_t randomSeed          = 1722347394u;
8996     const float geometryZ              = 5.0f; // Must be between tMin and tMax in the shaders.
8997 
8998     const CommandPoolWithBuffer cmd(ctx.vkd, ctx.device, ctx.qfIndex);
8999     const auto cmdBuffer = *cmd.cmdBuffer;
9000     beginCommandBuffer(ctx.vkd, cmdBuffer);
9001 
9002     // Create a pseudorandom mask for coverage.
9003     de::Random rnd(randomSeed);
9004     std::vector<bool> coverageMask;
9005     coverageMask.reserve(pixelCount);
9006     for (int y = 0; y < extent.y(); ++y)
9007         for (int x = 0; x < extent.x(); ++x)
9008             coverageMask.push_back(rnd.getBool());
9009 
9010     // Each bottom level AS will contain a number of rows.
9011     DE_ASSERT(blasCount > 0u);
9012     BottomLevelAccelerationStructurePool blasPool;
9013     for (uint32_t a = 0u; a < blasCount; ++a)
9014     {
9015         const auto prevRows = rowsPerAS * a;
9016         const auto rowCount = ((a < blasCount - 1u) ? rowsPerAS : (extentU.y() - prevRows));
9017         std::vector<tcu::Vec3> triangles;
9018         triangles.reserve(rowCount * extentU.x() * perTriangleVertices);
9019 
9020         for (uint32_t y = 0u; y < rowCount; ++y)
9021             for (uint32_t x = 0u; x < extentU.x(); ++x)
9022             {
9023                 const auto row       = y + prevRows;
9024                 const auto col       = x;
9025                 const auto maskIndex = row * extentU.x() + col;
9026 
9027                 if (!coverageMask.at(maskIndex))
9028                     continue;
9029 
9030                 const float xCenter = static_cast<float>(col) + 0.5f;
9031                 const float yCenter = static_cast<float>(row) + 0.5f;
9032 
9033                 triangles.push_back(tcu::Vec3(xCenter - coordMargin, yCenter + coordMargin, geometryZ));
9034                 triangles.push_back(tcu::Vec3(xCenter + coordMargin, yCenter + coordMargin, geometryZ));
9035                 triangles.push_back(tcu::Vec3(xCenter, yCenter - coordMargin, geometryZ));
9036             }
9037 
9038         const auto blas = blasPool.add();
9039         blas->addGeometry(triangles, true /* triangles */);
9040     }
9041 
9042     blasPool.batchCreateAdjust(ctx.vkd, ctx.device, ctx.allocator, ~0ull, false /* scratch buffer is host visible */);
9043     blasPool.batchBuild(ctx.vkd, ctx.device, cmdBuffer);
9044 
9045     const auto tlas = makeTopLevelAccelerationStructure();
9046     tlas->setInstanceCount(blasCount);
9047     for (const auto &blas : blasPool.structures())
9048         tlas->addInstance(blas, identityMatrix3x4, 0, 0xFFu, 0u,
9049                           VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR);
9050     tlas->createAndBuild(ctx.vkd, ctx.device, cmdBuffer, ctx.allocator);
9051 
9052     // Create storage image.
9053     const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM; // Must match the shader declaration.
9054     const auto colorUsage =
9055         (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
9056     const auto colorSSR = makeDefaultImageSubresourceRange();
9057     ImageWithBuffer colorBuffer(ctx.vkd, ctx.device, ctx.allocator, apiExtent, colorFormat, colorUsage,
9058                                 VK_IMAGE_TYPE_2D, colorSSR);
9059 
9060     // Descriptor pool and set.
9061     DescriptorPoolBuilder poolBuilder;
9062     poolBuilder.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, blasCount);
9063     poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
9064     const auto descritorPool =
9065         poolBuilder.build(ctx.vkd, ctx.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
9066 
9067     DescriptorSetLayoutBuilder setLayoutBuilder;
9068     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
9069     setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_RAYGEN_BIT_KHR);
9070     const auto setLayout      = setLayoutBuilder.build(ctx.vkd, ctx.device);
9071     const auto descriptorSet  = makeDescriptorSet(ctx.vkd, ctx.device, *descritorPool, *setLayout);
9072     const auto pipelineLayout = makePipelineLayout(ctx.vkd, ctx.device, *setLayout);
9073 
9074     DescriptorSetUpdateBuilder setUpdateBuilder;
9075     using Location = DescriptorSetUpdateBuilder::Location;
9076     {
9077         const VkWriteDescriptorSetAccelerationStructureKHR accelerationStructure = {
9078             VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,
9079             nullptr,
9080             1u,
9081             tlas->getPtr(),
9082         };
9083         setUpdateBuilder.writeSingle(*descriptorSet, Location::binding(0u),
9084                                      VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructure);
9085 
9086         const auto imageInfo =
9087             makeDescriptorImageInfo(VK_NULL_HANDLE, colorBuffer.getImageView(), VK_IMAGE_LAYOUT_GENERAL);
9088         setUpdateBuilder.writeSingle(*descriptorSet, Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
9089                                      &imageInfo);
9090     }
9091     setUpdateBuilder.update(ctx.vkd, ctx.device);
9092 
9093     const auto &binaries = context.getBinaryCollection();
9094     auto rgenModule      = createShaderModule(ctx.vkd, ctx.device, binaries.get("rgen"), 0);
9095     auto missModule      = createShaderModule(ctx.vkd, ctx.device, binaries.get("miss"), 0);
9096     auto chitModule      = createShaderModule(ctx.vkd, ctx.device, binaries.get("chit"), 0);
9097 
9098     uint32_t shaderGroupHandleSize    = 0u;
9099     uint32_t shaderGroupBaseAlignment = 1u;
9100     {
9101         const auto rayTracingPropertiesKHR = makeRayTracingProperties(ctx.vki, ctx.physicalDevice);
9102         shaderGroupHandleSize              = rayTracingPropertiesKHR->getShaderGroupHandleSize();
9103         shaderGroupBaseAlignment           = rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
9104     }
9105 
9106     // Create raytracing pipeline and shader binding tables.
9107     Move<VkPipeline> pipeline;
9108     de::MovePtr<BufferWithMemory> raygenSBT;
9109     de::MovePtr<BufferWithMemory> missSBT;
9110     de::MovePtr<BufferWithMemory> hitSBT;
9111     de::MovePtr<BufferWithMemory> callableSBT;
9112 
9113     auto raygenSBTRegion   = makeStridedDeviceAddressRegionKHR(0, 0, 0);
9114     auto missSBTRegion     = makeStridedDeviceAddressRegionKHR(0, 0, 0);
9115     auto hitSBTRegion      = makeStridedDeviceAddressRegionKHR(0, 0, 0);
9116     auto callableSBTRegion = makeStridedDeviceAddressRegionKHR(0, 0, 0);
9117 
9118     {
9119         const auto rayTracingPipeline = de::newMovePtr<RayTracingPipeline>();
9120         rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR, rgenModule, 0);
9121         rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR, missModule, 1);
9122         rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, chitModule, 2);
9123 
9124         pipeline = rayTracingPipeline->createPipeline(ctx.vkd, ctx.device, pipelineLayout.get());
9125 
9126         raygenSBT = rayTracingPipeline->createShaderBindingTable(ctx.vkd, ctx.device, pipeline.get(), ctx.allocator,
9127                                                                  shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1);
9128         raygenSBTRegion =
9129             makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(ctx.vkd, ctx.device, raygenSBT->get(), 0),
9130                                               shaderGroupHandleSize, shaderGroupHandleSize);
9131 
9132         missSBT = rayTracingPipeline->createShaderBindingTable(ctx.vkd, ctx.device, pipeline.get(), ctx.allocator,
9133                                                                shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1);
9134         missSBTRegion =
9135             makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(ctx.vkd, ctx.device, missSBT->get(), 0),
9136                                               shaderGroupHandleSize, shaderGroupHandleSize);
9137 
9138         hitSBT       = rayTracingPipeline->createShaderBindingTable(ctx.vkd, ctx.device, pipeline.get(), ctx.allocator,
9139                                                                     shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1);
9140         hitSBTRegion = makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(ctx.vkd, ctx.device, hitSBT->get(), 0),
9141                                                          shaderGroupHandleSize, shaderGroupHandleSize);
9142     }
9143 
9144     // Transition storage image.
9145     const auto preRTBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
9146                                                      VK_IMAGE_LAYOUT_GENERAL, colorBuffer.getImage(), colorSSR);
9147     cmdPipelineImageMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
9148                                   VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &preRTBarrier);
9149 
9150     // Trace rays.
9151     const auto bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR;
9152     ctx.vkd.cmdBindDescriptorSets(cmdBuffer, bindPoint, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, nullptr);
9153     ctx.vkd.cmdBindPipeline(cmdBuffer, bindPoint, pipeline.get());
9154     ctx.vkd.cmdTraceRaysKHR(cmdBuffer, &raygenSBTRegion, &missSBTRegion, &hitSBTRegion, &callableSBTRegion,
9155                             apiExtent.width, apiExtent.height, 1u);
9156     copyImageToBuffer(ctx.vkd, cmdBuffer, colorBuffer.getImage(), colorBuffer.getBuffer(), extent.swizzle(0, 1),
9157                       VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, 1u, VK_IMAGE_ASPECT_COLOR_BIT,
9158                       VK_IMAGE_ASPECT_COLOR_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR);
9159     endCommandBuffer(ctx.vkd, cmdBuffer);
9160     submitCommandsAndWait(ctx.vkd, ctx.device, ctx.queue, cmdBuffer);
9161 
9162     invalidateAlloc(ctx.vkd, ctx.device, colorBuffer.getBufferAllocation());
9163 
9164     // These must match the shaders.
9165     const tcu::Vec4 missColor(0.0f, 0.0f, 0.0f, 1.0f);
9166     const tcu::Vec4 hitColor(0.0f, 0.0f, 1.0f, 1.0f);
9167 
9168     const auto tcuFormat = mapVkFormat(colorFormat);
9169     tcu::TextureLevel referenceLevel(tcuFormat, extent.x(), extent.y(), extent.z());
9170     tcu::PixelBufferAccess referenceAccess = referenceLevel.getAccess();
9171 
9172     for (int y = 0; y < extent.y(); ++y)
9173         for (int x = 0; x < extent.x(); ++x)
9174         {
9175             const auto maskIdx = static_cast<uint32_t>(y * extent.x() + x);
9176             const auto &color  = (coverageMask.at(maskIdx) ? hitColor : missColor);
9177             referenceAccess.setPixel(color, x, y);
9178         }
9179 
9180     tcu::ConstPixelBufferAccess resultAccess(tcuFormat, extent, colorBuffer.getBufferAllocation().getHostPtr());
9181 
9182     const tcu::Vec4 threshold(0.0f, 0.0f, 0.0f, 0.0f); // Only 1.0 and 0.0 so we expect exact results.
9183     auto &log = context.getTestContext().getLog();
9184     if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, threshold,
9185                                     tcu::COMPARE_LOG_ON_ERROR))
9186         return tcu::TestStatus::fail("Failed; check log for details");
9187     return tcu::TestStatus::pass("Pass");
9188 }
9189 
9190 } // namespace
9191 
9192 class RayTracingTestCase : public TestCase
9193 {
9194 public:
9195     RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data);
9196     ~RayTracingTestCase(void);
9197 
9198     virtual void checkSupport(Context &context) const final;
9199     virtual TestInstance *createInstance(Context &context) const final;
9200     void initPrograms(SourceCollections &programCollection) const final;
9201 
9202 private:
9203     CaseDef m_data;
9204     mutable std::unique_ptr<TestBase> m_testPtr;
9205 };
9206 
RayTracingTestCase(tcu::TestContext & context,const char * name,const CaseDef data)9207 RayTracingTestCase::RayTracingTestCase(tcu::TestContext &context, const char *name, const CaseDef data)
9208     : vkt::TestCase(context, name)
9209     , m_data(data)
9210 {
9211     /* Stub */
9212 }
9213 
~RayTracingTestCase(void)9214 RayTracingTestCase::~RayTracingTestCase(void)
9215 {
9216 }
9217 
checkSupport(Context & context) const9218 void RayTracingTestCase::checkSupport(Context &context) const
9219 {
9220     context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
9221     context.requireDeviceFunctionality("VK_KHR_buffer_device_address");
9222     context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
9223     context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
9224 
9225     const VkPhysicalDeviceAccelerationStructureFeaturesKHR &accelerationStructureFeaturesKHR =
9226         context.getAccelerationStructureFeatures();
9227     const VkPhysicalDeviceRayTracingPipelineFeaturesKHR &rayTracingPipelineFeaturesKHR =
9228         context.getRayTracingPipelineFeatures();
9229     const auto &rayTracingPipelinePropertiesKHR = context.getRayTracingPipelineProperties();
9230 
9231     if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == false)
9232     {
9233         TCU_THROW(NotSupportedError, "VkPhysicalDeviceRayTracingPipelineFeaturesKHR::rayTracingPipeline is false");
9234     }
9235 
9236     if (accelerationStructureFeaturesKHR.accelerationStructure == false)
9237     {
9238         TCU_THROW(NotSupportedError,
9239                   "VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructure is false");
9240     }
9241 
9242     if (ShaderRecordBlockTest::isTest(m_data.type))
9243     {
9244         if (ShaderRecordBlockTest::isExplicitScalarOffsetTest(m_data.type) ||
9245             ShaderRecordBlockTest::isScalarLayoutTest(m_data.type))
9246         {
9247             context.requireDeviceFunctionality("VK_EXT_scalar_block_layout");
9248         }
9249 
9250         if (ShaderRecordBlockTest::usesF64(m_data.type))
9251         {
9252             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_FLOAT64);
9253         }
9254 
9255         if (ShaderRecordBlockTest::usesI8(m_data.type) || ShaderRecordBlockTest::usesU8(m_data.type))
9256         {
9257             if (context.get8BitStorageFeatures().storageBuffer8BitAccess == VK_FALSE)
9258             {
9259                 TCU_THROW(NotSupportedError, "storageBuffer8BitAccess feature is unavailable");
9260             }
9261         }
9262 
9263         if (ShaderRecordBlockTest::usesI16(m_data.type) || ShaderRecordBlockTest::usesU16(m_data.type))
9264         {
9265             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_INT16);
9266         }
9267 
9268         if (ShaderRecordBlockTest::usesI64(m_data.type) || ShaderRecordBlockTest::usesU64(m_data.type))
9269         {
9270             context.requireDeviceCoreFeature(vkt::DeviceCoreFeature::DEVICE_CORE_FEATURE_SHADER_INT64);
9271         }
9272     }
9273 
9274     if (static_cast<uint32_t>(m_data.type) >= static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) &&
9275         static_cast<uint32_t>(m_data.type) <= static_cast<uint32_t>(TestType::RECURSIVE_TRACES_29))
9276     {
9277         const auto nLevels =
9278             static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1;
9279 
9280         if (rayTracingPipelinePropertiesKHR.maxRayRecursionDepth < nLevels)
9281         {
9282             TCU_THROW(NotSupportedError, "Cannot use an unsupported ray recursion depth.");
9283         }
9284     }
9285 }
9286 
initPrograms(SourceCollections & programCollection) const9287 void RayTracingTestCase::initPrograms(SourceCollections &programCollection) const
9288 {
9289     switch (m_data.type)
9290     {
9291     case TestType::AABBS_AND_TRIS_IN_ONE_TL:
9292     {
9293         m_testPtr.reset(new AABBTriTLTest(m_data.geometryType, m_data.asLayout));
9294 
9295         m_testPtr->initPrograms(programCollection);
9296 
9297         break;
9298     }
9299 
9300     case TestType::AS_STRESS_TEST:
9301     {
9302         m_testPtr.reset(new ASStressTest(m_data.geometryType, m_data.asLayout));
9303 
9304         m_testPtr->initPrograms(programCollection);
9305 
9306         break;
9307     }
9308 
9309     case TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST:
9310     case TestType::CALLABLE_SHADER_STRESS_TEST:
9311     {
9312         const bool useDynamicStackSize = (m_data.type == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST);
9313 
9314         m_testPtr.reset(new CallableShaderStressTest(m_data.geometryType, m_data.asLayout, useDynamicStackSize));
9315 
9316         m_testPtr->initPrograms(programCollection);
9317 
9318         break;
9319     }
9320 
9321     case TestType::CULL_MASK:
9322     case TestType::CULL_MASK_EXTRA_BITS:
9323     {
9324         m_testPtr.reset(
9325             new CullMaskTest(m_data.asLayout, m_data.geometryType, (m_data.type == TestType::CULL_MASK_EXTRA_BITS)));
9326 
9327         m_testPtr->initPrograms(programCollection);
9328 
9329         break;
9330     }
9331 
9332     case TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE:
9333     {
9334         m_testPtr.reset(new MAXRayHitAttributeSizeTest(m_data.geometryType, m_data.asLayout));
9335 
9336         m_testPtr->initPrograms(programCollection);
9337 
9338         break;
9339     }
9340 
9341     case TestType::MAX_RT_INVOCATIONS_SUPPORTED:
9342     {
9343         m_testPtr.reset(new MAXRTInvocationsSupportedTest(m_data.geometryType, m_data.asLayout));
9344 
9345         m_testPtr->initPrograms(programCollection);
9346 
9347         break;
9348     }
9349 
9350     case TestType::NO_DUPLICATE_ANY_HIT:
9351     {
9352         m_testPtr.reset(new NoDuplicateAnyHitTest(m_data.asLayout, m_data.geometryType));
9353 
9354         m_testPtr->initPrograms(programCollection);
9355 
9356         break;
9357     }
9358 
9359     case TestType::RECURSIVE_TRACES_0:
9360     case TestType::RECURSIVE_TRACES_1:
9361     case TestType::RECURSIVE_TRACES_2:
9362     case TestType::RECURSIVE_TRACES_3:
9363     case TestType::RECURSIVE_TRACES_4:
9364     case TestType::RECURSIVE_TRACES_5:
9365     case TestType::RECURSIVE_TRACES_6:
9366     case TestType::RECURSIVE_TRACES_7:
9367     case TestType::RECURSIVE_TRACES_8:
9368     case TestType::RECURSIVE_TRACES_9:
9369     case TestType::RECURSIVE_TRACES_10:
9370     case TestType::RECURSIVE_TRACES_11:
9371     case TestType::RECURSIVE_TRACES_12:
9372     case TestType::RECURSIVE_TRACES_13:
9373     case TestType::RECURSIVE_TRACES_14:
9374     case TestType::RECURSIVE_TRACES_15:
9375     case TestType::RECURSIVE_TRACES_16:
9376     case TestType::RECURSIVE_TRACES_17:
9377     case TestType::RECURSIVE_TRACES_18:
9378     case TestType::RECURSIVE_TRACES_19:
9379     case TestType::RECURSIVE_TRACES_20:
9380     case TestType::RECURSIVE_TRACES_21:
9381     case TestType::RECURSIVE_TRACES_22:
9382     case TestType::RECURSIVE_TRACES_23:
9383     case TestType::RECURSIVE_TRACES_24:
9384     case TestType::RECURSIVE_TRACES_25:
9385     case TestType::RECURSIVE_TRACES_26:
9386     case TestType::RECURSIVE_TRACES_27:
9387     case TestType::RECURSIVE_TRACES_28:
9388     case TestType::RECURSIVE_TRACES_29:
9389     {
9390         const auto nLevels =
9391             ((m_data.type == TestType::RECURSIVE_TRACES_0) ?
9392                  0u :
9393                  (static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1));
9394 
9395         m_testPtr.reset(new RecursiveTracesTest(m_data.geometryType, m_data.asLayout, nLevels));
9396 
9397         m_testPtr->initPrograms(programCollection);
9398 
9399         break;
9400     }
9401 
9402     case TestType::REPORT_INTERSECTION_RESULT:
9403     case TestType::USE_MEMORY_ACCESS:
9404     {
9405         m_testPtr.reset(new ReportIntersectionResultTest(m_data.asLayout, m_data.geometryType));
9406 
9407         m_testPtr->initPrograms(programCollection);
9408 
9409         break;
9410     }
9411 
9412     case TestType::RAY_PAYLOAD_IN:
9413     {
9414         m_testPtr.reset(new RayPayloadInTest(m_data.geometryType, m_data.asLayout));
9415 
9416         m_testPtr->initPrograms(programCollection);
9417 
9418         break;
9419     }
9420 
9421     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
9422     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
9423     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
9424     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
9425     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
9426     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
9427     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
9428     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
9429     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
9430     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
9431     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
9432     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
9433     case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
9434     case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
9435     case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
9436     case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
9437     case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
9438     case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
9439     case TestType::SHADER_RECORD_BLOCK_STD430_1:
9440     case TestType::SHADER_RECORD_BLOCK_STD430_2:
9441     case TestType::SHADER_RECORD_BLOCK_STD430_3:
9442     case TestType::SHADER_RECORD_BLOCK_STD430_4:
9443     case TestType::SHADER_RECORD_BLOCK_STD430_5:
9444     case TestType::SHADER_RECORD_BLOCK_STD430_6:
9445     {
9446         m_testPtr.reset(new ShaderRecordBlockTest(m_data.type, ShaderRecordBlockTest::getVarsToTest(m_data.type)));
9447 
9448         m_testPtr->initPrograms(programCollection);
9449 
9450         break;
9451     }
9452 
9453     case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
9454     case TestType::IGNORE_ANY_HIT_STATICALLY:
9455     case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
9456     case TestType::TERMINATE_ANY_HIT_STATICALLY:
9457     case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
9458     case TestType::TERMINATE_INTERSECTION_STATICALLY:
9459     {
9460         m_testPtr.reset(new TerminationTest(TerminationTest::getModeFromTestType(m_data.type)));
9461 
9462         m_testPtr->initPrograms(programCollection);
9463 
9464         break;
9465     }
9466 
9467     default:
9468     {
9469         deAssertFail("This location should never be reached", __FILE__, __LINE__);
9470     }
9471     }
9472 }
9473 
createInstance(Context & context) const9474 TestInstance *RayTracingTestCase::createInstance(Context &context) const
9475 {
9476     switch (m_data.type)
9477     {
9478     case TestType::AABBS_AND_TRIS_IN_ONE_TL:
9479     {
9480         if (m_testPtr == nullptr)
9481         {
9482             m_testPtr.reset(new AABBTriTLTest(m_data.geometryType, m_data.asLayout));
9483         }
9484 
9485         break;
9486     }
9487 
9488     case TestType::AS_STRESS_TEST:
9489     {
9490         if (m_testPtr == nullptr)
9491         {
9492             m_testPtr.reset(new ASStressTest(m_data.geometryType, m_data.asLayout));
9493         }
9494 
9495         break;
9496     }
9497 
9498     case TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST:
9499     case TestType::CALLABLE_SHADER_STRESS_TEST:
9500     {
9501         if (m_testPtr == nullptr)
9502         {
9503             const bool useDynamicStackSize = (m_data.type == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST);
9504 
9505             m_testPtr.reset(new CallableShaderStressTest(m_data.geometryType, m_data.asLayout, useDynamicStackSize));
9506         }
9507 
9508         break;
9509     }
9510 
9511     case TestType::CULL_MASK:
9512     case TestType::CULL_MASK_EXTRA_BITS:
9513     {
9514         if (m_testPtr == nullptr)
9515         {
9516             m_testPtr.reset(new CullMaskTest(m_data.asLayout, m_data.geometryType,
9517                                              (m_data.type == TestType::CULL_MASK_EXTRA_BITS)));
9518         }
9519 
9520         break;
9521     }
9522 
9523     case TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE:
9524     {
9525         if (m_testPtr == nullptr)
9526         {
9527             m_testPtr.reset(new MAXRayHitAttributeSizeTest(m_data.geometryType, m_data.asLayout));
9528         }
9529 
9530         break;
9531     }
9532 
9533     case TestType::MAX_RT_INVOCATIONS_SUPPORTED:
9534     {
9535         if (m_testPtr == nullptr)
9536         {
9537             m_testPtr.reset(new MAXRTInvocationsSupportedTest(m_data.geometryType, m_data.asLayout));
9538         }
9539 
9540         break;
9541     }
9542 
9543     case TestType::NO_DUPLICATE_ANY_HIT:
9544     {
9545         if (m_testPtr == nullptr)
9546         {
9547             m_testPtr.reset(new NoDuplicateAnyHitTest(m_data.asLayout, m_data.geometryType));
9548         }
9549 
9550         break;
9551     }
9552 
9553     case TestType::RECURSIVE_TRACES_0:
9554     case TestType::RECURSIVE_TRACES_1:
9555     case TestType::RECURSIVE_TRACES_2:
9556     case TestType::RECURSIVE_TRACES_3:
9557     case TestType::RECURSIVE_TRACES_4:
9558     case TestType::RECURSIVE_TRACES_5:
9559     case TestType::RECURSIVE_TRACES_6:
9560     case TestType::RECURSIVE_TRACES_7:
9561     case TestType::RECURSIVE_TRACES_8:
9562     case TestType::RECURSIVE_TRACES_9:
9563     case TestType::RECURSIVE_TRACES_10:
9564     case TestType::RECURSIVE_TRACES_11:
9565     case TestType::RECURSIVE_TRACES_12:
9566     case TestType::RECURSIVE_TRACES_13:
9567     case TestType::RECURSIVE_TRACES_14:
9568     case TestType::RECURSIVE_TRACES_15:
9569     case TestType::RECURSIVE_TRACES_16:
9570     case TestType::RECURSIVE_TRACES_17:
9571     case TestType::RECURSIVE_TRACES_18:
9572     case TestType::RECURSIVE_TRACES_19:
9573     case TestType::RECURSIVE_TRACES_20:
9574     case TestType::RECURSIVE_TRACES_21:
9575     case TestType::RECURSIVE_TRACES_22:
9576     case TestType::RECURSIVE_TRACES_23:
9577     case TestType::RECURSIVE_TRACES_24:
9578     case TestType::RECURSIVE_TRACES_25:
9579     case TestType::RECURSIVE_TRACES_26:
9580     case TestType::RECURSIVE_TRACES_27:
9581     case TestType::RECURSIVE_TRACES_28:
9582     case TestType::RECURSIVE_TRACES_29:
9583     {
9584         const auto nLevels =
9585             ((m_data.type == TestType::RECURSIVE_TRACES_0) ?
9586                  0u :
9587                  (static_cast<uint32_t>(m_data.type) - static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + 1));
9588 
9589         if (m_testPtr == nullptr)
9590         {
9591             m_testPtr.reset(new RecursiveTracesTest(m_data.geometryType, m_data.asLayout, nLevels));
9592         }
9593 
9594         break;
9595     }
9596 
9597     case TestType::REPORT_INTERSECTION_RESULT:
9598     case TestType::USE_MEMORY_ACCESS:
9599     {
9600         if (m_testPtr == nullptr)
9601         {
9602             m_testPtr.reset(new ReportIntersectionResultTest(m_data.asLayout, m_data.geometryType));
9603         }
9604 
9605         break;
9606     }
9607 
9608     case TestType::RAY_PAYLOAD_IN:
9609     {
9610         if (m_testPtr == nullptr)
9611         {
9612             m_testPtr.reset(new RayPayloadInTest(m_data.geometryType, m_data.asLayout));
9613         }
9614 
9615         break;
9616     }
9617 
9618     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1:
9619     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2:
9620     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3:
9621     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4:
9622     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5:
9623     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6:
9624     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1:
9625     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2:
9626     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3:
9627     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4:
9628     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5:
9629     case TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6:
9630     case TestType::SHADER_RECORD_BLOCK_SCALAR_1:
9631     case TestType::SHADER_RECORD_BLOCK_SCALAR_2:
9632     case TestType::SHADER_RECORD_BLOCK_SCALAR_3:
9633     case TestType::SHADER_RECORD_BLOCK_SCALAR_4:
9634     case TestType::SHADER_RECORD_BLOCK_SCALAR_5:
9635     case TestType::SHADER_RECORD_BLOCK_SCALAR_6:
9636     case TestType::SHADER_RECORD_BLOCK_STD430_1:
9637     case TestType::SHADER_RECORD_BLOCK_STD430_2:
9638     case TestType::SHADER_RECORD_BLOCK_STD430_3:
9639     case TestType::SHADER_RECORD_BLOCK_STD430_4:
9640     case TestType::SHADER_RECORD_BLOCK_STD430_5:
9641     case TestType::SHADER_RECORD_BLOCK_STD430_6:
9642     {
9643         if (m_testPtr == nullptr)
9644         {
9645             m_testPtr.reset(new ShaderRecordBlockTest(m_data.type, ShaderRecordBlockTest::getVarsToTest(m_data.type)));
9646         }
9647 
9648         break;
9649     }
9650 
9651     case TestType::IGNORE_ANY_HIT_DYNAMICALLY:
9652     case TestType::IGNORE_ANY_HIT_STATICALLY:
9653     case TestType::TERMINATE_ANY_HIT_DYNAMICALLY:
9654     case TestType::TERMINATE_ANY_HIT_STATICALLY:
9655     case TestType::TERMINATE_INTERSECTION_DYNAMICALLY:
9656     case TestType::TERMINATE_INTERSECTION_STATICALLY:
9657     {
9658         if (m_testPtr == nullptr)
9659         {
9660             m_testPtr.reset(new TerminationTest(TerminationTest::getModeFromTestType(m_data.type)));
9661         }
9662 
9663         break;
9664     }
9665 
9666     default:
9667     {
9668         deAssertFail("This location should never be reached", __FILE__, __LINE__);
9669     }
9670     }
9671 
9672     auto newTestInstancePtr = new RayTracingMiscTestInstance(context, m_data, m_testPtr.get());
9673 
9674     return newTestInstancePtr;
9675 }
9676 
createMiscTests(tcu::TestContext & testCtx)9677 tcu::TestCaseGroup *createMiscTests(tcu::TestContext &testCtx)
9678 {
9679     de::MovePtr<tcu::TestCaseGroup> miscGroupPtr(
9680         // Miscellaneous ray-tracing tests
9681         new tcu::TestCaseGroup(testCtx, "misc"));
9682 
9683     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9684          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9685     {
9686         for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9687              currentASLayout != AccelerationStructureLayout::COUNT;
9688              currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9689         {
9690             for (uint32_t nIteration = 0; nIteration < 2; ++nIteration)
9691             {
9692                 const auto testType = (nIteration == 0) ? TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST :
9693                                                           TestType::CALLABLE_SHADER_STRESS_TEST;
9694                 const std::string newTestCaseName =
9695                     "callableshaderstress_" + de::toString(getSuffixForASLayout(currentASLayout)) + "_" +
9696                     de::toString(getSuffixForGeometryType(currentGeometryType)) + "_" +
9697                     ((testType == TestType::CALLABLE_SHADER_STRESS_DYNAMIC_TEST) ? "dynamic" : "static");
9698 
9699                 // Verifies that the maximum ray hit attribute size property reported by the implementation is actually supported.
9700                 auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9701                                                              CaseDef{testType, currentGeometryType, currentASLayout});
9702 
9703                 miscGroupPtr->addChild(newTestCasePtr);
9704             }
9705         }
9706     }
9707 
9708     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9709          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9710     {
9711         const std::string newTestCaseName =
9712             "AS_stresstest_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9713 
9714         // Verifies raygen shader invocations can simultaneously access as many AS instances as reported
9715         auto newTestCasePtr =
9716             new RayTracingTestCase(testCtx, newTestCaseName.data(),
9717                                    CaseDef{TestType::AS_STRESS_TEST, currentGeometryType,
9718                                            AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY});
9719 
9720         miscGroupPtr->addChild(newTestCasePtr);
9721     }
9722 
9723     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9724          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9725     {
9726         for (int nUseExtraCullMaskBits = 0; nUseExtraCullMaskBits < 2 /* false, true */; ++nUseExtraCullMaskBits)
9727         {
9728             const std::string newTestCaseName = "cullmask_" +
9729                                                 de::toString(getSuffixForGeometryType(currentGeometryType)) +
9730                                                 de::toString((nUseExtraCullMaskBits) ? "_extrabits" : "");
9731             const auto testType = (nUseExtraCullMaskBits == 0) ? TestType::CULL_MASK : TestType::CULL_MASK_EXTRA_BITS;
9732 
9733             // Verifies cull mask works as specified
9734             auto newTestCasePtr = new RayTracingTestCase(
9735                 testCtx, newTestCaseName.data(),
9736                 CaseDef{testType, currentGeometryType, AccelerationStructureLayout::ONE_TL_MANY_BLS_ONE_GEOMETRY});
9737 
9738             miscGroupPtr->addChild(newTestCasePtr);
9739         }
9740     }
9741 
9742     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9743          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9744     {
9745         const std::string newTestCaseName =
9746             "maxrtinvocations_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9747 
9748         // Verifies top-level acceleration structures built of AABB and triangle bottom-level AS instances work as expected
9749         auto newTestCasePtr =
9750             new RayTracingTestCase(testCtx, newTestCaseName.data(),
9751                                    CaseDef{TestType::MAX_RT_INVOCATIONS_SUPPORTED, currentGeometryType,
9752                                            AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9753 
9754         miscGroupPtr->addChild(newTestCasePtr);
9755     }
9756 
9757     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9758          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9759     {
9760         for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9761              currentASLayout != AccelerationStructureLayout::COUNT;
9762              currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9763         {
9764             const std::string newTestCaseName = "NO_DUPLICATE_ANY_HIT_" +
9765                                                 de::toString(getSuffixForASLayout(currentASLayout)) + "_" +
9766                                                 de::toString(getSuffixForGeometryType(currentGeometryType));
9767 
9768             // Verifies the NO_DUPLICATE_ANY_HIT flag is adhered to when tracing rays
9769             auto newTestCasePtr =
9770                 new RayTracingTestCase(testCtx, newTestCaseName.data(),
9771                                        CaseDef{TestType::NO_DUPLICATE_ANY_HIT, currentGeometryType, currentASLayout});
9772 
9773             miscGroupPtr->addChild(newTestCasePtr);
9774         }
9775     }
9776 
9777     {
9778         // Verifies top-level acceleration structures built of AABB and triangle bottom-level AS instances work as expected
9779         auto newTestCasePtr = new RayTracingTestCase(
9780             testCtx, "mixedPrimTL",
9781             CaseDef{TestType::AABBS_AND_TRIS_IN_ONE_TL, GeometryType::AABB_AND_TRIANGLES,
9782                     AccelerationStructureLayout::ONE_TL_MANY_BLS_MANY_GEOMETRIES_WITH_VARYING_PRIM_TYPES});
9783 
9784         miscGroupPtr->addChild(newTestCasePtr);
9785     }
9786 
9787     for (auto currentASLayout = AccelerationStructureLayout::FIRST;
9788          currentASLayout != AccelerationStructureLayout::COUNT;
9789          currentASLayout = static_cast<AccelerationStructureLayout>(static_cast<uint32_t>(currentASLayout) + 1))
9790     {
9791         const std::string newTestCaseName =
9792             "maxrayhitattributesize_" + de::toString(getSuffixForASLayout(currentASLayout));
9793 
9794         // Verifies that the maximum ray hit attribute size property reported by the implementation is actually supported.
9795         auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9796                                                      CaseDef{TestType::MAX_RAY_HIT_ATTRIBUTE_SIZE, GeometryType::AABB,
9797                                                              AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9798 
9799         miscGroupPtr->addChild(newTestCasePtr);
9800     }
9801 
9802     {
9803         // Test the return value of reportIntersectionEXT
9804         auto newTestCase1Ptr = new RayTracingTestCase(testCtx, "report_intersection_result",
9805                                                       CaseDef{TestType::REPORT_INTERSECTION_RESULT, GeometryType::AABB,
9806                                                               AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9807         // Test replacing VK_ACCESS_*_WRITE/READ_BIT with VK_ACCESS_MEMORY_WRITE/READ_BIT.
9808         auto newTestCase2Ptr = new RayTracingTestCase(testCtx, "memory_access",
9809                                                       CaseDef{TestType::USE_MEMORY_ACCESS, GeometryType::AABB,
9810                                                               AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9811 
9812         miscGroupPtr->addChild(newTestCase1Ptr);
9813         miscGroupPtr->addChild(newTestCase2Ptr);
9814     }
9815 
9816     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9817          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9818     {
9819         const std::string newTestCaseName =
9820             "raypayloadin_" + de::toString(getSuffixForGeometryType(currentGeometryType));
9821 
9822         // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9823         auto newTestCasePtr = new RayTracingTestCase(testCtx, newTestCaseName.data(),
9824                                                      CaseDef{TestType::RAY_PAYLOAD_IN, currentGeometryType,
9825                                                              AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9826         miscGroupPtr->addChild(newTestCasePtr);
9827     }
9828 
9829     {
9830         // Tests usage of various variables inside a shader record block using std430 layout
9831         auto newTestCaseSTD430_1Ptr =
9832             new RayTracingTestCase(testCtx, "shaderRecordSTD430_1", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_1));
9833         auto newTestCaseSTD430_2Ptr =
9834             new RayTracingTestCase(testCtx, "shaderRecordSTD430_2", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_2));
9835         auto newTestCaseSTD430_3Ptr =
9836             new RayTracingTestCase(testCtx, "shaderRecordSTD430_3", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_3));
9837         auto newTestCaseSTD430_4Ptr =
9838             new RayTracingTestCase(testCtx, "shaderRecordSTD430_4", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_4));
9839         auto newTestCaseSTD430_5Ptr =
9840             new RayTracingTestCase(testCtx, "shaderRecordSTD430_5", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_5));
9841         auto newTestCaseSTD430_6Ptr =
9842             new RayTracingTestCase(testCtx, "shaderRecordSTD430_6", CaseDef(TestType::SHADER_RECORD_BLOCK_STD430_6));
9843 
9844         // Tests usage of various variables inside a shader record block using scalar layout
9845         auto newTestCaseScalar_1Ptr =
9846             new RayTracingTestCase(testCtx, "shaderRecordScalar_1", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_1));
9847         auto newTestCaseScalar_2Ptr =
9848             new RayTracingTestCase(testCtx, "shaderRecordScalar_2", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_2));
9849         auto newTestCaseScalar_3Ptr =
9850             new RayTracingTestCase(testCtx, "shaderRecordScalar_3", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_3));
9851         auto newTestCaseScalar_4Ptr =
9852             new RayTracingTestCase(testCtx, "shaderRecordScalar_4", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_4));
9853         auto newTestCaseScalar_5Ptr =
9854             new RayTracingTestCase(testCtx, "shaderRecordScalar_5", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_5));
9855         auto newTestCaseScalar_6Ptr =
9856             new RayTracingTestCase(testCtx, "shaderRecordScalar_6", CaseDef(TestType::SHADER_RECORD_BLOCK_SCALAR_6));
9857 
9858         // Tests usage of various variables inside a shader record block using scalar layout and explicit offset qualifiers
9859         auto newTestCaseExplicitScalarOffset_1Ptr =
9860             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_1",
9861                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_1));
9862         auto newTestCaseExplicitScalarOffset_2Ptr =
9863             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_2",
9864                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_2));
9865         auto newTestCaseExplicitScalarOffset_3Ptr =
9866             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_3",
9867                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_3));
9868         auto newTestCaseExplicitScalarOffset_4Ptr =
9869             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_4",
9870                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_4));
9871         auto newTestCaseExplicitScalarOffset_5Ptr =
9872             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_5",
9873                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_5));
9874         auto newTestCaseExplicitScalarOffset_6Ptr =
9875             new RayTracingTestCase(testCtx, "shaderRecordExplicitScalarOffset_6",
9876                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_SCALAR_OFFSET_6));
9877 
9878         // Tests usage of various variables inside a shader record block using std430 layout and explicit offset qualifiers
9879         auto newTestCaseExplicitSTD430Offset_1Ptr =
9880             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_1",
9881                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_1));
9882         auto newTestCaseExplicitSTD430Offset_2Ptr =
9883             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_2",
9884                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_2));
9885         auto newTestCaseExplicitSTD430Offset_3Ptr =
9886             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_3",
9887                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_3));
9888         auto newTestCaseExplicitSTD430Offset_4Ptr =
9889             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_4",
9890                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_4));
9891         auto newTestCaseExplicitSTD430Offset_5Ptr =
9892             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_5",
9893                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_5));
9894         auto newTestCaseExplicitSTD430Offset_6Ptr =
9895             new RayTracingTestCase(testCtx, "shaderRecordExplicitSTD430Offset_6",
9896                                    CaseDef(TestType::SHADER_RECORD_BLOCK_EXPLICIT_STD430_OFFSET_6));
9897         miscGroupPtr->addChild(newTestCaseSTD430_1Ptr);
9898         miscGroupPtr->addChild(newTestCaseSTD430_2Ptr);
9899         miscGroupPtr->addChild(newTestCaseSTD430_3Ptr);
9900         miscGroupPtr->addChild(newTestCaseSTD430_4Ptr);
9901         miscGroupPtr->addChild(newTestCaseSTD430_5Ptr);
9902         miscGroupPtr->addChild(newTestCaseSTD430_6Ptr);
9903 
9904         miscGroupPtr->addChild(newTestCaseScalar_1Ptr);
9905         miscGroupPtr->addChild(newTestCaseScalar_2Ptr);
9906         miscGroupPtr->addChild(newTestCaseScalar_3Ptr);
9907         miscGroupPtr->addChild(newTestCaseScalar_4Ptr);
9908         miscGroupPtr->addChild(newTestCaseScalar_5Ptr);
9909         miscGroupPtr->addChild(newTestCaseScalar_6Ptr);
9910 
9911         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_1Ptr);
9912         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_2Ptr);
9913         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_3Ptr);
9914         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_4Ptr);
9915         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_5Ptr);
9916         miscGroupPtr->addChild(newTestCaseExplicitScalarOffset_6Ptr);
9917 
9918         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_1Ptr);
9919         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_2Ptr);
9920         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_3Ptr);
9921         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_4Ptr);
9922         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_5Ptr);
9923         miscGroupPtr->addChild(newTestCaseExplicitSTD430Offset_6Ptr);
9924     }
9925 
9926     for (auto currentGeometryType = GeometryType::FIRST; currentGeometryType != GeometryType::COUNT;
9927          currentGeometryType      = static_cast<GeometryType>(static_cast<uint32_t>(currentGeometryType) + 1))
9928     {
9929         const std::string newTestCaseName =
9930             "recursiveTraces_" + de::toString(getSuffixForGeometryType(currentGeometryType)) + "_";
9931 
9932         // 0 recursion levels.
9933         {
9934             // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9935             auto newTestCasePtr =
9936                 new RayTracingTestCase(testCtx, (newTestCaseName + "0").data(),
9937                                        CaseDef{TestType::RECURSIVE_TRACES_0, currentGeometryType,
9938                                                AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9939 
9940             miscGroupPtr->addChild(newTestCasePtr);
9941         }
9942 
9943         // TODO: for (uint32_t nLevels = 1; nLevels <= 29; ++nLevels)
9944         for (uint32_t nLevels = 1; nLevels <= 15; ++nLevels)
9945         {
9946             // Verifies that relevant shader stages can correctly read large ray payloads provided by raygen shader stage.
9947             auto newTestCasePtr = new RayTracingTestCase(
9948                 testCtx, (newTestCaseName + de::toString(nLevels)).data(),
9949                 CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::RECURSIVE_TRACES_1) + (nLevels - 1)),
9950                         currentGeometryType, AccelerationStructureLayout::ONE_TL_ONE_BL_ONE_GEOMETRY});
9951 
9952             miscGroupPtr->addChild(newTestCasePtr);
9953         }
9954     }
9955 
9956     {
9957         // Verifies that OpIgnoreIntersectionKHR works as per spec (static invocations).
9958         auto newTestCase1Ptr = new RayTracingTestCase(
9959             testCtx, "OpIgnoreIntersectionKHR_AnyHitStatically",
9960             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::IGNORE_ANY_HIT_STATICALLY)),
9961                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9962         // Verifies that OpIgnoreIntersectionKHR works as per spec (dynamic invocations).
9963         auto newTestCase2Ptr = new RayTracingTestCase(
9964             testCtx, "OpIgnoreIntersectionKHR_AnyHitDynamically",
9965             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::IGNORE_ANY_HIT_DYNAMICALLY)),
9966                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9967         // Verifies that OpTerminateRayKHR works as per spec (static invocations).
9968         auto newTestCase3Ptr = new RayTracingTestCase(
9969             testCtx, "OpTerminateRayKHR_AnyHitStatically",
9970             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_ANY_HIT_STATICALLY)),
9971                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9972         // Verifies that OpTerminateRayKHR works as per spec (dynamic invocations).
9973         auto newTestCase4Ptr = new RayTracingTestCase(
9974             testCtx, "OpTerminateRayKHR_AnyHitDynamically",
9975             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_ANY_HIT_DYNAMICALLY)),
9976                     GeometryType::TRIANGLES, AccelerationStructureLayout::COUNT});
9977         // Verifies that OpTerminateRayKHR works as per spec (static invocations).
9978         auto newTestCase5Ptr = new RayTracingTestCase(
9979             testCtx, "OpTerminateRayKHR_IntersectionStatically",
9980             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_INTERSECTION_STATICALLY)),
9981                     GeometryType::AABB, AccelerationStructureLayout::COUNT});
9982         // Verifies that OpTerminateRayKHR works as per spec (dynamic invocations).
9983         auto newTestCase6Ptr = new RayTracingTestCase(
9984             testCtx, "OpTerminateRayKHR_IntersectionDynamically",
9985             CaseDef{static_cast<TestType>(static_cast<uint32_t>(TestType::TERMINATE_INTERSECTION_DYNAMICALLY)),
9986                     GeometryType::AABB, AccelerationStructureLayout::COUNT});
9987 
9988         miscGroupPtr->addChild(newTestCase1Ptr);
9989         miscGroupPtr->addChild(newTestCase2Ptr);
9990         miscGroupPtr->addChild(newTestCase3Ptr);
9991         miscGroupPtr->addChild(newTestCase4Ptr);
9992         miscGroupPtr->addChild(newTestCase5Ptr);
9993         miscGroupPtr->addChild(newTestCase6Ptr);
9994     }
9995 
9996     {
9997         const auto groupPtr = miscGroupPtr.get();
9998         addFunctionCaseWithPrograms(groupPtr, "null_miss", checkRTPipelineSupport, initBasicHitBufferPrograms,
9999                                     nullMissInstance);
10000         addFunctionCaseWithPrograms(groupPtr, "empty_pipeline_layout", checkRTPipelineSupport, initEmptyPrograms,
10001                                     emptyPipelineLayoutInstance);
10002         addFunctionCaseWithPrograms(groupPtr, "reuse_creation_buffer_top", checkReuseCreationBufferSupport,
10003                                     initReuseCreationBufferPrograms, reuseCreationBufferInstance, true /*top*/);
10004         addFunctionCaseWithPrograms(groupPtr, "reuse_creation_buffer_bottom", checkReuseCreationBufferSupport,
10005                                     initReuseCreationBufferPrograms, reuseCreationBufferInstance, false /*top*/);
10006         addFunctionCaseWithPrograms(groupPtr, "reuse_scratch_buffer", checkReuseScratchBufferSupport,
10007                                     initReuseScratchBufferPrograms, reuseScratchBufferInstance);
10008     }
10009 
10010     return miscGroupPtr.release();
10011 }
10012 
10013 } // namespace RayTracing
10014 } // namespace vkt
10015