• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2024 The Khronos Group Inc.
6  * Copyright (c) 2024 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Device Generated Commands EXT Graphics Draw Tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktDGCGraphicsDrawCountTestsExt.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktDGCUtilExt.hpp"
28 #include "vktDGCUtilCommon.hpp"
29 #include "vktTestCaseUtil.hpp"
30 
31 #include "vkTypeUtil.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkBuilderUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 
38 #include "tcuImageCompare.hpp"
39 
40 #include "deUniquePtr.hpp"
41 #include "deRandom.hpp"
42 
43 #include <numeric>
44 #include <vector>
45 #include <cstddef>
46 #include <sstream>
47 #include <algorithm>
48 #include <iterator>
49 #include <utility>
50 
51 namespace vkt
52 {
53 namespace DGC
54 {
55 
56 using namespace vk;
57 
58 namespace
59 {
60 
61 /*
62 GENERAL MECHANISM BEHIND THESE TESTS:
63 
64 Create a framebuffer of 32x32 pixels.
65   - This gives us a total of 1024 pixels to draw over.
66 Create one triangle to cover each pixel and store their vertices in a vertex buffer.
67 Divide the 1024 pixels in 16 pseudorandom chunks.
68   - For that, choose a number of pixels randomly between 1 and 64 pixels for the first 15 chunks.
69   - For the last chunk, choose the remaining pixels.
70 For each of those chunks, create a VkDrawIndirectCommand structure.
71   - vertexCount is the number of pixels in each chunk * 3.
72   - firstVertex is the number of pixels in the previous chunks * 3.
73   - Choose pseudorandomly one of 256 InstanceIndex values for each pixel:
74     - Value in [0, 16, 32, 48, 64...] for firstInstance
75     - Value in [1..16] for instanceCount
76     - InstanceIndex will be a pseudorandom number in 0..255.
77 Pseudorandomly choose to split the list of chunks in 4 (buffers)
78   - Similar to dividing the pixels in chunks.
79 Pseudorandomly choose how many extra structures to put in the middle for padding in each buffer.
80  - For example, from 0 to 7.
81 With that, create 4 VkDrawIndirectCountIndirectCommandEXT structures:
82  - bufferAddress will vary in each of the 4 buffers.
83  - stride will depend on the pseudorandom padding in each buffer.
84  - commandCount will be the number of chunks assigned to each buffer.
85 
86 Clear framebuffer to (0, 0, 0, 1.0)
87 Draw (InstanceIndex / 256.0, 0, 1.0, 1.0) in the fragment shader.
88 
89 When testing execution sets with this, we will take the chance to test also:
90 - Shader IO
91 - Built-ins
92 - Descriptor sets.
93 
94 Descriptor sets and IO:
95 
96 In the vertex shader, we'll use 4 readonly storage buffers as descriptor bindings (1 for each sequence), containing:
97 
98 - binding=0:  8 odd positive numbers: 0, 2, 4, 6, 8, 10, 12, 14
99 - binding=1: 12 even positive numbers: 1, 3, ...
100 - binding=2: 16 odd negative numbers: -2, -4, ...
101 - binding=3: 20 even negative numbers: -1, -3, ...
102 
103 And 4 variants of the vertex and fragment shaders, numbered 0 to 3. Each sequence will use 1 vertex and fragment shader variant, and
104 will work with 1 of the 4 buffers.
105 
106 - Vertex shader i reads numbers from binding i, and stores each in an out flat int variable, in some order that depends on the
107   VertexIndex, for example. What matters is that, for variant 0 we'll have 16 IO variables and the number of IO variables changes
108   (increasing) for each sequence and shader.
109 - Fragment shader i will read those numbers from IO (4 fragment shaders, different amount of IO variables) and calculate the total
110   sum.
111 - The sum will be the same for all pixels of the sequence.
112 - As we know how many pixels are drawn by each sequence, we'll store the expected results in a storage buffer for each pixel.
113 - The fragment shader will check the sum against the expected result for the pixel (using gl_FragCoord to access a storage buffer
114   with the results) and will:
115     - Write 0 in the green channel if correct.
116     - Write 1 in the green channel if not.
117 
118 For built-ins:
119 
120 - Position and PointSize are set normally.
121 - We'll store a Vec4 of extra data for each vertex.
122   - One of them will be the clip distance and the other one will be the cull distance.
123   - In 1/8 (pseudorandom) of the pixels, we'll store a negative clip distance.
124   - In 1/8 (pseudorandom) of the pixels, we'll store a negative cull distance.
125 - When verifying results, those pixels should not be covered.
126 
127 */
128 
129 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
130 
131 constexpr uint32_t kSequenceCount       = 4u;
132 constexpr uint32_t kPerTriangleVertices = 3u;
133 constexpr uint32_t kMaxInstanceIndex    = 255u;
134 constexpr uint32_t kVertexChunkOffset   = 1000u;
135 constexpr uint32_t kPipelineShaders     = 2u; // Each "pipeline" contains a vertex and a frag shader.
136 
137 enum class TestType
138 {
139     DRAW_COUNT = 0,
140     DRAW_INDEXED_COUNT,
141     DRAW_INDEXED_COUNT_INDEX_TOKEN, // Same as the previous one, but using an index buffer token.
142 };
143 
144 enum class PreprocessType
145 {
146     NONE = 0,
147     SAME_STATE_CMD_BUFFER,
148     OTHER_STATE_CMD_BUFFER,
149 };
150 
151 struct TestParams
152 {
153     TestType testType;
154     PreprocessType preprocessType;
155     bool checkDrawParams;
156     bool useExecutionSet;
157     bool useShaderObjects;
158     bool unorderedSequences;
159 
getRandomSeedvkt::DGC::__anon9465af620111::TestParams160     uint32_t getRandomSeed(void) const
161     {
162         // Other members not used because we want to make sure results don't
163         // change if we use the same pseudorandom sequence.
164         const uint32_t rndSeed = ((static_cast<int>(testType) << 26u) | (useExecutionSet << 25u) |
165                                   (useShaderObjects << 24u) | static_cast<uint32_t>(checkDrawParams));
166 
167         return rndSeed;
168     }
169 
doPreprocessvkt::DGC::__anon9465af620111::TestParams170     bool doPreprocess(void) const
171     {
172         return (preprocessType != PreprocessType::NONE);
173     }
174 
indexedDrawsvkt::DGC::__anon9465af620111::TestParams175     bool indexedDraws(void) const
176     {
177         return (testType == TestType::DRAW_INDEXED_COUNT || testType == TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN);
178     }
179 
indexBufferTokenvkt::DGC::__anon9465af620111::TestParams180     bool indexBufferToken(void) const
181     {
182         return (testType == TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN);
183     }
184 };
185 
checkDrawCountSupport(Context & context,TestParams params)186 void checkDrawCountSupport(Context &context, TestParams params)
187 {
188     const auto stages                 = (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
189     const auto bindStages             = (params.useExecutionSet ? stages : static_cast<VkShaderStageFlags>(0u));
190     const auto bindStagesPipeline     = (params.useShaderObjects ? 0u : bindStages);
191     const auto bindStagesShaderObject = (params.useShaderObjects ? bindStages : 0u);
192 
193     checkDGCExtSupport(context, stages, bindStagesPipeline, bindStagesShaderObject);
194 
195     const auto &dgcProperties = context.getDeviceGeneratedCommandsPropertiesEXT();
196     if (!dgcProperties.deviceGeneratedCommandsMultiDrawIndirectCount)
197         TCU_THROW(NotSupportedError, "deviceGeneratedCommandsMultiDrawIndirectCount not supported");
198 
199     if (params.useShaderObjects)
200     {
201         context.requireDeviceFunctionality("VK_EXT_shader_object");
202 
203         if (params.useExecutionSet && dgcProperties.maxIndirectShaderObjectCount == 0u)
204             TCU_THROW(NotSupportedError, "maxIndirectShaderObjectCount is zero");
205     }
206 
207     if (params.checkDrawParams)
208         context.requireDeviceFunctionality("VK_KHR_shader_draw_parameters");
209 }
210 
211 template <typename T>
212 class RangeGen
213 {
214 public:
RangeGen(T start,T step)215     RangeGen(T start, T step) : m_current(start), m_step(step)
216     {
217     }
218 
operator ++()219     T operator++()
220     {
221         m_current += m_step;
222         return m_current;
223     }
operator ++(int)224     T operator++(int)
225     {
226         T prev = m_current;
227         m_current += m_step;
228         return prev;
229     }
operator T()230     operator T()
231     {
232         return m_current;
233     }
234 
235 private:
236     T m_current;
237     T m_step;
238 };
239 
240 using BufferDataVec = std::vector<std::vector<int32_t>>;
241 
getInputBuffers(void)242 BufferDataVec getInputBuffers(void)
243 {
244     //  - binding=0:  8 odd positive numbers: 0, 2, 4, 6, 8, 10, 12, 14
245     //  - binding=1: 12 even positive numbers: 1, 3, ...
246     //  - binding=2: 16 odd negative numbers: -2, -4, ...
247     //  - binding=3: 20 even negative numbers: -1, -3, ...
248     DE_ASSERT(kSequenceCount == 4u);
249     const std::vector<size_t> bufferSizes{8u, 12u, 16u, 20u};
250     const std::vector<int32_t> rangeStarts{0, 1, -2, -1};
251     const std::vector<int32_t> rangeSteps{2, 2, -2, -2};
252 
253     BufferDataVec buffers(kSequenceCount);
254     for (uint32_t i = 0u; i < kSequenceCount; ++i)
255     {
256         auto &buffer = buffers.at(i);
257         buffer.resize(bufferSizes.at(i));
258         RangeGen generator(rangeStarts.at(i), rangeSteps.at(i));
259         std::iota(begin(buffer), end(buffer), generator);
260     }
261 
262     return buffers;
263 }
264 
265 struct VertexData
266 {
267     tcu::Vec4 position;
268     tcu::Vec4 extraData; // 0: clip distance, 1: cull distance
269 
VertexDatavkt::DGC::__anon9465af620111::VertexData270     VertexData(const tcu::Vec4 &position_, const tcu::Vec4 &extraData_) : position(position_), extraData(extraData_)
271     {
272     }
273 };
274 
initDrawCountPrograms(vk::SourceCollections & programCollection,TestParams params)275 void initDrawCountPrograms(vk::SourceCollections &programCollection, TestParams params)
276 {
277     std::vector<uint32_t> ioSizes;
278     uint32_t shaderVariants = 1u;
279 
280     if (params.useExecutionSet)
281     {
282         const auto inputBuffers = getInputBuffers();
283 
284         shaderVariants = de::sizeU32(inputBuffers);
285 
286         std::transform(begin(inputBuffers), end(inputBuffers), std::back_inserter(ioSizes),
287                        [](const std::vector<int32_t> &vec) { return de::sizeU32(vec); });
288     }
289 
290     const uint32_t locationOffset = 5u; // For I/O vars, to leave some room for other things we may want to pass.
291     const bool checkDrawParams    = params.checkDrawParams;
292 
293     std::ostringstream vertBindings;
294     std::string vertBindingsDecl;
295     std::string fragBindingsDecl;
296     std::string pushConstantDecl;
297     uint32_t nextFragBinding = 0u;
298 
299     // When using multiple shader variants, we'll test bindings and shader IO as described above.
300     if (params.useExecutionSet)
301     {
302         for (size_t i = 0u; i < ioSizes.size(); ++i)
303         {
304             vertBindings << "layout (set=0, binding=" << i << ", std430) readonly buffer Buffer" << i
305                          << " { int values[" << ioSizes.at(i) << "]; } buffer" << i << ";\n";
306         }
307         vertBindingsDecl = vertBindings.str();
308 
309         // Note frag shader bindings use separate sets.
310         fragBindingsDecl += "layout (set=1, binding=" + std::to_string(nextFragBinding++) +
311                             ", std430) readonly buffer ExpectedAccum { int values[]; } ea;\n";
312     }
313 
314     if (checkDrawParams)
315         fragBindingsDecl += "layout (set=1, binding=" + std::to_string(nextFragBinding++) +
316                             ", std430) readonly buffer ExpectedDrawParams { ivec4 values[]; } edp;\n";
317 
318     if (params.useExecutionSet || checkDrawParams)
319         pushConstantDecl += "layout (push_constant, std430) uniform PushConstantBlock { uvec2 dim; } pc;\n";
320 
321     for (uint32_t i = 0u; i < shaderVariants; ++i)
322     {
323         const uint32_t ioVarCount = (params.useExecutionSet ? ioSizes.at(i) : 0u);
324         const auto nameSuffix     = (params.useExecutionSet ? std::to_string(i) : std::string());
325 
326         std::ostringstream outVarsDecl;
327         std::ostringstream inVarsDecl;
328         std::ostringstream outVarsWrite;
329         std::ostringstream inVarsRead;
330 
331         for (uint32_t j = 0u; j < ioVarCount; ++j)
332         {
333             const auto location = j + locationOffset;
334 
335             outVarsDecl << "layout (location=" << location << ") out flat int iovar" << j << ";\n";
336             inVarsDecl << "layout (location=" << location << ") in flat int iovar" << j << ";\n";
337             outVarsWrite << "    iovar" << j << " = buffer" << i << ".values[" << j << "];\n";
338             inVarsRead << "    accum += iovar" << j << ";\n";
339         }
340 
341         std::ostringstream vert;
342         vert << "#version 460\n"
343              << "layout (location=0) in vec4 inPos;\n"
344              << "layout (location=1) in vec4 inExtraData;\n"
345              << "layout (location=0) out flat int outInstanceIndex;\n"
346              << (checkDrawParams ? "layout (location=1) out flat int drawIndex;\n" : "") << "\n"
347              << (checkDrawParams ? "layout (location=2) out flat int baseVertex;\n" : "") << "\n"
348              << (checkDrawParams ? "layout (location=3) out flat int baseInstance;\n" : "") << "\n"
349              << vertBindingsDecl << "\n"
350              << outVarsDecl.str() << "\n"
351              << "out gl_PerVertex {\n"
352              << "    vec4  gl_Position;\n"
353              << "    float gl_PointSize;\n"
354              << "    float gl_ClipDistance[1];\n"
355              << "    float gl_CullDistance[1];\n"
356              << "};\n"
357              << "void main (void) {\n"
358              << "    gl_Position = inPos;\n"
359              << "    gl_PointSize = 1.0;\n"
360              << "    gl_ClipDistance[0] = inExtraData.x;\n"
361              << "    gl_CullDistance[0] = inExtraData.y;\n"
362              << "    outInstanceIndex = gl_InstanceIndex;\n"
363              << (checkDrawParams ? "    drawIndex = gl_DrawID;\n" : "")
364              << (checkDrawParams ? "    baseVertex = gl_BaseVertex;\n" : "")
365              << (checkDrawParams ? "    baseInstance = gl_BaseInstance;\n" : "") << outVarsWrite.str() << "}\n";
366         const auto vertName = "vert" + nameSuffix;
367         programCollection.glslSources.add(vertName) << glu::VertexSource(vert.str());
368 
369         const bool pixelIdxNeeded = (params.useExecutionSet || checkDrawParams);
370         std::ostringstream frag;
371         frag << "#version 460\n"
372              << "layout (location=0) in flat int inInstanceIndex;\n"
373              << (checkDrawParams ? "layout (location=1) in flat int drawIndex;\n" : "")
374              << (checkDrawParams ? "layout (location=2) in flat int baseVertex;\n" : "")
375              << (checkDrawParams ? "layout (location=3) in flat int baseInstance;\n" : "")
376              << "layout (location=0) out vec4 outColor;\n"
377              << "\n"
378              << fragBindingsDecl << pushConstantDecl << "\n"
379              << inVarsDecl.str() << "\n"
380              << "void main (void) {\n"
381              << (pixelIdxNeeded ?
382                      "    const uint pixelIdx = uint(gl_FragCoord.y) * pc.dim.x + uint(gl_FragCoord.x);\n" :
383                      "")
384              << (params.useExecutionSet ? "    int accum = 0;\n" : "") << inVarsRead.str()
385              << "    const float red   = float(inInstanceIndex) / " << kMaxInstanceIndex << ".0;\n"
386              << "    const float green = "
387              << (params.useExecutionSet ? "((accum == ea.values[pixelIdx]) ? 0.0 : 1.0)" : "0.0") << ";\n"
388              << "    bool blueOK = true;\n"
389              << (checkDrawParams ? "    blueOK = (blueOK && (drawIndex == edp.values[pixelIdx].x));\n" : "")
390              << (checkDrawParams ? "    blueOK = (blueOK && (baseVertex == edp.values[pixelIdx].y));\n" : "")
391              << (checkDrawParams ? "    blueOK = (blueOK && (baseInstance == edp.values[pixelIdx].z));\n" : "")
392              << "    const float blue  = (blueOK ? 1.0 : 0.0);\n"
393              << "    outColor = vec4(red, green, blue, 1.0);\n"
394              << "}\n";
395         const auto fragName = "frag" + nameSuffix;
396         programCollection.glslSources.add(fragName) << glu::FragmentSource(frag.str());
397     }
398 }
399 
400 using DGCBufferPtr        = std::unique_ptr<DGCBuffer>;
401 using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
402 using BufferVec           = std::vector<BufferWithMemoryPtr>;
403 
404 struct SequenceInfo
405 {
406     DGCBufferPtr buffer;
407     uint32_t chunkCount;
408     uint32_t stride;
409     uint32_t vertexCount;
410 };
411 
makeSingleShader(const DeviceInterface & vkd,VkDevice device,VkShaderStageFlagBits stage,const ProgramBinary & binary,const std::vector<VkDescriptorSetLayout> & setLayouts,const std::vector<VkPushConstantRange> & pcRanges)412 Move<VkShaderEXT> makeSingleShader(const DeviceInterface &vkd, VkDevice device, VkShaderStageFlagBits stage,
413                                    const ProgramBinary &binary, const std::vector<VkDescriptorSetLayout> &setLayouts,
414                                    const std::vector<VkPushConstantRange> &pcRanges)
415 {
416     VkShaderStageFlags nextStage = 0u;
417     if (stage == VK_SHADER_STAGE_VERTEX_BIT)
418         nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
419     else if (stage == VK_SHADER_STAGE_FRAGMENT_BIT)
420         ;
421     else
422         DE_ASSERT(false);
423 
424     const VkShaderCreateInfoEXT createInfo = {
425         VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, //  VkStructureType sType;
426         nullptr,                                  //  const void* pNext;
427         0u,                                       //  VkShaderCreateFlagsEXT flags;
428         stage,                                    //  VkShaderStageFlagBits stage;
429         nextStage,                                //  VkShaderStageFlags nextStage;
430         VK_SHADER_CODE_TYPE_SPIRV_EXT,            //  VkShaderCodeTypeEXT codeType;
431         binary.getSize(),                         //  size_t codeSize;
432         binary.getBinary(),                       //  const void* pCode;
433         "main",                                   //  const char* pName;
434         de::sizeU32(setLayouts),                  //  uint32_t setLayoutCount;
435         de::dataOrNull(setLayouts),               //  const VkDescriptorSetLayout* pSetLayouts;
436         de::sizeU32(pcRanges),                    //  uint32_t pushConstantRangeCount;
437         de::dataOrNull(pcRanges),                 //  const VkPushConstantRange* pPushConstantRanges;
438         nullptr,                                  //  const VkSpecializationInfo* pSpecializationInfo;
439     };
440 
441     binary.setUsed();
442 
443     return createShader(vkd, device, createInfo);
444 }
445 
testDrawCountRun(Context & context,TestParams params)446 tcu::TestStatus testDrawCountRun(Context &context, TestParams params)
447 {
448     const auto ctx = context.getContextCommonData();
449     const tcu::IVec3 fbExtent(32, 32, 1);
450     const auto vkExtent       = makeExtent3D(fbExtent);
451     const auto floatExtent    = fbExtent.asFloat();
452     const auto pixelCountU    = vkExtent.width * vkExtent.height * vkExtent.depth;
453     const auto kChunkCount    = 16u;
454     const auto chunkMaxPixels = static_cast<int>(pixelCountU / kChunkCount); // Does not apply to the last chunk.
455     const auto maxIndirectDraws =
456         static_cast<int>(kChunkCount / kSequenceCount); // Per draw count dispatch. Doesn't apply to last.
457     const auto stageFlags = (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
458     const auto bindPoint  = VK_PIPELINE_BIND_POINT_GRAPHICS;
459     const auto descType   = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
460 
461     // Pseudorandom number generator.
462     const auto randomSeed = params.getRandomSeed();
463     de::Random rnd(randomSeed);
464 
465     // Generate one triangle around the center of each pixel.
466     const float pixelWidth  = 2.0f / floatExtent.x();
467     const float pixelHeight = 2.0f / floatExtent.y();
468     const float horMargin   = pixelWidth / 4.0f;
469     const float verMargin   = pixelHeight / 4.0f;
470 
471     // Converts to framebuffer range [-1,1]
472     const auto normalize = [](int v, int total)
473     { return ((static_cast<float>(v) + 0.5f) / static_cast<float>(total)) * 2.0f - 1.0f; };
474 
475     // These will be chosen pseudorandomly for each pixel.
476     const std::vector<float> clipDistances{0.75f, 0.0f, -0.5f, 1.25f, 20.0f, 2.0f, 0.25f, 1.0f};
477     const std::vector<float> cullDistances{0.75f, 0.0f, 0.5f, 1.25f, 20.0f, 2.0f, -0.25f, 1.0f};
478 
479     const int lastClip = static_cast<int>(clipDistances.size()) - 1;
480     const int lastCull = static_cast<int>(cullDistances.size()) - 1;
481 
482     // Vertex buffer data.
483     std::vector<VertexData> vertices;
484     vertices.reserve(pixelCountU * kPerTriangleVertices);
485 
486     for (int y = 0; y < fbExtent.y(); ++y)
487         for (int x = 0; x < fbExtent.x(); ++x)
488         {
489             const float xCenter = normalize(x, fbExtent.x());
490             const float yCenter = normalize(y, fbExtent.y());
491 
492             const float clip = clipDistances.at(rnd.getInt(0, lastClip));
493             const float cull = cullDistances.at(rnd.getInt(0, lastCull));
494 
495             const tcu::Vec4 extraData(clip, cull, 0.0f, 0.0f);
496 
497             vertices.emplace_back(tcu::Vec4(xCenter - horMargin, yCenter + verMargin, 0.0f, 1.0f), extraData);
498             vertices.emplace_back(tcu::Vec4(xCenter + horMargin, yCenter + verMargin, 0.0f, 1.0f), extraData);
499             vertices.emplace_back(tcu::Vec4(xCenter, yCenter - verMargin, 0.0f, 1.0f), extraData);
500         }
501 
502     const auto vertexBufferSize           = static_cast<VkDeviceSize>(de::dataSize(vertices));
503     const auto vertexBufferUsage          = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
504     const auto vertexBufferInfo           = makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
505     const VkDeviceSize vertexBufferOffset = 0ull;
506 
507     BufferWithMemory vertexBuffer(ctx.vkd, ctx.device, ctx.allocator, vertexBufferInfo, MemoryRequirement::HostVisible);
508     auto &vertexBufferAlloc = vertexBuffer.getAllocation();
509     void *vertexBufferData  = vertexBufferAlloc.getHostPtr();
510 
511     deMemcpy(vertexBufferData, de::dataOrNull(vertices), de::dataSize(vertices));
512 
513     // Divide pixels in chunks of pseudorandom sizes.
514     std::vector<uint32_t> chunkSizes(kChunkCount, 0u);
515     {
516         uint32_t total = 0u;
517         for (uint32_t i = 0u; i < kChunkCount - 1u; ++i)
518         {
519             const uint32_t chunkSize = static_cast<uint32_t>(rnd.getInt(1, chunkMaxPixels));
520             chunkSizes.at(i)         = chunkSize;
521             total += chunkSize;
522         }
523         // Last chunk contains the remaining pixels.
524         chunkSizes.back() = pixelCountU - total;
525     }
526 
527     // Draw operation per chunk.
528     std::vector<VkDrawIndirectCommand> chunkDraws;
529     std::vector<VkDrawIndexedIndirectCommand> chunkIndexedDraws;
530 
531     if (params.testType == TestType::DRAW_COUNT)
532         chunkDraws.reserve(kChunkCount);
533     else if (params.indexedDraws())
534         chunkIndexedDraws.reserve(kChunkCount);
535     else
536         DE_ASSERT(false);
537 
538     {
539         const uint32_t firstInstanceStart = 0u;
540         const uint32_t firstInstanceStep  = 16u;
541         const int maxInstanceCount        = 16u;
542         RangeGen firstInstanceRange(firstInstanceStart, firstInstanceStep);
543 
544         std::vector<uint32_t> firstInstances(16u, 0u);
545         std::iota(begin(firstInstances), end(firstInstances), firstInstanceRange);
546 
547         uint32_t prevPixels = 0u;
548         for (uint32_t i = 0u; i < kChunkCount; ++i)
549         {
550             const auto &chunkSize = chunkSizes.at(i);
551 
552             const auto vertexCount   = chunkSize * kPerTriangleVertices;
553             const auto instanceCount = static_cast<uint32_t>(rnd.getInt(1, maxInstanceCount));
554             const auto firstVertex   = prevPixels * kPerTriangleVertices;
555             const auto firstInstance = firstInstances.at(rnd.getInt(0, static_cast<int>(firstInstances.size() - 1)));
556             const auto chunkOffset   = kVertexChunkOffset + i;
557 
558             if (params.testType == TestType::DRAW_COUNT)
559             {
560                 const VkDrawIndirectCommand cmd{
561                     vertexCount,
562                     instanceCount,
563                     firstVertex,
564                     firstInstance,
565                 };
566                 chunkDraws.push_back(cmd);
567             }
568             else if (params.indexedDraws())
569             {
570                 const VkDrawIndexedIndirectCommand cmd{
571                     vertexCount,                        //  uint32_t    indexCount;
572                     instanceCount,                      //  uint32_t    instanceCount;
573                     firstVertex,                        //  uint32_t    firstIndex;
574                     -static_cast<int32_t>(chunkOffset), //  int32_t     vertexOffset;
575                     firstInstance,                      //  uint32_t    firstInstance;
576                 };
577                 chunkIndexedDraws.push_back(cmd);
578             }
579             else
580                 DE_ASSERT(false);
581 
582             prevPixels += chunkSize;
583         }
584     }
585 
586     // Create indirect buffers for the sequences.
587     std::vector<SequenceInfo> sequenceInfos;
588     sequenceInfos.reserve(kSequenceCount);
589 
590     {
591         uint32_t prevChunks = 0u;
592 
593         for (uint32_t i = 0u; i < kSequenceCount; ++i)
594         {
595             sequenceInfos.emplace_back();
596             auto &seqInfo = sequenceInfos.back();
597 
598             const auto seqChunks = ((i < kSequenceCount - 1u) ? static_cast<uint32_t>(rnd.getInt(1, maxIndirectDraws)) :
599                                                                 (kChunkCount - prevChunks));
600             const auto extraPadding = static_cast<uint32_t>(rnd.getInt(0, 7));
601             const auto totalStructs = extraPadding + 1u;
602             const auto structSize   = ((params.testType == TestType::DRAW_COUNT) ? sizeof(VkDrawIndirectCommand) :
603                                                                                    sizeof(VkDrawIndexedIndirectCommand));
604             const auto stride       = totalStructs * structSize;
605             const auto bufferSize   = stride * seqChunks;
606 
607             seqInfo.chunkCount = seqChunks;
608             seqInfo.stride     = static_cast<uint32_t>(stride);
609             seqInfo.buffer.reset(new DGCBuffer(ctx.vkd, ctx.device, ctx.allocator, bufferSize));
610 
611             // Copy indirect commands to the buffer.
612             auto &bufferAlloc = seqInfo.buffer->getAllocation();
613             char *bufferData  = reinterpret_cast<char *>(bufferAlloc.getHostPtr());
614 
615             deMemset(bufferData, 0, static_cast<size_t>(seqInfo.buffer->getSize()));
616             uint32_t vertexCount = 0u;
617 
618             for (uint32_t j = 0; j < seqInfo.chunkCount; ++j)
619             {
620                 const auto chunkIdx = prevChunks + j;
621                 const auto dstPtr   = bufferData + stride * j;
622                 const auto srcPtr   = ((params.testType == TestType::DRAW_COUNT) ?
623                                            reinterpret_cast<const void *>(&chunkDraws.at(chunkIdx)) :
624                                            reinterpret_cast<const void *>(&chunkIndexedDraws.at(chunkIdx)));
625                 const auto chunkVertexCount =
626                     ((params.testType == TestType::DRAW_COUNT) ? chunkDraws.at(chunkIdx).vertexCount :
627                                                                  chunkIndexedDraws.at(chunkIdx).indexCount);
628                 deMemcpy(dstPtr, srcPtr, structSize);
629                 vertexCount += chunkVertexCount;
630             }
631 
632             seqInfo.vertexCount = vertexCount;
633             prevChunks += seqChunks;
634         }
635     }
636 
637     // Index buffer if needed. For indexed draws, we're going to draw vertices
638     // in reverse order, which means storing indices in reverse order in the
639     // index buffer. In addition, to check that vertexOffset is correctly read
640     // per draw, we're going to apply an offset to the index values stored in
641     // each chunk, with the offset being slightly different in each chunk.
642     std::vector<uint32_t> indices;
643     std::vector<BufferWithMemoryPtr> indexBuffers;
644     const VkBufferUsageFlags extraIndexBufferFlags =
645         (params.indexBufferToken() ? (VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) :
646                                      0u);
647     const MemoryRequirement extraIndexBufferMemReqs =
648         (params.indexBufferToken() ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any);
649 
650     if (params.indexedDraws())
651     {
652         // Indices in reverse order.
653         indices.reserve(vertices.size());
654 
655         uint32_t processedCount = 0u;
656         for (size_t i = 0u; i < chunkSizes.size(); ++i)
657         {
658             const auto chunkSize         = chunkSizes.at(i);
659             const auto chunkVertexCount  = chunkSize * kPerTriangleVertices;
660             const auto chunkVertexOffset = static_cast<uint32_t>(kVertexChunkOffset + i); // Varies a bit per chunk.
661 
662             for (uint32_t j = 0u; j < chunkVertexCount; ++j)
663             {
664                 const auto forwardIndex = processedCount + j;
665                 const auto reverseIndex = static_cast<uint32_t>(vertices.size() - 1u) - forwardIndex;
666                 const auto storedIndex  = reverseIndex + chunkVertexOffset;
667 
668                 indices.push_back(storedIndex);
669             }
670 
671             processedCount += chunkVertexCount;
672         }
673 
674         DE_ASSERT(vertices.size() == indices.size());
675 
676         const auto indexBufferSize  = static_cast<VkDeviceSize>(de::dataSize(indices));
677         const auto indexBufferUsage = (VK_BUFFER_USAGE_INDEX_BUFFER_BIT | extraIndexBufferFlags);
678         const auto indexBufferInfo  = makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
679 
680         // Store indices in one or more index buffers. When using index buffers, all buffers will be the same size but
681         // each buffer will only contain the appropriate chunks of real data and the rest will be zero-ed out.
682         const std::vector<uint32_t> singleSeqVertCount{pixelCountU * kPerTriangleVertices};
683         std::vector<uint32_t> multiSeqVertCount(kSequenceCount, 0u);
684         std::transform(begin(sequenceInfos), end(sequenceInfos), begin(multiSeqVertCount),
685                        [](const SequenceInfo &s) { return s.vertexCount; });
686 
687         const auto &indexChunks =
688             ((params.testType == TestType::DRAW_INDEXED_COUNT) ? singleSeqVertCount : multiSeqVertCount);
689 
690         processedCount = 0u;
691         for (uint32_t i = 0u; i < de::sizeU32(indexChunks); ++i)
692         {
693             const auto chunkIndexCount = indexChunks.at(i);
694 
695             indexBuffers.emplace_back(new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, indexBufferInfo,
696                                                            (MemoryRequirement::HostVisible | extraIndexBufferMemReqs)));
697             auto &indexBuffer        = indexBuffers.back();
698             auto &indexBufferAlloc   = indexBuffer->getAllocation();
699             char *indexBufferBasePtr = reinterpret_cast<char *>(indexBufferAlloc.getHostPtr());
700 
701             // Zero-out the whole buffer first.
702             deMemset(indexBufferBasePtr, 0, de::dataSize(indices));
703 
704             // Copy the chunk to its own index buffer.
705             {
706                 const auto chunkSizeBytes = chunkIndexCount * DE_SIZEOF32(uint32_t);
707                 const auto srcPtr         = &indices.at(processedCount);
708                 const auto dstPtr         = indexBufferBasePtr + processedCount * DE_SIZEOF32(uint32_t);
709                 deMemcpy(dstPtr, srcPtr, chunkSizeBytes);
710             }
711 
712             processedCount += chunkIndexCount;
713         }
714     }
715 
716     // Create token data for the draw count tokens.
717     std::vector<VkDrawIndirectCountIndirectCommandEXT> drawTokenData;
718     drawTokenData.reserve(kSequenceCount);
719 
720     uint32_t maxDrawCount = 0u;
721     for (uint32_t i = 0u; i < kSequenceCount; ++i)
722     {
723         const auto &seqInfo = sequenceInfos.at(i);
724 
725         drawTokenData.emplace_back(VkDrawIndirectCountIndirectCommandEXT{
726             seqInfo.buffer->getDeviceAddress(),
727             seqInfo.stride,
728             seqInfo.chunkCount,
729         });
730 
731         if (seqInfo.chunkCount > maxDrawCount)
732             maxDrawCount = seqInfo.chunkCount;
733     }
734     if (rnd.getBool())
735         maxDrawCount *= 2u;
736 
737     // Create token data for the index buffer tokens, if used.
738     std::vector<VkBindIndexBufferIndirectCommandEXT> indexBufferTokenData;
739     if (params.indexBufferToken())
740     {
741         for (uint32_t i = 0u; i < kSequenceCount; ++i)
742         {
743             indexBufferTokenData.push_back(VkBindIndexBufferIndirectCommandEXT{
744                 getBufferDeviceAddress(ctx.vkd, ctx.device, indexBuffers.at(i)->get()),
745                 static_cast<uint32_t>(de::dataSize(indices)),
746                 VK_INDEX_TYPE_UINT32,
747             });
748         }
749     }
750 
751     // Color framebuffer.
752     const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
753     const auto colorUsage =
754         (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
755     ImageWithBuffer colorBuffer(ctx.vkd, ctx.device, ctx.allocator, vkExtent, colorFormat, colorUsage,
756                                 VK_IMAGE_TYPE_2D);
757     const auto colorSRR = makeDefaultImageSubresourceRange();
758 
759     const std::vector<VkViewport> viewports(1u, makeViewport(vkExtent));
760     const std::vector<VkRect2D> scissors(1u, makeRect2D(vkExtent));
761 
762     Move<VkRenderPass> renderPass;
763     Move<VkFramebuffer> framebuffer;
764 
765     if (!params.useShaderObjects)
766     {
767         renderPass  = makeRenderPass(ctx.vkd, ctx.device, colorFormat);
768         framebuffer = makeFramebuffer(ctx.vkd, ctx.device, *renderPass, colorBuffer.getImageView(), vkExtent.width,
769                                       vkExtent.height);
770     }
771 
772     // Input buffers. Used with execution sets.
773     const auto inputBuffers     = (params.useExecutionSet ? getInputBuffers() : BufferDataVec());
774     const auto inputBufferCount = de::sizeU32(inputBuffers);
775 
776     Move<VkDescriptorSetLayout> vertSetLayout;
777     Move<VkDescriptorSetLayout> fragSetLayout;
778     std::vector<VkDescriptorSetLayout> setLayouts;
779     std::vector<VkPushConstantRange> pcRanges;
780 
781     Move<VkDescriptorPool> descriptorPool;
782     Move<VkDescriptorSet> vertDescSet;
783     Move<VkDescriptorSet> fragDescSet;
784 
785     BufferVec vertBuffers;
786     BufferVec fragBuffers;
787 
788     // Only used with execution sets.
789     const auto pcSize   = DE_SIZEOF32(tcu::UVec2);
790     const auto pcStages = static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_FRAGMENT_BIT);
791     const auto pcRange  = makePushConstantRange(pcStages, 0u, pcSize);
792     const auto pcData   = fbExtent.asUint().swizzle(0, 1);
793 
794     if (params.useExecutionSet || params.checkDrawParams)
795     {
796         uint32_t fragBufferCount = 0u;
797         uint32_t vertBufferCount = 0u;
798 
799         // Frag shader will always use set 1, so set 0 can be empty.
800         {
801             DescriptorSetLayoutBuilder vertLayoutBuilder;
802             for (uint32_t i = 0u; i < inputBufferCount; ++i)
803             {
804                 if (params.useExecutionSet)
805                     vertLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_VERTEX_BIT);
806             }
807             vertSetLayout   = vertLayoutBuilder.build(ctx.vkd, ctx.device);
808             vertBufferCount = inputBufferCount;
809         }
810 
811         DescriptorSetLayoutBuilder fragLayoutBuilder;
812         if (params.useExecutionSet)
813         {
814             fragLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_FRAGMENT_BIT);
815             ++fragBufferCount;
816         }
817         if (params.checkDrawParams)
818         {
819             fragLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_FRAGMENT_BIT);
820             ++fragBufferCount;
821         }
822         fragSetLayout = fragLayoutBuilder.build(ctx.vkd, ctx.device);
823 
824         setLayouts.push_back(*vertSetLayout);
825         setLayouts.push_back(*fragSetLayout);
826         pcRanges.push_back(pcRange);
827 
828         DescriptorPoolBuilder poolBuilder;
829         poolBuilder.addType(descType, vertBufferCount + fragBufferCount);
830         descriptorPool =
831             poolBuilder.build(ctx.vkd, ctx.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, kPipelineShaders);
832 
833         if (params.useExecutionSet)
834             vertDescSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *vertSetLayout);
835         fragDescSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *fragSetLayout);
836 
837         const auto bufferUsage = static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
838         for (uint32_t i = 0u; i < inputBufferCount; ++i)
839         {
840             const auto &inputBuffer = inputBuffers.at(i);
841             const auto bufferSize   = static_cast<VkDeviceSize>(de::dataSize(inputBuffer));
842             const auto createInfo   = makeBufferCreateInfo(bufferSize, bufferUsage);
843             vertBuffers.emplace_back(
844                 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
845 
846             auto &bufferAlloc   = vertBuffers.back()->getAllocation();
847             void *bufferDataPtr = bufferAlloc.getHostPtr();
848             deMemcpy(bufferDataPtr, de::dataOrNull(inputBuffer), de::dataSize(inputBuffer));
849         }
850 
851         if (params.useExecutionSet)
852         {
853             // Calculate expected accumulated values.
854             std::vector<int32_t> expectedAccums(pixelCountU,
855                                                 0); // Accumulated values for each pixel (this goes into the buffer).
856             std::vector<int32_t> bufferAccums(inputBuffers.size(), 0); // Accumulated values for each input buffer.
857             std::vector<uint32_t> seqSizesInPixels(inputBuffers.size(), 0u); // Number of pixels in each sequence.
858 
859             uint32_t prevChunks = 0u;
860             for (size_t seqIdx = 0u; seqIdx < sequenceInfos.size(); ++seqIdx)
861             {
862                 const auto &seqInfo = sequenceInfos.at(seqIdx);
863                 uint32_t seqPixels  = 0u;
864 
865                 for (uint32_t i = 0u; i < seqInfo.chunkCount; ++i)
866                 {
867                     const auto chunkIdx   = prevChunks + i;
868                     const auto pixelCount = chunkSizes.at(chunkIdx);
869 
870                     seqPixels += pixelCount;
871                 }
872 
873                 seqSizesInPixels.at(seqIdx) = seqPixels;
874                 prevChunks += seqInfo.chunkCount;
875             }
876 
877             for (size_t i = 0u; i < inputBuffers.size(); ++i)
878             {
879                 const auto &inputBuffer = inputBuffers.at(i);
880                 bufferAccums.at(i)      = std::accumulate(begin(inputBuffer), end(inputBuffer), 0);
881             }
882 
883             // Using the accumulated values for each input buffer and the number of
884             // pixels in each sequence, set the expected accumulated value in each
885             // pixel.
886             uint32_t prevPixels = 0u;
887             for (size_t i = 0u; i < seqSizesInPixels.size(); ++i)
888             {
889                 const auto &seqPixels = seqSizesInPixels.at(i);
890                 for (uint32_t j = 0u; j < seqPixels; ++j)
891                 {
892                     const auto pixelIdx         = prevPixels + j;
893                     expectedAccums.at(pixelIdx) = bufferAccums.at(i);
894                 }
895                 prevPixels += seqPixels;
896             }
897 
898             // Indexed draws happen in reverse order.
899             if (params.indexedDraws())
900                 std::reverse(begin(expectedAccums), end(expectedAccums));
901 
902             const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(expectedAccums));
903             const auto createInfo = makeBufferCreateInfo(bufferSize, bufferUsage);
904             fragBuffers.emplace_back(
905                 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
906 
907             auto &bufferAlloc   = fragBuffers.back()->getAllocation();
908             void *bufferDataPtr = bufferAlloc.getHostPtr();
909             deMemcpy(bufferDataPtr, de::dataOrNull(expectedAccums), de::dataSize(expectedAccums));
910         }
911 
912         if (params.checkDrawParams)
913         {
914             std::vector<tcu::IVec4> expectedDrawIndices;
915             expectedDrawIndices.reserve(pixelCountU);
916 
917             uint32_t prevChunks = 0u;
918             for (uint32_t i = 0u; i < kSequenceCount; ++i)
919             {
920                 uint32_t drawIdx    = 0u; // Resets at the start of each sequence.
921                 const auto &seqInfo = sequenceInfos.at(i);
922 
923                 for (uint32_t j = 0u; j < seqInfo.chunkCount; ++j)
924                 {
925                     const auto chunkIdx  = prevChunks + j;
926                     const auto chunkSize = chunkSizes.at(chunkIdx);
927                     const auto baseVertex =
928                         (params.testType == TestType::DRAW_COUNT ? chunkDraws.at(chunkIdx).firstVertex :
929                                                                    chunkIndexedDraws.at(chunkIdx).vertexOffset);
930                     const auto baseInstance =
931                         (params.testType == TestType::DRAW_COUNT ? chunkDraws.at(chunkIdx).firstInstance :
932                                                                    chunkIndexedDraws.at(chunkIdx).firstInstance);
933 
934                     for (uint32_t k = 0u; k < chunkSize; ++k)
935                         expectedDrawIndices.push_back(tcu::UVec4(drawIdx, baseVertex, baseInstance, 0).asInt());
936 
937                     ++drawIdx; // Increases with each draw.
938                 }
939 
940                 prevChunks += seqInfo.chunkCount;
941             }
942 
943             // Indexed draws happen in reverse order.
944             if (params.indexedDraws())
945                 std::reverse(begin(expectedDrawIndices), end(expectedDrawIndices));
946 
947             const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(expectedDrawIndices));
948             const auto createInfo = makeBufferCreateInfo(bufferSize, bufferUsage);
949             fragBuffers.emplace_back(
950                 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
951 
952             auto &bufferAlloc   = fragBuffers.back()->getAllocation();
953             void *bufferDataPtr = bufferAlloc.getHostPtr();
954             deMemcpy(bufferDataPtr, de::dataOrNull(expectedDrawIndices), de::dataSize(expectedDrawIndices));
955         }
956 
957         // Update descriptors with each buffer.
958         DescriptorSetUpdateBuilder updateBuilder;
959         using Location = DescriptorSetUpdateBuilder::Location;
960 
961         for (uint32_t i = 0u; i < de::sizeU32(vertBuffers); ++i)
962         {
963             const auto bufferInfo = makeDescriptorBufferInfo(vertBuffers.at(i)->get(), 0ull, VK_WHOLE_SIZE);
964             updateBuilder.writeSingle(*vertDescSet, Location::binding(i), descType, &bufferInfo);
965         }
966         for (uint32_t i = 0u; i < de::sizeU32(fragBuffers); ++i)
967         {
968             const auto bufferInfo = makeDescriptorBufferInfo(fragBuffers.at(i)->get(), 0ull, VK_WHOLE_SIZE);
969             updateBuilder.writeSingle(*fragDescSet, Location::binding(i), descType, &bufferInfo);
970         }
971         updateBuilder.update(ctx.vkd, ctx.device);
972     }
973 
974     const auto pipelineLayout =
975         makePipelineLayout(ctx.vkd, ctx.device, de::sizeU32(setLayouts), de::dataOrNull(setLayouts),
976                            de::sizeU32(pcRanges), de::dataOrNull(pcRanges));
977 
978     // Shader modules.
979     const auto &binaries      = context.getBinaryCollection();
980     const auto shaderSetCount = (params.useExecutionSet ? kSequenceCount : 1u);
981 
982     using ModuleVec = std::vector<Move<VkShaderModule>>;
983     ModuleVec vertModules;
984     ModuleVec fragModules;
985 
986     using ShaderVec = std::vector<Move<VkShaderEXT>>;
987     ShaderVec vertShaders;
988     ShaderVec fragShaders;
989 
990     using DGCShaderExtPtr = std::unique_ptr<DGCShaderExt>;
991     using DGCShaderVec    = std::vector<DGCShaderExtPtr>;
992     DGCShaderVec vertShadersDGC;
993     DGCShaderVec fragShadersDGC;
994 
995     const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
996     const auto &features     = context.getDeviceFeatures();
997 
998     const auto tessFeature = (features.tessellationShader == VK_TRUE);
999     const auto geomFeature = (features.geometryShader == VK_TRUE);
1000 
1001     if (!params.useShaderObjects)
1002     {
1003         vertModules.reserve(shaderSetCount);
1004         fragModules.reserve(shaderSetCount);
1005 
1006         for (uint32_t i = 0u; i < shaderSetCount; ++i)
1007         {
1008             const auto suffix   = (params.useExecutionSet ? std::to_string(i) : std::string());
1009             const auto vertName = "vert" + suffix;
1010             const auto fragName = "frag" + suffix;
1011             vertModules.push_back(createShaderModule(ctx.vkd, ctx.device, binaries.get(vertName)));
1012             fragModules.push_back(createShaderModule(ctx.vkd, ctx.device, binaries.get(fragName)));
1013         }
1014     }
1015     else
1016     {
1017         std::vector<VkDescriptorSetLayout> vertSetLayouts;
1018         std::vector<VkDescriptorSetLayout> fragSetLayouts;
1019 
1020         if (*vertSetLayout != VK_NULL_HANDLE)
1021         {
1022             vertSetLayouts.push_back(*vertSetLayout);
1023             fragSetLayouts.push_back(*vertSetLayout);
1024         }
1025 
1026         if (*fragSetLayout != VK_NULL_HANDLE)
1027             fragSetLayouts.push_back(*fragSetLayout);
1028 
1029         const std::vector<VkPushConstantRange> vertPCRanges;
1030         const std::vector<VkPushConstantRange> &fragPCRanges = pcRanges;
1031 
1032         // Otherwise we need to modify the vectors above.
1033         DE_ASSERT(pcStages == static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_FRAGMENT_BIT));
1034 
1035         if (params.useExecutionSet)
1036         {
1037             vertShadersDGC.reserve(shaderSetCount);
1038             fragShadersDGC.reserve(shaderSetCount);
1039         }
1040         else
1041         {
1042             vertShaders.reserve(shaderSetCount);
1043             fragShaders.reserve(shaderSetCount);
1044         }
1045 
1046         for (uint32_t i = 0u; i < shaderSetCount; ++i)
1047         {
1048             const auto suffix   = (params.useExecutionSet ? std::to_string(i) : std::string());
1049             const auto vertName = "vert" + suffix;
1050             const auto fragName = "frag" + suffix;
1051 
1052             if (params.useExecutionSet)
1053             {
1054                 vertShadersDGC.emplace_back(new DGCShaderExt(ctx.vkd, ctx.device, VK_SHADER_STAGE_VERTEX_BIT, 0u,
1055                                                              binaries.get(vertName), vertSetLayouts, vertPCRanges,
1056                                                              tessFeature, geomFeature));
1057                 fragShadersDGC.emplace_back(new DGCShaderExt(ctx.vkd, ctx.device, VK_SHADER_STAGE_FRAGMENT_BIT, 0u,
1058                                                              binaries.get(fragName), fragSetLayouts, fragPCRanges,
1059                                                              tessFeature, geomFeature));
1060             }
1061             else
1062             {
1063                 vertShaders.push_back(makeSingleShader(ctx.vkd, ctx.device, VK_SHADER_STAGE_VERTEX_BIT,
1064                                                        binaries.get(vertName), vertSetLayouts, vertPCRanges));
1065                 fragShaders.push_back(makeSingleShader(ctx.vkd, ctx.device, VK_SHADER_STAGE_FRAGMENT_BIT,
1066                                                        binaries.get(fragName), fragSetLayouts, fragPCRanges));
1067             }
1068         }
1069     }
1070 
1071     const std::vector<VkVertexInputBindingDescription> vertexBindings{
1072         makeVertexInputBindingDescription(0u, DE_SIZEOF32(VertexData), VK_VERTEX_INPUT_RATE_VERTEX),
1073     };
1074 
1075     const std::vector<VkVertexInputAttributeDescription> vertexAttributes{
1076         makeVertexInputAttributeDescription(0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT,
1077                                             static_cast<uint32_t>(offsetof(VertexData, position))),
1078         makeVertexInputAttributeDescription(1u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT,
1079                                             static_cast<uint32_t>(offsetof(VertexData, extraData))),
1080     };
1081 
1082     const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
1083         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, //   VkStructureType                             sType;
1084         nullptr,                        //   const void*                                 pNext;
1085         0u,                             //   VkPipelineVertexInputStateCreateFlags       flags;
1086         de::sizeU32(vertexBindings),    //   uint32_t                                    vertexBindingDescriptionCount;
1087         de::dataOrNull(vertexBindings), //   const VkVertexInputBindingDescription*      pVertexBindingDescriptions;
1088         de::sizeU32(vertexAttributes), //   uint32_t                                    vertexAttributeDescriptionCount;
1089         de::dataOrNull(vertexAttributes), //   const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions;
1090     };
1091 
1092     std::vector<Move<VkPipeline>> pipelines;
1093 
1094     if (!params.useShaderObjects)
1095     {
1096         for (uint32_t i = 0u; i < shaderSetCount; ++i)
1097         {
1098             const auto createFlags =
1099                 static_cast<VkPipelineCreateFlags2KHR>(VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT);
1100 
1101             const VkPipelineCreateFlags2CreateInfoKHR pipelineCreateFlags = {
1102                 VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR, //   VkStructureType             sType;
1103                 nullptr,                                                   //   const void*                 pNext;
1104                 createFlags,                                               //   VkPipelineCreateFlags2KHR   flags;
1105             };
1106 
1107             const void *pNext = (params.useExecutionSet ? &pipelineCreateFlags : nullptr);
1108 
1109             pipelines.push_back(
1110                 makeGraphicsPipeline(ctx.vkd, ctx.device, *pipelineLayout, *vertModules.at(i), VK_NULL_HANDLE,
1111                                      VK_NULL_HANDLE, VK_NULL_HANDLE, *fragModules.at(i), *renderPass, viewports,
1112                                      scissors, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0u, 0u, &vertexInputStateCreateInfo,
1113                                      nullptr, nullptr, nullptr, nullptr, nullptr, pNext, 0u));
1114         }
1115     }
1116 
1117     // Indirect commands layout.
1118     VkIndirectCommandsLayoutUsageFlagsEXT cmdsLayoutFlags = 0u;
1119 
1120     if (params.doPreprocess())
1121         cmdsLayoutFlags |= VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT;
1122 
1123     if (params.unorderedSequences)
1124         cmdsLayoutFlags |= VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_EXT;
1125 
1126     // We do not pass the pipeline layout because we don't have push constants or sequence index tokens.
1127     IndirectCommandsLayoutBuilderExt cmdsLayoutBuilder(cmdsLayoutFlags, stageFlags, VK_NULL_HANDLE);
1128 
1129     if (params.useExecutionSet)
1130     {
1131         const auto executionSetType =
1132             (params.useShaderObjects ? VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT :
1133                                        VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT);
1134         cmdsLayoutBuilder.addExecutionSetToken(cmdsLayoutBuilder.getStreamRange(), executionSetType, stageFlags);
1135     }
1136 
1137     if (params.indexBufferToken())
1138         cmdsLayoutBuilder.addIndexBufferToken(cmdsLayoutBuilder.getStreamRange(),
1139                                               VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT);
1140 
1141     if (params.testType == TestType::DRAW_COUNT)
1142         cmdsLayoutBuilder.addDrawCountToken(cmdsLayoutBuilder.getStreamRange());
1143     else if (params.indexedDraws())
1144         cmdsLayoutBuilder.addDrawIndexedCountToken(cmdsLayoutBuilder.getStreamRange());
1145     else
1146         DE_ASSERT(false);
1147 
1148     const auto cmdsLayout = cmdsLayoutBuilder.build(ctx.vkd, ctx.device);
1149 
1150     // Indirect execution set, if needed.
1151     ExecutionSetManagerPtr executionSetManager;
1152     VkIndirectExecutionSetEXT indirectExecutionSet = VK_NULL_HANDLE;
1153 
1154     if (params.useExecutionSet)
1155     {
1156         if (params.useShaderObjects)
1157         {
1158             const std::vector<VkDescriptorSetLayout> vertSetLayouts{*vertSetLayout};
1159             const std::vector<VkDescriptorSetLayout> fragSetLayouts{VK_NULL_HANDLE, *fragSetLayout};
1160 
1161             const std::vector<IESStageInfo> stagesInfo = {
1162                 IESStageInfo{vertShadersDGC.at(0u)->get(), vertSetLayouts},
1163                 IESStageInfo{fragShadersDGC.at(0u)->get(), fragSetLayouts},
1164             };
1165 
1166             executionSetManager = makeExecutionSetManagerShader(ctx.vkd, ctx.device, stagesInfo, pcRanges,
1167                                                                 shaderSetCount * kPipelineShaders);
1168 
1169             // Note we start at 1 and rely on the initial entry set above.
1170             for (uint32_t i = 1u; i < shaderSetCount; ++i)
1171             {
1172                 executionSetManager->addShader(i * kPipelineShaders + 0u, vertShadersDGC.at(i)->get());
1173                 executionSetManager->addShader(i * kPipelineShaders + 1u, fragShadersDGC.at(i)->get());
1174             }
1175             executionSetManager->update();
1176             indirectExecutionSet = executionSetManager->get();
1177         }
1178         else
1179         {
1180             executionSetManager =
1181                 makeExecutionSetManagerPipeline(ctx.vkd, ctx.device, *pipelines.at(0u), kSequenceCount);
1182             for (uint32_t i = 0u; i < shaderSetCount; ++i)
1183                 executionSetManager->addPipeline(i, *pipelines.at(i));
1184             executionSetManager->update();
1185             indirectExecutionSet = executionSetManager->get();
1186         }
1187     }
1188 
1189     // DGC buffer contents.
1190     std::vector<uint32_t> dgcData;
1191     dgcData.reserve((kSequenceCount * cmdsLayoutBuilder.getStreamStride()) / sizeof(uint32_t));
1192 
1193     for (uint32_t i = 0u; i < kSequenceCount; ++i)
1194     {
1195         if (params.useExecutionSet)
1196         {
1197             if (params.useShaderObjects)
1198             {
1199                 pushBackElement(dgcData, i * kPipelineShaders + 0u);
1200                 pushBackElement(dgcData, i * kPipelineShaders + 1u);
1201             }
1202             else
1203                 pushBackElement(dgcData, i);
1204         }
1205         if (params.indexBufferToken())
1206             pushBackElement(dgcData, indexBufferTokenData.at(i));
1207         pushBackElement(dgcData, drawTokenData.at(i));
1208     }
1209 
1210     // DGC buffer with those contents.
1211     const auto dgcBufferSize = static_cast<VkDeviceSize>(de::dataSize(dgcData));
1212     DGCBuffer dgcBuffer(ctx.vkd, ctx.device, ctx.allocator, dgcBufferSize);
1213     auto &dgcBufferAlloc = dgcBuffer.getAllocation();
1214     void *dgcBufferData  = dgcBufferAlloc.getHostPtr();
1215 
1216     deMemcpy(dgcBufferData, de::dataOrNull(dgcData), de::dataSize(dgcData));
1217 
1218     // Preprocess buffer.
1219     const auto prepPipeline =
1220         (indirectExecutionSet == VK_NULL_HANDLE && !params.useShaderObjects ? *pipelines.at(0u) : VK_NULL_HANDLE);
1221 
1222     std::vector<VkShaderEXT> prepShaders;
1223     if (indirectExecutionSet == VK_NULL_HANDLE && params.useShaderObjects)
1224     {
1225         prepShaders.push_back(*vertShaders.at(0));
1226         prepShaders.push_back(*fragShaders.at(0));
1227     }
1228     const std::vector<VkShaderEXT> *shadersVecPtr = (prepShaders.empty() ? nullptr : &prepShaders);
1229     PreprocessBufferExt preprocessBuffer(ctx.vkd, ctx.device, ctx.allocator, indirectExecutionSet, *cmdsLayout,
1230                                          kSequenceCount, maxDrawCount, prepPipeline, shadersVecPtr);
1231 
1232     // Command pool and buffer.
1233     CommandPoolWithBuffer cmd(ctx.vkd, ctx.device, ctx.qfIndex);
1234     const auto cmdBuffer = *cmd.cmdBuffer;
1235 
1236     const tcu::Vec4 fbClearColor(0.0f, 0.0f, 0.0f, 1.0f);
1237 
1238     // Generated commands info.
1239     const DGCGenCmdsInfo cmdsInfo(
1240         stageFlags,                          //   VkShaderStageFlags          shaderStages;
1241         indirectExecutionSet,                //   VkIndirectExecutionSetEXT   indirectExecutionSet;
1242         *cmdsLayout,                         //   VkIndirectCommandsLayoutEXT indirectCommandsLayout;
1243         dgcBuffer.getDeviceAddress(),        //   VkDeviceAddress             indirectAddress;
1244         dgcBuffer.getSize(),                 //   VkDeviceSize                indirectAddressSize;
1245         preprocessBuffer.getDeviceAddress(), //   VkDeviceAddress             preprocessAddress;
1246         preprocessBuffer.getSize(),          //   VkDeviceSize                preprocessSize;
1247         kSequenceCount,                      //   uint32_t                    maxSequenceCount;
1248         0ull,                                //   VkDeviceAddress             sequenceCountAddress;
1249         pixelCountU,                         //   uint32_t                    maxDrawCount;
1250         prepPipeline, shadersVecPtr);
1251 
1252     // When preprocessing, we need to use a command buffer to record state.
1253     // The preprocessing step needs to happen outside the render pass.
1254     Move<VkCommandBuffer> separateStateCmdBuffer;
1255 
1256     // A command buffer we want to record state into.
1257     // .first is the command buffer itself.
1258     // .second, if not NULL, means we'll record a preprocess command with it as the state command buffer.
1259     using StateCmdBuffer                 = std::pair<VkCommandBuffer, VkCommandBuffer>;
1260     const VkCommandBuffer kNullCmdBuffer = VK_NULL_HANDLE; // Workaround for types and emplace_back below.
1261     std::vector<StateCmdBuffer> stateCmdBuffers;
1262 
1263     // Sequences and iterations for the different cases:
1264     //     - PreprocessType::NONE
1265     //         - Only one loop iteration.
1266     //         - Iteration 0: .first = main cmd buffer, .second = NULL
1267     //             - No preprocess, bind state
1268     //         - Execute.
1269     //     - PreprocessType::OTHER_STATE_CMD_BUFFER
1270     //         - Iteration 0: .first = state cmd buffer, .second = NULL
1271     //             - No preprocess, bind state
1272     //         - Iteration 1: .first = main cmd buffer, .second = state cmd buffer
1273     //             - Preprocess with state cmd buffer, bind state on main
1274     //         - Execute.
1275     //     - PreprocessType::SAME_STATE_CMD_BUFFER
1276     //         - Iteration 0: .first = main cmd buffer, .second = NULL
1277     //             - No preprocess, bind state
1278     //         - Iteration 1: .first = main cmd buffer, .second = main cmd buffer
1279     //             - Preprocess with main cmd buffer, break
1280     //         - Execute.
1281     switch (params.preprocessType)
1282     {
1283     case PreprocessType::NONE:
1284         stateCmdBuffers.emplace_back(cmdBuffer, kNullCmdBuffer);
1285         break;
1286     case PreprocessType::SAME_STATE_CMD_BUFFER:
1287         stateCmdBuffers.emplace_back(cmdBuffer, kNullCmdBuffer);
1288         stateCmdBuffers.emplace_back(cmdBuffer, cmdBuffer);
1289         break;
1290     case PreprocessType::OTHER_STATE_CMD_BUFFER:
1291         separateStateCmdBuffer =
1292             allocateCommandBuffer(ctx.vkd, ctx.device, *cmd.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1293         stateCmdBuffers.emplace_back(*separateStateCmdBuffer, kNullCmdBuffer);
1294         stateCmdBuffers.emplace_back(cmdBuffer, *separateStateCmdBuffer);
1295         break;
1296     default:
1297         DE_ASSERT(false);
1298     }
1299 
1300     // Record pre-execution state to all needed command buffers.
1301     VkCommandBuffer prevCmdBuffer = VK_NULL_HANDLE;
1302     for (const auto &stateCmdBufferPair : stateCmdBuffers)
1303     {
1304         const auto &recCmdBuffer = stateCmdBufferPair.first;
1305 
1306         // Only begin each command buffer once.
1307         if (recCmdBuffer != prevCmdBuffer)
1308         {
1309             beginCommandBuffer(ctx.vkd, recCmdBuffer);
1310             prevCmdBuffer = recCmdBuffer;
1311         }
1312 
1313         if (stateCmdBufferPair.second != VK_NULL_HANDLE)
1314         {
1315             ctx.vkd.cmdPreprocessGeneratedCommandsEXT(recCmdBuffer, &cmdsInfo.get(), stateCmdBufferPair.second);
1316             separateStateCmdBuffer = Move<VkCommandBuffer>(); // Delete separate state command buffer right away.
1317 
1318             preprocessToExecuteBarrierExt(ctx.vkd, recCmdBuffer);
1319 
1320             // Break for iteration 1 of PreprocessType::SAME_STATE_CMD_BUFFER. See above.
1321             if (stateCmdBufferPair.first == stateCmdBufferPair.second)
1322                 break;
1323         }
1324 
1325         if (params.useExecutionSet || params.checkDrawParams)
1326         {
1327             const std::vector<VkDescriptorSet> descriptorSets{*vertDescSet, *fragDescSet};
1328             ctx.vkd.cmdBindDescriptorSets(recCmdBuffer, bindPoint, *pipelineLayout, 0u, de::sizeU32(descriptorSets),
1329                                           de::dataOrNull(descriptorSets), 0u, nullptr);
1330             ctx.vkd.cmdPushConstants(recCmdBuffer, *pipelineLayout, pcStages, 0u, pcSize, &pcData);
1331         }
1332 
1333         ctx.vkd.cmdBindVertexBuffers(recCmdBuffer, 0u, 1u, &vertexBuffer.get(), &vertexBufferOffset);
1334         if (params.testType == TestType::DRAW_INDEXED_COUNT)
1335             ctx.vkd.cmdBindIndexBuffer(recCmdBuffer, indexBuffers.at(0u)->get(), 0ull, VK_INDEX_TYPE_UINT32);
1336 
1337         if (!params.useShaderObjects)
1338             ctx.vkd.cmdBindPipeline(recCmdBuffer, bindPoint, *pipelines.at(0u)); // Execution set or not.
1339         else
1340         {
1341             std::map<VkShaderStageFlagBits, VkShaderEXT> boundShaders;
1342             if (meshFeatures.meshShader)
1343                 boundShaders[VK_SHADER_STAGE_MESH_BIT_EXT] = VK_NULL_HANDLE;
1344             if (meshFeatures.taskShader)
1345                 boundShaders[VK_SHADER_STAGE_TASK_BIT_EXT] = VK_NULL_HANDLE;
1346             if (features.tessellationShader)
1347             {
1348                 boundShaders[VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT]    = VK_NULL_HANDLE;
1349                 boundShaders[VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = VK_NULL_HANDLE;
1350             }
1351             if (features.geometryShader)
1352                 boundShaders[VK_SHADER_STAGE_GEOMETRY_BIT] = VK_NULL_HANDLE;
1353 
1354             if (params.useExecutionSet)
1355             {
1356                 boundShaders[VK_SHADER_STAGE_VERTEX_BIT]   = vertShadersDGC.at(0u)->get();
1357                 boundShaders[VK_SHADER_STAGE_FRAGMENT_BIT] = fragShadersDGC.at(0u)->get();
1358             }
1359             else
1360             {
1361                 boundShaders[VK_SHADER_STAGE_VERTEX_BIT]   = *vertShaders.at(0u);
1362                 boundShaders[VK_SHADER_STAGE_FRAGMENT_BIT] = *fragShaders.at(0u);
1363             }
1364 
1365             {
1366                 std::vector<VkShaderStageFlagBits> stages;
1367                 std::vector<VkShaderEXT> shaders;
1368 
1369                 stages.reserve(boundShaders.size());
1370                 shaders.reserve(boundShaders.size());
1371 
1372                 for (const auto &stageShader : boundShaders)
1373                 {
1374                     stages.push_back(stageShader.first);
1375                     shaders.push_back(stageShader.second);
1376                 }
1377 
1378                 DE_ASSERT(shaders.size() == stages.size());
1379                 ctx.vkd.cmdBindShadersEXT(recCmdBuffer, de::sizeU32(shaders), de::dataOrNull(stages),
1380                                           de::dataOrNull(shaders));
1381             }
1382         }
1383 
1384         if (params.useShaderObjects)
1385             bindShaderObjectState(ctx.vkd, getDeviceCreationExtensions(context), recCmdBuffer, viewports, scissors,
1386                                   VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0u, &vertexInputStateCreateInfo, nullptr,
1387                                   nullptr, nullptr, nullptr);
1388     }
1389 
1390     if (params.useShaderObjects)
1391     {
1392         const auto clearColor = makeClearValueColor(fbClearColor);
1393         const auto preClearBarrier =
1394             makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
1395                                    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, colorBuffer.getImage(), colorSRR);
1396         const auto postClearBarrier = makeImageMemoryBarrier(
1397             VK_ACCESS_TRANSFER_WRITE_BIT, (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),
1398             VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, colorBuffer.getImage(),
1399             colorSRR);
1400 
1401         cmdPipelineImageMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1402                                       VK_PIPELINE_STAGE_TRANSFER_BIT, &preClearBarrier);
1403         ctx.vkd.cmdClearColorImage(cmdBuffer, colorBuffer.getImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1404                                    &clearColor.color, 1u, &colorSRR);
1405         cmdPipelineImageMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
1406                                       VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &postClearBarrier);
1407         beginRendering(ctx.vkd, cmdBuffer, colorBuffer.getImageView(), scissors.at(0u), clearColor /*not used*/,
1408                        VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
1409     }
1410     else
1411         beginRenderPass(ctx.vkd, cmdBuffer, *renderPass, *framebuffer, scissors.at(0u), fbClearColor);
1412 
1413     {
1414         const VkBool32 isPreprocessed = makeVkBool(params.doPreprocess());
1415         ctx.vkd.cmdExecuteGeneratedCommandsEXT(cmdBuffer, isPreprocessed, &cmdsInfo.get());
1416     }
1417 
1418     if (params.useShaderObjects)
1419         endRendering(ctx.vkd, cmdBuffer);
1420     else
1421         endRenderPass(ctx.vkd, cmdBuffer);
1422 
1423     copyImageToBuffer(ctx.vkd, cmdBuffer, colorBuffer.getImage(), colorBuffer.getBuffer(), fbExtent.swizzle(0, 1));
1424     endCommandBuffer(ctx.vkd, cmdBuffer);
1425     submitCommandsAndWait(ctx.vkd, ctx.device, ctx.queue, cmdBuffer);
1426 
1427     // Generate reference image.
1428     const auto tcuFormat = mapVkFormat(colorFormat);
1429     tcu::TextureLevel refLevel(tcuFormat, fbExtent.x(), fbExtent.y(), fbExtent.z());
1430     auto refAccess = refLevel.getAccess();
1431 
1432     const auto maxInstanceIndex = static_cast<float>(kMaxInstanceIndex);
1433     const bool indexed          = (params.indexedDraws());
1434     const auto totalDraws       = (indexed ? chunkIndexedDraws.size() : chunkDraws.size());
1435     uint32_t prevPixels         = 0u;
1436 
1437     for (size_t drawIdx = 0u; drawIdx < totalDraws; ++drawIdx)
1438     {
1439         const auto vertexCount =
1440             (indexed ? chunkIndexedDraws.at(drawIdx).indexCount : chunkDraws.at(drawIdx).vertexCount);
1441         const auto firstInstance =
1442             (indexed ? chunkIndexedDraws.at(drawIdx).firstInstance : chunkDraws.at(drawIdx).firstInstance);
1443         const auto instanceCount =
1444             (indexed ? chunkIndexedDraws.at(drawIdx).instanceCount : chunkDraws.at(drawIdx).instanceCount);
1445 
1446         DE_ASSERT(vertexCount % kPerTriangleVertices == 0u);
1447         const auto chunkPixels = vertexCount / kPerTriangleVertices;
1448 
1449         for (uint32_t i = 0u; i < chunkPixels; ++i)
1450         {
1451             const auto curPixel   = prevPixels + i;
1452             const auto pixelIdx   = (indexed ? (pixelCountU - 1u - curPixel) : curPixel); // Reversed for indexed draws.
1453             const auto row        = static_cast<int>(pixelIdx / vkExtent.width);
1454             const auto col        = static_cast<int>(pixelIdx % vkExtent.width);
1455             const auto redValue   = static_cast<float>(firstInstance + (instanceCount - 1u)) / maxInstanceIndex;
1456             const auto &extraData = vertices.at(pixelIdx * kPerTriangleVertices).extraData;
1457             const bool blank = (extraData.x() < 0.0f || extraData.y() < 0.0f); // Filtered by clip or cull distance.
1458 
1459             const tcu::Vec4 color(redValue, 0.0f, 1.0f, 1.0f);
1460             refAccess.setPixel((blank ? fbClearColor : color), col, row);
1461         }
1462         prevPixels += chunkPixels;
1463     }
1464 
1465     // Reference access.
1466     auto &colorAlloc = colorBuffer.getBufferAllocation();
1467     invalidateAlloc(ctx.vkd, ctx.device, colorAlloc);
1468 
1469     const tcu::ConstPixelBufferAccess resAccess(tcuFormat, fbExtent, colorAlloc.getHostPtr());
1470 
1471     const float colorThreshold = 0.005f; // 1/255 < 0.005f < 2/255.
1472     const tcu::Vec4 threshold(colorThreshold, colorThreshold, colorThreshold, colorThreshold);
1473     auto &log = context.getTestContext().getLog();
1474     if (!tcu::floatThresholdCompare(log, "Result", "", refAccess, resAccess, threshold, tcu::COMPARE_LOG_ON_ERROR))
1475         TCU_FAIL("Unexpected result found in color buffer; check log for details");
1476 
1477     return tcu::TestStatus::pass("Pass");
1478 }
1479 
1480 } // anonymous namespace
1481 
createDGCGraphicsDrawCountTestsExt(tcu::TestContext & testCtx)1482 tcu::TestCaseGroup *createDGCGraphicsDrawCountTestsExt(tcu::TestContext &testCtx)
1483 {
1484     GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "draw_count"));
1485 
1486     GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, "token_draw_count"));
1487     GroupPtr drawIndexedCountGroup(new tcu::TestCaseGroup(testCtx, "token_draw_indexed_count"));
1488 
1489     const struct
1490     {
1491         PreprocessType preprocessType;
1492         const char *suffix;
1493     } preprocessCases[] = {
1494         {PreprocessType::NONE, ""},
1495         {PreprocessType::SAME_STATE_CMD_BUFFER, "_preprocess_same_state_cmd_buffer"},
1496         {PreprocessType::OTHER_STATE_CMD_BUFFER, "_preprocess_separate_state_cmd_buffer"},
1497     };
1498 
1499     const struct
1500     {
1501         TestType testType;
1502         const char *suffix;
1503     } testTypeCases[] = {
1504         {TestType::DRAW_COUNT, ""},
1505         {TestType::DRAW_INDEXED_COUNT, ""}, // Also no suffix but will go into a different test group.
1506         {TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN, "_with_index_buffer_token"},
1507     };
1508 
1509     for (const auto &testTypeCase : testTypeCases)
1510         for (const bool executionSets : {false, true})
1511             for (const bool shaderObjects : {false, true})
1512                 for (const auto &preProcessCase : preprocessCases)
1513                     for (const bool unordered : {false, true})
1514                         for (const bool checkDrawParams : {false, true})
1515                         {
1516                             const TestParams params{testTypeCase.testType, preProcessCase.preprocessType,
1517                                                     checkDrawParams,       executionSets,
1518                                                     shaderObjects,         unordered};
1519 
1520                             const std::string testName =
1521                                 std::string() + (shaderObjects ? "shader_objects" : "pipelines") +
1522                                 (executionSets ? "_execution_set" : "") + preProcessCase.suffix +
1523                                 (unordered ? "_unordered" : "") + (checkDrawParams ? "_check_draw_params" : "") +
1524                                 testTypeCase.suffix;
1525 
1526                             const auto group =
1527                                 (params.indexedDraws() ? drawIndexedCountGroup.get() : drawCountGroup.get());
1528                             addFunctionCaseWithPrograms(group, testName, checkDrawCountSupport, initDrawCountPrograms,
1529                                                         testDrawCountRun, params);
1530                         }
1531 
1532     mainGroup->addChild(drawCountGroup.release());
1533     mainGroup->addChild(drawIndexedCountGroup.release());
1534 
1535     return mainGroup.release();
1536 }
1537 
1538 } // namespace DGC
1539 } // namespace vkt
1540