1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2024 The Khronos Group Inc.
6 * Copyright (c) 2024 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Device Generated Commands EXT Graphics Draw Tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktDGCGraphicsDrawCountTestsExt.hpp"
26 #include "vktTestCase.hpp"
27 #include "vktDGCUtilExt.hpp"
28 #include "vktDGCUtilCommon.hpp"
29 #include "vktTestCaseUtil.hpp"
30
31 #include "vkTypeUtil.hpp"
32 #include "vkObjUtil.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkBuilderUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37
38 #include "tcuImageCompare.hpp"
39
40 #include "deUniquePtr.hpp"
41 #include "deRandom.hpp"
42
43 #include <numeric>
44 #include <vector>
45 #include <cstddef>
46 #include <sstream>
47 #include <algorithm>
48 #include <iterator>
49 #include <utility>
50
51 namespace vkt
52 {
53 namespace DGC
54 {
55
56 using namespace vk;
57
58 namespace
59 {
60
61 /*
62 GENERAL MECHANISM BEHIND THESE TESTS:
63
64 Create a framebuffer of 32x32 pixels.
65 - This gives us a total of 1024 pixels to draw over.
66 Create one triangle to cover each pixel and store their vertices in a vertex buffer.
67 Divide the 1024 pixels in 16 pseudorandom chunks.
68 - For that, choose a number of pixels randomly between 1 and 64 pixels for the first 15 chunks.
69 - For the last chunk, choose the remaining pixels.
70 For each of those chunks, create a VkDrawIndirectCommand structure.
71 - vertexCount is the number of pixels in each chunk * 3.
72 - firstVertex is the number of pixels in the previous chunks * 3.
73 - Choose pseudorandomly one of 256 InstanceIndex values for each pixel:
74 - Value in [0, 16, 32, 48, 64...] for firstInstance
75 - Value in [1..16] for instanceCount
76 - InstanceIndex will be a pseudorandom number in 0..255.
77 Pseudorandomly choose to split the list of chunks in 4 (buffers)
78 - Similar to dividing the pixels in chunks.
79 Pseudorandomly choose how many extra structures to put in the middle for padding in each buffer.
80 - For example, from 0 to 7.
81 With that, create 4 VkDrawIndirectCountIndirectCommandEXT structures:
82 - bufferAddress will vary in each of the 4 buffers.
83 - stride will depend on the pseudorandom padding in each buffer.
84 - commandCount will be the number of chunks assigned to each buffer.
85
86 Clear framebuffer to (0, 0, 0, 1.0)
87 Draw (InstanceIndex / 256.0, 0, 1.0, 1.0) in the fragment shader.
88
89 When testing execution sets with this, we will take the chance to test also:
90 - Shader IO
91 - Built-ins
92 - Descriptor sets.
93
94 Descriptor sets and IO:
95
96 In the vertex shader, we'll use 4 readonly storage buffers as descriptor bindings (1 for each sequence), containing:
97
98 - binding=0: 8 odd positive numbers: 0, 2, 4, 6, 8, 10, 12, 14
99 - binding=1: 12 even positive numbers: 1, 3, ...
100 - binding=2: 16 odd negative numbers: -2, -4, ...
101 - binding=3: 20 even negative numbers: -1, -3, ...
102
103 And 4 variants of the vertex and fragment shaders, numbered 0 to 3. Each sequence will use 1 vertex and fragment shader variant, and
104 will work with 1 of the 4 buffers.
105
106 - Vertex shader i reads numbers from binding i, and stores each in an out flat int variable, in some order that depends on the
107 VertexIndex, for example. What matters is that, for variant 0 we'll have 16 IO variables and the number of IO variables changes
108 (increasing) for each sequence and shader.
109 - Fragment shader i will read those numbers from IO (4 fragment shaders, different amount of IO variables) and calculate the total
110 sum.
111 - The sum will be the same for all pixels of the sequence.
112 - As we know how many pixels are drawn by each sequence, we'll store the expected results in a storage buffer for each pixel.
113 - The fragment shader will check the sum against the expected result for the pixel (using gl_FragCoord to access a storage buffer
114 with the results) and will:
115 - Write 0 in the green channel if correct.
116 - Write 1 in the green channel if not.
117
118 For built-ins:
119
120 - Position and PointSize are set normally.
121 - We'll store a Vec4 of extra data for each vertex.
122 - One of them will be the clip distance and the other one will be the cull distance.
123 - In 1/8 (pseudorandom) of the pixels, we'll store a negative clip distance.
124 - In 1/8 (pseudorandom) of the pixels, we'll store a negative cull distance.
125 - When verifying results, those pixels should not be covered.
126
127 */
128
129 using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
130
131 constexpr uint32_t kSequenceCount = 4u;
132 constexpr uint32_t kPerTriangleVertices = 3u;
133 constexpr uint32_t kMaxInstanceIndex = 255u;
134 constexpr uint32_t kVertexChunkOffset = 1000u;
135 constexpr uint32_t kPipelineShaders = 2u; // Each "pipeline" contains a vertex and a frag shader.
136
137 enum class TestType
138 {
139 DRAW_COUNT = 0,
140 DRAW_INDEXED_COUNT,
141 DRAW_INDEXED_COUNT_INDEX_TOKEN, // Same as the previous one, but using an index buffer token.
142 };
143
144 enum class PreprocessType
145 {
146 NONE = 0,
147 SAME_STATE_CMD_BUFFER,
148 OTHER_STATE_CMD_BUFFER,
149 };
150
151 struct TestParams
152 {
153 TestType testType;
154 PreprocessType preprocessType;
155 bool checkDrawParams;
156 bool useExecutionSet;
157 bool useShaderObjects;
158 bool unorderedSequences;
159
getRandomSeedvkt::DGC::__anon9465af620111::TestParams160 uint32_t getRandomSeed(void) const
161 {
162 // Other members not used because we want to make sure results don't
163 // change if we use the same pseudorandom sequence.
164 const uint32_t rndSeed = ((static_cast<int>(testType) << 26u) | (useExecutionSet << 25u) |
165 (useShaderObjects << 24u) | static_cast<uint32_t>(checkDrawParams));
166
167 return rndSeed;
168 }
169
doPreprocessvkt::DGC::__anon9465af620111::TestParams170 bool doPreprocess(void) const
171 {
172 return (preprocessType != PreprocessType::NONE);
173 }
174
indexedDrawsvkt::DGC::__anon9465af620111::TestParams175 bool indexedDraws(void) const
176 {
177 return (testType == TestType::DRAW_INDEXED_COUNT || testType == TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN);
178 }
179
indexBufferTokenvkt::DGC::__anon9465af620111::TestParams180 bool indexBufferToken(void) const
181 {
182 return (testType == TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN);
183 }
184 };
185
checkDrawCountSupport(Context & context,TestParams params)186 void checkDrawCountSupport(Context &context, TestParams params)
187 {
188 const auto stages = (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
189 const auto bindStages = (params.useExecutionSet ? stages : static_cast<VkShaderStageFlags>(0u));
190 const auto bindStagesPipeline = (params.useShaderObjects ? 0u : bindStages);
191 const auto bindStagesShaderObject = (params.useShaderObjects ? bindStages : 0u);
192
193 checkDGCExtSupport(context, stages, bindStagesPipeline, bindStagesShaderObject);
194
195 const auto &dgcProperties = context.getDeviceGeneratedCommandsPropertiesEXT();
196 if (!dgcProperties.deviceGeneratedCommandsMultiDrawIndirectCount)
197 TCU_THROW(NotSupportedError, "deviceGeneratedCommandsMultiDrawIndirectCount not supported");
198
199 if (params.useShaderObjects)
200 {
201 context.requireDeviceFunctionality("VK_EXT_shader_object");
202
203 if (params.useExecutionSet && dgcProperties.maxIndirectShaderObjectCount == 0u)
204 TCU_THROW(NotSupportedError, "maxIndirectShaderObjectCount is zero");
205 }
206
207 if (params.checkDrawParams)
208 context.requireDeviceFunctionality("VK_KHR_shader_draw_parameters");
209 }
210
211 template <typename T>
212 class RangeGen
213 {
214 public:
RangeGen(T start,T step)215 RangeGen(T start, T step) : m_current(start), m_step(step)
216 {
217 }
218
operator ++()219 T operator++()
220 {
221 m_current += m_step;
222 return m_current;
223 }
operator ++(int)224 T operator++(int)
225 {
226 T prev = m_current;
227 m_current += m_step;
228 return prev;
229 }
operator T()230 operator T()
231 {
232 return m_current;
233 }
234
235 private:
236 T m_current;
237 T m_step;
238 };
239
240 using BufferDataVec = std::vector<std::vector<int32_t>>;
241
getInputBuffers(void)242 BufferDataVec getInputBuffers(void)
243 {
244 // - binding=0: 8 odd positive numbers: 0, 2, 4, 6, 8, 10, 12, 14
245 // - binding=1: 12 even positive numbers: 1, 3, ...
246 // - binding=2: 16 odd negative numbers: -2, -4, ...
247 // - binding=3: 20 even negative numbers: -1, -3, ...
248 DE_ASSERT(kSequenceCount == 4u);
249 const std::vector<size_t> bufferSizes{8u, 12u, 16u, 20u};
250 const std::vector<int32_t> rangeStarts{0, 1, -2, -1};
251 const std::vector<int32_t> rangeSteps{2, 2, -2, -2};
252
253 BufferDataVec buffers(kSequenceCount);
254 for (uint32_t i = 0u; i < kSequenceCount; ++i)
255 {
256 auto &buffer = buffers.at(i);
257 buffer.resize(bufferSizes.at(i));
258 RangeGen generator(rangeStarts.at(i), rangeSteps.at(i));
259 std::iota(begin(buffer), end(buffer), generator);
260 }
261
262 return buffers;
263 }
264
265 struct VertexData
266 {
267 tcu::Vec4 position;
268 tcu::Vec4 extraData; // 0: clip distance, 1: cull distance
269
VertexDatavkt::DGC::__anon9465af620111::VertexData270 VertexData(const tcu::Vec4 &position_, const tcu::Vec4 &extraData_) : position(position_), extraData(extraData_)
271 {
272 }
273 };
274
initDrawCountPrograms(vk::SourceCollections & programCollection,TestParams params)275 void initDrawCountPrograms(vk::SourceCollections &programCollection, TestParams params)
276 {
277 std::vector<uint32_t> ioSizes;
278 uint32_t shaderVariants = 1u;
279
280 if (params.useExecutionSet)
281 {
282 const auto inputBuffers = getInputBuffers();
283
284 shaderVariants = de::sizeU32(inputBuffers);
285
286 std::transform(begin(inputBuffers), end(inputBuffers), std::back_inserter(ioSizes),
287 [](const std::vector<int32_t> &vec) { return de::sizeU32(vec); });
288 }
289
290 const uint32_t locationOffset = 5u; // For I/O vars, to leave some room for other things we may want to pass.
291 const bool checkDrawParams = params.checkDrawParams;
292
293 std::ostringstream vertBindings;
294 std::string vertBindingsDecl;
295 std::string fragBindingsDecl;
296 std::string pushConstantDecl;
297 uint32_t nextFragBinding = 0u;
298
299 // When using multiple shader variants, we'll test bindings and shader IO as described above.
300 if (params.useExecutionSet)
301 {
302 for (size_t i = 0u; i < ioSizes.size(); ++i)
303 {
304 vertBindings << "layout (set=0, binding=" << i << ", std430) readonly buffer Buffer" << i
305 << " { int values[" << ioSizes.at(i) << "]; } buffer" << i << ";\n";
306 }
307 vertBindingsDecl = vertBindings.str();
308
309 // Note frag shader bindings use separate sets.
310 fragBindingsDecl += "layout (set=1, binding=" + std::to_string(nextFragBinding++) +
311 ", std430) readonly buffer ExpectedAccum { int values[]; } ea;\n";
312 }
313
314 if (checkDrawParams)
315 fragBindingsDecl += "layout (set=1, binding=" + std::to_string(nextFragBinding++) +
316 ", std430) readonly buffer ExpectedDrawParams { ivec4 values[]; } edp;\n";
317
318 if (params.useExecutionSet || checkDrawParams)
319 pushConstantDecl += "layout (push_constant, std430) uniform PushConstantBlock { uvec2 dim; } pc;\n";
320
321 for (uint32_t i = 0u; i < shaderVariants; ++i)
322 {
323 const uint32_t ioVarCount = (params.useExecutionSet ? ioSizes.at(i) : 0u);
324 const auto nameSuffix = (params.useExecutionSet ? std::to_string(i) : std::string());
325
326 std::ostringstream outVarsDecl;
327 std::ostringstream inVarsDecl;
328 std::ostringstream outVarsWrite;
329 std::ostringstream inVarsRead;
330
331 for (uint32_t j = 0u; j < ioVarCount; ++j)
332 {
333 const auto location = j + locationOffset;
334
335 outVarsDecl << "layout (location=" << location << ") out flat int iovar" << j << ";\n";
336 inVarsDecl << "layout (location=" << location << ") in flat int iovar" << j << ";\n";
337 outVarsWrite << " iovar" << j << " = buffer" << i << ".values[" << j << "];\n";
338 inVarsRead << " accum += iovar" << j << ";\n";
339 }
340
341 std::ostringstream vert;
342 vert << "#version 460\n"
343 << "layout (location=0) in vec4 inPos;\n"
344 << "layout (location=1) in vec4 inExtraData;\n"
345 << "layout (location=0) out flat int outInstanceIndex;\n"
346 << (checkDrawParams ? "layout (location=1) out flat int drawIndex;\n" : "") << "\n"
347 << (checkDrawParams ? "layout (location=2) out flat int baseVertex;\n" : "") << "\n"
348 << (checkDrawParams ? "layout (location=3) out flat int baseInstance;\n" : "") << "\n"
349 << vertBindingsDecl << "\n"
350 << outVarsDecl.str() << "\n"
351 << "out gl_PerVertex {\n"
352 << " vec4 gl_Position;\n"
353 << " float gl_PointSize;\n"
354 << " float gl_ClipDistance[1];\n"
355 << " float gl_CullDistance[1];\n"
356 << "};\n"
357 << "void main (void) {\n"
358 << " gl_Position = inPos;\n"
359 << " gl_PointSize = 1.0;\n"
360 << " gl_ClipDistance[0] = inExtraData.x;\n"
361 << " gl_CullDistance[0] = inExtraData.y;\n"
362 << " outInstanceIndex = gl_InstanceIndex;\n"
363 << (checkDrawParams ? " drawIndex = gl_DrawID;\n" : "")
364 << (checkDrawParams ? " baseVertex = gl_BaseVertex;\n" : "")
365 << (checkDrawParams ? " baseInstance = gl_BaseInstance;\n" : "") << outVarsWrite.str() << "}\n";
366 const auto vertName = "vert" + nameSuffix;
367 programCollection.glslSources.add(vertName) << glu::VertexSource(vert.str());
368
369 const bool pixelIdxNeeded = (params.useExecutionSet || checkDrawParams);
370 std::ostringstream frag;
371 frag << "#version 460\n"
372 << "layout (location=0) in flat int inInstanceIndex;\n"
373 << (checkDrawParams ? "layout (location=1) in flat int drawIndex;\n" : "")
374 << (checkDrawParams ? "layout (location=2) in flat int baseVertex;\n" : "")
375 << (checkDrawParams ? "layout (location=3) in flat int baseInstance;\n" : "")
376 << "layout (location=0) out vec4 outColor;\n"
377 << "\n"
378 << fragBindingsDecl << pushConstantDecl << "\n"
379 << inVarsDecl.str() << "\n"
380 << "void main (void) {\n"
381 << (pixelIdxNeeded ?
382 " const uint pixelIdx = uint(gl_FragCoord.y) * pc.dim.x + uint(gl_FragCoord.x);\n" :
383 "")
384 << (params.useExecutionSet ? " int accum = 0;\n" : "") << inVarsRead.str()
385 << " const float red = float(inInstanceIndex) / " << kMaxInstanceIndex << ".0;\n"
386 << " const float green = "
387 << (params.useExecutionSet ? "((accum == ea.values[pixelIdx]) ? 0.0 : 1.0)" : "0.0") << ";\n"
388 << " bool blueOK = true;\n"
389 << (checkDrawParams ? " blueOK = (blueOK && (drawIndex == edp.values[pixelIdx].x));\n" : "")
390 << (checkDrawParams ? " blueOK = (blueOK && (baseVertex == edp.values[pixelIdx].y));\n" : "")
391 << (checkDrawParams ? " blueOK = (blueOK && (baseInstance == edp.values[pixelIdx].z));\n" : "")
392 << " const float blue = (blueOK ? 1.0 : 0.0);\n"
393 << " outColor = vec4(red, green, blue, 1.0);\n"
394 << "}\n";
395 const auto fragName = "frag" + nameSuffix;
396 programCollection.glslSources.add(fragName) << glu::FragmentSource(frag.str());
397 }
398 }
399
400 using DGCBufferPtr = std::unique_ptr<DGCBuffer>;
401 using BufferWithMemoryPtr = std::unique_ptr<BufferWithMemory>;
402 using BufferVec = std::vector<BufferWithMemoryPtr>;
403
404 struct SequenceInfo
405 {
406 DGCBufferPtr buffer;
407 uint32_t chunkCount;
408 uint32_t stride;
409 uint32_t vertexCount;
410 };
411
makeSingleShader(const DeviceInterface & vkd,VkDevice device,VkShaderStageFlagBits stage,const ProgramBinary & binary,const std::vector<VkDescriptorSetLayout> & setLayouts,const std::vector<VkPushConstantRange> & pcRanges)412 Move<VkShaderEXT> makeSingleShader(const DeviceInterface &vkd, VkDevice device, VkShaderStageFlagBits stage,
413 const ProgramBinary &binary, const std::vector<VkDescriptorSetLayout> &setLayouts,
414 const std::vector<VkPushConstantRange> &pcRanges)
415 {
416 VkShaderStageFlags nextStage = 0u;
417 if (stage == VK_SHADER_STAGE_VERTEX_BIT)
418 nextStage |= VK_SHADER_STAGE_FRAGMENT_BIT;
419 else if (stage == VK_SHADER_STAGE_FRAGMENT_BIT)
420 ;
421 else
422 DE_ASSERT(false);
423
424 const VkShaderCreateInfoEXT createInfo = {
425 VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, // VkStructureType sType;
426 nullptr, // const void* pNext;
427 0u, // VkShaderCreateFlagsEXT flags;
428 stage, // VkShaderStageFlagBits stage;
429 nextStage, // VkShaderStageFlags nextStage;
430 VK_SHADER_CODE_TYPE_SPIRV_EXT, // VkShaderCodeTypeEXT codeType;
431 binary.getSize(), // size_t codeSize;
432 binary.getBinary(), // const void* pCode;
433 "main", // const char* pName;
434 de::sizeU32(setLayouts), // uint32_t setLayoutCount;
435 de::dataOrNull(setLayouts), // const VkDescriptorSetLayout* pSetLayouts;
436 de::sizeU32(pcRanges), // uint32_t pushConstantRangeCount;
437 de::dataOrNull(pcRanges), // const VkPushConstantRange* pPushConstantRanges;
438 nullptr, // const VkSpecializationInfo* pSpecializationInfo;
439 };
440
441 binary.setUsed();
442
443 return createShader(vkd, device, createInfo);
444 }
445
testDrawCountRun(Context & context,TestParams params)446 tcu::TestStatus testDrawCountRun(Context &context, TestParams params)
447 {
448 const auto ctx = context.getContextCommonData();
449 const tcu::IVec3 fbExtent(32, 32, 1);
450 const auto vkExtent = makeExtent3D(fbExtent);
451 const auto floatExtent = fbExtent.asFloat();
452 const auto pixelCountU = vkExtent.width * vkExtent.height * vkExtent.depth;
453 const auto kChunkCount = 16u;
454 const auto chunkMaxPixels = static_cast<int>(pixelCountU / kChunkCount); // Does not apply to the last chunk.
455 const auto maxIndirectDraws =
456 static_cast<int>(kChunkCount / kSequenceCount); // Per draw count dispatch. Doesn't apply to last.
457 const auto stageFlags = (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT);
458 const auto bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
459 const auto descType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
460
461 // Pseudorandom number generator.
462 const auto randomSeed = params.getRandomSeed();
463 de::Random rnd(randomSeed);
464
465 // Generate one triangle around the center of each pixel.
466 const float pixelWidth = 2.0f / floatExtent.x();
467 const float pixelHeight = 2.0f / floatExtent.y();
468 const float horMargin = pixelWidth / 4.0f;
469 const float verMargin = pixelHeight / 4.0f;
470
471 // Converts to framebuffer range [-1,1]
472 const auto normalize = [](int v, int total)
473 { return ((static_cast<float>(v) + 0.5f) / static_cast<float>(total)) * 2.0f - 1.0f; };
474
475 // These will be chosen pseudorandomly for each pixel.
476 const std::vector<float> clipDistances{0.75f, 0.0f, -0.5f, 1.25f, 20.0f, 2.0f, 0.25f, 1.0f};
477 const std::vector<float> cullDistances{0.75f, 0.0f, 0.5f, 1.25f, 20.0f, 2.0f, -0.25f, 1.0f};
478
479 const int lastClip = static_cast<int>(clipDistances.size()) - 1;
480 const int lastCull = static_cast<int>(cullDistances.size()) - 1;
481
482 // Vertex buffer data.
483 std::vector<VertexData> vertices;
484 vertices.reserve(pixelCountU * kPerTriangleVertices);
485
486 for (int y = 0; y < fbExtent.y(); ++y)
487 for (int x = 0; x < fbExtent.x(); ++x)
488 {
489 const float xCenter = normalize(x, fbExtent.x());
490 const float yCenter = normalize(y, fbExtent.y());
491
492 const float clip = clipDistances.at(rnd.getInt(0, lastClip));
493 const float cull = cullDistances.at(rnd.getInt(0, lastCull));
494
495 const tcu::Vec4 extraData(clip, cull, 0.0f, 0.0f);
496
497 vertices.emplace_back(tcu::Vec4(xCenter - horMargin, yCenter + verMargin, 0.0f, 1.0f), extraData);
498 vertices.emplace_back(tcu::Vec4(xCenter + horMargin, yCenter + verMargin, 0.0f, 1.0f), extraData);
499 vertices.emplace_back(tcu::Vec4(xCenter, yCenter - verMargin, 0.0f, 1.0f), extraData);
500 }
501
502 const auto vertexBufferSize = static_cast<VkDeviceSize>(de::dataSize(vertices));
503 const auto vertexBufferUsage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
504 const auto vertexBufferInfo = makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
505 const VkDeviceSize vertexBufferOffset = 0ull;
506
507 BufferWithMemory vertexBuffer(ctx.vkd, ctx.device, ctx.allocator, vertexBufferInfo, MemoryRequirement::HostVisible);
508 auto &vertexBufferAlloc = vertexBuffer.getAllocation();
509 void *vertexBufferData = vertexBufferAlloc.getHostPtr();
510
511 deMemcpy(vertexBufferData, de::dataOrNull(vertices), de::dataSize(vertices));
512
513 // Divide pixels in chunks of pseudorandom sizes.
514 std::vector<uint32_t> chunkSizes(kChunkCount, 0u);
515 {
516 uint32_t total = 0u;
517 for (uint32_t i = 0u; i < kChunkCount - 1u; ++i)
518 {
519 const uint32_t chunkSize = static_cast<uint32_t>(rnd.getInt(1, chunkMaxPixels));
520 chunkSizes.at(i) = chunkSize;
521 total += chunkSize;
522 }
523 // Last chunk contains the remaining pixels.
524 chunkSizes.back() = pixelCountU - total;
525 }
526
527 // Draw operation per chunk.
528 std::vector<VkDrawIndirectCommand> chunkDraws;
529 std::vector<VkDrawIndexedIndirectCommand> chunkIndexedDraws;
530
531 if (params.testType == TestType::DRAW_COUNT)
532 chunkDraws.reserve(kChunkCount);
533 else if (params.indexedDraws())
534 chunkIndexedDraws.reserve(kChunkCount);
535 else
536 DE_ASSERT(false);
537
538 {
539 const uint32_t firstInstanceStart = 0u;
540 const uint32_t firstInstanceStep = 16u;
541 const int maxInstanceCount = 16u;
542 RangeGen firstInstanceRange(firstInstanceStart, firstInstanceStep);
543
544 std::vector<uint32_t> firstInstances(16u, 0u);
545 std::iota(begin(firstInstances), end(firstInstances), firstInstanceRange);
546
547 uint32_t prevPixels = 0u;
548 for (uint32_t i = 0u; i < kChunkCount; ++i)
549 {
550 const auto &chunkSize = chunkSizes.at(i);
551
552 const auto vertexCount = chunkSize * kPerTriangleVertices;
553 const auto instanceCount = static_cast<uint32_t>(rnd.getInt(1, maxInstanceCount));
554 const auto firstVertex = prevPixels * kPerTriangleVertices;
555 const auto firstInstance = firstInstances.at(rnd.getInt(0, static_cast<int>(firstInstances.size() - 1)));
556 const auto chunkOffset = kVertexChunkOffset + i;
557
558 if (params.testType == TestType::DRAW_COUNT)
559 {
560 const VkDrawIndirectCommand cmd{
561 vertexCount,
562 instanceCount,
563 firstVertex,
564 firstInstance,
565 };
566 chunkDraws.push_back(cmd);
567 }
568 else if (params.indexedDraws())
569 {
570 const VkDrawIndexedIndirectCommand cmd{
571 vertexCount, // uint32_t indexCount;
572 instanceCount, // uint32_t instanceCount;
573 firstVertex, // uint32_t firstIndex;
574 -static_cast<int32_t>(chunkOffset), // int32_t vertexOffset;
575 firstInstance, // uint32_t firstInstance;
576 };
577 chunkIndexedDraws.push_back(cmd);
578 }
579 else
580 DE_ASSERT(false);
581
582 prevPixels += chunkSize;
583 }
584 }
585
586 // Create indirect buffers for the sequences.
587 std::vector<SequenceInfo> sequenceInfos;
588 sequenceInfos.reserve(kSequenceCount);
589
590 {
591 uint32_t prevChunks = 0u;
592
593 for (uint32_t i = 0u; i < kSequenceCount; ++i)
594 {
595 sequenceInfos.emplace_back();
596 auto &seqInfo = sequenceInfos.back();
597
598 const auto seqChunks = ((i < kSequenceCount - 1u) ? static_cast<uint32_t>(rnd.getInt(1, maxIndirectDraws)) :
599 (kChunkCount - prevChunks));
600 const auto extraPadding = static_cast<uint32_t>(rnd.getInt(0, 7));
601 const auto totalStructs = extraPadding + 1u;
602 const auto structSize = ((params.testType == TestType::DRAW_COUNT) ? sizeof(VkDrawIndirectCommand) :
603 sizeof(VkDrawIndexedIndirectCommand));
604 const auto stride = totalStructs * structSize;
605 const auto bufferSize = stride * seqChunks;
606
607 seqInfo.chunkCount = seqChunks;
608 seqInfo.stride = static_cast<uint32_t>(stride);
609 seqInfo.buffer.reset(new DGCBuffer(ctx.vkd, ctx.device, ctx.allocator, bufferSize));
610
611 // Copy indirect commands to the buffer.
612 auto &bufferAlloc = seqInfo.buffer->getAllocation();
613 char *bufferData = reinterpret_cast<char *>(bufferAlloc.getHostPtr());
614
615 deMemset(bufferData, 0, static_cast<size_t>(seqInfo.buffer->getSize()));
616 uint32_t vertexCount = 0u;
617
618 for (uint32_t j = 0; j < seqInfo.chunkCount; ++j)
619 {
620 const auto chunkIdx = prevChunks + j;
621 const auto dstPtr = bufferData + stride * j;
622 const auto srcPtr = ((params.testType == TestType::DRAW_COUNT) ?
623 reinterpret_cast<const void *>(&chunkDraws.at(chunkIdx)) :
624 reinterpret_cast<const void *>(&chunkIndexedDraws.at(chunkIdx)));
625 const auto chunkVertexCount =
626 ((params.testType == TestType::DRAW_COUNT) ? chunkDraws.at(chunkIdx).vertexCount :
627 chunkIndexedDraws.at(chunkIdx).indexCount);
628 deMemcpy(dstPtr, srcPtr, structSize);
629 vertexCount += chunkVertexCount;
630 }
631
632 seqInfo.vertexCount = vertexCount;
633 prevChunks += seqChunks;
634 }
635 }
636
637 // Index buffer if needed. For indexed draws, we're going to draw vertices
638 // in reverse order, which means storing indices in reverse order in the
639 // index buffer. In addition, to check that vertexOffset is correctly read
640 // per draw, we're going to apply an offset to the index values stored in
641 // each chunk, with the offset being slightly different in each chunk.
642 std::vector<uint32_t> indices;
643 std::vector<BufferWithMemoryPtr> indexBuffers;
644 const VkBufferUsageFlags extraIndexBufferFlags =
645 (params.indexBufferToken() ? (VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) :
646 0u);
647 const MemoryRequirement extraIndexBufferMemReqs =
648 (params.indexBufferToken() ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any);
649
650 if (params.indexedDraws())
651 {
652 // Indices in reverse order.
653 indices.reserve(vertices.size());
654
655 uint32_t processedCount = 0u;
656 for (size_t i = 0u; i < chunkSizes.size(); ++i)
657 {
658 const auto chunkSize = chunkSizes.at(i);
659 const auto chunkVertexCount = chunkSize * kPerTriangleVertices;
660 const auto chunkVertexOffset = static_cast<uint32_t>(kVertexChunkOffset + i); // Varies a bit per chunk.
661
662 for (uint32_t j = 0u; j < chunkVertexCount; ++j)
663 {
664 const auto forwardIndex = processedCount + j;
665 const auto reverseIndex = static_cast<uint32_t>(vertices.size() - 1u) - forwardIndex;
666 const auto storedIndex = reverseIndex + chunkVertexOffset;
667
668 indices.push_back(storedIndex);
669 }
670
671 processedCount += chunkVertexCount;
672 }
673
674 DE_ASSERT(vertices.size() == indices.size());
675
676 const auto indexBufferSize = static_cast<VkDeviceSize>(de::dataSize(indices));
677 const auto indexBufferUsage = (VK_BUFFER_USAGE_INDEX_BUFFER_BIT | extraIndexBufferFlags);
678 const auto indexBufferInfo = makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
679
680 // Store indices in one or more index buffers. When using index buffers, all buffers will be the same size but
681 // each buffer will only contain the appropriate chunks of real data and the rest will be zero-ed out.
682 const std::vector<uint32_t> singleSeqVertCount{pixelCountU * kPerTriangleVertices};
683 std::vector<uint32_t> multiSeqVertCount(kSequenceCount, 0u);
684 std::transform(begin(sequenceInfos), end(sequenceInfos), begin(multiSeqVertCount),
685 [](const SequenceInfo &s) { return s.vertexCount; });
686
687 const auto &indexChunks =
688 ((params.testType == TestType::DRAW_INDEXED_COUNT) ? singleSeqVertCount : multiSeqVertCount);
689
690 processedCount = 0u;
691 for (uint32_t i = 0u; i < de::sizeU32(indexChunks); ++i)
692 {
693 const auto chunkIndexCount = indexChunks.at(i);
694
695 indexBuffers.emplace_back(new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, indexBufferInfo,
696 (MemoryRequirement::HostVisible | extraIndexBufferMemReqs)));
697 auto &indexBuffer = indexBuffers.back();
698 auto &indexBufferAlloc = indexBuffer->getAllocation();
699 char *indexBufferBasePtr = reinterpret_cast<char *>(indexBufferAlloc.getHostPtr());
700
701 // Zero-out the whole buffer first.
702 deMemset(indexBufferBasePtr, 0, de::dataSize(indices));
703
704 // Copy the chunk to its own index buffer.
705 {
706 const auto chunkSizeBytes = chunkIndexCount * DE_SIZEOF32(uint32_t);
707 const auto srcPtr = &indices.at(processedCount);
708 const auto dstPtr = indexBufferBasePtr + processedCount * DE_SIZEOF32(uint32_t);
709 deMemcpy(dstPtr, srcPtr, chunkSizeBytes);
710 }
711
712 processedCount += chunkIndexCount;
713 }
714 }
715
716 // Create token data for the draw count tokens.
717 std::vector<VkDrawIndirectCountIndirectCommandEXT> drawTokenData;
718 drawTokenData.reserve(kSequenceCount);
719
720 uint32_t maxDrawCount = 0u;
721 for (uint32_t i = 0u; i < kSequenceCount; ++i)
722 {
723 const auto &seqInfo = sequenceInfos.at(i);
724
725 drawTokenData.emplace_back(VkDrawIndirectCountIndirectCommandEXT{
726 seqInfo.buffer->getDeviceAddress(),
727 seqInfo.stride,
728 seqInfo.chunkCount,
729 });
730
731 if (seqInfo.chunkCount > maxDrawCount)
732 maxDrawCount = seqInfo.chunkCount;
733 }
734 if (rnd.getBool())
735 maxDrawCount *= 2u;
736
737 // Create token data for the index buffer tokens, if used.
738 std::vector<VkBindIndexBufferIndirectCommandEXT> indexBufferTokenData;
739 if (params.indexBufferToken())
740 {
741 for (uint32_t i = 0u; i < kSequenceCount; ++i)
742 {
743 indexBufferTokenData.push_back(VkBindIndexBufferIndirectCommandEXT{
744 getBufferDeviceAddress(ctx.vkd, ctx.device, indexBuffers.at(i)->get()),
745 static_cast<uint32_t>(de::dataSize(indices)),
746 VK_INDEX_TYPE_UINT32,
747 });
748 }
749 }
750
751 // Color framebuffer.
752 const auto colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
753 const auto colorUsage =
754 (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
755 ImageWithBuffer colorBuffer(ctx.vkd, ctx.device, ctx.allocator, vkExtent, colorFormat, colorUsage,
756 VK_IMAGE_TYPE_2D);
757 const auto colorSRR = makeDefaultImageSubresourceRange();
758
759 const std::vector<VkViewport> viewports(1u, makeViewport(vkExtent));
760 const std::vector<VkRect2D> scissors(1u, makeRect2D(vkExtent));
761
762 Move<VkRenderPass> renderPass;
763 Move<VkFramebuffer> framebuffer;
764
765 if (!params.useShaderObjects)
766 {
767 renderPass = makeRenderPass(ctx.vkd, ctx.device, colorFormat);
768 framebuffer = makeFramebuffer(ctx.vkd, ctx.device, *renderPass, colorBuffer.getImageView(), vkExtent.width,
769 vkExtent.height);
770 }
771
772 // Input buffers. Used with execution sets.
773 const auto inputBuffers = (params.useExecutionSet ? getInputBuffers() : BufferDataVec());
774 const auto inputBufferCount = de::sizeU32(inputBuffers);
775
776 Move<VkDescriptorSetLayout> vertSetLayout;
777 Move<VkDescriptorSetLayout> fragSetLayout;
778 std::vector<VkDescriptorSetLayout> setLayouts;
779 std::vector<VkPushConstantRange> pcRanges;
780
781 Move<VkDescriptorPool> descriptorPool;
782 Move<VkDescriptorSet> vertDescSet;
783 Move<VkDescriptorSet> fragDescSet;
784
785 BufferVec vertBuffers;
786 BufferVec fragBuffers;
787
788 // Only used with execution sets.
789 const auto pcSize = DE_SIZEOF32(tcu::UVec2);
790 const auto pcStages = static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_FRAGMENT_BIT);
791 const auto pcRange = makePushConstantRange(pcStages, 0u, pcSize);
792 const auto pcData = fbExtent.asUint().swizzle(0, 1);
793
794 if (params.useExecutionSet || params.checkDrawParams)
795 {
796 uint32_t fragBufferCount = 0u;
797 uint32_t vertBufferCount = 0u;
798
799 // Frag shader will always use set 1, so set 0 can be empty.
800 {
801 DescriptorSetLayoutBuilder vertLayoutBuilder;
802 for (uint32_t i = 0u; i < inputBufferCount; ++i)
803 {
804 if (params.useExecutionSet)
805 vertLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_VERTEX_BIT);
806 }
807 vertSetLayout = vertLayoutBuilder.build(ctx.vkd, ctx.device);
808 vertBufferCount = inputBufferCount;
809 }
810
811 DescriptorSetLayoutBuilder fragLayoutBuilder;
812 if (params.useExecutionSet)
813 {
814 fragLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_FRAGMENT_BIT);
815 ++fragBufferCount;
816 }
817 if (params.checkDrawParams)
818 {
819 fragLayoutBuilder.addSingleBinding(descType, VK_SHADER_STAGE_FRAGMENT_BIT);
820 ++fragBufferCount;
821 }
822 fragSetLayout = fragLayoutBuilder.build(ctx.vkd, ctx.device);
823
824 setLayouts.push_back(*vertSetLayout);
825 setLayouts.push_back(*fragSetLayout);
826 pcRanges.push_back(pcRange);
827
828 DescriptorPoolBuilder poolBuilder;
829 poolBuilder.addType(descType, vertBufferCount + fragBufferCount);
830 descriptorPool =
831 poolBuilder.build(ctx.vkd, ctx.device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, kPipelineShaders);
832
833 if (params.useExecutionSet)
834 vertDescSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *vertSetLayout);
835 fragDescSet = makeDescriptorSet(ctx.vkd, ctx.device, *descriptorPool, *fragSetLayout);
836
837 const auto bufferUsage = static_cast<VkBufferUsageFlags>(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
838 for (uint32_t i = 0u; i < inputBufferCount; ++i)
839 {
840 const auto &inputBuffer = inputBuffers.at(i);
841 const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(inputBuffer));
842 const auto createInfo = makeBufferCreateInfo(bufferSize, bufferUsage);
843 vertBuffers.emplace_back(
844 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
845
846 auto &bufferAlloc = vertBuffers.back()->getAllocation();
847 void *bufferDataPtr = bufferAlloc.getHostPtr();
848 deMemcpy(bufferDataPtr, de::dataOrNull(inputBuffer), de::dataSize(inputBuffer));
849 }
850
851 if (params.useExecutionSet)
852 {
853 // Calculate expected accumulated values.
854 std::vector<int32_t> expectedAccums(pixelCountU,
855 0); // Accumulated values for each pixel (this goes into the buffer).
856 std::vector<int32_t> bufferAccums(inputBuffers.size(), 0); // Accumulated values for each input buffer.
857 std::vector<uint32_t> seqSizesInPixels(inputBuffers.size(), 0u); // Number of pixels in each sequence.
858
859 uint32_t prevChunks = 0u;
860 for (size_t seqIdx = 0u; seqIdx < sequenceInfos.size(); ++seqIdx)
861 {
862 const auto &seqInfo = sequenceInfos.at(seqIdx);
863 uint32_t seqPixels = 0u;
864
865 for (uint32_t i = 0u; i < seqInfo.chunkCount; ++i)
866 {
867 const auto chunkIdx = prevChunks + i;
868 const auto pixelCount = chunkSizes.at(chunkIdx);
869
870 seqPixels += pixelCount;
871 }
872
873 seqSizesInPixels.at(seqIdx) = seqPixels;
874 prevChunks += seqInfo.chunkCount;
875 }
876
877 for (size_t i = 0u; i < inputBuffers.size(); ++i)
878 {
879 const auto &inputBuffer = inputBuffers.at(i);
880 bufferAccums.at(i) = std::accumulate(begin(inputBuffer), end(inputBuffer), 0);
881 }
882
883 // Using the accumulated values for each input buffer and the number of
884 // pixels in each sequence, set the expected accumulated value in each
885 // pixel.
886 uint32_t prevPixels = 0u;
887 for (size_t i = 0u; i < seqSizesInPixels.size(); ++i)
888 {
889 const auto &seqPixels = seqSizesInPixels.at(i);
890 for (uint32_t j = 0u; j < seqPixels; ++j)
891 {
892 const auto pixelIdx = prevPixels + j;
893 expectedAccums.at(pixelIdx) = bufferAccums.at(i);
894 }
895 prevPixels += seqPixels;
896 }
897
898 // Indexed draws happen in reverse order.
899 if (params.indexedDraws())
900 std::reverse(begin(expectedAccums), end(expectedAccums));
901
902 const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(expectedAccums));
903 const auto createInfo = makeBufferCreateInfo(bufferSize, bufferUsage);
904 fragBuffers.emplace_back(
905 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
906
907 auto &bufferAlloc = fragBuffers.back()->getAllocation();
908 void *bufferDataPtr = bufferAlloc.getHostPtr();
909 deMemcpy(bufferDataPtr, de::dataOrNull(expectedAccums), de::dataSize(expectedAccums));
910 }
911
912 if (params.checkDrawParams)
913 {
914 std::vector<tcu::IVec4> expectedDrawIndices;
915 expectedDrawIndices.reserve(pixelCountU);
916
917 uint32_t prevChunks = 0u;
918 for (uint32_t i = 0u; i < kSequenceCount; ++i)
919 {
920 uint32_t drawIdx = 0u; // Resets at the start of each sequence.
921 const auto &seqInfo = sequenceInfos.at(i);
922
923 for (uint32_t j = 0u; j < seqInfo.chunkCount; ++j)
924 {
925 const auto chunkIdx = prevChunks + j;
926 const auto chunkSize = chunkSizes.at(chunkIdx);
927 const auto baseVertex =
928 (params.testType == TestType::DRAW_COUNT ? chunkDraws.at(chunkIdx).firstVertex :
929 chunkIndexedDraws.at(chunkIdx).vertexOffset);
930 const auto baseInstance =
931 (params.testType == TestType::DRAW_COUNT ? chunkDraws.at(chunkIdx).firstInstance :
932 chunkIndexedDraws.at(chunkIdx).firstInstance);
933
934 for (uint32_t k = 0u; k < chunkSize; ++k)
935 expectedDrawIndices.push_back(tcu::UVec4(drawIdx, baseVertex, baseInstance, 0).asInt());
936
937 ++drawIdx; // Increases with each draw.
938 }
939
940 prevChunks += seqInfo.chunkCount;
941 }
942
943 // Indexed draws happen in reverse order.
944 if (params.indexedDraws())
945 std::reverse(begin(expectedDrawIndices), end(expectedDrawIndices));
946
947 const auto bufferSize = static_cast<VkDeviceSize>(de::dataSize(expectedDrawIndices));
948 const auto createInfo = makeBufferCreateInfo(bufferSize, bufferUsage);
949 fragBuffers.emplace_back(
950 new BufferWithMemory(ctx.vkd, ctx.device, ctx.allocator, createInfo, MemoryRequirement::HostVisible));
951
952 auto &bufferAlloc = fragBuffers.back()->getAllocation();
953 void *bufferDataPtr = bufferAlloc.getHostPtr();
954 deMemcpy(bufferDataPtr, de::dataOrNull(expectedDrawIndices), de::dataSize(expectedDrawIndices));
955 }
956
957 // Update descriptors with each buffer.
958 DescriptorSetUpdateBuilder updateBuilder;
959 using Location = DescriptorSetUpdateBuilder::Location;
960
961 for (uint32_t i = 0u; i < de::sizeU32(vertBuffers); ++i)
962 {
963 const auto bufferInfo = makeDescriptorBufferInfo(vertBuffers.at(i)->get(), 0ull, VK_WHOLE_SIZE);
964 updateBuilder.writeSingle(*vertDescSet, Location::binding(i), descType, &bufferInfo);
965 }
966 for (uint32_t i = 0u; i < de::sizeU32(fragBuffers); ++i)
967 {
968 const auto bufferInfo = makeDescriptorBufferInfo(fragBuffers.at(i)->get(), 0ull, VK_WHOLE_SIZE);
969 updateBuilder.writeSingle(*fragDescSet, Location::binding(i), descType, &bufferInfo);
970 }
971 updateBuilder.update(ctx.vkd, ctx.device);
972 }
973
974 const auto pipelineLayout =
975 makePipelineLayout(ctx.vkd, ctx.device, de::sizeU32(setLayouts), de::dataOrNull(setLayouts),
976 de::sizeU32(pcRanges), de::dataOrNull(pcRanges));
977
978 // Shader modules.
979 const auto &binaries = context.getBinaryCollection();
980 const auto shaderSetCount = (params.useExecutionSet ? kSequenceCount : 1u);
981
982 using ModuleVec = std::vector<Move<VkShaderModule>>;
983 ModuleVec vertModules;
984 ModuleVec fragModules;
985
986 using ShaderVec = std::vector<Move<VkShaderEXT>>;
987 ShaderVec vertShaders;
988 ShaderVec fragShaders;
989
990 using DGCShaderExtPtr = std::unique_ptr<DGCShaderExt>;
991 using DGCShaderVec = std::vector<DGCShaderExtPtr>;
992 DGCShaderVec vertShadersDGC;
993 DGCShaderVec fragShadersDGC;
994
995 const auto &meshFeatures = context.getMeshShaderFeaturesEXT();
996 const auto &features = context.getDeviceFeatures();
997
998 const auto tessFeature = (features.tessellationShader == VK_TRUE);
999 const auto geomFeature = (features.geometryShader == VK_TRUE);
1000
1001 if (!params.useShaderObjects)
1002 {
1003 vertModules.reserve(shaderSetCount);
1004 fragModules.reserve(shaderSetCount);
1005
1006 for (uint32_t i = 0u; i < shaderSetCount; ++i)
1007 {
1008 const auto suffix = (params.useExecutionSet ? std::to_string(i) : std::string());
1009 const auto vertName = "vert" + suffix;
1010 const auto fragName = "frag" + suffix;
1011 vertModules.push_back(createShaderModule(ctx.vkd, ctx.device, binaries.get(vertName)));
1012 fragModules.push_back(createShaderModule(ctx.vkd, ctx.device, binaries.get(fragName)));
1013 }
1014 }
1015 else
1016 {
1017 std::vector<VkDescriptorSetLayout> vertSetLayouts;
1018 std::vector<VkDescriptorSetLayout> fragSetLayouts;
1019
1020 if (*vertSetLayout != VK_NULL_HANDLE)
1021 {
1022 vertSetLayouts.push_back(*vertSetLayout);
1023 fragSetLayouts.push_back(*vertSetLayout);
1024 }
1025
1026 if (*fragSetLayout != VK_NULL_HANDLE)
1027 fragSetLayouts.push_back(*fragSetLayout);
1028
1029 const std::vector<VkPushConstantRange> vertPCRanges;
1030 const std::vector<VkPushConstantRange> &fragPCRanges = pcRanges;
1031
1032 // Otherwise we need to modify the vectors above.
1033 DE_ASSERT(pcStages == static_cast<VkShaderStageFlags>(VK_SHADER_STAGE_FRAGMENT_BIT));
1034
1035 if (params.useExecutionSet)
1036 {
1037 vertShadersDGC.reserve(shaderSetCount);
1038 fragShadersDGC.reserve(shaderSetCount);
1039 }
1040 else
1041 {
1042 vertShaders.reserve(shaderSetCount);
1043 fragShaders.reserve(shaderSetCount);
1044 }
1045
1046 for (uint32_t i = 0u; i < shaderSetCount; ++i)
1047 {
1048 const auto suffix = (params.useExecutionSet ? std::to_string(i) : std::string());
1049 const auto vertName = "vert" + suffix;
1050 const auto fragName = "frag" + suffix;
1051
1052 if (params.useExecutionSet)
1053 {
1054 vertShadersDGC.emplace_back(new DGCShaderExt(ctx.vkd, ctx.device, VK_SHADER_STAGE_VERTEX_BIT, 0u,
1055 binaries.get(vertName), vertSetLayouts, vertPCRanges,
1056 tessFeature, geomFeature));
1057 fragShadersDGC.emplace_back(new DGCShaderExt(ctx.vkd, ctx.device, VK_SHADER_STAGE_FRAGMENT_BIT, 0u,
1058 binaries.get(fragName), fragSetLayouts, fragPCRanges,
1059 tessFeature, geomFeature));
1060 }
1061 else
1062 {
1063 vertShaders.push_back(makeSingleShader(ctx.vkd, ctx.device, VK_SHADER_STAGE_VERTEX_BIT,
1064 binaries.get(vertName), vertSetLayouts, vertPCRanges));
1065 fragShaders.push_back(makeSingleShader(ctx.vkd, ctx.device, VK_SHADER_STAGE_FRAGMENT_BIT,
1066 binaries.get(fragName), fragSetLayouts, fragPCRanges));
1067 }
1068 }
1069 }
1070
1071 const std::vector<VkVertexInputBindingDescription> vertexBindings{
1072 makeVertexInputBindingDescription(0u, DE_SIZEOF32(VertexData), VK_VERTEX_INPUT_RATE_VERTEX),
1073 };
1074
1075 const std::vector<VkVertexInputAttributeDescription> vertexAttributes{
1076 makeVertexInputAttributeDescription(0u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT,
1077 static_cast<uint32_t>(offsetof(VertexData, position))),
1078 makeVertexInputAttributeDescription(1u, 0u, VK_FORMAT_R32G32B32A32_SFLOAT,
1079 static_cast<uint32_t>(offsetof(VertexData, extraData))),
1080 };
1081
1082 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = {
1083 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1084 nullptr, // const void* pNext;
1085 0u, // VkPipelineVertexInputStateCreateFlags flags;
1086 de::sizeU32(vertexBindings), // uint32_t vertexBindingDescriptionCount;
1087 de::dataOrNull(vertexBindings), // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1088 de::sizeU32(vertexAttributes), // uint32_t vertexAttributeDescriptionCount;
1089 de::dataOrNull(vertexAttributes), // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1090 };
1091
1092 std::vector<Move<VkPipeline>> pipelines;
1093
1094 if (!params.useShaderObjects)
1095 {
1096 for (uint32_t i = 0u; i < shaderSetCount; ++i)
1097 {
1098 const auto createFlags =
1099 static_cast<VkPipelineCreateFlags2KHR>(VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT);
1100
1101 const VkPipelineCreateFlags2CreateInfoKHR pipelineCreateFlags = {
1102 VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR, // VkStructureType sType;
1103 nullptr, // const void* pNext;
1104 createFlags, // VkPipelineCreateFlags2KHR flags;
1105 };
1106
1107 const void *pNext = (params.useExecutionSet ? &pipelineCreateFlags : nullptr);
1108
1109 pipelines.push_back(
1110 makeGraphicsPipeline(ctx.vkd, ctx.device, *pipelineLayout, *vertModules.at(i), VK_NULL_HANDLE,
1111 VK_NULL_HANDLE, VK_NULL_HANDLE, *fragModules.at(i), *renderPass, viewports,
1112 scissors, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0u, 0u, &vertexInputStateCreateInfo,
1113 nullptr, nullptr, nullptr, nullptr, nullptr, pNext, 0u));
1114 }
1115 }
1116
1117 // Indirect commands layout.
1118 VkIndirectCommandsLayoutUsageFlagsEXT cmdsLayoutFlags = 0u;
1119
1120 if (params.doPreprocess())
1121 cmdsLayoutFlags |= VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EXPLICIT_PREPROCESS_BIT_EXT;
1122
1123 if (params.unorderedSequences)
1124 cmdsLayoutFlags |= VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_EXT;
1125
1126 // We do not pass the pipeline layout because we don't have push constants or sequence index tokens.
1127 IndirectCommandsLayoutBuilderExt cmdsLayoutBuilder(cmdsLayoutFlags, stageFlags, VK_NULL_HANDLE);
1128
1129 if (params.useExecutionSet)
1130 {
1131 const auto executionSetType =
1132 (params.useShaderObjects ? VK_INDIRECT_EXECUTION_SET_INFO_TYPE_SHADER_OBJECTS_EXT :
1133 VK_INDIRECT_EXECUTION_SET_INFO_TYPE_PIPELINES_EXT);
1134 cmdsLayoutBuilder.addExecutionSetToken(cmdsLayoutBuilder.getStreamRange(), executionSetType, stageFlags);
1135 }
1136
1137 if (params.indexBufferToken())
1138 cmdsLayoutBuilder.addIndexBufferToken(cmdsLayoutBuilder.getStreamRange(),
1139 VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT);
1140
1141 if (params.testType == TestType::DRAW_COUNT)
1142 cmdsLayoutBuilder.addDrawCountToken(cmdsLayoutBuilder.getStreamRange());
1143 else if (params.indexedDraws())
1144 cmdsLayoutBuilder.addDrawIndexedCountToken(cmdsLayoutBuilder.getStreamRange());
1145 else
1146 DE_ASSERT(false);
1147
1148 const auto cmdsLayout = cmdsLayoutBuilder.build(ctx.vkd, ctx.device);
1149
1150 // Indirect execution set, if needed.
1151 ExecutionSetManagerPtr executionSetManager;
1152 VkIndirectExecutionSetEXT indirectExecutionSet = VK_NULL_HANDLE;
1153
1154 if (params.useExecutionSet)
1155 {
1156 if (params.useShaderObjects)
1157 {
1158 const std::vector<VkDescriptorSetLayout> vertSetLayouts{*vertSetLayout};
1159 const std::vector<VkDescriptorSetLayout> fragSetLayouts{VK_NULL_HANDLE, *fragSetLayout};
1160
1161 const std::vector<IESStageInfo> stagesInfo = {
1162 IESStageInfo{vertShadersDGC.at(0u)->get(), vertSetLayouts},
1163 IESStageInfo{fragShadersDGC.at(0u)->get(), fragSetLayouts},
1164 };
1165
1166 executionSetManager = makeExecutionSetManagerShader(ctx.vkd, ctx.device, stagesInfo, pcRanges,
1167 shaderSetCount * kPipelineShaders);
1168
1169 // Note we start at 1 and rely on the initial entry set above.
1170 for (uint32_t i = 1u; i < shaderSetCount; ++i)
1171 {
1172 executionSetManager->addShader(i * kPipelineShaders + 0u, vertShadersDGC.at(i)->get());
1173 executionSetManager->addShader(i * kPipelineShaders + 1u, fragShadersDGC.at(i)->get());
1174 }
1175 executionSetManager->update();
1176 indirectExecutionSet = executionSetManager->get();
1177 }
1178 else
1179 {
1180 executionSetManager =
1181 makeExecutionSetManagerPipeline(ctx.vkd, ctx.device, *pipelines.at(0u), kSequenceCount);
1182 for (uint32_t i = 0u; i < shaderSetCount; ++i)
1183 executionSetManager->addPipeline(i, *pipelines.at(i));
1184 executionSetManager->update();
1185 indirectExecutionSet = executionSetManager->get();
1186 }
1187 }
1188
1189 // DGC buffer contents.
1190 std::vector<uint32_t> dgcData;
1191 dgcData.reserve((kSequenceCount * cmdsLayoutBuilder.getStreamStride()) / sizeof(uint32_t));
1192
1193 for (uint32_t i = 0u; i < kSequenceCount; ++i)
1194 {
1195 if (params.useExecutionSet)
1196 {
1197 if (params.useShaderObjects)
1198 {
1199 pushBackElement(dgcData, i * kPipelineShaders + 0u);
1200 pushBackElement(dgcData, i * kPipelineShaders + 1u);
1201 }
1202 else
1203 pushBackElement(dgcData, i);
1204 }
1205 if (params.indexBufferToken())
1206 pushBackElement(dgcData, indexBufferTokenData.at(i));
1207 pushBackElement(dgcData, drawTokenData.at(i));
1208 }
1209
1210 // DGC buffer with those contents.
1211 const auto dgcBufferSize = static_cast<VkDeviceSize>(de::dataSize(dgcData));
1212 DGCBuffer dgcBuffer(ctx.vkd, ctx.device, ctx.allocator, dgcBufferSize);
1213 auto &dgcBufferAlloc = dgcBuffer.getAllocation();
1214 void *dgcBufferData = dgcBufferAlloc.getHostPtr();
1215
1216 deMemcpy(dgcBufferData, de::dataOrNull(dgcData), de::dataSize(dgcData));
1217
1218 // Preprocess buffer.
1219 const auto prepPipeline =
1220 (indirectExecutionSet == VK_NULL_HANDLE && !params.useShaderObjects ? *pipelines.at(0u) : VK_NULL_HANDLE);
1221
1222 std::vector<VkShaderEXT> prepShaders;
1223 if (indirectExecutionSet == VK_NULL_HANDLE && params.useShaderObjects)
1224 {
1225 prepShaders.push_back(*vertShaders.at(0));
1226 prepShaders.push_back(*fragShaders.at(0));
1227 }
1228 const std::vector<VkShaderEXT> *shadersVecPtr = (prepShaders.empty() ? nullptr : &prepShaders);
1229 PreprocessBufferExt preprocessBuffer(ctx.vkd, ctx.device, ctx.allocator, indirectExecutionSet, *cmdsLayout,
1230 kSequenceCount, maxDrawCount, prepPipeline, shadersVecPtr);
1231
1232 // Command pool and buffer.
1233 CommandPoolWithBuffer cmd(ctx.vkd, ctx.device, ctx.qfIndex);
1234 const auto cmdBuffer = *cmd.cmdBuffer;
1235
1236 const tcu::Vec4 fbClearColor(0.0f, 0.0f, 0.0f, 1.0f);
1237
1238 // Generated commands info.
1239 const DGCGenCmdsInfo cmdsInfo(
1240 stageFlags, // VkShaderStageFlags shaderStages;
1241 indirectExecutionSet, // VkIndirectExecutionSetEXT indirectExecutionSet;
1242 *cmdsLayout, // VkIndirectCommandsLayoutEXT indirectCommandsLayout;
1243 dgcBuffer.getDeviceAddress(), // VkDeviceAddress indirectAddress;
1244 dgcBuffer.getSize(), // VkDeviceSize indirectAddressSize;
1245 preprocessBuffer.getDeviceAddress(), // VkDeviceAddress preprocessAddress;
1246 preprocessBuffer.getSize(), // VkDeviceSize preprocessSize;
1247 kSequenceCount, // uint32_t maxSequenceCount;
1248 0ull, // VkDeviceAddress sequenceCountAddress;
1249 pixelCountU, // uint32_t maxDrawCount;
1250 prepPipeline, shadersVecPtr);
1251
1252 // When preprocessing, we need to use a command buffer to record state.
1253 // The preprocessing step needs to happen outside the render pass.
1254 Move<VkCommandBuffer> separateStateCmdBuffer;
1255
1256 // A command buffer we want to record state into.
1257 // .first is the command buffer itself.
1258 // .second, if not NULL, means we'll record a preprocess command with it as the state command buffer.
1259 using StateCmdBuffer = std::pair<VkCommandBuffer, VkCommandBuffer>;
1260 const VkCommandBuffer kNullCmdBuffer = VK_NULL_HANDLE; // Workaround for types and emplace_back below.
1261 std::vector<StateCmdBuffer> stateCmdBuffers;
1262
1263 // Sequences and iterations for the different cases:
1264 // - PreprocessType::NONE
1265 // - Only one loop iteration.
1266 // - Iteration 0: .first = main cmd buffer, .second = NULL
1267 // - No preprocess, bind state
1268 // - Execute.
1269 // - PreprocessType::OTHER_STATE_CMD_BUFFER
1270 // - Iteration 0: .first = state cmd buffer, .second = NULL
1271 // - No preprocess, bind state
1272 // - Iteration 1: .first = main cmd buffer, .second = state cmd buffer
1273 // - Preprocess with state cmd buffer, bind state on main
1274 // - Execute.
1275 // - PreprocessType::SAME_STATE_CMD_BUFFER
1276 // - Iteration 0: .first = main cmd buffer, .second = NULL
1277 // - No preprocess, bind state
1278 // - Iteration 1: .first = main cmd buffer, .second = main cmd buffer
1279 // - Preprocess with main cmd buffer, break
1280 // - Execute.
1281 switch (params.preprocessType)
1282 {
1283 case PreprocessType::NONE:
1284 stateCmdBuffers.emplace_back(cmdBuffer, kNullCmdBuffer);
1285 break;
1286 case PreprocessType::SAME_STATE_CMD_BUFFER:
1287 stateCmdBuffers.emplace_back(cmdBuffer, kNullCmdBuffer);
1288 stateCmdBuffers.emplace_back(cmdBuffer, cmdBuffer);
1289 break;
1290 case PreprocessType::OTHER_STATE_CMD_BUFFER:
1291 separateStateCmdBuffer =
1292 allocateCommandBuffer(ctx.vkd, ctx.device, *cmd.cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1293 stateCmdBuffers.emplace_back(*separateStateCmdBuffer, kNullCmdBuffer);
1294 stateCmdBuffers.emplace_back(cmdBuffer, *separateStateCmdBuffer);
1295 break;
1296 default:
1297 DE_ASSERT(false);
1298 }
1299
1300 // Record pre-execution state to all needed command buffers.
1301 VkCommandBuffer prevCmdBuffer = VK_NULL_HANDLE;
1302 for (const auto &stateCmdBufferPair : stateCmdBuffers)
1303 {
1304 const auto &recCmdBuffer = stateCmdBufferPair.first;
1305
1306 // Only begin each command buffer once.
1307 if (recCmdBuffer != prevCmdBuffer)
1308 {
1309 beginCommandBuffer(ctx.vkd, recCmdBuffer);
1310 prevCmdBuffer = recCmdBuffer;
1311 }
1312
1313 if (stateCmdBufferPair.second != VK_NULL_HANDLE)
1314 {
1315 ctx.vkd.cmdPreprocessGeneratedCommandsEXT(recCmdBuffer, &cmdsInfo.get(), stateCmdBufferPair.second);
1316 separateStateCmdBuffer = Move<VkCommandBuffer>(); // Delete separate state command buffer right away.
1317
1318 preprocessToExecuteBarrierExt(ctx.vkd, recCmdBuffer);
1319
1320 // Break for iteration 1 of PreprocessType::SAME_STATE_CMD_BUFFER. See above.
1321 if (stateCmdBufferPair.first == stateCmdBufferPair.second)
1322 break;
1323 }
1324
1325 if (params.useExecutionSet || params.checkDrawParams)
1326 {
1327 const std::vector<VkDescriptorSet> descriptorSets{*vertDescSet, *fragDescSet};
1328 ctx.vkd.cmdBindDescriptorSets(recCmdBuffer, bindPoint, *pipelineLayout, 0u, de::sizeU32(descriptorSets),
1329 de::dataOrNull(descriptorSets), 0u, nullptr);
1330 ctx.vkd.cmdPushConstants(recCmdBuffer, *pipelineLayout, pcStages, 0u, pcSize, &pcData);
1331 }
1332
1333 ctx.vkd.cmdBindVertexBuffers(recCmdBuffer, 0u, 1u, &vertexBuffer.get(), &vertexBufferOffset);
1334 if (params.testType == TestType::DRAW_INDEXED_COUNT)
1335 ctx.vkd.cmdBindIndexBuffer(recCmdBuffer, indexBuffers.at(0u)->get(), 0ull, VK_INDEX_TYPE_UINT32);
1336
1337 if (!params.useShaderObjects)
1338 ctx.vkd.cmdBindPipeline(recCmdBuffer, bindPoint, *pipelines.at(0u)); // Execution set or not.
1339 else
1340 {
1341 std::map<VkShaderStageFlagBits, VkShaderEXT> boundShaders;
1342 if (meshFeatures.meshShader)
1343 boundShaders[VK_SHADER_STAGE_MESH_BIT_EXT] = VK_NULL_HANDLE;
1344 if (meshFeatures.taskShader)
1345 boundShaders[VK_SHADER_STAGE_TASK_BIT_EXT] = VK_NULL_HANDLE;
1346 if (features.tessellationShader)
1347 {
1348 boundShaders[VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = VK_NULL_HANDLE;
1349 boundShaders[VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = VK_NULL_HANDLE;
1350 }
1351 if (features.geometryShader)
1352 boundShaders[VK_SHADER_STAGE_GEOMETRY_BIT] = VK_NULL_HANDLE;
1353
1354 if (params.useExecutionSet)
1355 {
1356 boundShaders[VK_SHADER_STAGE_VERTEX_BIT] = vertShadersDGC.at(0u)->get();
1357 boundShaders[VK_SHADER_STAGE_FRAGMENT_BIT] = fragShadersDGC.at(0u)->get();
1358 }
1359 else
1360 {
1361 boundShaders[VK_SHADER_STAGE_VERTEX_BIT] = *vertShaders.at(0u);
1362 boundShaders[VK_SHADER_STAGE_FRAGMENT_BIT] = *fragShaders.at(0u);
1363 }
1364
1365 {
1366 std::vector<VkShaderStageFlagBits> stages;
1367 std::vector<VkShaderEXT> shaders;
1368
1369 stages.reserve(boundShaders.size());
1370 shaders.reserve(boundShaders.size());
1371
1372 for (const auto &stageShader : boundShaders)
1373 {
1374 stages.push_back(stageShader.first);
1375 shaders.push_back(stageShader.second);
1376 }
1377
1378 DE_ASSERT(shaders.size() == stages.size());
1379 ctx.vkd.cmdBindShadersEXT(recCmdBuffer, de::sizeU32(shaders), de::dataOrNull(stages),
1380 de::dataOrNull(shaders));
1381 }
1382 }
1383
1384 if (params.useShaderObjects)
1385 bindShaderObjectState(ctx.vkd, getDeviceCreationExtensions(context), recCmdBuffer, viewports, scissors,
1386 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0u, &vertexInputStateCreateInfo, nullptr,
1387 nullptr, nullptr, nullptr);
1388 }
1389
1390 if (params.useShaderObjects)
1391 {
1392 const auto clearColor = makeClearValueColor(fbClearColor);
1393 const auto preClearBarrier =
1394 makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
1395 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, colorBuffer.getImage(), colorSRR);
1396 const auto postClearBarrier = makeImageMemoryBarrier(
1397 VK_ACCESS_TRANSFER_WRITE_BIT, (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),
1398 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, colorBuffer.getImage(),
1399 colorSRR);
1400
1401 cmdPipelineImageMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1402 VK_PIPELINE_STAGE_TRANSFER_BIT, &preClearBarrier);
1403 ctx.vkd.cmdClearColorImage(cmdBuffer, colorBuffer.getImage(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1404 &clearColor.color, 1u, &colorSRR);
1405 cmdPipelineImageMemoryBarrier(ctx.vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
1406 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &postClearBarrier);
1407 beginRendering(ctx.vkd, cmdBuffer, colorBuffer.getImageView(), scissors.at(0u), clearColor /*not used*/,
1408 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
1409 }
1410 else
1411 beginRenderPass(ctx.vkd, cmdBuffer, *renderPass, *framebuffer, scissors.at(0u), fbClearColor);
1412
1413 {
1414 const VkBool32 isPreprocessed = makeVkBool(params.doPreprocess());
1415 ctx.vkd.cmdExecuteGeneratedCommandsEXT(cmdBuffer, isPreprocessed, &cmdsInfo.get());
1416 }
1417
1418 if (params.useShaderObjects)
1419 endRendering(ctx.vkd, cmdBuffer);
1420 else
1421 endRenderPass(ctx.vkd, cmdBuffer);
1422
1423 copyImageToBuffer(ctx.vkd, cmdBuffer, colorBuffer.getImage(), colorBuffer.getBuffer(), fbExtent.swizzle(0, 1));
1424 endCommandBuffer(ctx.vkd, cmdBuffer);
1425 submitCommandsAndWait(ctx.vkd, ctx.device, ctx.queue, cmdBuffer);
1426
1427 // Generate reference image.
1428 const auto tcuFormat = mapVkFormat(colorFormat);
1429 tcu::TextureLevel refLevel(tcuFormat, fbExtent.x(), fbExtent.y(), fbExtent.z());
1430 auto refAccess = refLevel.getAccess();
1431
1432 const auto maxInstanceIndex = static_cast<float>(kMaxInstanceIndex);
1433 const bool indexed = (params.indexedDraws());
1434 const auto totalDraws = (indexed ? chunkIndexedDraws.size() : chunkDraws.size());
1435 uint32_t prevPixels = 0u;
1436
1437 for (size_t drawIdx = 0u; drawIdx < totalDraws; ++drawIdx)
1438 {
1439 const auto vertexCount =
1440 (indexed ? chunkIndexedDraws.at(drawIdx).indexCount : chunkDraws.at(drawIdx).vertexCount);
1441 const auto firstInstance =
1442 (indexed ? chunkIndexedDraws.at(drawIdx).firstInstance : chunkDraws.at(drawIdx).firstInstance);
1443 const auto instanceCount =
1444 (indexed ? chunkIndexedDraws.at(drawIdx).instanceCount : chunkDraws.at(drawIdx).instanceCount);
1445
1446 DE_ASSERT(vertexCount % kPerTriangleVertices == 0u);
1447 const auto chunkPixels = vertexCount / kPerTriangleVertices;
1448
1449 for (uint32_t i = 0u; i < chunkPixels; ++i)
1450 {
1451 const auto curPixel = prevPixels + i;
1452 const auto pixelIdx = (indexed ? (pixelCountU - 1u - curPixel) : curPixel); // Reversed for indexed draws.
1453 const auto row = static_cast<int>(pixelIdx / vkExtent.width);
1454 const auto col = static_cast<int>(pixelIdx % vkExtent.width);
1455 const auto redValue = static_cast<float>(firstInstance + (instanceCount - 1u)) / maxInstanceIndex;
1456 const auto &extraData = vertices.at(pixelIdx * kPerTriangleVertices).extraData;
1457 const bool blank = (extraData.x() < 0.0f || extraData.y() < 0.0f); // Filtered by clip or cull distance.
1458
1459 const tcu::Vec4 color(redValue, 0.0f, 1.0f, 1.0f);
1460 refAccess.setPixel((blank ? fbClearColor : color), col, row);
1461 }
1462 prevPixels += chunkPixels;
1463 }
1464
1465 // Reference access.
1466 auto &colorAlloc = colorBuffer.getBufferAllocation();
1467 invalidateAlloc(ctx.vkd, ctx.device, colorAlloc);
1468
1469 const tcu::ConstPixelBufferAccess resAccess(tcuFormat, fbExtent, colorAlloc.getHostPtr());
1470
1471 const float colorThreshold = 0.005f; // 1/255 < 0.005f < 2/255.
1472 const tcu::Vec4 threshold(colorThreshold, colorThreshold, colorThreshold, colorThreshold);
1473 auto &log = context.getTestContext().getLog();
1474 if (!tcu::floatThresholdCompare(log, "Result", "", refAccess, resAccess, threshold, tcu::COMPARE_LOG_ON_ERROR))
1475 TCU_FAIL("Unexpected result found in color buffer; check log for details");
1476
1477 return tcu::TestStatus::pass("Pass");
1478 }
1479
1480 } // anonymous namespace
1481
createDGCGraphicsDrawCountTestsExt(tcu::TestContext & testCtx)1482 tcu::TestCaseGroup *createDGCGraphicsDrawCountTestsExt(tcu::TestContext &testCtx)
1483 {
1484 GroupPtr mainGroup(new tcu::TestCaseGroup(testCtx, "draw_count"));
1485
1486 GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, "token_draw_count"));
1487 GroupPtr drawIndexedCountGroup(new tcu::TestCaseGroup(testCtx, "token_draw_indexed_count"));
1488
1489 const struct
1490 {
1491 PreprocessType preprocessType;
1492 const char *suffix;
1493 } preprocessCases[] = {
1494 {PreprocessType::NONE, ""},
1495 {PreprocessType::SAME_STATE_CMD_BUFFER, "_preprocess_same_state_cmd_buffer"},
1496 {PreprocessType::OTHER_STATE_CMD_BUFFER, "_preprocess_separate_state_cmd_buffer"},
1497 };
1498
1499 const struct
1500 {
1501 TestType testType;
1502 const char *suffix;
1503 } testTypeCases[] = {
1504 {TestType::DRAW_COUNT, ""},
1505 {TestType::DRAW_INDEXED_COUNT, ""}, // Also no suffix but will go into a different test group.
1506 {TestType::DRAW_INDEXED_COUNT_INDEX_TOKEN, "_with_index_buffer_token"},
1507 };
1508
1509 for (const auto &testTypeCase : testTypeCases)
1510 for (const bool executionSets : {false, true})
1511 for (const bool shaderObjects : {false, true})
1512 for (const auto &preProcessCase : preprocessCases)
1513 for (const bool unordered : {false, true})
1514 for (const bool checkDrawParams : {false, true})
1515 {
1516 const TestParams params{testTypeCase.testType, preProcessCase.preprocessType,
1517 checkDrawParams, executionSets,
1518 shaderObjects, unordered};
1519
1520 const std::string testName =
1521 std::string() + (shaderObjects ? "shader_objects" : "pipelines") +
1522 (executionSets ? "_execution_set" : "") + preProcessCase.suffix +
1523 (unordered ? "_unordered" : "") + (checkDrawParams ? "_check_draw_params" : "") +
1524 testTypeCase.suffix;
1525
1526 const auto group =
1527 (params.indexedDraws() ? drawIndexedCountGroup.get() : drawCountGroup.get());
1528 addFunctionCaseWithPrograms(group, testName, checkDrawCountSupport, initDrawCountPrograms,
1529 testDrawCountRun, params);
1530 }
1531
1532 mainGroup->addChild(drawCountGroup.release());
1533 mainGroup->addChild(drawIndexedCountGroup.release());
1534
1535 return mainGroup.release();
1536 }
1537
1538 } // namespace DGC
1539 } // namespace vkt
1540