1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Indirect compute dispatch tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es31fIndirectComputeDispatchTests.hpp"
25 #include "gluObjectWrapper.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "glwFunctions.hpp"
29 #include "glwEnums.hpp"
30 #include "tcuVector.hpp"
31 #include "tcuStringTemplate.hpp"
32 #include "tcuTestLog.hpp"
33 #include "deStringUtil.hpp"
34
35 #include <vector>
36 #include <string>
37 #include <map>
38
39 namespace deqp
40 {
41 namespace gles31
42 {
43 namespace Functional
44 {
45
46 using tcu::UVec3;
47 using tcu::TestLog;
48 using std::vector;
49 using std::string;
50 using std::map;
51
52 // \todo [2014-02-17 pyry] Should be extended with following:
53
54 // Negative:
55 // - no active shader program
56 // - indirect negative or not aligned
57 // - indirect + size outside buffer bounds
58 // - no buffer bound to DRAW_INDIRECT_BUFFER
59 // - (implict) buffer mapped
60
61 // Robustness:
62 // - lot of small work group launches
63 // - very large work group size
64 // - no synchronization, touched by gpu
65 // - compute program overwiting buffer
66
67 namespace
68 {
69
70 enum
71 {
72 RESULT_BLOCK_BASE_SIZE = (3+1)*(int)sizeof(deUint32), // uvec3 + uint
73 RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0,
74 RESULT_BLOCK_NUM_PASSED_OFFSET = 3*(int)sizeof(deUint32),
75
76 INDIRECT_COMMAND_SIZE = 3*(int)sizeof(deUint32)
77 };
78
79 enum GenBuffer
80 {
81 GEN_BUFFER_UPLOAD = 0,
82 GEN_BUFFER_COMPUTE,
83
84 GEN_BUFFER_LAST
85 };
86
genVerifySources(const UVec3 & workGroupSize)87 glu::ProgramSources genVerifySources (const UVec3& workGroupSize)
88 {
89 static const char* s_verifyDispatchTmpl =
90 "#version 310 es\n"
91 "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
92 "layout(binding = 0, std430) buffer Result\n"
93 "{\n"
94 " uvec3 expectedGroupCount;\n"
95 " coherent uint numPassed;\n"
96 "} result;\n"
97 "void main (void)\n"
98 "{\n"
99 " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
100 " atomicAdd(result.numPassed, 1u);\n"
101 "}\n";
102
103 map<string, string> args;
104
105 args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x());
106 args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y());
107 args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z());
108
109 return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args));
110 }
111
112 class IndirectDispatchCase : public TestCase
113 {
114 public:
115 IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer);
116 ~IndirectDispatchCase (void);
117
118 IterateResult iterate (void);
119
120 protected:
121 struct DispatchCommand
122 {
123 deIntptr offset;
124 UVec3 numWorkGroups;
125
DispatchCommanddeqp::gles31::Functional::__anon9f66ec160111::IndirectDispatchCase::DispatchCommand126 DispatchCommand (void) : offset(0) {}
DispatchCommanddeqp::gles31::Functional::__anon9f66ec160111::IndirectDispatchCase::DispatchCommand127 DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {}
128 };
129
130 GenBuffer m_genBuffer;
131 deUintptr m_bufferSize;
132 UVec3 m_workGroupSize;
133 vector<DispatchCommand> m_commands;
134
135 void createCommandBuffer (deUint32 buffer) const;
136 void createResultBuffer (deUint32 buffer) const;
137
138 bool verifyResultBuffer (deUint32 buffer);
139
140 void createCmdBufferUpload (deUint32 buffer) const;
141 void createCmdBufferCompute (deUint32 buffer) const;
142
143 private:
144 IndirectDispatchCase (const IndirectDispatchCase&);
145 IndirectDispatchCase& operator= (const IndirectDispatchCase&);
146 };
147
IndirectDispatchCase(Context & context,const char * name,const char * description,GenBuffer genBuffer)148 IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer)
149 : TestCase (context, name, description)
150 , m_genBuffer (genBuffer)
151 , m_bufferSize (0)
152 {
153 }
154
~IndirectDispatchCase(void)155 IndirectDispatchCase::~IndirectDispatchCase (void)
156 {
157 }
158
getResultBlockAlignedSize(const glw::Functions & gl)159 static int getResultBlockAlignedSize (const glw::Functions& gl)
160 {
161 const int baseSize = RESULT_BLOCK_BASE_SIZE;
162 int alignment = 0;
163 gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment);
164
165 if (alignment == 0 || (baseSize % alignment == 0))
166 return baseSize;
167 else
168 return (baseSize/alignment + 1)*alignment;
169 }
170
createCommandBuffer(deUint32 buffer) const171 void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const
172 {
173 switch (m_genBuffer)
174 {
175 case GEN_BUFFER_UPLOAD: createCmdBufferUpload (buffer); break;
176 case GEN_BUFFER_COMPUTE: createCmdBufferCompute (buffer); break;
177 default:
178 DE_ASSERT(false);
179 }
180 }
181
createCmdBufferUpload(deUint32 buffer) const182 void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const
183 {
184 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
185 vector<deUint8> data (m_bufferSize);
186
187 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
188 {
189 DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3);
190 DE_ASSERT(cmdIter->offset >= 0);
191 DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0);
192 DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize);
193
194 deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset];
195
196 dstPtr[0] = cmdIter->numWorkGroups[0];
197 dstPtr[1] = cmdIter->numWorkGroups[1];
198 dstPtr[2] = cmdIter->numWorkGroups[2];
199 }
200
201 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
202 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW);
203 }
204
createCmdBufferCompute(deUint32 buffer) const205 void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const
206 {
207 std::ostringstream src;
208
209 // Header
210 src <<
211 "#version 310 es\n"
212 "layout(local_size_x = 1) in;\n"
213 "layout(std430, binding = 1) buffer Out\n"
214 "{\n"
215 " highp uint data[];\n"
216 "};\n"
217 "void writeCmd (uint offset, uvec3 numWorkGroups)\n"
218 "{\n"
219 " data[offset+0u] = numWorkGroups.x;\n"
220 " data[offset+1u] = numWorkGroups.y;\n"
221 " data[offset+2u] = numWorkGroups.z;\n"
222 "}\n"
223 "void main (void)\n"
224 "{\n";
225
226 // Commands
227 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
228 {
229 const deUint32 offs = (deUint32)(cmdIter->offset/4);
230 DE_ASSERT((deIntptr)offs*4 == cmdIter->offset);
231
232 src << "\twriteCmd(" << offs << "u, uvec3("
233 << cmdIter->numWorkGroups.x() << "u, "
234 << cmdIter->numWorkGroups.y() << "u, "
235 << cmdIter->numWorkGroups.z() << "u));\n";
236 }
237
238 src << "}\n";
239
240 {
241 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
242 glu::ShaderProgram program (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str()));
243
244 m_testCtx.getLog() << program;
245 if (!program.isOk())
246 TCU_FAIL("Compile failed");
247
248 gl.useProgram(program.getProgram());
249
250 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
251 gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW);
252 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer);
253 GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
254
255 gl.dispatchCompute(1,1,1);
256 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed");
257
258 gl.memoryBarrier(GL_COMMAND_BARRIER_BIT);
259 GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed");
260 }
261 }
262
createResultBuffer(deUint32 buffer) const263 void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const
264 {
265 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
266 const int resultBlockSize = getResultBlockAlignedSize(gl);
267 const int resultBufferSize = resultBlockSize*(int)m_commands.size();
268 vector<deUint8> data (resultBufferSize);
269
270 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
271 {
272 deUint8* const dstPtr = &data[resultBlockSize*cmdNdx];
273
274 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0];
275 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1];
276 *(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2];
277 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
278 }
279
280 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
281 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ);
282 }
283
computeInvocationCount(const UVec3 & workGroupSize,const UVec3 & numWorkGroups)284 deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups)
285 {
286 const int numInvocationsPerGroup = workGroupSize[0]*workGroupSize[1]*workGroupSize[2];
287 const int numGroups = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2];
288
289 return numInvocationsPerGroup*numGroups;
290 }
291
verifyResultBuffer(deUint32 buffer)292 bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer)
293 {
294 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
295
296 const int resultBlockSize = getResultBlockAlignedSize(gl);
297 const int resultBufferSize = resultBlockSize*(int)m_commands.size();
298
299 void* mapPtr = DE_NULL;
300 bool allOk = true;
301
302 try
303 {
304 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
305 mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT);
306
307 GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed");
308 TCU_CHECK(mapPtr);
309
310 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
311 {
312 const DispatchCommand& cmd = m_commands[cmdNdx];
313 const deUint8* const srcPtr = (const deUint8*)mapPtr + cmdNdx*resultBlockSize;
314 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
315 const deUint32 expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups);
316
317 // Verify numPassed.
318 if (numPassed != expectedCount)
319 {
320 m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
321 << ": got numPassed = " << numPassed << ", expected " << expectedCount
322 << TestLog::EndMessage;
323 allOk = false;
324 }
325 }
326 }
327 catch (...)
328 {
329 if (mapPtr)
330 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
331 }
332
333 gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
334 GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed");
335
336 return allOk;
337 }
338
iterate(void)339 IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void)
340 {
341 const glu::RenderContext& renderCtx = m_context.getRenderContext();
342 const glw::Functions& gl = renderCtx.getFunctions();
343
344 const glu::ShaderProgram program (renderCtx, genVerifySources(m_workGroupSize));
345
346 glu::Buffer cmdBuffer (renderCtx);
347 glu::Buffer resultBuffer (renderCtx);
348
349 m_testCtx.getLog() << program;
350 TCU_CHECK_MSG(program.isOk(), "Compile failed");
351
352 m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage;
353 {
354 tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)");
355
356 for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
357 m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset
358 << ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups
359 << TestLog::EndMessage;
360 }
361
362 createResultBuffer(*resultBuffer);
363 createCommandBuffer(*cmdBuffer);
364
365 gl.useProgram(program.getProgram());
366 gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer);
367 GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed");
368
369 {
370 const int resultBlockAlignedSize = getResultBlockAlignedSize(gl);
371 deIntptr curOffset = 0;
372
373 for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
374 {
375 gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize);
376 gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset);
377
378 curOffset += resultBlockAlignedSize;
379 }
380 }
381
382 GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed");
383
384 if (verifyResultBuffer(*resultBuffer))
385 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
386 else
387 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
388
389 return STOP;
390 }
391
392 class SingleDispatchCase : public IndirectDispatchCase
393 {
394 public:
SingleDispatchCase(Context & context,const char * name,const char * description,GenBuffer genBuffer,deUintptr bufferSize,deUintptr offset,const UVec3 & workGroupSize,const UVec3 & numWorkGroups)395 SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups)
396 : IndirectDispatchCase(context, name, description, genBuffer)
397 {
398 m_bufferSize = bufferSize;
399 m_workGroupSize = workGroupSize;
400 m_commands.push_back(DispatchCommand(offset, numWorkGroups));
401 }
402 };
403
404 class MultiDispatchCase : public IndirectDispatchCase
405 {
406 public:
MultiDispatchCase(Context & context,GenBuffer genBuffer)407 MultiDispatchCase (Context& context, GenBuffer genBuffer)
408 : IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer)
409 {
410 m_bufferSize = 1<<10;
411 m_workGroupSize = UVec3(3,1,2);
412
413 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
414 m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2,1,1)));
415 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
416 m_commands.push_back(DispatchCommand(40, UVec3(1,1,7)));
417 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
418 }
419 };
420
421 class MultiDispatchReuseCommandCase : public IndirectDispatchCase
422 {
423 public:
MultiDispatchReuseCommandCase(Context & context,GenBuffer genBuffer)424 MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer)
425 : IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer)
426 {
427 m_bufferSize = 1<<10;
428 m_workGroupSize = UVec3(3,1,2);
429
430 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
431 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
432 m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
433 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
434 m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
435 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
436 m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
437 }
438 };
439
440 } // anonymous
441
IndirectComputeDispatchTests(Context & context)442 IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context)
443 : TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests")
444 {
445 }
446
~IndirectComputeDispatchTests(void)447 IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void)
448 {
449 }
450
init(void)451 void IndirectComputeDispatchTests::init (void)
452 {
453 static const struct
454 {
455 const char* name;
456 GenBuffer gen;
457 } s_genBuffer[] =
458 {
459 { "upload_buffer", GEN_BUFFER_UPLOAD },
460 { "gen_in_compute", GEN_BUFFER_COMPUTE }
461 };
462
463 static const struct
464 {
465 const char* name;
466 const char* description;
467 deUintptr bufferSize;
468 deUintptr offset;
469 UVec3 workGroupSize;
470 UVec3 numWorkGroups;
471 } s_singleDispatchCases[] =
472 {
473 // Name Desc BufferSize Offs WorkGroupSize NumWorkGroups
474 { "single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(1,1,1) },
475 { "multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(2,3,5) },
476 { "multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(2,3,1), UVec3(1,2,3) },
477 { "small_offset", "Small offset", 16+INDIRECT_COMMAND_SIZE, 16, UVec3(1,1,1), UVec3(1,1,1) },
478 { "large_offset", "Large offset", (2<<20), (1<<20) + 12, UVec3(1,1,1), UVec3(1,1,1) },
479 { "large_offset_multiple_invocations", "Large offset, multiple invocations", (2<<20), (1<<20) + 12, UVec3(2,3,1), UVec3(1,2,3) },
480 { "empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(0,0,0) },
481 };
482
483 for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++)
484 {
485 const GenBuffer genBuf = s_genBuffer[genNdx].gen;
486 tcu::TestCaseGroup* const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, "");
487 addChild(genGroup);
488
489 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++)
490 genGroup->addChild(new SingleDispatchCase(m_context,
491 s_singleDispatchCases[ndx].name,
492 s_singleDispatchCases[ndx].description,
493 genBuf,
494 s_singleDispatchCases[ndx].bufferSize,
495 s_singleDispatchCases[ndx].offset,
496 s_singleDispatchCases[ndx].workGroupSize,
497 s_singleDispatchCases[ndx].numWorkGroups));
498
499 genGroup->addChild(new MultiDispatchCase (m_context, genBuf));
500 genGroup->addChild(new MultiDispatchReuseCommandCase (m_context, genBuf));
501 }
502 }
503
504 } // Functional
505 } // gles31
506 } // deqp
507