1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Synchronization Tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
38 #include "deMemory.h"
39 #include "deRandom.hpp"
40
41 #include <map>
42
43 namespace deqp
44 {
45 namespace gles31
46 {
47 namespace Functional
48 {
49 namespace
50 {
51
checkSupport(Context & ctx)52 static bool checkSupport(Context& ctx)
53 {
54 auto ctxType = ctx.getRenderContext().getType();
55 return contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
56 contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
57 ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
58 }
59
validateSortedAtomicRampAdditionValueChain(const std::vector<deUint32> & valueChain,deUint32 sumValue,int & invalidOperationNdx,deUint32 & errorDelta,deUint32 & errorExpected)60 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
61 {
62 std::vector<deUint32> chainDelta(valueChain.size());
63
64 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
65 chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
66
67 // chainDelta contains now the actual additions applied to the value
68 // check there exists an addition ramp form 1 to ...
69 std::sort(chainDelta.begin(), chainDelta.end());
70
71 for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
72 {
73 if ((int)chainDelta[callNdx] != callNdx+1)
74 {
75 invalidOperationNdx = callNdx;
76 errorDelta = chainDelta[callNdx];
77 errorExpected = callNdx+1;
78
79 return false;
80 }
81 }
82
83 return true;
84 }
85
readBuffer(const glw::Functions & gl,deUint32 target,int numElements,std::vector<deUint32> & result)86 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
87 {
88 const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
89 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
90
91 if (!ptr)
92 throw tcu::TestError("mapBufferRange returned NULL");
93
94 result.resize(numElements);
95 memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
96
97 if (gl.unmapBuffer(target) == GL_FALSE)
98 throw tcu::TestError("unmapBuffer returned false");
99 }
100
readBufferUint32(const glw::Functions & gl,deUint32 target)101 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
102 {
103 std::vector<deUint32> vec;
104
105 readBuffer(gl, target, 1, vec);
106
107 return vec[0];
108 }
109
110 //! Generate a ramp of values from 1 to numElements, and shuffle it
generateShuffledRamp(int numElements,std::vector<int> & ramp)111 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
112 {
113 de::Random rng(0xabcd);
114
115 // some positive (non-zero) unique values
116 ramp.resize(numElements);
117 for (int callNdx = 0; callNdx < numElements; ++callNdx)
118 ramp[callNdx] = callNdx + 1;
119
120 rng.shuffle(ramp.begin(), ramp.end());
121 }
122
specializeShader(Context & context,const char * code)123 static std::string specializeShader(Context& context, const char* code)
124 {
125 auto ctxType = context.getRenderContext().getType();
126 const bool isES32orGL45 = glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
127 glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
128 const glu::GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(ctxType);
129
130 std::map<std::string, std::string> specializationMap;
131 specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
132 specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] = isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";
133
134 return tcu::StringTemplate(code).specialize(specializationMap);
135 }
136
137 class InterInvocationTestCase : public TestCase
138 {
139 public:
140 enum StorageType
141 {
142 STORAGE_BUFFER = 0,
143 STORAGE_IMAGE,
144
145 STORAGE_LAST
146 };
147 enum CaseFlags
148 {
149 FLAG_ATOMIC = 0x1,
150 FLAG_ALIASING_STORAGES = 0x2,
151 FLAG_IN_GROUP = 0x4,
152 };
153
154 InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
155 ~InterInvocationTestCase (void);
156
157 private:
158 void init (void);
159 void deinit (void);
160 IterateResult iterate (void);
161
162 void runCompute (void);
163 bool verifyResults (void);
164 virtual std::string genShaderSource (void) const = 0;
165
166 protected:
167 std::string genBarrierSource (void) const;
168
169 const StorageType m_storage;
170 const bool m_useAtomic;
171 const bool m_aliasingStorages;
172 const bool m_syncWithGroup;
173 const int m_workWidth; // !< total work width
174 const int m_workHeight; // !< ... height
175 const int m_localWidth; // !< group width
176 const int m_localHeight; // !< group height
177 const int m_elementsPerInvocation; // !< elements accessed by a single invocation
178
179 private:
180 glw::GLuint m_storageBuf;
181 glw::GLuint m_storageTex;
182 glw::GLuint m_resultBuf;
183 glu::ShaderProgram* m_program;
184 };
185
InterInvocationTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)186 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
187 : TestCase (context, name, desc)
188 , m_storage (storage)
189 , m_useAtomic ((flags & FLAG_ATOMIC) != 0)
190 , m_aliasingStorages ((flags & FLAG_ALIASING_STORAGES) != 0)
191 , m_syncWithGroup ((flags & FLAG_IN_GROUP) != 0)
192 , m_workWidth (256)
193 , m_workHeight (256)
194 , m_localWidth (16)
195 , m_localHeight (8)
196 , m_elementsPerInvocation (8)
197 , m_storageBuf (0)
198 , m_storageTex (0)
199 , m_resultBuf (0)
200 , m_program (DE_NULL)
201 {
202 DE_ASSERT(m_storage < STORAGE_LAST);
203 DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
204 }
205
~InterInvocationTestCase(void)206 InterInvocationTestCase::~InterInvocationTestCase (void)
207 {
208 deinit();
209 }
210
init(void)211 void InterInvocationTestCase::init (void)
212 {
213 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
214
215 // requirements
216
217 if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
218 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
219
220 // program
221
222 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
223 m_testCtx.getLog() << *m_program;
224 if (!m_program->isOk())
225 throw tcu::TestError("could not build program");
226
227 // source
228
229 if (m_storage == STORAGE_BUFFER)
230 {
231 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
232 const int bufferSize = bufferElements * (int)sizeof(deUint32);
233 std::vector<deUint32> zeroBuffer (bufferElements, 0);
234
235 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
236
237 gl.genBuffers(1, &m_storageBuf);
238 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
239 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
240 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
241 }
242 else if (m_storage == STORAGE_IMAGE)
243 {
244 const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
245 const int bufferSize = bufferElements * (int)sizeof(deUint32);
246
247 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
248
249 gl.genTextures(1, &m_storageTex);
250 gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
251 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
252 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
253 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
254 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
255
256 // Zero-fill
257 m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
258
259 {
260 const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
261 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
262 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
263 }
264 }
265 else
266 DE_ASSERT(DE_FALSE);
267
268 // destination
269
270 {
271 const int bufferElements = m_workWidth * m_workHeight;
272 const int bufferSize = bufferElements * (int)sizeof(deUint32);
273 std::vector<deInt32> negativeBuffer (bufferElements, -1);
274
275 m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
276
277 gl.genBuffers(1, &m_resultBuf);
278 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
279 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
280 GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
281 }
282 }
283
deinit(void)284 void InterInvocationTestCase::deinit (void)
285 {
286 if (m_storageBuf)
287 {
288 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
289 m_storageBuf = DE_NULL;
290 }
291
292 if (m_storageTex)
293 {
294 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
295 m_storageTex = DE_NULL;
296 }
297
298 if (m_resultBuf)
299 {
300 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
301 m_resultBuf = DE_NULL;
302 }
303
304 delete m_program;
305 m_program = DE_NULL;
306 }
307
iterate(void)308 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
309 {
310 // Dispatch
311 runCompute();
312
313 // Verify buffer contents
314 if (verifyResults())
315 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
316 else
317 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
318
319 return STOP;
320 }
321
runCompute(void)322 void InterInvocationTestCase::runCompute (void)
323 {
324 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
325 const int groupsX = m_workWidth / m_localWidth;
326 const int groupsY = m_workHeight / m_localHeight;
327
328 DE_ASSERT((m_workWidth % m_localWidth) == 0);
329 DE_ASSERT((m_workHeight % m_localHeight) == 0);
330
331 m_testCtx.getLog()
332 << tcu::TestLog::Message
333 << "Dispatching compute.\n"
334 << " group size: " << m_localWidth << "x" << m_localHeight << "\n"
335 << " dispatch size: " << groupsX << "x" << groupsY << "\n"
336 << " total work size: " << m_workWidth << "x" << m_workHeight << "\n"
337 << tcu::TestLog::EndMessage;
338
339 gl.useProgram(m_program->getProgram());
340
341 // source
342 if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
343 {
344 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
345 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346 }
347 else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
348 {
349 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
350 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
351 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
352
353 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
354 }
355 else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
356 {
357 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
359 }
360 else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
361 {
362 gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
363 gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
364
365 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
366
367 m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
368 }
369 else
370 DE_ASSERT(DE_FALSE);
371
372 // destination
373 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
374 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
375
376 // dispatch
377 gl.dispatchCompute(groupsX, groupsY, 1);
378 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
379 }
380
verifyResults(void)381 bool InterInvocationTestCase::verifyResults (void)
382 {
383 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
384 const int errorFloodThreshold = 5;
385 int numErrorsLogged = 0;
386 const void* mapped = DE_NULL;
387 std::vector<deInt32> results (m_workWidth * m_workHeight);
388 bool error = false;
389
390 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
391 gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
392 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
393 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
394
395 // copy to properly aligned array
396 deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
397
398 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
399 throw tcu::TestError("memory map store corrupted");
400
401 // check the results
402 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
403 {
404 if (results[ndx] != 1)
405 {
406 error = true;
407
408 if (numErrorsLogged == 0)
409 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
410 if (numErrorsLogged++ < errorFloodThreshold)
411 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
412 else
413 {
414 // after N errors, no point continuing verification
415 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
416 break;
417 }
418 }
419 }
420
421 if (!error)
422 m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
423 return !error;
424 }
425
genBarrierSource(void) const426 std::string InterInvocationTestCase::genBarrierSource (void) const
427 {
428 std::ostringstream buf;
429
430 if (m_syncWithGroup)
431 {
432 // Wait until all invocations in this work group have their texture/buffer read/write operations complete
433 // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
434 // we only require intra-workgroup synchronization.
435 buf << "\n"
436 << " groupMemoryBarrier();\n"
437 << " barrier();\n"
438 << "\n";
439 }
440 else if (m_storage == STORAGE_BUFFER)
441 {
442 DE_ASSERT(!m_syncWithGroup);
443
444 // Waiting only for data written by this invocation. Since all buffer reads and writes are
445 // processed in order (within a single invocation), we don't have to do anything.
446 buf << "\n";
447 }
448 else if (m_storage == STORAGE_IMAGE)
449 {
450 DE_ASSERT(!m_syncWithGroup);
451
452 // Waiting only for data written by this invocation. But since operations complete in undefined
453 // order, we have to wait for them to complete.
454 buf << "\n"
455 << " memoryBarrierImage();\n"
456 << "\n";
457 }
458 else
459 DE_ASSERT(DE_FALSE);
460
461 return buf.str();
462 }
463
464 class InvocationBasicCase : public InterInvocationTestCase
465 {
466 public:
467 InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
468 private:
469 std::string genShaderSource (void) const;
470 virtual std::string genShaderMainBlock (void) const = 0;
471 };
472
InvocationBasicCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)473 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
474 : InterInvocationTestCase(context, name, desc, storage, flags)
475 {
476 }
477
genShaderSource(void) const478 std::string InvocationBasicCase::genShaderSource (void) const
479 {
480 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
481 std::ostringstream buf;
482
483 buf << "${GLSL_VERSION_DECL}\n"
484 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
485 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
486 << "layout(binding=0, std430) buffer Output\n"
487 << "{\n"
488 << " highp int values[];\n"
489 << "} sb_result;\n";
490
491 if (m_storage == STORAGE_BUFFER)
492 buf << "layout(binding=1, std430) coherent buffer Storage\n"
493 << "{\n"
494 << " highp int values[];\n"
495 << "} sb_store;\n"
496 << "\n"
497 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
498 << "{\n"
499 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
500 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
501 << "}\n";
502 else if (m_storage == STORAGE_IMAGE)
503 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
504 << "\n"
505 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
506 << "{\n"
507 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
508 << "}\n";
509 else
510 DE_ASSERT(DE_FALSE);
511
512 buf << "\n"
513 << "void main (void)\n"
514 << "{\n"
515 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
516 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
517 << " bool allOk = true;\n"
518 << "\n"
519 << genShaderMainBlock()
520 << "\n"
521 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
522 << "}\n";
523
524 return specializeShader(m_context, buf.str().c_str());
525 }
526
527 class InvocationWriteReadCase : public InvocationBasicCase
528 {
529 public:
530 InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
531 private:
532 std::string genShaderMainBlock (void) const;
533 };
534
InvocationWriteReadCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)535 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
536 : InvocationBasicCase(context, name, desc, storage, flags)
537 {
538 }
539
genShaderMainBlock(void) const540 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
541 {
542 std::ostringstream buf;
543
544 // write
545
546 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
547 {
548 if (m_storage == STORAGE_BUFFER && m_useAtomic)
549 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
550 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
551 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
552 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
553 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
554 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
555 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
556 else
557 DE_ASSERT(DE_FALSE);
558 }
559
560 // barrier
561
562 buf << genBarrierSource();
563
564 // read
565
566 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
567 {
568 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
569
570 if (m_storage == STORAGE_BUFFER && m_useAtomic)
571 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
572 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
573 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
574 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
575 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
576 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
577 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
578 else
579 DE_ASSERT(DE_FALSE);
580 }
581
582 return buf.str();
583 }
584
585 class InvocationReadWriteCase : public InvocationBasicCase
586 {
587 public:
588 InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
589 private:
590 std::string genShaderMainBlock (void) const;
591 };
592
InvocationReadWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)593 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
594 : InvocationBasicCase(context, name, desc, storage, flags)
595 {
596 }
597
genShaderMainBlock(void) const598 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
599 {
600 std::ostringstream buf;
601
602 // read
603
604 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
605 {
606 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
607
608 if (m_storage == STORAGE_BUFFER && m_useAtomic)
609 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
610 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
611 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
612 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
613 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
614 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
615 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
616 else
617 DE_ASSERT(DE_FALSE);
618 }
619
620 // barrier
621
622 buf << genBarrierSource();
623
624 // write
625
626 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
627 {
628 if (m_storage == STORAGE_BUFFER && m_useAtomic)
629 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
630 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
631 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
632 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
633 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
634 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
635 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
636 else
637 DE_ASSERT(DE_FALSE);
638 }
639
640 return buf.str();
641 }
642
643 class InvocationOverWriteCase : public InvocationBasicCase
644 {
645 public:
646 InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags);
647 private:
648 std::string genShaderMainBlock (void) const;
649 };
650
InvocationOverWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)651 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
652 : InvocationBasicCase(context, name, desc, storage, flags)
653 {
654 }
655
genShaderMainBlock(void) const656 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
657 {
658 std::ostringstream buf;
659
660 // write
661
662 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
663 {
664 if (m_storage == STORAGE_BUFFER && m_useAtomic)
665 buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
666 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667 buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
668 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669 buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
670 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671 buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
672 else
673 DE_ASSERT(DE_FALSE);
674 }
675
676 // barrier
677
678 buf << genBarrierSource();
679
680 // write over
681
682 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683 {
684 // write another invocation's value or our own value depending on test type
685 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686
687 if (m_storage == STORAGE_BUFFER && m_useAtomic)
688 buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
689 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690 buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
691 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692 buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
693 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694 buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
695 else
696 DE_ASSERT(DE_FALSE);
697 }
698
699 // barrier
700
701 buf << genBarrierSource();
702
703 // read
704
705 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
706 {
707 // check another invocation's value or our own value depending on test type
708 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
709
710 if (m_storage == STORAGE_BUFFER && m_useAtomic)
711 buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
712 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
713 buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
714 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
715 buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
716 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
717 buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
718 else
719 DE_ASSERT(DE_FALSE);
720 }
721
722 return buf.str();
723 }
724
725 class InvocationAliasWriteCase : public InterInvocationTestCase
726 {
727 public:
728 enum TestType
729 {
730 TYPE_WRITE = 0,
731 TYPE_OVERWRITE,
732
733 TYPE_LAST
734 };
735
736 InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
737 private:
738 std::string genShaderSource (void) const;
739
740 const TestType m_type;
741 };
742
InvocationAliasWriteCase(Context & context,const char * name,const char * desc,TestType type,StorageType storage,int flags)743 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
744 : InterInvocationTestCase (context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
745 , m_type (type)
746 {
747 DE_ASSERT(type < TYPE_LAST);
748 }
749
genShaderSource(void) const750 std::string InvocationAliasWriteCase::genShaderSource (void) const
751 {
752 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
753 std::ostringstream buf;
754
755 buf << "${GLSL_VERSION_DECL}\n"
756 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
757 << "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
758 << "layout(binding=0, std430) buffer Output\n"
759 << "{\n"
760 << " highp int values[];\n"
761 << "} sb_result;\n";
762
763 if (m_storage == STORAGE_BUFFER)
764 buf << "layout(binding=1, std430) coherent buffer Storage0\n"
765 << "{\n"
766 << " highp int values[];\n"
767 << "} sb_store0;\n"
768 << "layout(binding=2, std430) coherent buffer Storage1\n"
769 << "{\n"
770 << " highp int values[];\n"
771 << "} sb_store1;\n"
772 << "\n"
773 << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
774 << "{\n"
775 << " highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
776 << " return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
777 << "}\n";
778 else if (m_storage == STORAGE_IMAGE)
779 buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
780 << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
781 << "\n"
782 << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
783 << "{\n"
784 << " return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
785 << "}\n";
786 else
787 DE_ASSERT(DE_FALSE);
788
789 buf << "\n"
790 << "void main (void)\n"
791 << "{\n"
792 << " int resultNdx = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
793 << " int groupNdx = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
794 << " bool allOk = true;\n"
795 << "\n";
796
797 if (m_type == TYPE_OVERWRITE)
798 {
799 // write
800
801 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802 {
803 if (m_storage == STORAGE_BUFFER && m_useAtomic)
804 buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
805 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
806 buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
807 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
808 buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
809 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
810 buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
811 else
812 DE_ASSERT(DE_FALSE);
813 }
814
815 // barrier
816
817 buf << genBarrierSource();
818 }
819 else
820 DE_ASSERT(m_type == TYPE_WRITE);
821
822 // write (again)
823
824 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
825 {
826 const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
827
828 if (m_storage == STORAGE_BUFFER && m_useAtomic)
829 buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
830 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
831 buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
832 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
833 buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
834 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
835 buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
836 else
837 DE_ASSERT(DE_FALSE);
838 }
839
840 // barrier
841
842 buf << genBarrierSource();
843
844 // read
845
846 for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
847 {
848 if (m_storage == STORAGE_BUFFER && m_useAtomic)
849 buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
850 else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
851 buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
852 else if (m_storage == STORAGE_IMAGE && m_useAtomic)
853 buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
854 else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
855 buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
856 else
857 DE_ASSERT(DE_FALSE);
858 }
859
860 // return result
861
862 buf << "\n"
863 << " sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
864 << "}\n";
865
866 return specializeShader(m_context, buf.str().c_str());
867 }
868
869 namespace op
870 {
871
872 struct WriteData
873 {
874 int targetHandle;
875 int seed;
876
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::WriteData877 static WriteData Generate(int targetHandle, int seed)
878 {
879 WriteData retVal;
880
881 retVal.targetHandle = targetHandle;
882 retVal.seed = seed;
883
884 return retVal;
885 }
886 };
887
888 struct ReadData
889 {
890 int targetHandle;
891 int seed;
892
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadData893 static ReadData Generate(int targetHandle, int seed)
894 {
895 ReadData retVal;
896
897 retVal.targetHandle = targetHandle;
898 retVal.seed = seed;
899
900 return retVal;
901 }
902 };
903
904 struct Barrier
905 {
906 };
907
908 struct WriteDataInterleaved
909 {
910 int targetHandle;
911 int seed;
912 bool evenOdd;
913
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::WriteDataInterleaved914 static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
915 {
916 WriteDataInterleaved retVal;
917
918 retVal.targetHandle = targetHandle;
919 retVal.seed = seed;
920 retVal.evenOdd = evenOdd;
921
922 return retVal;
923 }
924 };
925
926 struct ReadDataInterleaved
927 {
928 int targetHandle;
929 int seed0;
930 int seed1;
931
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadDataInterleaved932 static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
933 {
934 ReadDataInterleaved retVal;
935
936 retVal.targetHandle = targetHandle;
937 retVal.seed0 = seed0;
938 retVal.seed1 = seed1;
939
940 return retVal;
941 }
942 };
943
944 struct ReadMultipleData
945 {
946 int targetHandle0;
947 int seed0;
948 int targetHandle1;
949 int seed1;
950
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadMultipleData951 static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
952 {
953 ReadMultipleData retVal;
954
955 retVal.targetHandle0 = targetHandle0;
956 retVal.seed0 = seed0;
957 retVal.targetHandle1 = targetHandle1;
958 retVal.seed1 = seed1;
959
960 return retVal;
961 }
962 };
963
964 struct ReadZeroData
965 {
966 int targetHandle;
967
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadZeroData968 static ReadZeroData Generate(int targetHandle)
969 {
970 ReadZeroData retVal;
971
972 retVal.targetHandle = targetHandle;
973
974 return retVal;
975 }
976 };
977
978 } // namespace op
979
980 class InterCallTestCase;
981
982 class InterCallOperations
983 {
984 public:
985 InterCallOperations& operator<< (const op::WriteData&);
986 InterCallOperations& operator<< (const op::ReadData&);
987 InterCallOperations& operator<< (const op::Barrier&);
988 InterCallOperations& operator<< (const op::ReadMultipleData&);
989 InterCallOperations& operator<< (const op::WriteDataInterleaved&);
990 InterCallOperations& operator<< (const op::ReadDataInterleaved&);
991 InterCallOperations& operator<< (const op::ReadZeroData&);
992
993 private:
994 struct Command
995 {
996 enum CommandType
997 {
998 TYPE_WRITE = 0,
999 TYPE_READ,
1000 TYPE_BARRIER,
1001 TYPE_READ_MULTIPLE,
1002 TYPE_WRITE_INTERLEAVE,
1003 TYPE_READ_INTERLEAVE,
1004 TYPE_READ_ZERO,
1005
1006 TYPE_LAST
1007 };
1008
1009 CommandType type;
1010
1011 union CommandUnion
1012 {
1013 op::WriteData write;
1014 op::ReadData read;
1015 op::Barrier barrier;
1016 op::ReadMultipleData readMulti;
1017 op::WriteDataInterleaved writeInterleave;
1018 op::ReadDataInterleaved readInterleave;
1019 op::ReadZeroData readZero;
1020 } u_cmd;
1021 };
1022
1023 friend class InterCallTestCase;
1024
1025 std::vector<Command> m_cmds;
1026 };
1027
operator <<(const op::WriteData & cmd)1028 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1029 {
1030 m_cmds.push_back(Command());
1031 m_cmds.back().type = Command::TYPE_WRITE;
1032 m_cmds.back().u_cmd.write = cmd;
1033
1034 return *this;
1035 }
1036
operator <<(const op::ReadData & cmd)1037 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1038 {
1039 m_cmds.push_back(Command());
1040 m_cmds.back().type = Command::TYPE_READ;
1041 m_cmds.back().u_cmd.read = cmd;
1042
1043 return *this;
1044 }
1045
operator <<(const op::Barrier & cmd)1046 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1047 {
1048 m_cmds.push_back(Command());
1049 m_cmds.back().type = Command::TYPE_BARRIER;
1050 m_cmds.back().u_cmd.barrier = cmd;
1051
1052 return *this;
1053 }
1054
operator <<(const op::ReadMultipleData & cmd)1055 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1056 {
1057 m_cmds.push_back(Command());
1058 m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1059 m_cmds.back().u_cmd.readMulti = cmd;
1060
1061 return *this;
1062 }
1063
operator <<(const op::WriteDataInterleaved & cmd)1064 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1065 {
1066 m_cmds.push_back(Command());
1067 m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1068 m_cmds.back().u_cmd.writeInterleave = cmd;
1069
1070 return *this;
1071 }
1072
operator <<(const op::ReadDataInterleaved & cmd)1073 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1074 {
1075 m_cmds.push_back(Command());
1076 m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1077 m_cmds.back().u_cmd.readInterleave = cmd;
1078
1079 return *this;
1080 }
1081
operator <<(const op::ReadZeroData & cmd)1082 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1083 {
1084 m_cmds.push_back(Command());
1085 m_cmds.back().type = Command::TYPE_READ_ZERO;
1086 m_cmds.back().u_cmd.readZero = cmd;
1087
1088 return *this;
1089 }
1090
1091 class InterCallTestCase : public TestCase
1092 {
1093 public:
1094 enum StorageType
1095 {
1096 STORAGE_BUFFER = 0,
1097 STORAGE_IMAGE,
1098
1099 STORAGE_LAST
1100 };
1101 enum Flags
1102 {
1103 FLAG_USE_ATOMIC = 1,
1104 FLAG_USE_INT = 2,
1105 };
1106 InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1107 ~InterCallTestCase (void);
1108
1109 private:
1110 void init (void);
1111 void deinit (void);
1112 IterateResult iterate (void);
1113 bool verifyResults (void);
1114
1115 void runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1116 void runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117 void runCommand (const op::Barrier&);
1118 void runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1119 void runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1120 void runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1121 void runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1122 void runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1123
1124 glw::GLuint genStorage (int friendlyName);
1125 glw::GLuint genResultStorage (void);
1126 glu::ShaderProgram* genWriteProgram (int seed);
1127 glu::ShaderProgram* genReadProgram (int seed);
1128 glu::ShaderProgram* genReadMultipleProgram (int seed0, int seed1);
1129 glu::ShaderProgram* genWriteInterleavedProgram (int seed, bool evenOdd);
1130 glu::ShaderProgram* genReadInterleavedProgram (int seed0, int seed1);
1131 glu::ShaderProgram* genReadZeroProgram (void);
1132
1133 const StorageType m_storage;
1134 const int m_invocationGridSize; // !< width and height of the two dimensional work dispatch
1135 const int m_perInvocationSize; // !< number of elements accessed in single invocation
1136 const std::vector<InterCallOperations::Command> m_cmds;
1137 const bool m_useAtomic;
1138 const bool m_formatInteger;
1139
1140 std::vector<glu::ShaderProgram*> m_operationPrograms;
1141 std::vector<glw::GLuint> m_operationResultStorages;
1142 std::map<int, glw::GLuint> m_storageIDs;
1143 };
1144
InterCallTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags,const InterCallOperations & ops)1145 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1146 : TestCase (context, name, desc)
1147 , m_storage (storage)
1148 , m_invocationGridSize (512)
1149 , m_perInvocationSize (2)
1150 , m_cmds (ops.m_cmds)
1151 , m_useAtomic ((flags & FLAG_USE_ATOMIC) != 0)
1152 , m_formatInteger ((flags & FLAG_USE_INT) != 0)
1153 {
1154 }
1155
~InterCallTestCase(void)1156 InterCallTestCase::~InterCallTestCase (void)
1157 {
1158 deinit();
1159 }
1160
init(void)1161 void InterCallTestCase::init (void)
1162 {
1163 int programFriendlyName = 0;
1164
1165 // requirements
1166
1167 if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
1168 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1169
1170 // generate resources and validate command list
1171
1172 m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1173 m_operationResultStorages.resize(m_cmds.size(), 0);
1174
1175 for (int step = 0; step < (int)m_cmds.size(); ++step)
1176 {
1177 switch (m_cmds[step].type)
1178 {
1179 case InterCallOperations::Command::TYPE_WRITE:
1180 {
1181 const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1182
1183 // new storage handle?
1184 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1185 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1186
1187 // program
1188 {
1189 glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1190
1191 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1192 m_testCtx.getLog() << *program;
1193
1194 if (!program->isOk())
1195 throw tcu::TestError("could not build program");
1196
1197 m_operationPrograms[step] = program;
1198 }
1199 break;
1200 }
1201
1202 case InterCallOperations::Command::TYPE_READ:
1203 {
1204 const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1205 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1206
1207 // program and result storage
1208 {
1209 glu::ShaderProgram* program = genReadProgram(cmd.seed);
1210
1211 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1212 m_testCtx.getLog() << *program;
1213
1214 if (!program->isOk())
1215 throw tcu::TestError("could not build program");
1216
1217 m_operationPrograms[step] = program;
1218 m_operationResultStorages[step] = genResultStorage();
1219 }
1220 break;
1221 }
1222
1223 case InterCallOperations::Command::TYPE_BARRIER:
1224 {
1225 break;
1226 }
1227
1228 case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1229 {
1230 const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1231 DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1232 DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1233
1234 // program
1235 {
1236 glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1237
1238 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1239 m_testCtx.getLog() << *program;
1240
1241 if (!program->isOk())
1242 throw tcu::TestError("could not build program");
1243
1244 m_operationPrograms[step] = program;
1245 m_operationResultStorages[step] = genResultStorage();
1246 }
1247 break;
1248 }
1249
1250 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1251 {
1252 const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1253
1254 // new storage handle?
1255 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1256 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1257
1258 // program
1259 {
1260 glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1261
1262 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1263 m_testCtx.getLog() << *program;
1264
1265 if (!program->isOk())
1266 throw tcu::TestError("could not build program");
1267
1268 m_operationPrograms[step] = program;
1269 }
1270 break;
1271 }
1272
1273 case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1274 {
1275 const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1276 DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1277
1278 // program
1279 {
1280 glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1281
1282 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1283 m_testCtx.getLog() << *program;
1284
1285 if (!program->isOk())
1286 throw tcu::TestError("could not build program");
1287
1288 m_operationPrograms[step] = program;
1289 m_operationResultStorages[step] = genResultStorage();
1290 }
1291 break;
1292 }
1293
1294 case InterCallOperations::Command::TYPE_READ_ZERO:
1295 {
1296 const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1297
1298 // new storage handle?
1299 if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1300 m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1301
1302 // program
1303 {
1304 glu::ShaderProgram* program = genReadZeroProgram();
1305
1306 m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1307 m_testCtx.getLog() << *program;
1308
1309 if (!program->isOk())
1310 throw tcu::TestError("could not build program");
1311
1312 m_operationPrograms[step] = program;
1313 m_operationResultStorages[step] = genResultStorage();
1314 }
1315 break;
1316 }
1317
1318 default:
1319 DE_ASSERT(DE_FALSE);
1320 }
1321 }
1322 }
1323
deinit(void)1324 void InterCallTestCase::deinit (void)
1325 {
1326 // programs
1327 for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1328 delete m_operationPrograms[ndx];
1329 m_operationPrograms.clear();
1330
1331 // result storages
1332 for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1333 {
1334 if (m_operationResultStorages[ndx])
1335 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1336 }
1337 m_operationResultStorages.clear();
1338
1339 // storage
1340 for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1341 {
1342 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1343
1344 if (m_storage == STORAGE_BUFFER)
1345 gl.deleteBuffers(1, &it->second);
1346 else if (m_storage == STORAGE_IMAGE)
1347 gl.deleteTextures(1, &it->second);
1348 else
1349 DE_ASSERT(DE_FALSE);
1350 }
1351 m_storageIDs.clear();
1352 }
1353
iterate(void)1354 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1355 {
1356 int programFriendlyName = 0;
1357 int resultStorageFriendlyName = 0;
1358
1359 m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1360
1361 // run steps
1362
1363 for (int step = 0; step < (int)m_cmds.size(); ++step)
1364 {
1365 switch (m_cmds[step].type)
1366 {
1367 case InterCallOperations::Command::TYPE_WRITE: runCommand(m_cmds[step].u_cmd.write, step, programFriendlyName); break;
1368 case InterCallOperations::Command::TYPE_READ: runCommand(m_cmds[step].u_cmd.read, step, programFriendlyName, resultStorageFriendlyName); break;
1369 case InterCallOperations::Command::TYPE_BARRIER: runCommand(m_cmds[step].u_cmd.barrier); break;
1370 case InterCallOperations::Command::TYPE_READ_MULTIPLE: runCommand(m_cmds[step].u_cmd.readMulti, step, programFriendlyName, resultStorageFriendlyName); break;
1371 case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE: runCommand(m_cmds[step].u_cmd.writeInterleave, step, programFriendlyName); break;
1372 case InterCallOperations::Command::TYPE_READ_INTERLEAVE: runCommand(m_cmds[step].u_cmd.readInterleave, step, programFriendlyName, resultStorageFriendlyName); break;
1373 case InterCallOperations::Command::TYPE_READ_ZERO: runCommand(m_cmds[step].u_cmd.readZero, step, programFriendlyName, resultStorageFriendlyName); break;
1374 default:
1375 DE_ASSERT(DE_FALSE);
1376 }
1377 }
1378
1379 // read results from result buffers
1380 if (verifyResults())
1381 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1382 else
1383 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1384
1385 return STOP;
1386 }
1387
verifyResults(void)1388 bool InterCallTestCase::verifyResults (void)
1389 {
1390 int resultBufferFriendlyName = 0;
1391 bool allResultsOk = true;
1392 bool anyResult = false;
1393
1394 m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1395
1396 for (int step = 0; step < (int)m_cmds.size(); ++step)
1397 {
1398 const int errorFloodThreshold = 5;
1399 int numErrorsLogged = 0;
1400
1401 if (m_operationResultStorages[step])
1402 {
1403 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1404 const void* mapped = DE_NULL;
1405 std::vector<deInt32> results (m_invocationGridSize * m_invocationGridSize);
1406 bool error = false;
1407
1408 anyResult = true;
1409
1410 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1411 mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1412 GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1413
1414 // copy to properly aligned array
1415 deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1416
1417 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1418 throw tcu::TestError("memory map store corrupted");
1419
1420 // check the results
1421 for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1422 {
1423 if (results[ndx] != 1)
1424 {
1425 error = true;
1426
1427 if (numErrorsLogged == 0)
1428 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1429 if (numErrorsLogged++ < errorFloodThreshold)
1430 m_testCtx.getLog() << tcu::TestLog::Message << " Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1431 else
1432 {
1433 // after N errors, no point continuing verification
1434 m_testCtx.getLog() << tcu::TestLog::Message << " -- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1435 break;
1436 }
1437 }
1438 }
1439
1440 if (error)
1441 {
1442 allResultsOk = false;
1443 }
1444 else
1445 m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1446 }
1447 }
1448
1449 DE_ASSERT(anyResult);
1450 DE_UNREF(anyResult);
1451
1452 return allResultsOk;
1453 }
1454
runCommand(const op::WriteData & cmd,int stepNdx,int & programFriendlyName)1455 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1456 {
1457 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1458
1459 m_testCtx.getLog()
1460 << tcu::TestLog::Message
1461 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1462 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1463 << tcu::TestLog::EndMessage;
1464
1465 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1466
1467 // set destination
1468 if (m_storage == STORAGE_BUFFER)
1469 {
1470 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1471
1472 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1473 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1474 }
1475 else if (m_storage == STORAGE_IMAGE)
1476 {
1477 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1478
1479 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1480 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1481 }
1482 else
1483 DE_ASSERT(DE_FALSE);
1484
1485 // calc
1486 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1487 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1488 }
1489
runCommand(const op::ReadData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1490 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1491 {
1492 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1493 }
1494
runCommand(const op::Barrier & cmd)1495 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1496 {
1497 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1498
1499 DE_UNREF(cmd);
1500
1501 if (m_storage == STORAGE_BUFFER)
1502 {
1503 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504 gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1505 }
1506 else if (m_storage == STORAGE_IMAGE)
1507 {
1508 m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1509 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1510 }
1511 else
1512 DE_ASSERT(DE_FALSE);
1513 }
1514
runCommand(const op::ReadMultipleData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1515 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1516 {
1517 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1518
1519 m_testCtx.getLog()
1520 << tcu::TestLog::Message
1521 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1522 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1523 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1524 << tcu::TestLog::EndMessage;
1525
1526 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1527
1528 // set sources
1529 if (m_storage == STORAGE_BUFFER)
1530 {
1531 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1532 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1533
1534 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1535 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1536 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1537 }
1538 else if (m_storage == STORAGE_IMAGE)
1539 {
1540 DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1541 DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1542
1543 gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1544 gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1545 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1546 }
1547 else
1548 DE_ASSERT(DE_FALSE);
1549
1550 // set destination
1551 DE_ASSERT(m_operationResultStorages[stepNdx]);
1552 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1553 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1554
1555 // calc
1556 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1557 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1558 }
1559
runCommand(const op::WriteDataInterleaved & cmd,int stepNdx,int & programFriendlyName)1560 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1561 {
1562 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1563
1564 m_testCtx.getLog()
1565 << tcu::TestLog::Message
1566 << "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1567 << " Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1568 << " Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1569 << tcu::TestLog::EndMessage;
1570
1571 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1572
1573 // set destination
1574 if (m_storage == STORAGE_BUFFER)
1575 {
1576 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1577
1578 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1579 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1580 }
1581 else if (m_storage == STORAGE_IMAGE)
1582 {
1583 DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1584
1585 gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1586 GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1587 }
1588 else
1589 DE_ASSERT(DE_FALSE);
1590
1591 // calc
1592 gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1593 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1594 }
1595
runCommand(const op::ReadDataInterleaved & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1596 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600
runCommand(const op::ReadZeroData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1601 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603 runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1604 }
1605
runSingleRead(int targetHandle,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1606 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1607 {
1608 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1609
1610 m_testCtx.getLog()
1611 << tcu::TestLog::Message
1612 << "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1613 << " Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1614 << " Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1615 << tcu::TestLog::EndMessage;
1616
1617 gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1618
1619 // set source
1620 if (m_storage == STORAGE_BUFFER)
1621 {
1622 DE_ASSERT(m_storageIDs[targetHandle]);
1623
1624 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1625 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1626 }
1627 else if (m_storage == STORAGE_IMAGE)
1628 {
1629 DE_ASSERT(m_storageIDs[targetHandle]);
1630
1631 gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1632 GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1633 }
1634 else
1635 DE_ASSERT(DE_FALSE);
1636
1637 // set destination
1638 DE_ASSERT(m_operationResultStorages[stepNdx]);
1639 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1640 GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1641
1642 // calc
1643 gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1644 GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1645 }
1646
genStorage(int friendlyName)1647 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1648 {
1649 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1650
1651 if (m_storage == STORAGE_BUFFER)
1652 {
1653 const int numElements = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1654 const int bufferSize = numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1655 glw::GLuint retVal = 0;
1656
1657 m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1658
1659 gl.genBuffers(1, &retVal);
1660 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1661
1662 if (m_formatInteger)
1663 {
1664 const std::vector<deUint32> zeroBuffer(numElements, 0);
1665 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666 }
1667 else
1668 {
1669 const std::vector<float> zeroBuffer(numElements, 0.0f);
1670 gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1671 }
1672 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1673
1674 return retVal;
1675 }
1676 else if (m_storage == STORAGE_IMAGE)
1677 {
1678 const int imageWidth = m_invocationGridSize;
1679 const int imageHeight = m_invocationGridSize * m_perInvocationSize;
1680 glw::GLuint retVal = 0;
1681
1682 m_testCtx.getLog()
1683 << tcu::TestLog::Message
1684 << "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1685 << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1686 << ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1687 << tcu::TestLog::EndMessage;
1688
1689 gl.genTextures(1, &retVal);
1690 gl.bindTexture(GL_TEXTURE_2D, retVal);
1691
1692 if (m_formatInteger)
1693 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1694 else
1695 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1696
1697 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1698 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1699 GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1700
1701 m_testCtx.getLog()
1702 << tcu::TestLog::Message
1703 << "Filling image with 0"
1704 << tcu::TestLog::EndMessage;
1705
1706 if (m_formatInteger)
1707 {
1708 const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1709 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1710 }
1711 else
1712 {
1713 const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1714 gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1715 }
1716
1717 GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1718
1719 return retVal;
1720 }
1721 else
1722 {
1723 DE_ASSERT(DE_FALSE);
1724 return 0;
1725 }
1726 }
1727
genResultStorage(void)1728 glw::GLuint InterCallTestCase::genResultStorage (void)
1729 {
1730 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1731 glw::GLuint retVal = 0;
1732
1733 gl.genBuffers(1, &retVal);
1734 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1735 gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1736 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1737
1738 return retVal;
1739 }
1740
genWriteProgram(int seed)1741 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1742 {
1743 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1744 std::ostringstream buf;
1745
1746 buf << "${GLSL_VERSION_DECL}\n"
1747 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1748 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1749
1750 if (m_storage == STORAGE_BUFFER)
1751 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1752 << "{\n"
1753 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1754 << "} sb_out;\n";
1755 else if (m_storage == STORAGE_IMAGE)
1756 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1757 else
1758 DE_ASSERT(DE_FALSE);
1759
1760 buf << "\n"
1761 << "void main (void)\n"
1762 << "{\n"
1763 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1764 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1765 << "\n";
1766
1767 // Write to buffer/image m_perInvocationSize elements
1768 if (m_storage == STORAGE_BUFFER)
1769 {
1770 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1771 {
1772 if (m_useAtomic)
1773 buf << " atomicExchange(";
1774 else
1775 buf << " ";
1776
1777 buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1778
1779 if (m_useAtomic)
1780 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1781 else
1782 buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1783 }
1784 }
1785 else if (m_storage == STORAGE_IMAGE)
1786 {
1787 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1788 {
1789 if (m_useAtomic)
1790 buf << " imageAtomicExchange";
1791 else
1792 buf << " imageStore";
1793
1794 buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1795
1796 if (m_useAtomic)
1797 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1798 else
1799 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1800 }
1801 }
1802 else
1803 DE_ASSERT(DE_FALSE);
1804
1805 buf << "}\n";
1806
1807 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1808 }
1809
genReadProgram(int seed)1810 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1811 {
1812 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1813 std::ostringstream buf;
1814
1815 buf << "${GLSL_VERSION_DECL}\n"
1816 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1817 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1818
1819 if (m_storage == STORAGE_BUFFER)
1820 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1821 << "{\n"
1822 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1823 << "} sb_in;\n";
1824 else if (m_storage == STORAGE_IMAGE)
1825 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1826 else
1827 DE_ASSERT(DE_FALSE);
1828
1829 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1830 << "{\n"
1831 << " highp int resultOk[];\n"
1832 << "} sb_result;\n"
1833 << "\n"
1834 << "void main (void)\n"
1835 << "{\n"
1836 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1837 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1838 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1839 << " bool allOk = true;\n"
1840 << "\n";
1841
1842 // Verify data
1843
1844 if (m_storage == STORAGE_BUFFER)
1845 {
1846 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1847 {
1848 if (!m_useAtomic)
1849 buf << " allOk = allOk && (sb_in.values[(groupNdx + "
1850 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1851 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852 else
1853 buf << " allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1854 << seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1855 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1856 }
1857 }
1858 else if (m_storage == STORAGE_IMAGE)
1859 {
1860 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1861 {
1862 if (!m_useAtomic)
1863 buf << " allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1864 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1865 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1866 else
1867 buf << " allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1868 << (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1869 << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1870 }
1871 }
1872 else
1873 DE_ASSERT(DE_FALSE);
1874
1875 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1876 << "}\n";
1877
1878 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1879 }
1880
genReadMultipleProgram(int seed0,int seed1)1881 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1882 {
1883 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1884 std::ostringstream buf;
1885
1886 buf << "${GLSL_VERSION_DECL}\n"
1887 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1888 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1889
1890 if (m_storage == STORAGE_BUFFER)
1891 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1892 << "{\n"
1893 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1894 << "} sb_in0;\n"
1895 << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1896 << "{\n"
1897 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1898 << "} sb_in1;\n";
1899 else if (m_storage == STORAGE_IMAGE)
1900 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1901 << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1902 else
1903 DE_ASSERT(DE_FALSE);
1904
1905 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1906 << "{\n"
1907 << " highp int resultOk[];\n"
1908 << "} sb_result;\n"
1909 << "\n"
1910 << "void main (void)\n"
1911 << "{\n"
1912 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1913 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1914 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1915 << " bool allOk = true;\n"
1916 << "\n";
1917
1918 // Verify data
1919
1920 if (m_storage == STORAGE_BUFFER)
1921 {
1922 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924 << " allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925 }
1926 else if (m_storage == STORAGE_IMAGE)
1927 {
1928 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1929 buf << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1930 << " allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1931 }
1932 else
1933 DE_ASSERT(DE_FALSE);
1934
1935 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1936 << "}\n";
1937
1938 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1939 }
1940
genWriteInterleavedProgram(int seed,bool evenOdd)1941 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1942 {
1943 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1944 std::ostringstream buf;
1945
1946 buf << "${GLSL_VERSION_DECL}\n"
1947 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1948 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
1949
1950 if (m_storage == STORAGE_BUFFER)
1951 buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1952 << "{\n"
1953 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1954 << "} sb_out;\n";
1955 else if (m_storage == STORAGE_IMAGE)
1956 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1957 else
1958 DE_ASSERT(DE_FALSE);
1959
1960 buf << "\n"
1961 << "void main (void)\n"
1962 << "{\n"
1963 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1964 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1965 << "\n";
1966
1967 // Write to buffer/image m_perInvocationSize elements
1968 if (m_storage == STORAGE_BUFFER)
1969 {
1970 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1971 {
1972 if (m_useAtomic)
1973 buf << " atomicExchange(";
1974 else
1975 buf << " ";
1976
1977 buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1978
1979 if (m_useAtomic)
1980 buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1981 else
1982 buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1983 }
1984 }
1985 else if (m_storage == STORAGE_IMAGE)
1986 {
1987 for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1988 {
1989 if (m_useAtomic)
1990 buf << " imageAtomicExchange";
1991 else
1992 buf << " imageStore";
1993
1994 buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1995
1996 if (m_useAtomic)
1997 buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1998 else
1999 buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
2000 }
2001 }
2002 else
2003 DE_ASSERT(DE_FALSE);
2004
2005 buf << "}\n";
2006
2007 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2008 }
2009
genReadInterleavedProgram(int seed0,int seed1)2010 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2011 {
2012 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2013 std::ostringstream buf;
2014
2015 buf << "${GLSL_VERSION_DECL}\n"
2016 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2017 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2018
2019 if (m_storage == STORAGE_BUFFER)
2020 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2021 << "{\n"
2022 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2023 << "} sb_in;\n";
2024 else if (m_storage == STORAGE_IMAGE)
2025 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2026 else
2027 DE_ASSERT(DE_FALSE);
2028
2029 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2030 << "{\n"
2031 << " highp int resultOk[];\n"
2032 << "} sb_result;\n"
2033 << "\n"
2034 << "void main (void)\n"
2035 << "{\n"
2036 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2037 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2038 << " int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2039 << " " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2040 << " bool allOk = true;\n"
2041 << "\n";
2042
2043 // Verify data
2044
2045 if (m_storage == STORAGE_BUFFER)
2046 {
2047 buf << " if (groupNdx % 2 == 0)\n"
2048 << " {\n";
2049 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2050 buf << " allOk = allOk && ("
2051 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2052 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2053 buf << " }\n"
2054 << " else\n"
2055 << " {\n";
2056 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2057 buf << " allOk = allOk && ("
2058 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2059 << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2060 buf << " }\n";
2061 }
2062 else if (m_storage == STORAGE_IMAGE)
2063 {
2064 buf << " if (groupNdx % 2 == 0)\n"
2065 << " {\n";
2066 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2067 buf << " allOk = allOk && ("
2068 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2069 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2070 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2071 buf << " }\n"
2072 << " else\n"
2073 << " {\n";
2074 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2075 buf << " allOk = allOk && ("
2076 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2077 << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2078 << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2079 buf << " }\n";
2080 }
2081 else
2082 DE_ASSERT(DE_FALSE);
2083
2084 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2085 << "}\n";
2086
2087 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2088 }
2089
genReadZeroProgram(void)2090 glu::ShaderProgram* InterCallTestCase::genReadZeroProgram (void)
2091 {
2092 const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2093 std::ostringstream buf;
2094
2095 buf << "${GLSL_VERSION_DECL}\n"
2096 << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2097 << "layout (local_size_x = 1, local_size_y = 1) in;\n";
2098
2099 if (m_storage == STORAGE_BUFFER)
2100 buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2101 << "{\n"
2102 << " highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2103 << "} sb_in;\n";
2104 else if (m_storage == STORAGE_IMAGE)
2105 buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2106 else
2107 DE_ASSERT(DE_FALSE);
2108
2109 buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2110 << "{\n"
2111 << " highp int resultOk[];\n"
2112 << "} sb_result;\n"
2113 << "\n"
2114 << "void main (void)\n"
2115 << "{\n"
2116 << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2117 << " int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2118 << " " << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2119 << " bool allOk = true;\n"
2120 << "\n";
2121
2122 // Verify data
2123
2124 if (m_storage == STORAGE_BUFFER)
2125 {
2126 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2127 buf << " allOk = allOk && ("
2128 << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2129 << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2130 }
2131 else if (m_storage == STORAGE_IMAGE)
2132 {
2133 for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2134 buf << " allOk = allOk && ("
2135 << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2136 << ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2137 }
2138 else
2139 DE_ASSERT(DE_FALSE);
2140
2141 buf << " sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2142 << "}\n";
2143
2144 return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2145 }
2146
2147 class SSBOConcurrentAtomicCase : public TestCase
2148 {
2149 public:
2150
2151 SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2152 ~SSBOConcurrentAtomicCase (void);
2153
2154 void init (void);
2155 void deinit (void);
2156 IterateResult iterate (void);
2157
2158 private:
2159 std::string genComputeSource (void) const;
2160
2161 const int m_numCalls;
2162 const int m_workSize;
2163 glu::ShaderProgram* m_program;
2164 deUint32 m_bufferID;
2165 std::vector<deUint32> m_intermediateResultBuffers;
2166 };
2167
SSBOConcurrentAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2168 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2169 : TestCase (context, name, description)
2170 , m_numCalls (numCalls)
2171 , m_workSize (workSize)
2172 , m_program (DE_NULL)
2173 , m_bufferID (DE_NULL)
2174 {
2175 }
2176
~SSBOConcurrentAtomicCase(void)2177 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2178 {
2179 deinit();
2180 }
2181
init(void)2182 void SSBOConcurrentAtomicCase::init (void)
2183 {
2184 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2185 std::vector<deUint32> zeroData (m_workSize, 0);
2186
2187 // gen buffers
2188
2189 gl.genBuffers(1, &m_bufferID);
2190 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2191 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2192
2193 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2194 {
2195 deUint32 buffer = 0;
2196
2197 gl.genBuffers(1, &buffer);
2198 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2199 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2200
2201 m_intermediateResultBuffers.push_back(buffer);
2202 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2203 }
2204
2205 // gen program
2206
2207 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2208 m_testCtx.getLog() << *m_program;
2209 if (!m_program->isOk())
2210 throw tcu::TestError("could not build program");
2211 }
2212
deinit(void)2213 void SSBOConcurrentAtomicCase::deinit (void)
2214 {
2215 if (m_bufferID)
2216 {
2217 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2218 m_bufferID = 0;
2219 }
2220
2221 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2222 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2223 m_intermediateResultBuffers.clear();
2224
2225 delete m_program;
2226 m_program = DE_NULL;
2227 }
2228
iterate(void)2229 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2230 {
2231 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2232 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2233 std::vector<int> deltas;
2234
2235 // generate unique deltas
2236 generateShuffledRamp(m_numCalls, deltas);
2237
2238 // invoke program N times, each with a different delta
2239 {
2240 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2241
2242 m_testCtx.getLog()
2243 << tcu::TestLog::Message
2244 << "Running shader " << m_numCalls << " times.\n"
2245 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2246 << "Setting u_atomicDelta to a unique value for each call.\n"
2247 << tcu::TestLog::EndMessage;
2248
2249 if (deltaLocation == -1)
2250 throw tcu::TestError("u_atomicDelta location was -1");
2251
2252 gl.useProgram(m_program->getProgram());
2253 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2254
2255 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2256 {
2257 m_testCtx.getLog()
2258 << tcu::TestLog::Message
2259 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2260 << tcu::TestLog::EndMessage;
2261
2262 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2263 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2264 gl.dispatchCompute(m_workSize, 1, 1);
2265 }
2266
2267 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2268 }
2269
2270 // Verify result
2271 {
2272 std::vector<deUint32> result;
2273
2274 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2275
2276 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2277 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2278
2279 for (int ndx = 0; ndx < m_workSize; ++ndx)
2280 {
2281 if (result[ndx] != sumValue)
2282 {
2283 m_testCtx.getLog()
2284 << tcu::TestLog::Message
2285 << "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2286 << "Work buffer contains invalid values."
2287 << tcu::TestLog::EndMessage;
2288
2289 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2290 return STOP;
2291 }
2292 }
2293
2294 m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2295 }
2296
2297 // verify steps
2298 {
2299 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2300 std::vector<deUint32> valueChain (m_numCalls);
2301
2302 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2303
2304 // collect results
2305
2306 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2307 {
2308 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2309 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2310 }
2311
2312 // verify values
2313
2314 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2315 {
2316 int invalidOperationNdx;
2317 deUint32 errorDelta;
2318 deUint32 errorExpected;
2319
2320 // collect result chain for each element
2321 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2322 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2323
2324 // check there exists a path from 0 to sumValue using each addition once
2325 // decompose cumulative results to addition operations (all additions positive => this works)
2326
2327 std::sort(valueChain.begin(), valueChain.end());
2328
2329 // validate chain
2330 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2331 {
2332 m_testCtx.getLog()
2333 << tcu::TestLog::Message
2334 << "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2335 << "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2336 << tcu::TestLog::EndMessage;
2337
2338 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2339 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2340 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2341
2342 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2343 return STOP;
2344 }
2345 }
2346
2347 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2348 }
2349
2350 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2351 return STOP;
2352 }
2353
genComputeSource(void) const2354 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2355 {
2356 std::ostringstream buf;
2357
2358 buf << "${GLSL_VERSION_DECL}\n"
2359 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2360 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2361 << "{\n"
2362 << " highp uint values[" << m_workSize << "];\n"
2363 << "} sb_ires;\n"
2364 << "\n"
2365 << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2366 << "{\n"
2367 << " highp uint values[" << m_workSize << "];\n"
2368 << "} sb_work;\n"
2369 << "uniform highp uint u_atomicDelta;\n"
2370 << "\n"
2371 << "void main ()\n"
2372 << "{\n"
2373 << " highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2374 << " sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2375 << "}";
2376
2377 return specializeShader(m_context, buf.str().c_str());
2378 }
2379
2380 class ConcurrentAtomicCounterCase : public TestCase
2381 {
2382 public:
2383
2384 ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2385 ~ConcurrentAtomicCounterCase (void);
2386
2387 void init (void);
2388 void deinit (void);
2389 IterateResult iterate (void);
2390
2391 private:
2392 std::string genComputeSource (bool evenOdd) const;
2393
2394 const int m_numCalls;
2395 const int m_workSize;
2396 glu::ShaderProgram* m_evenProgram;
2397 glu::ShaderProgram* m_oddProgram;
2398 deUint32 m_counterBuffer;
2399 deUint32 m_intermediateResultBuffer;
2400 };
2401
ConcurrentAtomicCounterCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2402 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2403 : TestCase (context, name, description)
2404 , m_numCalls (numCalls)
2405 , m_workSize (workSize)
2406 , m_evenProgram (DE_NULL)
2407 , m_oddProgram (DE_NULL)
2408 , m_counterBuffer (DE_NULL)
2409 , m_intermediateResultBuffer(DE_NULL)
2410 {
2411 }
2412
~ConcurrentAtomicCounterCase(void)2413 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2414 {
2415 deinit();
2416 }
2417
init(void)2418 void ConcurrentAtomicCounterCase::init (void)
2419 {
2420 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2421 const std::vector<deUint32> zeroData (m_numCalls * m_workSize, 0);
2422
2423 // gen buffer
2424
2425 gl.genBuffers(1, &m_counterBuffer);
2426 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2427 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2428
2429 gl.genBuffers(1, &m_intermediateResultBuffer);
2430 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2431 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2432
2433 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2434
2435 // gen programs
2436
2437 {
2438 const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2439
2440 m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2441 m_testCtx.getLog() << *m_evenProgram;
2442 if (!m_evenProgram->isOk())
2443 throw tcu::TestError("could not build program");
2444 }
2445 {
2446 const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2447
2448 m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2449 m_testCtx.getLog() << *m_oddProgram;
2450 if (!m_oddProgram->isOk())
2451 throw tcu::TestError("could not build program");
2452 }
2453 }
2454
deinit(void)2455 void ConcurrentAtomicCounterCase::deinit (void)
2456 {
2457 if (m_counterBuffer)
2458 {
2459 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2460 m_counterBuffer = 0;
2461 }
2462 if (m_intermediateResultBuffer)
2463 {
2464 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2465 m_intermediateResultBuffer = 0;
2466 }
2467
2468 delete m_evenProgram;
2469 m_evenProgram = DE_NULL;
2470
2471 delete m_oddProgram;
2472 m_oddProgram = DE_NULL;
2473 }
2474
iterate(void)2475 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2476 {
2477 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2478
2479 // invoke program N times, each with a different delta
2480 {
2481 const int evenCallNdxLocation = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2482 const int oddCallNdxLocation = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2483
2484 m_testCtx.getLog()
2485 << tcu::TestLog::Message
2486 << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2487 << "Num groups = (" << m_workSize << ", 1, 1)\n"
2488 << tcu::TestLog::EndMessage;
2489
2490 if (evenCallNdxLocation == -1)
2491 throw tcu::TestError("u_callNdx location was -1");
2492 if (oddCallNdxLocation == -1)
2493 throw tcu::TestError("u_callNdx location was -1");
2494
2495 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2496 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);
2497
2498 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2499 {
2500 gl.useProgram(m_evenProgram->getProgram());
2501 gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2502 gl.dispatchCompute(m_workSize, 1, 1);
2503
2504 gl.useProgram(m_oddProgram->getProgram());
2505 gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2506 gl.dispatchCompute(m_workSize, 1, 1);
2507 }
2508
2509 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510 }
2511
2512 // Verify result
2513 {
2514 deUint32 result;
2515
2516 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2517
2518 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2519 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2520
2521 if ((int)result != m_numCalls*m_workSize)
2522 {
2523 m_testCtx.getLog()
2524 << tcu::TestLog::Message
2525 << "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2526 << tcu::TestLog::EndMessage;
2527
2528 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2529 return STOP;
2530 }
2531
2532 m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2533 }
2534
2535 // verify steps
2536 {
2537 std::vector<deUint32> intermediateResults;
2538
2539 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2540
2541 // collect results
2542
2543 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2544 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2545
2546 // verify values
2547
2548 std::sort(intermediateResults.begin(), intermediateResults.end());
2549
2550 for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2551 {
2552 if ((int)intermediateResults[valueNdx] != valueNdx)
2553 {
2554 m_testCtx.getLog()
2555 << tcu::TestLog::Message
2556 << "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2557 << "Intermediate buffer contains invalid values. Intermediate results:\n"
2558 << tcu::TestLog::EndMessage;
2559
2560 for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2561 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2562
2563 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2564 return STOP;
2565 }
2566 }
2567
2568 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2569 }
2570
2571 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2572 return STOP;
2573 }
2574
genComputeSource(bool evenOdd) const2575 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2576 {
2577 std::ostringstream buf;
2578
2579 buf << "${GLSL_VERSION_DECL}\n"
2580 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2581 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2582 << "{\n"
2583 << " highp uint values[" << m_workSize * m_numCalls << "];\n"
2584 << "} sb_ires;\n"
2585 << "\n"
2586 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
2587 << "uniform highp uint u_callNdx;\n"
2588 << "\n"
2589 << "void main ()\n"
2590 << "{\n"
2591 << " highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2592 << " if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2593 << " sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2594 << "}";
2595
2596 return specializeShader(m_context, buf.str().c_str());
2597 }
2598
2599 class ConcurrentImageAtomicCase : public TestCase
2600 {
2601 public:
2602
2603 ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2604 ~ConcurrentImageAtomicCase (void);
2605
2606 void init (void);
2607 void deinit (void);
2608 IterateResult iterate (void);
2609
2610 private:
2611 void readWorkImage (std::vector<deUint32>& result);
2612
2613 std::string genComputeSource (void) const;
2614 std::string genImageReadSource (void) const;
2615 std::string genImageClearSource (void) const;
2616
2617 const int m_numCalls;
2618 const int m_workSize;
2619 glu::ShaderProgram* m_program;
2620 glu::ShaderProgram* m_imageReadProgram;
2621 glu::ShaderProgram* m_imageClearProgram;
2622 deUint32 m_imageID;
2623 std::vector<deUint32> m_intermediateResultBuffers;
2624 };
2625
ConcurrentImageAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2626 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2627 : TestCase (context, name, description)
2628 , m_numCalls (numCalls)
2629 , m_workSize (workSize)
2630 , m_program (DE_NULL)
2631 , m_imageReadProgram (DE_NULL)
2632 , m_imageClearProgram (DE_NULL)
2633 , m_imageID (DE_NULL)
2634 {
2635 }
2636
~ConcurrentImageAtomicCase(void)2637 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2638 {
2639 deinit();
2640 }
2641
init(void)2642 void ConcurrentImageAtomicCase::init (void)
2643 {
2644 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2645 std::vector<deUint32> zeroData (m_workSize * m_workSize, 0);
2646
2647 if (!checkSupport(m_context))
2648 throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2649
2650 // gen image
2651
2652 gl.genTextures(1, &m_imageID);
2653 gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2654 gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2655 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2656 gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2657 GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2658
2659 // gen buffers
2660
2661 for (int ndx = 0; ndx < m_numCalls; ++ndx)
2662 {
2663 deUint32 buffer = 0;
2664
2665 gl.genBuffers(1, &buffer);
2666 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2667 gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2668
2669 m_intermediateResultBuffers.push_back(buffer);
2670 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2671 }
2672
2673 // gen programs
2674
2675 m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2676 m_testCtx.getLog() << *m_program;
2677 if (!m_program->isOk())
2678 throw tcu::TestError("could not build program");
2679
2680 m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2681 if (!m_imageReadProgram->isOk())
2682 {
2683 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2684
2685 m_testCtx.getLog() << *m_imageReadProgram;
2686 throw tcu::TestError("could not build program");
2687 }
2688
2689 m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2690 if (!m_imageClearProgram->isOk())
2691 {
2692 const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2693
2694 m_testCtx.getLog() << *m_imageClearProgram;
2695 throw tcu::TestError("could not build program");
2696 }
2697 }
2698
deinit(void)2699 void ConcurrentImageAtomicCase::deinit (void)
2700 {
2701 if (m_imageID)
2702 {
2703 m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2704 m_imageID = 0;
2705 }
2706
2707 for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2708 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2709 m_intermediateResultBuffers.clear();
2710
2711 delete m_program;
2712 m_program = DE_NULL;
2713
2714 delete m_imageReadProgram;
2715 m_imageReadProgram = DE_NULL;
2716
2717 delete m_imageClearProgram;
2718 m_imageClearProgram = DE_NULL;
2719 }
2720
iterate(void)2721 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2722 {
2723 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2724 const deUint32 sumValue = (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2725 std::vector<int> deltas;
2726
2727 // generate unique deltas
2728 generateShuffledRamp(m_numCalls, deltas);
2729
2730 // clear image
2731 {
2732 m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2733
2734 gl.useProgram(m_imageClearProgram->getProgram());
2735 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2736 gl.dispatchCompute(m_workSize, m_workSize, 1);
2737 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2738
2739 GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2740 }
2741
2742 // invoke program N times, each with a different delta
2743 {
2744 const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2745
2746 m_testCtx.getLog()
2747 << tcu::TestLog::Message
2748 << "Running shader " << m_numCalls << " times.\n"
2749 << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2750 << "Setting u_atomicDelta to a unique value for each call.\n"
2751 << tcu::TestLog::EndMessage;
2752
2753 if (deltaLocation == -1)
2754 throw tcu::TestError("u_atomicDelta location was -1");
2755
2756 gl.useProgram(m_program->getProgram());
2757 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2758
2759 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2760 {
2761 m_testCtx.getLog()
2762 << tcu::TestLog::Message
2763 << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2764 << tcu::TestLog::EndMessage;
2765
2766 gl.uniform1ui(deltaLocation, deltas[callNdx]);
2767 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2768 gl.dispatchCompute(m_workSize, m_workSize, 1);
2769 }
2770
2771 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2772 }
2773
2774 // Verify result
2775 {
2776 std::vector<deUint32> result;
2777
2778 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2779
2780 readWorkImage(result);
2781
2782 for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2783 {
2784 if (result[ndx] != sumValue)
2785 {
2786 m_testCtx.getLog()
2787 << tcu::TestLog::Message
2788 << "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2789 << "Work image contains invalid values."
2790 << tcu::TestLog::EndMessage;
2791
2792 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2793 return STOP;
2794 }
2795 }
2796
2797 m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2798 }
2799
2800 // verify steps
2801 {
2802 std::vector<std::vector<deUint32> > intermediateResults (m_numCalls);
2803 std::vector<deUint32> valueChain (m_numCalls);
2804 std::vector<deUint32> chainDelta (m_numCalls);
2805
2806 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2807
2808 // collect results
2809
2810 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811 {
2812 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2813 readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2814 }
2815
2816 // verify values
2817
2818 for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2819 {
2820 int invalidOperationNdx;
2821 deUint32 errorDelta;
2822 deUint32 errorExpected;
2823
2824 // collect result chain for each element
2825 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2826 valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2827
2828 // check there exists a path from 0 to sumValue using each addition once
2829 // decompose cumulative results to addition operations (all additions positive => this works)
2830
2831 std::sort(valueChain.begin(), valueChain.end());
2832
2833 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2834 chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2835
2836 // chainDelta contains now the actual additions applied to the value
2837 std::sort(chainDelta.begin(), chainDelta.end());
2838
2839 // validate chain
2840 if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2841 {
2842 m_testCtx.getLog()
2843 << tcu::TestLog::Message
2844 << "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2845 << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2846 << "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2847 << tcu::TestLog::EndMessage;
2848
2849 for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2850 m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2851 m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2852
2853 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2854 return STOP;
2855 }
2856 }
2857
2858 m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2859 }
2860
2861 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2862 return STOP;
2863 }
2864
readWorkImage(std::vector<deUint32> & result)2865 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2866 {
2867 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2868 glu::Buffer resultBuffer (m_context.getRenderContext());
2869
2870 // Read image to an ssbo
2871
2872 {
2873 const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2874
2875 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2876 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2877
2878 gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2879 gl.useProgram(m_imageReadProgram->getProgram());
2880
2881 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2882 gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2883 gl.dispatchCompute(m_workSize, m_workSize, 1);
2884
2885 GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2886 }
2887
2888 // Read ssbo
2889 {
2890 const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2891 GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2892
2893 if (!ptr)
2894 throw tcu::TestError("mapBufferRange returned NULL");
2895
2896 result.resize(m_workSize * m_workSize);
2897 memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2898
2899 if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2900 throw tcu::TestError("unmapBuffer returned false");
2901 }
2902 }
2903
genComputeSource(void) const2904 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2905 {
2906 std::ostringstream buf;
2907
2908 buf << "${GLSL_VERSION_DECL}\n"
2909 << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2910 << "\n"
2911 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2912 << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2913 << "{\n"
2914 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2915 << "} sb_ires;\n"
2916 << "\n"
2917 << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2918 << "uniform highp uint u_atomicDelta;\n"
2919 << "\n"
2920 << "void main ()\n"
2921 << "{\n"
2922 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923 << " sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2924 << "}";
2925
2926 return specializeShader(m_context, buf.str().c_str());
2927 }
2928
genImageReadSource(void) const2929 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2930 {
2931 std::ostringstream buf;
2932
2933 buf << "${GLSL_VERSION_DECL}\n"
2934 << "\n"
2935 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936 << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2937 << "{\n"
2938 << " highp uint values[" << m_workSize * m_workSize << "];\n"
2939 << "} sb_res;\n"
2940 << "\n"
2941 << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2942 << "\n"
2943 << "void main ()\n"
2944 << "{\n"
2945 << " highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2946 << " sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2947 << "}";
2948
2949 return specializeShader(m_context, buf.str().c_str());
2950 }
2951
genImageClearSource(void) const2952 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2953 {
2954 std::ostringstream buf;
2955
2956 buf << "${GLSL_VERSION_DECL}\n"
2957 << "\n"
2958 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2959 << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2960 << "\n"
2961 << "void main ()\n"
2962 << "{\n"
2963 << " imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2964 << "}";
2965
2966 return specializeShader(m_context, buf.str().c_str());
2967 }
2968
2969 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2970 {
2971 public:
2972 ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize);
2973 ~ConcurrentSSBOAtomicCounterMixedCase (void);
2974
2975 void init (void);
2976 void deinit (void);
2977 IterateResult iterate (void);
2978
2979 private:
2980 std::string genSSBOComputeSource (void) const;
2981 std::string genAtomicCounterComputeSource (void) const;
2982
2983 const int m_numCalls;
2984 const int m_workSize;
2985 deUint32 m_bufferID;
2986 glu::ShaderProgram* m_ssboAtomicProgram;
2987 glu::ShaderProgram* m_atomicCounterProgram;
2988 };
2989
ConcurrentSSBOAtomicCounterMixedCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2990 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2991 : TestCase (context, name, description)
2992 , m_numCalls (numCalls)
2993 , m_workSize (workSize)
2994 , m_bufferID (DE_NULL)
2995 , m_ssboAtomicProgram (DE_NULL)
2996 , m_atomicCounterProgram (DE_NULL)
2997 {
2998 // SSBO atomic XORs cancel out
2999 DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
3000 }
3001
~ConcurrentSSBOAtomicCounterMixedCase(void)3002 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3003 {
3004 deinit();
3005 }
3006
init(void)3007 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3008 {
3009 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3010 const deUint32 zeroBuf[2] = { 0, 0 };
3011
3012 // gen buffer
3013
3014 gl.genBuffers(1, &m_bufferID);
3015 gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3016 gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3017
3018 GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3019
3020 // gen programs
3021
3022 {
3023 const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3024
3025 m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3026 m_testCtx.getLog() << *m_ssboAtomicProgram;
3027 if (!m_ssboAtomicProgram->isOk())
3028 throw tcu::TestError("could not build program");
3029 }
3030 {
3031 const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3032
3033 m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3034 m_testCtx.getLog() << *m_atomicCounterProgram;
3035 if (!m_atomicCounterProgram->isOk())
3036 throw tcu::TestError("could not build program");
3037 }
3038 }
3039
deinit(void)3040 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3041 {
3042 if (m_bufferID)
3043 {
3044 m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3045 m_bufferID = 0;
3046 }
3047
3048 delete m_ssboAtomicProgram;
3049 m_ssboAtomicProgram = DE_NULL;
3050
3051 delete m_atomicCounterProgram;
3052 m_atomicCounterProgram = DE_NULL;
3053 }
3054
iterate(void)3055 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3056 {
3057 const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3058
3059 m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3060
3061 // invoke programs N times
3062 {
3063 m_testCtx.getLog()
3064 << tcu::TestLog::Message
3065 << "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3066 << "Num groups = (" << m_workSize << ", 1, 1)\n"
3067 << tcu::TestLog::EndMessage;
3068
3069 gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3070 gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);
3071
3072 for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3073 {
3074 gl.useProgram(m_atomicCounterProgram->getProgram());
3075 gl.dispatchCompute(m_workSize, 1, 1);
3076
3077 gl.useProgram(m_ssboAtomicProgram->getProgram());
3078 gl.dispatchCompute(m_workSize, 1, 1);
3079 }
3080
3081 GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3082 }
3083
3084 // Verify result
3085 {
3086 deUint32 result;
3087
3088 // XORs cancel out, only addition is left
3089 m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3090
3091 gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3092 result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3093
3094 if ((int)result != m_numCalls*m_workSize)
3095 {
3096 m_testCtx.getLog()
3097 << tcu::TestLog::Message
3098 << "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3099 << tcu::TestLog::EndMessage;
3100
3101 m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3102 return STOP;
3103 }
3104
3105 m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3106 }
3107
3108 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3109 return STOP;
3110 }
3111
genSSBOComputeSource(void) const3112 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3113 {
3114 std::ostringstream buf;
3115
3116 buf << "${GLSL_VERSION_DECL}\n"
3117 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3118 << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3119 << "{\n"
3120 << " highp uint targetValue;\n"
3121 << " highp uint dummy;\n"
3122 << "} sb_work;\n"
3123 << "\n"
3124 << "void main ()\n"
3125 << "{\n"
3126 << " // flip high bits\n"
3127 << " highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3128 << " sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3129 << "}";
3130
3131 return specializeShader(m_context, buf.str().c_str());
3132 }
3133
genAtomicCounterComputeSource(void) const3134 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3135 {
3136 std::ostringstream buf;
3137
3138 buf << "${GLSL_VERSION_DECL}\n"
3139 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3140 << "\n"
3141 << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
3142 << "\n"
3143 << "void main ()\n"
3144 << "{\n"
3145 << " atomicCounterIncrement(u_counter);\n"
3146 << "}";
3147
3148 return specializeShader(m_context, buf.str().c_str());
3149 }
3150
3151 } // anonymous
3152
SynchronizationTests(Context & context)3153 SynchronizationTests::SynchronizationTests (Context& context)
3154 : TestCaseGroup(context, "synchronization", "Synchronization tests")
3155 {
3156 }
3157
~SynchronizationTests(void)3158 SynchronizationTests::~SynchronizationTests (void)
3159 {
3160 }
3161
init(void)3162 void SynchronizationTests::init (void)
3163 {
3164 tcu::TestCaseGroup* const inInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "in_invocation", "Test intra-invocation synchronization");
3165 tcu::TestCaseGroup* const interInvocationGroup = new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3166 tcu::TestCaseGroup* const interCallGroup = new tcu::TestCaseGroup(m_testCtx, "inter_call", "Test inter-call synchronization");
3167
3168 addChild(inInvocationGroup);
3169 addChild(interInvocationGroup);
3170 addChild(interCallGroup);
3171
3172 // .in_invocation & .inter_invocation
3173 {
3174 static const struct CaseConfig
3175 {
3176 const char* namePrefix;
3177 const InterInvocationTestCase::StorageType storage;
3178 const int flags;
3179 } configs[] =
3180 {
3181 { "image", InterInvocationTestCase::STORAGE_IMAGE, 0 },
3182 { "image_atomic", InterInvocationTestCase::STORAGE_IMAGE, InterInvocationTestCase::FLAG_ATOMIC },
3183 { "ssbo", InterInvocationTestCase::STORAGE_BUFFER, 0 },
3184 { "ssbo_atomic", InterInvocationTestCase::STORAGE_BUFFER, InterInvocationTestCase::FLAG_ATOMIC },
3185 };
3186
3187 for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3188 {
3189 tcu::TestCaseGroup* const targetGroup = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3190 const int extraFlags = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3191
3192 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3193 {
3194 const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3195
3196 targetGroup->addChild(new InvocationWriteReadCase(m_context,
3197 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3198 (std::string("Write to ") + target + " and read it").c_str(),
3199 configs[configNdx].storage,
3200 configs[configNdx].flags | extraFlags));
3201
3202 targetGroup->addChild(new InvocationReadWriteCase(m_context,
3203 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3204 (std::string("Read form ") + target + " and then write to it").c_str(),
3205 configs[configNdx].storage,
3206 configs[configNdx].flags | extraFlags));
3207
3208 targetGroup->addChild(new InvocationOverWriteCase(m_context,
3209 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3210 (std::string("Write to ") + target + " twice and read it").c_str(),
3211 configs[configNdx].storage,
3212 configs[configNdx].flags | extraFlags));
3213
3214 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3215 (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3216 (std::string("Write to aliasing ") + target + " and read it").c_str(),
3217 InvocationAliasWriteCase::TYPE_WRITE,
3218 configs[configNdx].storage,
3219 configs[configNdx].flags | extraFlags));
3220
3221 targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3222 (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3223 (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3224 InvocationAliasWriteCase::TYPE_OVERWRITE,
3225 configs[configNdx].storage,
3226 configs[configNdx].flags | extraFlags));
3227 }
3228 }
3229 }
3230
3231 // .inter_call
3232 {
3233 tcu::TestCaseGroup* const withBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3234 tcu::TestCaseGroup* const withoutBarrierGroup = new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3235
3236 interCallGroup->addChild(withBarrierGroup);
3237 interCallGroup->addChild(withoutBarrierGroup);
3238
3239 // .with_memory_barrier
3240 {
3241 static const struct CaseConfig
3242 {
3243 const char* namePrefix;
3244 const InterCallTestCase::StorageType storage;
3245 const int flags;
3246 } configs[] =
3247 {
3248 { "image", InterCallTestCase::STORAGE_IMAGE, 0 },
3249 { "image_atomic", InterCallTestCase::STORAGE_IMAGE, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3250 { "ssbo", InterCallTestCase::STORAGE_BUFFER, 0 },
3251 { "ssbo_atomic", InterCallTestCase::STORAGE_BUFFER, InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT },
3252 };
3253
3254 const int seed0 = 123;
3255 const int seed1 = 457;
3256
3257 for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3258 {
3259 const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3260
3261 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3262 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3263 (std::string("Write to ") + target + " and read it").c_str(),
3264 configs[configNdx].storage,
3265 configs[configNdx].flags,
3266 InterCallOperations()
3267 << op::WriteData::Generate(1, seed0)
3268 << op::Barrier()
3269 << op::ReadData::Generate(1, seed0)));
3270
3271 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3272 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3273 (std::string("Read from ") + target + " and then write to it").c_str(),
3274 configs[configNdx].storage,
3275 configs[configNdx].flags,
3276 InterCallOperations()
3277 << op::ReadZeroData::Generate(1)
3278 << op::Barrier()
3279 << op::WriteData::Generate(1, seed0)));
3280
3281 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3283 (std::string("Write to ") + target + " twice and read it").c_str(),
3284 configs[configNdx].storage,
3285 configs[configNdx].flags,
3286 InterCallOperations()
3287 << op::WriteData::Generate(1, seed0)
3288 << op::Barrier()
3289 << op::WriteData::Generate(1, seed1)
3290 << op::Barrier()
3291 << op::ReadData::Generate(1, seed1)));
3292
3293 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3294 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3295 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3296 configs[configNdx].storage,
3297 configs[configNdx].flags,
3298 InterCallOperations()
3299 << op::WriteData::Generate(1, seed0)
3300 << op::WriteData::Generate(2, seed1)
3301 << op::Barrier()
3302 << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3303
3304 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3306 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3307 configs[configNdx].storage,
3308 configs[configNdx].flags,
3309 InterCallOperations()
3310 << op::WriteDataInterleaved::Generate(1, seed0, true)
3311 << op::WriteDataInterleaved::Generate(1, seed1, false)
3312 << op::Barrier()
3313 << op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3314
3315 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3316 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3317 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3318 configs[configNdx].storage,
3319 configs[configNdx].flags,
3320 InterCallOperations()
3321 << op::WriteData::Generate(1, seed0)
3322 << op::WriteData::Generate(2, seed1)
3323 << op::Barrier()
3324 << op::ReadData::Generate(1, seed0)
3325 << op::ReadData::Generate(2, seed1)));
3326
3327 withBarrierGroup->addChild(new InterCallTestCase(m_context,
3328 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3329 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3330 configs[configNdx].storage,
3331 configs[configNdx].flags,
3332 InterCallOperations()
3333 << op::WriteData::Generate(1, seed0)
3334 << op::WriteData::Generate(2, seed1)
3335 << op::Barrier()
3336 << op::ReadData::Generate(2, seed1)
3337 << op::ReadData::Generate(1, seed0)));
3338 }
3339
3340 // .without_memory_barrier
3341 {
3342 struct InvocationConfig
3343 {
3344 const char* name;
3345 int count;
3346 };
3347
3348 static const InvocationConfig ssboInvocations[] =
3349 {
3350 { "1k", 1024 },
3351 { "4k", 4096 },
3352 { "32k", 32768 },
3353 };
3354 static const InvocationConfig imageInvocations[] =
3355 {
3356 { "8x8", 8 },
3357 { "32x32", 32 },
3358 { "128x128", 128 },
3359 };
3360 static const InvocationConfig counterInvocations[] =
3361 {
3362 { "32", 32 },
3363 { "128", 128 },
3364 { "1k", 1024 },
3365 };
3366 static const int callCounts[] = { 2, 5, 100 };
3367
3368 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3369 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370 withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3371
3372 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3373 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374 withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3375
3376 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378 withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379
3380 for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3381 for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3382 withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(), "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3383 }
3384 }
3385 }
3386 }
3387
3388 } // Functional
3389 } // gles31
3390 } // deqp
3391