• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.1 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Synchronization Tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es31fSynchronizationTests.hpp"
25 #include "tcuTestLog.hpp"
26 #include "tcuStringTemplate.hpp"
27 #include "tcuSurface.hpp"
28 #include "tcuRenderTarget.hpp"
29 #include "gluRenderContext.hpp"
30 #include "gluShaderProgram.hpp"
31 #include "gluObjectWrapper.hpp"
32 #include "gluPixelTransfer.hpp"
33 #include "gluContextInfo.hpp"
34 #include "glwFunctions.hpp"
35 #include "glwEnums.hpp"
36 #include "deStringUtil.hpp"
37 #include "deSharedPtr.hpp"
38 #include "deMemory.h"
39 #include "deRandom.hpp"
40 
41 #include <map>
42 
43 namespace deqp
44 {
45 namespace gles31
46 {
47 namespace Functional
48 {
49 namespace
50 {
51 
checkSupport(Context & ctx)52 static bool checkSupport(Context& ctx)
53 {
54 	auto ctxType = ctx.getRenderContext().getType();
55 	return contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
56 		   contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
57 		   ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
58 }
59 
validateSortedAtomicRampAdditionValueChain(const std::vector<deUint32> & valueChain,deUint32 sumValue,int & invalidOperationNdx,deUint32 & errorDelta,deUint32 & errorExpected)60 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
61 {
62 	std::vector<deUint32> chainDelta(valueChain.size());
63 
64 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
65 		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
66 
67 	// chainDelta contains now the actual additions applied to the value
68 	// check there exists an addition ramp form 1 to ...
69 	std::sort(chainDelta.begin(), chainDelta.end());
70 
71 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
72 	{
73 		if ((int)chainDelta[callNdx] != callNdx+1)
74 		{
75 			invalidOperationNdx = callNdx;
76 			errorDelta = chainDelta[callNdx];
77 			errorExpected = callNdx+1;
78 
79 			return false;
80 		}
81 	}
82 
83 	return true;
84 }
85 
readBuffer(const glw::Functions & gl,deUint32 target,int numElements,std::vector<deUint32> & result)86 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
87 {
88 	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
89 	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
90 
91 	if (!ptr)
92 		throw tcu::TestError("mapBufferRange returned NULL");
93 
94 	result.resize(numElements);
95 	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
96 
97 	if (gl.unmapBuffer(target) == GL_FALSE)
98 		throw tcu::TestError("unmapBuffer returned false");
99 }
100 
readBufferUint32(const glw::Functions & gl,deUint32 target)101 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
102 {
103 	std::vector<deUint32> vec;
104 
105 	readBuffer(gl, target, 1, vec);
106 
107 	return vec[0];
108 }
109 
110 //! Generate a ramp of values from 1 to numElements, and shuffle it
generateShuffledRamp(int numElements,std::vector<int> & ramp)111 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
112 {
113 	de::Random rng(0xabcd);
114 
115 	// some positive (non-zero) unique values
116 	ramp.resize(numElements);
117 	for (int callNdx = 0; callNdx < numElements; ++callNdx)
118 		ramp[callNdx] = callNdx + 1;
119 
120 	rng.shuffle(ramp.begin(), ramp.end());
121 }
122 
specializeShader(Context & context,const char * code)123 static std::string specializeShader(Context& context, const char* code)
124 {
125 	auto					ctxType			= context.getRenderContext().getType();
126 	const bool				isES32orGL45	= glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
127 											  glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
128 	const glu::GLSLVersion	glslVersion		= glu::getContextTypeGLSLVersion(ctxType);
129 
130 	std::map<std::string, std::string> specializationMap;
131 	specializationMap["GLSL_VERSION_DECL"]				= glu::getGLSLVersionDeclaration(glslVersion);
132 	specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"]	= isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";
133 
134 	return tcu::StringTemplate(code).specialize(specializationMap);
135 }
136 
137 class InterInvocationTestCase : public TestCase
138 {
139 public:
140 	enum StorageType
141 	{
142 		STORAGE_BUFFER = 0,
143 		STORAGE_IMAGE,
144 
145 		STORAGE_LAST
146 	};
147 	enum CaseFlags
148 	{
149 		FLAG_ATOMIC				= 0x1,
150 		FLAG_ALIASING_STORAGES	= 0x2,
151 		FLAG_IN_GROUP			= 0x4,
152 	};
153 
154 						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
155 						~InterInvocationTestCase	(void);
156 
157 private:
158 	void				init						(void);
159 	void				deinit						(void);
160 	IterateResult		iterate						(void);
161 
162 	void				runCompute					(void);
163 	bool				verifyResults				(void);
164 	virtual std::string	genShaderSource				(void) const = 0;
165 
166 protected:
167 	std::string			genBarrierSource			(void) const;
168 
169 	const StorageType	m_storage;
170 	const bool			m_useAtomic;
171 	const bool			m_aliasingStorages;
172 	const bool			m_syncWithGroup;
173 	const int			m_workWidth;				// !< total work width
174 	const int			m_workHeight;				// !<     ...    height
175 	const int			m_localWidth;				// !< group width
176 	const int			m_localHeight;				// !< group height
177 	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
178 
179 private:
180 	glw::GLuint			m_storageBuf;
181 	glw::GLuint			m_storageTex;
182 	glw::GLuint			m_resultBuf;
183 	glu::ShaderProgram*	m_program;
184 };
185 
InterInvocationTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)186 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
187 	: TestCase					(context, name, desc)
188 	, m_storage					(storage)
189 	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
190 	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
191 	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
192 	, m_workWidth				(256)
193 	, m_workHeight				(256)
194 	, m_localWidth				(16)
195 	, m_localHeight				(8)
196 	, m_elementsPerInvocation	(8)
197 	, m_storageBuf				(0)
198 	, m_storageTex				(0)
199 	, m_resultBuf				(0)
200 	, m_program					(DE_NULL)
201 {
202 	DE_ASSERT(m_storage < STORAGE_LAST);
203 	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
204 }
205 
~InterInvocationTestCase(void)206 InterInvocationTestCase::~InterInvocationTestCase (void)
207 {
208 	deinit();
209 }
210 
init(void)211 void InterInvocationTestCase::init (void)
212 {
213 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
214 
215 	// requirements
216 
217 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
218 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
219 
220 	// program
221 
222 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
223 	m_testCtx.getLog() << *m_program;
224 	if (!m_program->isOk())
225 		throw tcu::TestError("could not build program");
226 
227 	// source
228 
229 	if (m_storage == STORAGE_BUFFER)
230 	{
231 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
232 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
233 		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
234 
235 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
236 
237 		gl.genBuffers(1, &m_storageBuf);
238 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
239 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
240 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
241 	}
242 	else if (m_storage == STORAGE_IMAGE)
243 	{
244 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
245 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
246 
247 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
248 
249 		gl.genTextures(1, &m_storageTex);
250 		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
251 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
252 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
253 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
254 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
255 
256 		// Zero-fill
257 		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
258 
259 		{
260 			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
261 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
262 			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
263 		}
264 	}
265 	else
266 		DE_ASSERT(DE_FALSE);
267 
268 	// destination
269 
270 	{
271 		const int				bufferElements	= m_workWidth * m_workHeight;
272 		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
273 		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
274 
275 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
276 
277 		gl.genBuffers(1, &m_resultBuf);
278 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
279 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
280 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
281 	}
282 }
283 
deinit(void)284 void InterInvocationTestCase::deinit (void)
285 {
286 	if (m_storageBuf)
287 	{
288 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
289 		m_storageBuf = DE_NULL;
290 	}
291 
292 	if (m_storageTex)
293 	{
294 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
295 		m_storageTex = DE_NULL;
296 	}
297 
298 	if (m_resultBuf)
299 	{
300 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
301 		m_resultBuf = DE_NULL;
302 	}
303 
304 	delete m_program;
305 	m_program = DE_NULL;
306 }
307 
iterate(void)308 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
309 {
310 	// Dispatch
311 	runCompute();
312 
313 	// Verify buffer contents
314 	if (verifyResults())
315 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
316 	else
317 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
318 
319 	return STOP;
320 }
321 
runCompute(void)322 void InterInvocationTestCase::runCompute (void)
323 {
324 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
325 	const int				groupsX	= m_workWidth / m_localWidth;
326 	const int				groupsY	= m_workHeight / m_localHeight;
327 
328 	DE_ASSERT((m_workWidth % m_localWidth) == 0);
329 	DE_ASSERT((m_workHeight % m_localHeight) == 0);
330 
331 	m_testCtx.getLog()
332 		<< tcu::TestLog::Message
333 		<< "Dispatching compute.\n"
334 		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
335 		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
336 		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
337 		<< tcu::TestLog::EndMessage;
338 
339 	gl.useProgram(m_program->getProgram());
340 
341 	// source
342 	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
343 	{
344 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
345 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346 	}
347 	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
348 	{
349 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
350 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
351 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
352 
353 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
354 	}
355 	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
356 	{
357 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
359 	}
360 	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
361 	{
362 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
363 		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
364 
365 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
366 
367 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
368 	}
369 	else
370 		DE_ASSERT(DE_FALSE);
371 
372 	// destination
373 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
374 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
375 
376 	// dispatch
377 	gl.dispatchCompute(groupsX, groupsY, 1);
378 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
379 }
380 
verifyResults(void)381 bool InterInvocationTestCase::verifyResults (void)
382 {
383 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
384 	const int				errorFloodThreshold	= 5;
385 	int						numErrorsLogged		= 0;
386 	const void*				mapped				= DE_NULL;
387 	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
388 	bool					error				= false;
389 
390 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
391 	gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
392 	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
393 	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
394 
395 	// copy to properly aligned array
396 	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
397 
398 	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
399 		throw tcu::TestError("memory map store corrupted");
400 
401 	// check the results
402 	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
403 	{
404 		if (results[ndx] != 1)
405 		{
406 			error = true;
407 
408 			if (numErrorsLogged == 0)
409 				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
410 			if (numErrorsLogged++ < errorFloodThreshold)
411 				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
412 			else
413 			{
414 				// after N errors, no point continuing verification
415 				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
416 				break;
417 			}
418 		}
419 	}
420 
421 	if (!error)
422 		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
423 	return !error;
424 }
425 
genBarrierSource(void) const426 std::string InterInvocationTestCase::genBarrierSource (void) const
427 {
428 	std::ostringstream buf;
429 
430 	if (m_syncWithGroup)
431 	{
432 		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
433 		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
434 		//       we only require intra-workgroup synchronization.
435 		buf << "\n"
436 			<< "	groupMemoryBarrier();\n"
437 			<< "	barrier();\n"
438 			<< "\n";
439 	}
440 	else if (m_storage == STORAGE_BUFFER)
441 	{
442 		DE_ASSERT(!m_syncWithGroup);
443 
444 		// Waiting only for data written by this invocation. Since all buffer reads and writes are
445 		// processed in order (within a single invocation), we don't have to do anything.
446 		buf << "\n";
447 	}
448 	else if (m_storage == STORAGE_IMAGE)
449 	{
450 		DE_ASSERT(!m_syncWithGroup);
451 
452 		// Waiting only for data written by this invocation. But since operations complete in undefined
453 		// order, we have to wait for them to complete.
454 		buf << "\n"
455 			<< "	memoryBarrierImage();\n"
456 			<< "\n";
457 	}
458 	else
459 		DE_ASSERT(DE_FALSE);
460 
461 	return buf.str();
462 }
463 
464 class InvocationBasicCase : public InterInvocationTestCase
465 {
466 public:
467 							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
468 private:
469 	std::string				genShaderSource			(void) const;
470 	virtual std::string		genShaderMainBlock		(void) const = 0;
471 };
472 
InvocationBasicCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)473 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
474 	: InterInvocationTestCase(context, name, desc, storage, flags)
475 {
476 }
477 
genShaderSource(void) const478 std::string InvocationBasicCase::genShaderSource (void) const
479 {
480 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
481 	std::ostringstream	buf;
482 
483 	buf << "${GLSL_VERSION_DECL}\n"
484 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
485 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
486 		<< "layout(binding=0, std430) buffer Output\n"
487 		<< "{\n"
488 		<< "	highp int values[];\n"
489 		<< "} sb_result;\n";
490 
491 	if (m_storage == STORAGE_BUFFER)
492 		buf << "layout(binding=1, std430) coherent buffer Storage\n"
493 			<< "{\n"
494 			<< "	highp int values[];\n"
495 			<< "} sb_store;\n"
496 			<< "\n"
497 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
498 			<< "{\n"
499 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
500 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
501 			<< "}\n";
502 	else if (m_storage == STORAGE_IMAGE)
503 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
504 			<< "\n"
505 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
506 			<< "{\n"
507 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
508 			<< "}\n";
509 	else
510 		DE_ASSERT(DE_FALSE);
511 
512 	buf << "\n"
513 		<< "void main (void)\n"
514 		<< "{\n"
515 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
516 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
517 		<< "	bool allOk      = true;\n"
518 		<< "\n"
519 		<< genShaderMainBlock()
520 		<< "\n"
521 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
522 		<< "}\n";
523 
524 	return specializeShader(m_context, buf.str().c_str());
525 }
526 
527 class InvocationWriteReadCase : public InvocationBasicCase
528 {
529 public:
530 					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
531 private:
532 	std::string		genShaderMainBlock			(void) const;
533 };
534 
InvocationWriteReadCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)535 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
536 	: InvocationBasicCase(context, name, desc, storage, flags)
537 {
538 }
539 
genShaderMainBlock(void) const540 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
541 {
542 	std::ostringstream buf;
543 
544 	// write
545 
546 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
547 	{
548 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
549 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
550 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
551 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
552 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
553 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
554 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
555 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
556 		else
557 			DE_ASSERT(DE_FALSE);
558 	}
559 
560 	// barrier
561 
562 	buf << genBarrierSource();
563 
564 	// read
565 
566 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
567 	{
568 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
569 
570 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
571 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
572 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
573 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
574 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
575 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
576 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
577 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
578 		else
579 			DE_ASSERT(DE_FALSE);
580 	}
581 
582 	return buf.str();
583 }
584 
585 class InvocationReadWriteCase : public InvocationBasicCase
586 {
587 public:
588 					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
589 private:
590 	std::string		genShaderMainBlock			(void) const;
591 };
592 
InvocationReadWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)593 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
594 	: InvocationBasicCase(context, name, desc, storage, flags)
595 {
596 }
597 
genShaderMainBlock(void) const598 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
599 {
600 	std::ostringstream buf;
601 
602 	// read
603 
604 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
605 	{
606 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
607 
608 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
609 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
610 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
611 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
612 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
613 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
614 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
615 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
616 		else
617 			DE_ASSERT(DE_FALSE);
618 	}
619 
620 	// barrier
621 
622 	buf << genBarrierSource();
623 
624 	// write
625 
626 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
627 	{
628 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
629 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
630 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
631 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
632 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
633 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
634 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
635 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
636 		else
637 			DE_ASSERT(DE_FALSE);
638 	}
639 
640 	return buf.str();
641 }
642 
643 class InvocationOverWriteCase : public InvocationBasicCase
644 {
645 public:
646 					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
647 private:
648 	std::string		genShaderMainBlock			(void) const;
649 };
650 
InvocationOverWriteCase(Context & context,const char * name,const char * desc,StorageType storage,int flags)651 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
652 	: InvocationBasicCase(context, name, desc, storage, flags)
653 {
654 }
655 
genShaderMainBlock(void) const656 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
657 {
658 	std::ostringstream buf;
659 
660 	// write
661 
662 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
663 	{
664 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
665 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
666 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
668 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
670 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
672 		else
673 			DE_ASSERT(DE_FALSE);
674 	}
675 
676 	// barrier
677 
678 	buf << genBarrierSource();
679 
680 	// write over
681 
682 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683 	{
684 		// write another invocation's value or our own value depending on test type
685 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686 
687 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
688 			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
689 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690 			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
691 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692 			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
693 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694 			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
695 		else
696 			DE_ASSERT(DE_FALSE);
697 	}
698 
699 	// barrier
700 
701 	buf << genBarrierSource();
702 
703 	// read
704 
705 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
706 	{
707 		// check another invocation's value or our own value depending on test type
708 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
709 
710 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
711 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
712 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
713 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
714 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
715 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
716 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
717 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
718 		else
719 			DE_ASSERT(DE_FALSE);
720 	}
721 
722 	return buf.str();
723 }
724 
725 class InvocationAliasWriteCase : public InterInvocationTestCase
726 {
727 public:
728 	enum TestType
729 	{
730 		TYPE_WRITE = 0,
731 		TYPE_OVERWRITE,
732 
733 		TYPE_LAST
734 	};
735 
736 					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
737 private:
738 	std::string		genShaderSource				(void) const;
739 
740 	const TestType	m_type;
741 };
742 
InvocationAliasWriteCase(Context & context,const char * name,const char * desc,TestType type,StorageType storage,int flags)743 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
744 	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
745 	, m_type					(type)
746 {
747 	DE_ASSERT(type < TYPE_LAST);
748 }
749 
genShaderSource(void) const750 std::string InvocationAliasWriteCase::genShaderSource (void) const
751 {
752 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
753 	std::ostringstream	buf;
754 
755 	buf << "${GLSL_VERSION_DECL}\n"
756 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
757 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
758 		<< "layout(binding=0, std430) buffer Output\n"
759 		<< "{\n"
760 		<< "	highp int values[];\n"
761 		<< "} sb_result;\n";
762 
763 	if (m_storage == STORAGE_BUFFER)
764 		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
765 			<< "{\n"
766 			<< "	highp int values[];\n"
767 			<< "} sb_store0;\n"
768 			<< "layout(binding=2, std430) coherent buffer Storage1\n"
769 			<< "{\n"
770 			<< "	highp int values[];\n"
771 			<< "} sb_store1;\n"
772 			<< "\n"
773 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
774 			<< "{\n"
775 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
776 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
777 			<< "}\n";
778 	else if (m_storage == STORAGE_IMAGE)
779 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
780 			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
781 			<< "\n"
782 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
783 			<< "{\n"
784 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
785 			<< "}\n";
786 	else
787 		DE_ASSERT(DE_FALSE);
788 
789 	buf << "\n"
790 		<< "void main (void)\n"
791 		<< "{\n"
792 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
793 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
794 		<< "	bool allOk      = true;\n"
795 		<< "\n";
796 
797 	if (m_type == TYPE_OVERWRITE)
798 	{
799 		// write
800 
801 		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802 		{
803 			if (m_storage == STORAGE_BUFFER && m_useAtomic)
804 				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
805 			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
806 				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
807 			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
808 				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
809 			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
810 				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
811 			else
812 				DE_ASSERT(DE_FALSE);
813 		}
814 
815 		// barrier
816 
817 		buf << genBarrierSource();
818 	}
819 	else
820 		DE_ASSERT(m_type == TYPE_WRITE);
821 
822 	// write (again)
823 
824 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
825 	{
826 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
827 
828 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
829 			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
830 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
831 			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
832 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
833 			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
834 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
835 			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
836 		else
837 			DE_ASSERT(DE_FALSE);
838 	}
839 
840 	// barrier
841 
842 	buf << genBarrierSource();
843 
844 	// read
845 
846 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
847 	{
848 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
849 			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
850 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
851 			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
852 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
853 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
854 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
855 			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
856 		else
857 			DE_ASSERT(DE_FALSE);
858 	}
859 
860 	// return result
861 
862 	buf << "\n"
863 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
864 		<< "}\n";
865 
866 	return specializeShader(m_context, buf.str().c_str());
867 }
868 
869 namespace op
870 {
871 
872 struct WriteData
873 {
874 	int targetHandle;
875 	int seed;
876 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::WriteData877 	static WriteData Generate(int targetHandle, int seed)
878 	{
879 		WriteData retVal;
880 
881 		retVal.targetHandle = targetHandle;
882 		retVal.seed = seed;
883 
884 		return retVal;
885 	}
886 };
887 
888 struct ReadData
889 {
890 	int targetHandle;
891 	int seed;
892 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadData893 	static ReadData Generate(int targetHandle, int seed)
894 	{
895 		ReadData retVal;
896 
897 		retVal.targetHandle = targetHandle;
898 		retVal.seed = seed;
899 
900 		return retVal;
901 	}
902 };
903 
904 struct Barrier
905 {
906 };
907 
908 struct WriteDataInterleaved
909 {
910 	int		targetHandle;
911 	int		seed;
912 	bool	evenOdd;
913 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::WriteDataInterleaved914 	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
915 	{
916 		WriteDataInterleaved retVal;
917 
918 		retVal.targetHandle = targetHandle;
919 		retVal.seed = seed;
920 		retVal.evenOdd = evenOdd;
921 
922 		return retVal;
923 	}
924 };
925 
926 struct ReadDataInterleaved
927 {
928 	int targetHandle;
929 	int seed0;
930 	int seed1;
931 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadDataInterleaved932 	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
933 	{
934 		ReadDataInterleaved retVal;
935 
936 		retVal.targetHandle = targetHandle;
937 		retVal.seed0 = seed0;
938 		retVal.seed1 = seed1;
939 
940 		return retVal;
941 	}
942 };
943 
944 struct ReadMultipleData
945 {
946 	int targetHandle0;
947 	int seed0;
948 	int targetHandle1;
949 	int seed1;
950 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadMultipleData951 	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
952 	{
953 		ReadMultipleData retVal;
954 
955 		retVal.targetHandle0 = targetHandle0;
956 		retVal.seed0 = seed0;
957 		retVal.targetHandle1 = targetHandle1;
958 		retVal.seed1 = seed1;
959 
960 		return retVal;
961 	}
962 };
963 
964 struct ReadZeroData
965 {
966 	int targetHandle;
967 
Generatedeqp::gles31::Functional::__anon7f4c81d10111::op::ReadZeroData968 	static ReadZeroData Generate(int targetHandle)
969 	{
970 		ReadZeroData retVal;
971 
972 		retVal.targetHandle = targetHandle;
973 
974 		return retVal;
975 	}
976 };
977 
978 } // namespace op
979 
980 class InterCallTestCase;
981 
982 class InterCallOperations
983 {
984 public:
985 	InterCallOperations& operator<< (const op::WriteData&);
986 	InterCallOperations& operator<< (const op::ReadData&);
987 	InterCallOperations& operator<< (const op::Barrier&);
988 	InterCallOperations& operator<< (const op::ReadMultipleData&);
989 	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
990 	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
991 	InterCallOperations& operator<< (const op::ReadZeroData&);
992 
993 private:
994 	struct Command
995 	{
996 		enum CommandType
997 		{
998 			TYPE_WRITE = 0,
999 			TYPE_READ,
1000 			TYPE_BARRIER,
1001 			TYPE_READ_MULTIPLE,
1002 			TYPE_WRITE_INTERLEAVE,
1003 			TYPE_READ_INTERLEAVE,
1004 			TYPE_READ_ZERO,
1005 
1006 			TYPE_LAST
1007 		};
1008 
1009 		CommandType type;
1010 
1011 		union CommandUnion
1012 		{
1013 			op::WriteData				write;
1014 			op::ReadData				read;
1015 			op::Barrier					barrier;
1016 			op::ReadMultipleData		readMulti;
1017 			op::WriteDataInterleaved	writeInterleave;
1018 			op::ReadDataInterleaved		readInterleave;
1019 			op::ReadZeroData			readZero;
1020 		} u_cmd;
1021 	};
1022 
1023 	friend class InterCallTestCase;
1024 
1025 	std::vector<Command> m_cmds;
1026 };
1027 
operator <<(const op::WriteData & cmd)1028 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1029 {
1030 	m_cmds.push_back(Command());
1031 	m_cmds.back().type = Command::TYPE_WRITE;
1032 	m_cmds.back().u_cmd.write = cmd;
1033 
1034 	return *this;
1035 }
1036 
operator <<(const op::ReadData & cmd)1037 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1038 {
1039 	m_cmds.push_back(Command());
1040 	m_cmds.back().type = Command::TYPE_READ;
1041 	m_cmds.back().u_cmd.read = cmd;
1042 
1043 	return *this;
1044 }
1045 
operator <<(const op::Barrier & cmd)1046 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1047 {
1048 	m_cmds.push_back(Command());
1049 	m_cmds.back().type = Command::TYPE_BARRIER;
1050 	m_cmds.back().u_cmd.barrier = cmd;
1051 
1052 	return *this;
1053 }
1054 
operator <<(const op::ReadMultipleData & cmd)1055 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1056 {
1057 	m_cmds.push_back(Command());
1058 	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1059 	m_cmds.back().u_cmd.readMulti = cmd;
1060 
1061 	return *this;
1062 }
1063 
operator <<(const op::WriteDataInterleaved & cmd)1064 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1065 {
1066 	m_cmds.push_back(Command());
1067 	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1068 	m_cmds.back().u_cmd.writeInterleave = cmd;
1069 
1070 	return *this;
1071 }
1072 
operator <<(const op::ReadDataInterleaved & cmd)1073 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1074 {
1075 	m_cmds.push_back(Command());
1076 	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1077 	m_cmds.back().u_cmd.readInterleave = cmd;
1078 
1079 	return *this;
1080 }
1081 
operator <<(const op::ReadZeroData & cmd)1082 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1083 {
1084 	m_cmds.push_back(Command());
1085 	m_cmds.back().type = Command::TYPE_READ_ZERO;
1086 	m_cmds.back().u_cmd.readZero = cmd;
1087 
1088 	return *this;
1089 }
1090 
1091 class InterCallTestCase : public TestCase
1092 {
1093 public:
1094 	enum StorageType
1095 	{
1096 		STORAGE_BUFFER = 0,
1097 		STORAGE_IMAGE,
1098 
1099 		STORAGE_LAST
1100 	};
1101 	enum Flags
1102 	{
1103 		FLAG_USE_ATOMIC	= 1,
1104 		FLAG_USE_INT	= 2,
1105 	};
1106 													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1107 													~InterCallTestCase			(void);
1108 
1109 private:
1110 	void											init						(void);
1111 	void											deinit						(void);
1112 	IterateResult									iterate						(void);
1113 	bool											verifyResults				(void);
1114 
1115 	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1116 	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117 	void											runCommand					(const op::Barrier&);
1118 	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1119 	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1120 	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1121 	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1122 	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1123 
1124 	glw::GLuint										genStorage					(int friendlyName);
1125 	glw::GLuint										genResultStorage			(void);
1126 	glu::ShaderProgram*								genWriteProgram				(int seed);
1127 	glu::ShaderProgram*								genReadProgram				(int seed);
1128 	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
1129 	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
1130 	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
1131 	glu::ShaderProgram*								genReadZeroProgram			(void);
1132 
1133 	const StorageType								m_storage;
1134 	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
1135 	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
1136 	const std::vector<InterCallOperations::Command>	m_cmds;
1137 	const bool										m_useAtomic;
1138 	const bool										m_formatInteger;
1139 
1140 	std::vector<glu::ShaderProgram*>				m_operationPrograms;
1141 	std::vector<glw::GLuint>						m_operationResultStorages;
1142 	std::map<int, glw::GLuint>						m_storageIDs;
1143 };
1144 
InterCallTestCase(Context & context,const char * name,const char * desc,StorageType storage,int flags,const InterCallOperations & ops)1145 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1146 	: TestCase					(context, name, desc)
1147 	, m_storage					(storage)
1148 	, m_invocationGridSize		(512)
1149 	, m_perInvocationSize		(2)
1150 	, m_cmds					(ops.m_cmds)
1151 	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
1152 	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
1153 {
1154 }
1155 
~InterCallTestCase(void)1156 InterCallTestCase::~InterCallTestCase (void)
1157 {
1158 	deinit();
1159 }
1160 
init(void)1161 void InterCallTestCase::init (void)
1162 {
1163 	int			programFriendlyName = 0;
1164 
1165 	// requirements
1166 
1167 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
1168 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1169 
1170 	// generate resources and validate command list
1171 
1172 	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1173 	m_operationResultStorages.resize(m_cmds.size(), 0);
1174 
1175 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1176 	{
1177 		switch (m_cmds[step].type)
1178 		{
1179 			case InterCallOperations::Command::TYPE_WRITE:
1180 			{
1181 				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1182 
1183 				// new storage handle?
1184 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1185 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1186 
1187 				// program
1188 				{
1189 					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1190 
1191 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1192 					m_testCtx.getLog() << *program;
1193 
1194 					if (!program->isOk())
1195 						throw tcu::TestError("could not build program");
1196 
1197 					m_operationPrograms[step] = program;
1198 				}
1199 				break;
1200 			}
1201 
1202 			case InterCallOperations::Command::TYPE_READ:
1203 			{
1204 				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1205 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1206 
1207 				// program and result storage
1208 				{
1209 					glu::ShaderProgram* program = genReadProgram(cmd.seed);
1210 
1211 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1212 					m_testCtx.getLog() << *program;
1213 
1214 					if (!program->isOk())
1215 						throw tcu::TestError("could not build program");
1216 
1217 					m_operationPrograms[step] = program;
1218 					m_operationResultStorages[step] = genResultStorage();
1219 				}
1220 				break;
1221 			}
1222 
1223 			case InterCallOperations::Command::TYPE_BARRIER:
1224 			{
1225 				break;
1226 			}
1227 
1228 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1229 			{
1230 				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1231 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1232 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1233 
1234 				// program
1235 				{
1236 					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1237 
1238 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1239 					m_testCtx.getLog() << *program;
1240 
1241 					if (!program->isOk())
1242 						throw tcu::TestError("could not build program");
1243 
1244 					m_operationPrograms[step] = program;
1245 					m_operationResultStorages[step] = genResultStorage();
1246 				}
1247 				break;
1248 			}
1249 
1250 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1251 			{
1252 				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1253 
1254 				// new storage handle?
1255 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1256 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1257 
1258 				// program
1259 				{
1260 					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1261 
1262 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1263 					m_testCtx.getLog() << *program;
1264 
1265 					if (!program->isOk())
1266 						throw tcu::TestError("could not build program");
1267 
1268 					m_operationPrograms[step] = program;
1269 				}
1270 				break;
1271 			}
1272 
1273 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1274 			{
1275 				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1276 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1277 
1278 				// program
1279 				{
1280 					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1281 
1282 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1283 					m_testCtx.getLog() << *program;
1284 
1285 					if (!program->isOk())
1286 						throw tcu::TestError("could not build program");
1287 
1288 					m_operationPrograms[step] = program;
1289 					m_operationResultStorages[step] = genResultStorage();
1290 				}
1291 				break;
1292 			}
1293 
1294 			case InterCallOperations::Command::TYPE_READ_ZERO:
1295 			{
1296 				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1297 
1298 				// new storage handle?
1299 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1300 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1301 
1302 				// program
1303 				{
1304 					glu::ShaderProgram* program = genReadZeroProgram();
1305 
1306 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1307 					m_testCtx.getLog() << *program;
1308 
1309 					if (!program->isOk())
1310 						throw tcu::TestError("could not build program");
1311 
1312 					m_operationPrograms[step] = program;
1313 					m_operationResultStorages[step] = genResultStorage();
1314 				}
1315 				break;
1316 			}
1317 
1318 			default:
1319 				DE_ASSERT(DE_FALSE);
1320 		}
1321 	}
1322 }
1323 
deinit(void)1324 void InterCallTestCase::deinit (void)
1325 {
1326 	// programs
1327 	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1328 		delete m_operationPrograms[ndx];
1329 	m_operationPrograms.clear();
1330 
1331 	// result storages
1332 	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1333 	{
1334 		if (m_operationResultStorages[ndx])
1335 			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1336 	}
1337 	m_operationResultStorages.clear();
1338 
1339 	// storage
1340 	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1341 	{
1342 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1343 
1344 		if (m_storage == STORAGE_BUFFER)
1345 			gl.deleteBuffers(1, &it->second);
1346 		else if (m_storage == STORAGE_IMAGE)
1347 			gl.deleteTextures(1, &it->second);
1348 		else
1349 			DE_ASSERT(DE_FALSE);
1350 	}
1351 	m_storageIDs.clear();
1352 }
1353 
iterate(void)1354 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1355 {
1356 	int programFriendlyName			= 0;
1357 	int resultStorageFriendlyName	= 0;
1358 
1359 	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1360 
1361 	// run steps
1362 
1363 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1364 	{
1365 		switch (m_cmds[step].type)
1366 		{
1367 			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
1368 			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
1369 			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
1370 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
1371 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
1372 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
1373 			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
1374 			default:
1375 				DE_ASSERT(DE_FALSE);
1376 		}
1377 	}
1378 
1379 	// read results from result buffers
1380 	if (verifyResults())
1381 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1382 	else
1383 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1384 
1385 	return STOP;
1386 }
1387 
verifyResults(void)1388 bool InterCallTestCase::verifyResults (void)
1389 {
1390 	int		resultBufferFriendlyName	= 0;
1391 	bool	allResultsOk				= true;
1392 	bool	anyResult					= false;
1393 
1394 	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1395 
1396 	for (int step = 0; step < (int)m_cmds.size(); ++step)
1397 	{
1398 		const int	errorFloodThreshold	= 5;
1399 		int			numErrorsLogged		= 0;
1400 
1401 		if (m_operationResultStorages[step])
1402 		{
1403 			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1404 			const void*				mapped	= DE_NULL;
1405 			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
1406 			bool					error	= false;
1407 
1408 			anyResult = true;
1409 
1410 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1411 			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1412 			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1413 
1414 			// copy to properly aligned array
1415 			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1416 
1417 			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1418 				throw tcu::TestError("memory map store corrupted");
1419 
1420 			// check the results
1421 			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1422 			{
1423 				if (results[ndx] != 1)
1424 				{
1425 					error = true;
1426 
1427 					if (numErrorsLogged == 0)
1428 						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1429 					if (numErrorsLogged++ < errorFloodThreshold)
1430 						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1431 					else
1432 					{
1433 						// after N errors, no point continuing verification
1434 						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1435 						break;
1436 					}
1437 				}
1438 			}
1439 
1440 			if (error)
1441 			{
1442 				allResultsOk = false;
1443 			}
1444 			else
1445 				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1446 		}
1447 	}
1448 
1449 	DE_ASSERT(anyResult);
1450 	DE_UNREF(anyResult);
1451 
1452 	return allResultsOk;
1453 }
1454 
runCommand(const op::WriteData & cmd,int stepNdx,int & programFriendlyName)1455 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1456 {
1457 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1458 
1459 	m_testCtx.getLog()
1460 		<< tcu::TestLog::Message
1461 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1462 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1463 		<< tcu::TestLog::EndMessage;
1464 
1465 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1466 
1467 	// set destination
1468 	if (m_storage == STORAGE_BUFFER)
1469 	{
1470 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1471 
1472 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1473 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1474 	}
1475 	else if (m_storage == STORAGE_IMAGE)
1476 	{
1477 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1478 
1479 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1480 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1481 	}
1482 	else
1483 		DE_ASSERT(DE_FALSE);
1484 
1485 	// calc
1486 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1487 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1488 }
1489 
runCommand(const op::ReadData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1490 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1491 {
1492 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1493 }
1494 
runCommand(const op::Barrier & cmd)1495 void InterCallTestCase::runCommand (const op::Barrier& cmd)
1496 {
1497 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1498 
1499 	DE_UNREF(cmd);
1500 
1501 	if (m_storage == STORAGE_BUFFER)
1502 	{
1503 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1505 	}
1506 	else if (m_storage == STORAGE_IMAGE)
1507 	{
1508 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1509 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1510 	}
1511 	else
1512 		DE_ASSERT(DE_FALSE);
1513 }
1514 
runCommand(const op::ReadMultipleData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1515 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1516 {
1517 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1518 
1519 	m_testCtx.getLog()
1520 		<< tcu::TestLog::Message
1521 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1522 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1523 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1524 		<< tcu::TestLog::EndMessage;
1525 
1526 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1527 
1528 	// set sources
1529 	if (m_storage == STORAGE_BUFFER)
1530 	{
1531 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1532 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1533 
1534 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1535 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1536 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1537 	}
1538 	else if (m_storage == STORAGE_IMAGE)
1539 	{
1540 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1541 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1542 
1543 		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1544 		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1545 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1546 	}
1547 	else
1548 		DE_ASSERT(DE_FALSE);
1549 
1550 	// set destination
1551 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1552 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1553 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1554 
1555 	// calc
1556 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1557 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1558 }
1559 
runCommand(const op::WriteDataInterleaved & cmd,int stepNdx,int & programFriendlyName)1560 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1561 {
1562 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1563 
1564 	m_testCtx.getLog()
1565 		<< tcu::TestLog::Message
1566 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1567 		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1568 		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1569 		<< tcu::TestLog::EndMessage;
1570 
1571 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1572 
1573 	// set destination
1574 	if (m_storage == STORAGE_BUFFER)
1575 	{
1576 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1577 
1578 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1579 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1580 	}
1581 	else if (m_storage == STORAGE_IMAGE)
1582 	{
1583 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1584 
1585 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1586 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1587 	}
1588 	else
1589 		DE_ASSERT(DE_FALSE);
1590 
1591 	// calc
1592 	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1593 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1594 }
1595 
runCommand(const op::ReadDataInterleaved & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1596 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597 {
1598 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599 }
1600 
runCommand(const op::ReadZeroData & cmd,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1601 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602 {
1603 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1604 }
1605 
runSingleRead(int targetHandle,int stepNdx,int & programFriendlyName,int & resultStorageFriendlyName)1606 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1607 {
1608 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1609 
1610 	m_testCtx.getLog()
1611 		<< tcu::TestLog::Message
1612 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1613 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1614 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1615 		<< tcu::TestLog::EndMessage;
1616 
1617 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1618 
1619 	// set source
1620 	if (m_storage == STORAGE_BUFFER)
1621 	{
1622 		DE_ASSERT(m_storageIDs[targetHandle]);
1623 
1624 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1625 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1626 	}
1627 	else if (m_storage == STORAGE_IMAGE)
1628 	{
1629 		DE_ASSERT(m_storageIDs[targetHandle]);
1630 
1631 		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1632 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1633 	}
1634 	else
1635 		DE_ASSERT(DE_FALSE);
1636 
1637 	// set destination
1638 	DE_ASSERT(m_operationResultStorages[stepNdx]);
1639 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1640 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1641 
1642 	// calc
1643 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1644 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1645 }
1646 
genStorage(int friendlyName)1647 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1648 {
1649 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1650 
1651 	if (m_storage == STORAGE_BUFFER)
1652 	{
1653 		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1654 		const int		bufferSize		= numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1655 		glw::GLuint		retVal			= 0;
1656 
1657 		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1658 
1659 		gl.genBuffers(1, &retVal);
1660 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1661 
1662 		if (m_formatInteger)
1663 		{
1664 			const std::vector<deUint32> zeroBuffer(numElements, 0);
1665 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666 		}
1667 		else
1668 		{
1669 			const std::vector<float> zeroBuffer(numElements, 0.0f);
1670 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1671 		}
1672 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1673 
1674 		return retVal;
1675 	}
1676 	else if (m_storage == STORAGE_IMAGE)
1677 	{
1678 		const int	imageWidth	= m_invocationGridSize;
1679 		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
1680 		glw::GLuint	retVal		= 0;
1681 
1682 		m_testCtx.getLog()
1683 			<< tcu::TestLog::Message
1684 			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1685 			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1686 			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1687 			<< tcu::TestLog::EndMessage;
1688 
1689 		gl.genTextures(1, &retVal);
1690 		gl.bindTexture(GL_TEXTURE_2D, retVal);
1691 
1692 		if (m_formatInteger)
1693 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1694 		else
1695 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1696 
1697 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1698 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1699 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1700 
1701 		m_testCtx.getLog()
1702 			<< tcu::TestLog::Message
1703 			<< "Filling image with 0"
1704 			<< tcu::TestLog::EndMessage;
1705 
1706 		if (m_formatInteger)
1707 		{
1708 			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1709 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1710 		}
1711 		else
1712 		{
1713 			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1714 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1715 		}
1716 
1717 		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1718 
1719 		return retVal;
1720 	}
1721 	else
1722 	{
1723 		DE_ASSERT(DE_FALSE);
1724 		return 0;
1725 	}
1726 }
1727 
genResultStorage(void)1728 glw::GLuint InterCallTestCase::genResultStorage (void)
1729 {
1730 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1731 	glw::GLuint				retVal	= 0;
1732 
1733 	gl.genBuffers(1, &retVal);
1734 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1735 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1736 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1737 
1738 	return retVal;
1739 }
1740 
genWriteProgram(int seed)1741 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1742 {
1743 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1744 	std::ostringstream	buf;
1745 
1746 	buf << "${GLSL_VERSION_DECL}\n"
1747 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1748 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1749 
1750 	if (m_storage == STORAGE_BUFFER)
1751 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1752 			<< "{\n"
1753 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1754 			<< "} sb_out;\n";
1755 	else if (m_storage == STORAGE_IMAGE)
1756 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1757 	else
1758 		DE_ASSERT(DE_FALSE);
1759 
1760 	buf << "\n"
1761 		<< "void main (void)\n"
1762 		<< "{\n"
1763 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1764 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1765 		<< "\n";
1766 
1767 	// Write to buffer/image m_perInvocationSize elements
1768 	if (m_storage == STORAGE_BUFFER)
1769 	{
1770 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1771 		{
1772 			if (m_useAtomic)
1773 				buf << "	atomicExchange(";
1774 			else
1775 				buf << "	";
1776 
1777 			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1778 
1779 			if (m_useAtomic)
1780 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1781 			else
1782 				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1783 		}
1784 	}
1785 	else if (m_storage == STORAGE_IMAGE)
1786 	{
1787 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1788 		{
1789 			if (m_useAtomic)
1790 				buf << "	imageAtomicExchange";
1791 			else
1792 				buf << "	imageStore";
1793 
1794 			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1795 
1796 			if (m_useAtomic)
1797 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1798 			else
1799 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1800 		}
1801 	}
1802 	else
1803 		DE_ASSERT(DE_FALSE);
1804 
1805 	buf << "}\n";
1806 
1807 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1808 }
1809 
genReadProgram(int seed)1810 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1811 {
1812 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1813 	std::ostringstream	buf;
1814 
1815 	buf << "${GLSL_VERSION_DECL}\n"
1816 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1817 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1818 
1819 	if (m_storage == STORAGE_BUFFER)
1820 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1821 			<< "{\n"
1822 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1823 			<< "} sb_in;\n";
1824 	else if (m_storage == STORAGE_IMAGE)
1825 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1826 	else
1827 		DE_ASSERT(DE_FALSE);
1828 
1829 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1830 		<< "{\n"
1831 		<< "	highp int resultOk[];\n"
1832 		<< "} sb_result;\n"
1833 		<< "\n"
1834 		<< "void main (void)\n"
1835 		<< "{\n"
1836 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1837 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1838 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1839 		<< "	bool allOk = true;\n"
1840 		<< "\n";
1841 
1842 	// Verify data
1843 
1844 	if (m_storage == STORAGE_BUFFER)
1845 	{
1846 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1847 		{
1848 			if (!m_useAtomic)
1849 				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
1850 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1851 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852 			else
1853 				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1854 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1855 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1856 		}
1857 	}
1858 	else if (m_storage == STORAGE_IMAGE)
1859 	{
1860 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1861 		{
1862 			if (!m_useAtomic)
1863 				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1864 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1865 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1866 			else
1867 				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1868 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1869 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1870 		}
1871 	}
1872 	else
1873 		DE_ASSERT(DE_FALSE);
1874 
1875 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1876 		<< "}\n";
1877 
1878 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1879 }
1880 
genReadMultipleProgram(int seed0,int seed1)1881 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1882 {
1883 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1884 	std::ostringstream	buf;
1885 
1886 	buf << "${GLSL_VERSION_DECL}\n"
1887 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1888 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1889 
1890 	if (m_storage == STORAGE_BUFFER)
1891 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1892 			<< "{\n"
1893 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1894 			<< "} sb_in0;\n"
1895 			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1896 			<< "{\n"
1897 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1898 			<< "} sb_in1;\n";
1899 	else if (m_storage == STORAGE_IMAGE)
1900 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1901 			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1902 	else
1903 		DE_ASSERT(DE_FALSE);
1904 
1905 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1906 		<< "{\n"
1907 		<< "	highp int resultOk[];\n"
1908 		<< "} sb_result;\n"
1909 		<< "\n"
1910 		<< "void main (void)\n"
1911 		<< "{\n"
1912 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1913 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1914 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1915 		<< "	bool allOk = true;\n"
1916 		<< "\n";
1917 
1918 	// Verify data
1919 
1920 	if (m_storage == STORAGE_BUFFER)
1921 	{
1922 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925 	}
1926 	else if (m_storage == STORAGE_IMAGE)
1927 	{
1928 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1929 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1930 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1931 	}
1932 	else
1933 		DE_ASSERT(DE_FALSE);
1934 
1935 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1936 		<< "}\n";
1937 
1938 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1939 }
1940 
genWriteInterleavedProgram(int seed,bool evenOdd)1941 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1942 {
1943 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1944 	std::ostringstream	buf;
1945 
1946 	buf << "${GLSL_VERSION_DECL}\n"
1947 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1948 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1949 
1950 	if (m_storage == STORAGE_BUFFER)
1951 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1952 			<< "{\n"
1953 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1954 			<< "} sb_out;\n";
1955 	else if (m_storage == STORAGE_IMAGE)
1956 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1957 	else
1958 		DE_ASSERT(DE_FALSE);
1959 
1960 	buf << "\n"
1961 		<< "void main (void)\n"
1962 		<< "{\n"
1963 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1964 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1965 		<< "\n";
1966 
1967 	// Write to buffer/image m_perInvocationSize elements
1968 	if (m_storage == STORAGE_BUFFER)
1969 	{
1970 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1971 		{
1972 			if (m_useAtomic)
1973 				buf << "	atomicExchange(";
1974 			else
1975 				buf << "	";
1976 
1977 			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1978 
1979 			if (m_useAtomic)
1980 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1981 			else
1982 				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1983 		}
1984 	}
1985 	else if (m_storage == STORAGE_IMAGE)
1986 	{
1987 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1988 		{
1989 			if (m_useAtomic)
1990 				buf << "	imageAtomicExchange";
1991 			else
1992 				buf << "	imageStore";
1993 
1994 			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1995 
1996 			if (m_useAtomic)
1997 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1998 			else
1999 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
2000 		}
2001 	}
2002 	else
2003 		DE_ASSERT(DE_FALSE);
2004 
2005 	buf << "}\n";
2006 
2007 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2008 }
2009 
genReadInterleavedProgram(int seed0,int seed1)2010 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2011 {
2012 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2013 	std::ostringstream	buf;
2014 
2015 	buf << "${GLSL_VERSION_DECL}\n"
2016 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2017 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2018 
2019 	if (m_storage == STORAGE_BUFFER)
2020 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2021 			<< "{\n"
2022 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2023 			<< "} sb_in;\n";
2024 	else if (m_storage == STORAGE_IMAGE)
2025 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2026 	else
2027 		DE_ASSERT(DE_FALSE);
2028 
2029 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2030 		<< "{\n"
2031 		<< "	highp int resultOk[];\n"
2032 		<< "} sb_result;\n"
2033 		<< "\n"
2034 		<< "void main (void)\n"
2035 		<< "{\n"
2036 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2037 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2038 		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2039 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2040 		<< "	bool allOk = true;\n"
2041 		<< "\n";
2042 
2043 	// Verify data
2044 
2045 	if (m_storage == STORAGE_BUFFER)
2046 	{
2047 		buf << "	if (groupNdx % 2 == 0)\n"
2048 			<< "	{\n";
2049 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2050 			buf << "		allOk = allOk && ("
2051 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2052 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2053 		buf << "	}\n"
2054 			<< "	else\n"
2055 			<< "	{\n";
2056 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2057 			buf << "		allOk = allOk && ("
2058 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2059 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2060 		buf << "	}\n";
2061 	}
2062 	else if (m_storage == STORAGE_IMAGE)
2063 	{
2064 		buf << "	if (groupNdx % 2 == 0)\n"
2065 			<< "	{\n";
2066 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2067 			buf << "		allOk = allOk && ("
2068 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2069 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2070 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2071 		buf << "	}\n"
2072 			<< "	else\n"
2073 			<< "	{\n";
2074 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2075 			buf << "		allOk = allOk && ("
2076 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2077 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2078 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2079 		buf << "	}\n";
2080 	}
2081 	else
2082 		DE_ASSERT(DE_FALSE);
2083 
2084 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2085 		<< "}\n";
2086 
2087 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2088 }
2089 
genReadZeroProgram(void)2090 glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
2091 {
2092 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2093 	std::ostringstream	buf;
2094 
2095 	buf << "${GLSL_VERSION_DECL}\n"
2096 		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2097 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2098 
2099 	if (m_storage == STORAGE_BUFFER)
2100 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2101 			<< "{\n"
2102 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2103 			<< "} sb_in;\n";
2104 	else if (m_storage == STORAGE_IMAGE)
2105 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2106 	else
2107 		DE_ASSERT(DE_FALSE);
2108 
2109 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2110 		<< "{\n"
2111 		<< "	highp int resultOk[];\n"
2112 		<< "} sb_result;\n"
2113 		<< "\n"
2114 		<< "void main (void)\n"
2115 		<< "{\n"
2116 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2117 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2118 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2119 		<< "	bool allOk = true;\n"
2120 		<< "\n";
2121 
2122 	// Verify data
2123 
2124 	if (m_storage == STORAGE_BUFFER)
2125 	{
2126 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2127 			buf << "	allOk = allOk && ("
2128 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2129 				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2130 	}
2131 	else if (m_storage == STORAGE_IMAGE)
2132 	{
2133 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2134 			buf << "	allOk = allOk && ("
2135 			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2136 			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2137 	}
2138 	else
2139 		DE_ASSERT(DE_FALSE);
2140 
2141 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2142 		<< "}\n";
2143 
2144 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2145 }
2146 
2147 class SSBOConcurrentAtomicCase : public TestCase
2148 {
2149 public:
2150 
2151 							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2152 							~SSBOConcurrentAtomicCase	(void);
2153 
2154 	void					init						(void);
2155 	void					deinit						(void);
2156 	IterateResult			iterate						(void);
2157 
2158 private:
2159 	std::string				genComputeSource			(void) const;
2160 
2161 	const int				m_numCalls;
2162 	const int				m_workSize;
2163 	glu::ShaderProgram*		m_program;
2164 	deUint32				m_bufferID;
2165 	std::vector<deUint32>	m_intermediateResultBuffers;
2166 };
2167 
SSBOConcurrentAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2168 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2169 	: TestCase		(context, name, description)
2170 	, m_numCalls	(numCalls)
2171 	, m_workSize	(workSize)
2172 	, m_program		(DE_NULL)
2173 	, m_bufferID	(DE_NULL)
2174 {
2175 }
2176 
~SSBOConcurrentAtomicCase(void)2177 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2178 {
2179 	deinit();
2180 }
2181 
init(void)2182 void SSBOConcurrentAtomicCase::init (void)
2183 {
2184 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2185 	std::vector<deUint32>	zeroData			(m_workSize, 0);
2186 
2187 	// gen buffers
2188 
2189 	gl.genBuffers(1, &m_bufferID);
2190 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2191 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2192 
2193 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2194 	{
2195 		deUint32 buffer = 0;
2196 
2197 		gl.genBuffers(1, &buffer);
2198 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2199 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2200 
2201 		m_intermediateResultBuffers.push_back(buffer);
2202 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2203 	}
2204 
2205 	// gen program
2206 
2207 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2208 	m_testCtx.getLog() << *m_program;
2209 	if (!m_program->isOk())
2210 		throw tcu::TestError("could not build program");
2211 }
2212 
deinit(void)2213 void SSBOConcurrentAtomicCase::deinit (void)
2214 {
2215 	if (m_bufferID)
2216 	{
2217 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2218 		m_bufferID = 0;
2219 	}
2220 
2221 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2222 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2223 	m_intermediateResultBuffers.clear();
2224 
2225 	delete m_program;
2226 	m_program = DE_NULL;
2227 }
2228 
iterate(void)2229 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2230 {
2231 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2232 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2233 	std::vector<int>		deltas;
2234 
2235 	// generate unique deltas
2236 	generateShuffledRamp(m_numCalls, deltas);
2237 
2238 	// invoke program N times, each with a different delta
2239 	{
2240 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2241 
2242 		m_testCtx.getLog()
2243 			<< tcu::TestLog::Message
2244 			<< "Running shader " << m_numCalls << " times.\n"
2245 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2246 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2247 			<< tcu::TestLog::EndMessage;
2248 
2249 		if (deltaLocation == -1)
2250 			throw tcu::TestError("u_atomicDelta location was -1");
2251 
2252 		gl.useProgram(m_program->getProgram());
2253 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2254 
2255 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2256 		{
2257 			m_testCtx.getLog()
2258 				<< tcu::TestLog::Message
2259 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2260 				<< tcu::TestLog::EndMessage;
2261 
2262 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2263 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2264 			gl.dispatchCompute(m_workSize, 1, 1);
2265 		}
2266 
2267 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2268 	}
2269 
2270 	// Verify result
2271 	{
2272 		std::vector<deUint32> result;
2273 
2274 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2275 
2276 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2277 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2278 
2279 		for (int ndx = 0; ndx < m_workSize; ++ndx)
2280 		{
2281 			if (result[ndx] != sumValue)
2282 			{
2283 				m_testCtx.getLog()
2284 					<< tcu::TestLog::Message
2285 					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2286 					<< "Work buffer contains invalid values."
2287 					<< tcu::TestLog::EndMessage;
2288 
2289 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2290 				return STOP;
2291 			}
2292 		}
2293 
2294 		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2295 	}
2296 
2297 	// verify steps
2298 	{
2299 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2300 		std::vector<deUint32>				valueChain			(m_numCalls);
2301 
2302 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2303 
2304 		// collect results
2305 
2306 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2307 		{
2308 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2309 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2310 		}
2311 
2312 		// verify values
2313 
2314 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2315 		{
2316 			int			invalidOperationNdx;
2317 			deUint32	errorDelta;
2318 			deUint32	errorExpected;
2319 
2320 			// collect result chain for each element
2321 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2322 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2323 
2324 			// check there exists a path from 0 to sumValue using each addition once
2325 			// decompose cumulative results to addition operations (all additions positive => this works)
2326 
2327 			std::sort(valueChain.begin(), valueChain.end());
2328 
2329 			// validate chain
2330 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2331 			{
2332 				m_testCtx.getLog()
2333 					<< tcu::TestLog::Message
2334 					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2335 					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2336 					<< tcu::TestLog::EndMessage;
2337 
2338 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2339 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2340 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2341 
2342 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2343 				return STOP;
2344 			}
2345 		}
2346 
2347 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2348 	}
2349 
2350 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2351 	return STOP;
2352 }
2353 
genComputeSource(void) const2354 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2355 {
2356 	std::ostringstream buf;
2357 
2358 	buf	<< "${GLSL_VERSION_DECL}\n"
2359 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2360 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2361 		<< "{\n"
2362 		<< "	highp uint values[" << m_workSize << "];\n"
2363 		<< "} sb_ires;\n"
2364 		<< "\n"
2365 		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2366 		<< "{\n"
2367 		<< "	highp uint values[" << m_workSize << "];\n"
2368 		<< "} sb_work;\n"
2369 		<< "uniform highp uint u_atomicDelta;\n"
2370 		<< "\n"
2371 		<< "void main ()\n"
2372 		<< "{\n"
2373 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2374 		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2375 		<< "}";
2376 
2377 	return specializeShader(m_context, buf.str().c_str());
2378 }
2379 
2380 class ConcurrentAtomicCounterCase : public TestCase
2381 {
2382 public:
2383 
2384 							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
2385 							~ConcurrentAtomicCounterCase	(void);
2386 
2387 	void					init							(void);
2388 	void					deinit							(void);
2389 	IterateResult			iterate							(void);
2390 
2391 private:
2392 	std::string				genComputeSource				(bool evenOdd) const;
2393 
2394 	const int				m_numCalls;
2395 	const int				m_workSize;
2396 	glu::ShaderProgram*		m_evenProgram;
2397 	glu::ShaderProgram*		m_oddProgram;
2398 	deUint32				m_counterBuffer;
2399 	deUint32				m_intermediateResultBuffer;
2400 };
2401 
ConcurrentAtomicCounterCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2402 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2403 	: TestCase					(context, name, description)
2404 	, m_numCalls				(numCalls)
2405 	, m_workSize				(workSize)
2406 	, m_evenProgram				(DE_NULL)
2407 	, m_oddProgram				(DE_NULL)
2408 	, m_counterBuffer			(DE_NULL)
2409 	, m_intermediateResultBuffer(DE_NULL)
2410 {
2411 }
2412 
~ConcurrentAtomicCounterCase(void)2413 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2414 {
2415 	deinit();
2416 }
2417 
init(void)2418 void ConcurrentAtomicCounterCase::init (void)
2419 {
2420 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2421 	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
2422 
2423 	// gen buffer
2424 
2425 	gl.genBuffers(1, &m_counterBuffer);
2426 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2427 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2428 
2429 	gl.genBuffers(1, &m_intermediateResultBuffer);
2430 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2431 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2432 
2433 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2434 
2435 	// gen programs
2436 
2437 	{
2438 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2439 
2440 		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2441 		m_testCtx.getLog() << *m_evenProgram;
2442 		if (!m_evenProgram->isOk())
2443 			throw tcu::TestError("could not build program");
2444 	}
2445 	{
2446 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2447 
2448 		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2449 		m_testCtx.getLog() << *m_oddProgram;
2450 		if (!m_oddProgram->isOk())
2451 			throw tcu::TestError("could not build program");
2452 	}
2453 }
2454 
deinit(void)2455 void ConcurrentAtomicCounterCase::deinit (void)
2456 {
2457 	if (m_counterBuffer)
2458 	{
2459 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2460 		m_counterBuffer = 0;
2461 	}
2462 	if (m_intermediateResultBuffer)
2463 	{
2464 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2465 		m_intermediateResultBuffer = 0;
2466 	}
2467 
2468 	delete m_evenProgram;
2469 	m_evenProgram = DE_NULL;
2470 
2471 	delete m_oddProgram;
2472 	m_oddProgram = DE_NULL;
2473 }
2474 
iterate(void)2475 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2476 {
2477 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2478 
2479 	// invoke program N times, each with a different delta
2480 	{
2481 		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2482 		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2483 
2484 		m_testCtx.getLog()
2485 			<< tcu::TestLog::Message
2486 			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2487 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2488 			<< tcu::TestLog::EndMessage;
2489 
2490 		if (evenCallNdxLocation == -1)
2491 			throw tcu::TestError("u_callNdx location was -1");
2492 		if (oddCallNdxLocation == -1)
2493 			throw tcu::TestError("u_callNdx location was -1");
2494 
2495 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2496 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);
2497 
2498 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2499 		{
2500 			gl.useProgram(m_evenProgram->getProgram());
2501 			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2502 			gl.dispatchCompute(m_workSize, 1, 1);
2503 
2504 			gl.useProgram(m_oddProgram->getProgram());
2505 			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2506 			gl.dispatchCompute(m_workSize, 1, 1);
2507 		}
2508 
2509 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510 	}
2511 
2512 	// Verify result
2513 	{
2514 		deUint32 result;
2515 
2516 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2517 
2518 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2519 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2520 
2521 		if ((int)result != m_numCalls*m_workSize)
2522 		{
2523 			m_testCtx.getLog()
2524 				<< tcu::TestLog::Message
2525 				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2526 				<< tcu::TestLog::EndMessage;
2527 
2528 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2529 			return STOP;
2530 		}
2531 
2532 		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2533 	}
2534 
2535 	// verify steps
2536 	{
2537 		std::vector<deUint32> intermediateResults;
2538 
2539 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2540 
2541 		// collect results
2542 
2543 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2544 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2545 
2546 		// verify values
2547 
2548 		std::sort(intermediateResults.begin(), intermediateResults.end());
2549 
2550 		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2551 		{
2552 			if ((int)intermediateResults[valueNdx] != valueNdx)
2553 			{
2554 				m_testCtx.getLog()
2555 					<< tcu::TestLog::Message
2556 					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2557 					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
2558 					<< tcu::TestLog::EndMessage;
2559 
2560 				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2561 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2562 
2563 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2564 				return STOP;
2565 			}
2566 		}
2567 
2568 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2569 	}
2570 
2571 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2572 	return STOP;
2573 }
2574 
genComputeSource(bool evenOdd) const2575 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2576 {
2577 	std::ostringstream buf;
2578 
2579 	buf	<< "${GLSL_VERSION_DECL}\n"
2580 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2581 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2582 		<< "{\n"
2583 		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
2584 		<< "} sb_ires;\n"
2585 		<< "\n"
2586 		<< "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
2587 		<< "uniform highp uint u_callNdx;\n"
2588 		<< "\n"
2589 		<< "void main ()\n"
2590 		<< "{\n"
2591 		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2592 		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2593 		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2594 		<< "}";
2595 
2596 	return specializeShader(m_context, buf.str().c_str());
2597 }
2598 
2599 class ConcurrentImageAtomicCase : public TestCase
2600 {
2601 public:
2602 
2603 							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2604 							~ConcurrentImageAtomicCase	(void);
2605 
2606 	void					init						(void);
2607 	void					deinit						(void);
2608 	IterateResult			iterate						(void);
2609 
2610 private:
2611 	void					readWorkImage				(std::vector<deUint32>& result);
2612 
2613 	std::string				genComputeSource			(void) const;
2614 	std::string				genImageReadSource			(void) const;
2615 	std::string				genImageClearSource			(void) const;
2616 
2617 	const int				m_numCalls;
2618 	const int				m_workSize;
2619 	glu::ShaderProgram*		m_program;
2620 	glu::ShaderProgram*		m_imageReadProgram;
2621 	glu::ShaderProgram*		m_imageClearProgram;
2622 	deUint32				m_imageID;
2623 	std::vector<deUint32>	m_intermediateResultBuffers;
2624 };
2625 
ConcurrentImageAtomicCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2626 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2627 	: TestCase				(context, name, description)
2628 	, m_numCalls			(numCalls)
2629 	, m_workSize			(workSize)
2630 	, m_program				(DE_NULL)
2631 	, m_imageReadProgram	(DE_NULL)
2632 	, m_imageClearProgram	(DE_NULL)
2633 	, m_imageID				(DE_NULL)
2634 {
2635 }
2636 
~ConcurrentImageAtomicCase(void)2637 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2638 {
2639 	deinit();
2640 }
2641 
init(void)2642 void ConcurrentImageAtomicCase::init (void)
2643 {
2644 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2645 	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
2646 
2647 	if (!checkSupport(m_context))
2648 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2649 
2650 	// gen image
2651 
2652 	gl.genTextures(1, &m_imageID);
2653 	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2654 	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2655 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2656 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2657 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2658 
2659 	// gen buffers
2660 
2661 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2662 	{
2663 		deUint32 buffer = 0;
2664 
2665 		gl.genBuffers(1, &buffer);
2666 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2667 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2668 
2669 		m_intermediateResultBuffers.push_back(buffer);
2670 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2671 	}
2672 
2673 	// gen programs
2674 
2675 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2676 	m_testCtx.getLog() << *m_program;
2677 	if (!m_program->isOk())
2678 		throw tcu::TestError("could not build program");
2679 
2680 	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2681 	if (!m_imageReadProgram->isOk())
2682 	{
2683 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2684 
2685 		m_testCtx.getLog() << *m_imageReadProgram;
2686 		throw tcu::TestError("could not build program");
2687 	}
2688 
2689 	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2690 	if (!m_imageClearProgram->isOk())
2691 	{
2692 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2693 
2694 		m_testCtx.getLog() << *m_imageClearProgram;
2695 		throw tcu::TestError("could not build program");
2696 	}
2697 }
2698 
deinit(void)2699 void ConcurrentImageAtomicCase::deinit (void)
2700 {
2701 	if (m_imageID)
2702 	{
2703 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2704 		m_imageID = 0;
2705 	}
2706 
2707 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2708 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2709 	m_intermediateResultBuffers.clear();
2710 
2711 	delete m_program;
2712 	m_program = DE_NULL;
2713 
2714 	delete m_imageReadProgram;
2715 	m_imageReadProgram = DE_NULL;
2716 
2717 	delete m_imageClearProgram;
2718 	m_imageClearProgram = DE_NULL;
2719 }
2720 
iterate(void)2721 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2722 {
2723 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2724 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2725 	std::vector<int>		deltas;
2726 
2727 	// generate unique deltas
2728 	generateShuffledRamp(m_numCalls, deltas);
2729 
2730 	// clear image
2731 	{
2732 		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2733 
2734 		gl.useProgram(m_imageClearProgram->getProgram());
2735 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2736 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2737 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2738 
2739 		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2740 	}
2741 
2742 	// invoke program N times, each with a different delta
2743 	{
2744 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2745 
2746 		m_testCtx.getLog()
2747 			<< tcu::TestLog::Message
2748 			<< "Running shader " << m_numCalls << " times.\n"
2749 			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2750 			<< "Setting u_atomicDelta to a unique value for each call.\n"
2751 			<< tcu::TestLog::EndMessage;
2752 
2753 		if (deltaLocation == -1)
2754 			throw tcu::TestError("u_atomicDelta location was -1");
2755 
2756 		gl.useProgram(m_program->getProgram());
2757 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2758 
2759 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2760 		{
2761 			m_testCtx.getLog()
2762 				<< tcu::TestLog::Message
2763 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2764 				<< tcu::TestLog::EndMessage;
2765 
2766 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2767 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2768 			gl.dispatchCompute(m_workSize, m_workSize, 1);
2769 		}
2770 
2771 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2772 	}
2773 
2774 	// Verify result
2775 	{
2776 		std::vector<deUint32> result;
2777 
2778 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2779 
2780 		readWorkImage(result);
2781 
2782 		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2783 		{
2784 			if (result[ndx] != sumValue)
2785 			{
2786 				m_testCtx.getLog()
2787 					<< tcu::TestLog::Message
2788 					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2789 					<< "Work image contains invalid values."
2790 					<< tcu::TestLog::EndMessage;
2791 
2792 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2793 				return STOP;
2794 			}
2795 		}
2796 
2797 		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2798 	}
2799 
2800 	// verify steps
2801 	{
2802 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2803 		std::vector<deUint32>				valueChain			(m_numCalls);
2804 		std::vector<deUint32>				chainDelta			(m_numCalls);
2805 
2806 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2807 
2808 		// collect results
2809 
2810 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811 		{
2812 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2813 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2814 		}
2815 
2816 		// verify values
2817 
2818 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2819 		{
2820 			int			invalidOperationNdx;
2821 			deUint32	errorDelta;
2822 			deUint32	errorExpected;
2823 
2824 			// collect result chain for each element
2825 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2826 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2827 
2828 			// check there exists a path from 0 to sumValue using each addition once
2829 			// decompose cumulative results to addition operations (all additions positive => this works)
2830 
2831 			std::sort(valueChain.begin(), valueChain.end());
2832 
2833 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2834 				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2835 
2836 			// chainDelta contains now the actual additions applied to the value
2837 			std::sort(chainDelta.begin(), chainDelta.end());
2838 
2839 			// validate chain
2840 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2841 			{
2842 				m_testCtx.getLog()
2843 					<< tcu::TestLog::Message
2844 					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2845 					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2846 					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2847 					<< tcu::TestLog::EndMessage;
2848 
2849 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2850 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2851 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2852 
2853 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2854 				return STOP;
2855 			}
2856 		}
2857 
2858 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2859 	}
2860 
2861 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2862 	return STOP;
2863 }
2864 
readWorkImage(std::vector<deUint32> & result)2865 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2866 {
2867 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2868 	glu::Buffer				resultBuffer	(m_context.getRenderContext());
2869 
2870 	// Read image to an ssbo
2871 
2872 	{
2873 		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2874 
2875 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2876 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2877 
2878 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2879 		gl.useProgram(m_imageReadProgram->getProgram());
2880 
2881 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2882 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2883 		gl.dispatchCompute(m_workSize, m_workSize, 1);
2884 
2885 		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2886 	}
2887 
2888 	// Read ssbo
2889 	{
2890 		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2891 		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2892 
2893 		if (!ptr)
2894 			throw tcu::TestError("mapBufferRange returned NULL");
2895 
2896 		result.resize(m_workSize * m_workSize);
2897 		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2898 
2899 		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2900 			throw tcu::TestError("unmapBuffer returned false");
2901 	}
2902 }
2903 
genComputeSource(void) const2904 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2905 {
2906 	std::ostringstream buf;
2907 
2908 	buf	<< "${GLSL_VERSION_DECL}\n"
2909 		<< "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2910 		<< "\n"
2911 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2912 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2913 		<< "{\n"
2914 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2915 		<< "} sb_ires;\n"
2916 		<< "\n"
2917 		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2918 		<< "uniform highp uint u_atomicDelta;\n"
2919 		<< "\n"
2920 		<< "void main ()\n"
2921 		<< "{\n"
2922 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923 		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2924 		<< "}";
2925 
2926 	return specializeShader(m_context, buf.str().c_str());
2927 }
2928 
genImageReadSource(void) const2929 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2930 {
2931 	std::ostringstream buf;
2932 
2933 	buf	<< "${GLSL_VERSION_DECL}\n"
2934 		<< "\n"
2935 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936 		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2937 		<< "{\n"
2938 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2939 		<< "} sb_res;\n"
2940 		<< "\n"
2941 		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2942 		<< "\n"
2943 		<< "void main ()\n"
2944 		<< "{\n"
2945 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2946 		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2947 		<< "}";
2948 
2949 	return specializeShader(m_context, buf.str().c_str());
2950 }
2951 
genImageClearSource(void) const2952 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2953 {
2954 	std::ostringstream buf;
2955 
2956 	buf	<< "${GLSL_VERSION_DECL}\n"
2957 		<< "\n"
2958 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2959 		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2960 		<< "\n"
2961 		<< "void main ()\n"
2962 		<< "{\n"
2963 		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2964 		<< "}";
2965 
2966 	return specializeShader(m_context, buf.str().c_str());
2967 }
2968 
2969 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2970 {
2971 public:
2972 							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2973 							~ConcurrentSSBOAtomicCounterMixedCase	(void);
2974 
2975 	void					init									(void);
2976 	void					deinit									(void);
2977 	IterateResult			iterate									(void);
2978 
2979 private:
2980 	std::string				genSSBOComputeSource					(void) const;
2981 	std::string				genAtomicCounterComputeSource			(void) const;
2982 
2983 	const int				m_numCalls;
2984 	const int				m_workSize;
2985 	deUint32				m_bufferID;
2986 	glu::ShaderProgram*		m_ssboAtomicProgram;
2987 	glu::ShaderProgram*		m_atomicCounterProgram;
2988 };
2989 
ConcurrentSSBOAtomicCounterMixedCase(Context & context,const char * name,const char * description,int numCalls,int workSize)2990 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2991 	: TestCase					(context, name, description)
2992 	, m_numCalls				(numCalls)
2993 	, m_workSize				(workSize)
2994 	, m_bufferID				(DE_NULL)
2995 	, m_ssboAtomicProgram		(DE_NULL)
2996 	, m_atomicCounterProgram	(DE_NULL)
2997 {
2998 	// SSBO atomic XORs cancel out
2999 	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
3000 }
3001 
~ConcurrentSSBOAtomicCounterMixedCase(void)3002 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3003 {
3004 	deinit();
3005 }
3006 
init(void)3007 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3008 {
3009 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
3010 	const deUint32				zeroBuf[2]	= { 0, 0 };
3011 
3012 	// gen buffer
3013 
3014 	gl.genBuffers(1, &m_bufferID);
3015 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3016 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3017 
3018 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3019 
3020 	// gen programs
3021 
3022 	{
3023 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3024 
3025 		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3026 		m_testCtx.getLog() << *m_ssboAtomicProgram;
3027 		if (!m_ssboAtomicProgram->isOk())
3028 			throw tcu::TestError("could not build program");
3029 	}
3030 	{
3031 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3032 
3033 		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3034 		m_testCtx.getLog() << *m_atomicCounterProgram;
3035 		if (!m_atomicCounterProgram->isOk())
3036 			throw tcu::TestError("could not build program");
3037 	}
3038 }
3039 
deinit(void)3040 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3041 {
3042 	if (m_bufferID)
3043 	{
3044 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3045 		m_bufferID = 0;
3046 	}
3047 
3048 	delete m_ssboAtomicProgram;
3049 	m_ssboAtomicProgram = DE_NULL;
3050 
3051 	delete m_atomicCounterProgram;
3052 	m_atomicCounterProgram = DE_NULL;
3053 }
3054 
iterate(void)3055 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3056 {
3057 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3058 
3059 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3060 
3061 	// invoke programs N times
3062 	{
3063 		m_testCtx.getLog()
3064 			<< tcu::TestLog::Message
3065 			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3066 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
3067 			<< tcu::TestLog::EndMessage;
3068 
3069 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3070 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);
3071 
3072 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3073 		{
3074 			gl.useProgram(m_atomicCounterProgram->getProgram());
3075 			gl.dispatchCompute(m_workSize, 1, 1);
3076 
3077 			gl.useProgram(m_ssboAtomicProgram->getProgram());
3078 			gl.dispatchCompute(m_workSize, 1, 1);
3079 		}
3080 
3081 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3082 	}
3083 
3084 	// Verify result
3085 	{
3086 		deUint32 result;
3087 
3088 		// XORs cancel out, only addition is left
3089 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3090 
3091 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3092 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3093 
3094 		if ((int)result != m_numCalls*m_workSize)
3095 		{
3096 			m_testCtx.getLog()
3097 				<< tcu::TestLog::Message
3098 				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3099 				<< tcu::TestLog::EndMessage;
3100 
3101 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3102 			return STOP;
3103 		}
3104 
3105 		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3106 	}
3107 
3108 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3109 	return STOP;
3110 }
3111 
genSSBOComputeSource(void) const3112 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3113 {
3114 	std::ostringstream buf;
3115 
3116 	buf	<< "${GLSL_VERSION_DECL}\n"
3117 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3118 		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3119 		<< "{\n"
3120 		<< "	highp uint targetValue;\n"
3121 		<< "	highp uint dummy;\n"
3122 		<< "} sb_work;\n"
3123 		<< "\n"
3124 		<< "void main ()\n"
3125 		<< "{\n"
3126 		<< "	// flip high bits\n"
3127 		<< "	highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3128 		<< "	sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
3129 		<< "}";
3130 
3131 	return specializeShader(m_context, buf.str().c_str());
3132 }
3133 
genAtomicCounterComputeSource(void) const3134 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3135 {
3136 	std::ostringstream buf;
3137 
3138 	buf	<< "${GLSL_VERSION_DECL}\n"
3139 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3140 		<< "\n"
3141 		<< "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
3142 		<< "\n"
3143 		<< "void main ()\n"
3144 		<< "{\n"
3145 		<< "	atomicCounterIncrement(u_counter);\n"
3146 		<< "}";
3147 
3148 	return specializeShader(m_context, buf.str().c_str());
3149 }
3150 
3151 } // anonymous
3152 
SynchronizationTests(Context & context)3153 SynchronizationTests::SynchronizationTests (Context& context)
3154 	: TestCaseGroup(context, "synchronization", "Synchronization tests")
3155 {
3156 }
3157 
~SynchronizationTests(void)3158 SynchronizationTests::~SynchronizationTests (void)
3159 {
3160 }
3161 
init(void)3162 void SynchronizationTests::init (void)
3163 {
3164 	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
3165 	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3166 	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3167 
3168 	addChild(inInvocationGroup);
3169 	addChild(interInvocationGroup);
3170 	addChild(interCallGroup);
3171 
3172 	// .in_invocation & .inter_invocation
3173 	{
3174 		static const struct CaseConfig
3175 		{
3176 			const char*									namePrefix;
3177 			const InterInvocationTestCase::StorageType	storage;
3178 			const int									flags;
3179 		} configs[] =
3180 		{
3181 			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
3182 			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
3183 			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
3184 			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
3185 		};
3186 
3187 		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3188 		{
3189 			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3190 			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3191 
3192 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3193 			{
3194 				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3195 
3196 				targetGroup->addChild(new InvocationWriteReadCase(m_context,
3197 																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3198 																  (std::string("Write to ") + target + " and read it").c_str(),
3199 																  configs[configNdx].storage,
3200 																  configs[configNdx].flags | extraFlags));
3201 
3202 				targetGroup->addChild(new InvocationReadWriteCase(m_context,
3203 																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3204 																  (std::string("Read form ") + target + " and then write to it").c_str(),
3205 																  configs[configNdx].storage,
3206 																  configs[configNdx].flags | extraFlags));
3207 
3208 				targetGroup->addChild(new InvocationOverWriteCase(m_context,
3209 																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3210 																  (std::string("Write to ") + target + " twice and read it").c_str(),
3211 																  configs[configNdx].storage,
3212 																  configs[configNdx].flags | extraFlags));
3213 
3214 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3215 																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3216 																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
3217 																   InvocationAliasWriteCase::TYPE_WRITE,
3218 																   configs[configNdx].storage,
3219 																   configs[configNdx].flags | extraFlags));
3220 
3221 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3222 																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3223 																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3224 																   InvocationAliasWriteCase::TYPE_OVERWRITE,
3225 																   configs[configNdx].storage,
3226 																   configs[configNdx].flags | extraFlags));
3227 			}
3228 		}
3229 	}
3230 
3231 	// .inter_call
3232 	{
3233 		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3234 		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3235 
3236 		interCallGroup->addChild(withBarrierGroup);
3237 		interCallGroup->addChild(withoutBarrierGroup);
3238 
3239 		// .with_memory_barrier
3240 		{
3241 			static const struct CaseConfig
3242 			{
3243 				const char*								namePrefix;
3244 				const InterCallTestCase::StorageType	storage;
3245 				const int								flags;
3246 			} configs[] =
3247 			{
3248 				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
3249 				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3250 				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
3251 				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3252 			};
3253 
3254 			const int seed0 = 123;
3255 			const int seed1 = 457;
3256 
3257 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3258 			{
3259 				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3260 
3261 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3262 																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3263 																 (std::string("Write to ") + target + " and read it").c_str(),
3264 																 configs[configNdx].storage,
3265 																 configs[configNdx].flags,
3266 																 InterCallOperations()
3267 																	<< op::WriteData::Generate(1, seed0)
3268 																	<< op::Barrier()
3269 																	<< op::ReadData::Generate(1, seed0)));
3270 
3271 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3272 																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3273 																 (std::string("Read from ") + target + " and then write to it").c_str(),
3274 																 configs[configNdx].storage,
3275 																 configs[configNdx].flags,
3276 																 InterCallOperations()
3277 																	<< op::ReadZeroData::Generate(1)
3278 																	<< op::Barrier()
3279 																	<< op::WriteData::Generate(1, seed0)));
3280 
3281 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282 																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3283 																 (std::string("Write to ") + target + " twice and read it").c_str(),
3284 																 configs[configNdx].storage,
3285 																 configs[configNdx].flags,
3286 																 InterCallOperations()
3287 																	<< op::WriteData::Generate(1, seed0)
3288 																	<< op::Barrier()
3289 																	<< op::WriteData::Generate(1, seed1)
3290 																	<< op::Barrier()
3291 																	<< op::ReadData::Generate(1, seed1)));
3292 
3293 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3294 																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3295 																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3296 																 configs[configNdx].storage,
3297 																 configs[configNdx].flags,
3298 																 InterCallOperations()
3299 																	<< op::WriteData::Generate(1, seed0)
3300 																	<< op::WriteData::Generate(2, seed1)
3301 																	<< op::Barrier()
3302 																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3303 
3304 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305 																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3306 																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3307 																 configs[configNdx].storage,
3308 																 configs[configNdx].flags,
3309 																 InterCallOperations()
3310 																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
3311 																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
3312 																	<< op::Barrier()
3313 																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3314 
3315 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3316 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3317 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3318 																 configs[configNdx].storage,
3319 																 configs[configNdx].flags,
3320 																 InterCallOperations()
3321 																	<< op::WriteData::Generate(1, seed0)
3322 																	<< op::WriteData::Generate(2, seed1)
3323 																	<< op::Barrier()
3324 																	<< op::ReadData::Generate(1, seed0)
3325 																	<< op::ReadData::Generate(2, seed1)));
3326 
3327 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3328 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3329 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3330 																 configs[configNdx].storage,
3331 																 configs[configNdx].flags,
3332 																 InterCallOperations()
3333 																	<< op::WriteData::Generate(1, seed0)
3334 																	<< op::WriteData::Generate(2, seed1)
3335 																	<< op::Barrier()
3336 																	<< op::ReadData::Generate(2, seed1)
3337 																	<< op::ReadData::Generate(1, seed0)));
3338 			}
3339 
3340 			// .without_memory_barrier
3341 			{
3342 				struct InvocationConfig
3343 				{
3344 					const char*	name;
3345 					int			count;
3346 				};
3347 
3348 				static const InvocationConfig ssboInvocations[] =
3349 				{
3350 					{ "1k",		1024	},
3351 					{ "4k",		4096	},
3352 					{ "32k",	32768	},
3353 				};
3354 				static const InvocationConfig imageInvocations[] =
3355 				{
3356 					{ "8x8",		8	},
3357 					{ "32x32",		32	},
3358 					{ "128x128",	128	},
3359 				};
3360 				static const InvocationConfig counterInvocations[] =
3361 				{
3362 					{ "32",		32		},
3363 					{ "128",	128		},
3364 					{ "1k",		1024	},
3365 				};
3366 				static const int callCounts[] = { 2, 5, 100 };
3367 
3368 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3369 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370 						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3371 
3372 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3373 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374 						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3375 
3376 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378 						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379 
3380 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3381 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3382 						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3383 			}
3384 		}
3385 	}
3386 }
3387 
3388 } // Functional
3389 } // gles31
3390 } // deqp
3391