• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief SSBO layout case.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "deFloat16.h"
27 #include "deInt32.h"
28 #include "deMath.h"
29 #include "deMemory.h"
30 #include "deRandom.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deString.h"
33 #include "deStringUtil.hpp"
34 #include "gluContextInfo.hpp"
35 #include "gluShaderProgram.hpp"
36 #include "gluShaderUtil.hpp"
37 #include "gluVarType.hpp"
38 #include "gluVarTypeUtil.hpp"
39 #include "tcuTestLog.hpp"
40 #include "vktSSBOLayoutCase.hpp"
41 
42 #include "vkBuilderUtil.hpp"
43 #include "vkMemUtil.hpp"
44 #include "vkPrograms.hpp"
45 #include "vkQueryUtil.hpp"
46 #include "vkRef.hpp"
47 #include "vkRefUtil.hpp"
48 #include "vkTypeUtil.hpp"
49 #include "vkCmdUtil.hpp"
50 
51 #include "util/vktTypeComparisonUtil.hpp"
52 
53 namespace vkt
54 {
55 namespace ssbo
56 {
57 
58 using tcu::TestLog;
59 using std::string;
60 using std::vector;
61 using glu::VarType;
62 using glu::StructType;
63 using glu::StructMember;
64 
65 struct LayoutFlagsFmt
66 {
67 	deUint32 flags;
LayoutFlagsFmtvkt::ssbo::LayoutFlagsFmt68 	LayoutFlagsFmt (deUint32 flags_) : flags(flags_) {}
69 };
70 
operator <<(std::ostream & str,const LayoutFlagsFmt & fmt)71 std::ostream& operator<< (std::ostream& str, const LayoutFlagsFmt& fmt)
72 {
73 	static const struct
74 	{
75 		deUint32	bit;
76 		const char*	token;
77 	} bitDesc[] =
78 	{
79 		{ LAYOUT_STD140,		"std140"		},
80 		{ LAYOUT_STD430,		"std430"		},
81 		{ LAYOUT_SCALAR,		"scalar"		},
82 		{ LAYOUT_ROW_MAJOR,		"row_major"		},
83 		{ LAYOUT_COLUMN_MAJOR,	"column_major"	}
84 	};
85 
86 	deUint32 remBits = fmt.flags;
87 	for (int descNdx = 0; descNdx < DE_LENGTH_OF_ARRAY(bitDesc); descNdx++)
88 	{
89 		if (remBits & bitDesc[descNdx].bit)
90 		{
91 			if (remBits != fmt.flags)
92 				str << ", ";
93 			str << bitDesc[descNdx].token;
94 			remBits &= ~bitDesc[descNdx].bit;
95 		}
96 	}
97 	DE_ASSERT(remBits == 0);
98 	return str;
99 }
100 
101 // BufferVar implementation.
102 
BufferVar(const char * name,const VarType & type,deUint32 flags)103 BufferVar::BufferVar (const char* name, const VarType& type, deUint32 flags)
104 	: m_name	(name)
105 	, m_type	(type)
106 	, m_flags	(flags)
107 	, m_offset	(~0u)
108 {
109 }
110 
111 // BufferBlock implementation.
112 
BufferBlock(const char * blockName)113 BufferBlock::BufferBlock (const char* blockName)
114 	: m_blockName		(blockName)
115 	, m_arraySize		(-1)
116 	, m_flags			(0)
117 {
118 	setArraySize(0);
119 }
120 
setArraySize(int arraySize)121 void BufferBlock::setArraySize (int arraySize)
122 {
123 	DE_ASSERT(arraySize >= 0);
124 	m_lastUnsizedArraySizes.resize(arraySize == 0 ? 1 : arraySize, 0);
125 	m_arraySize = arraySize;
126 }
127 
operator <<(std::ostream & stream,const BlockLayoutEntry & entry)128 std::ostream& operator<< (std::ostream& stream, const BlockLayoutEntry& entry)
129 {
130 	stream << entry.name << " { name = " << entry.name
131 		   << ", size = " << entry.size
132 		   << ", activeVarIndices = [";
133 
134 	for (vector<int>::const_iterator i = entry.activeVarIndices.begin(); i != entry.activeVarIndices.end(); i++)
135 	{
136 		if (i != entry.activeVarIndices.begin())
137 			stream << ", ";
138 		stream << *i;
139 	}
140 
141 	stream << "] }";
142 	return stream;
143 }
144 
isUnsizedArray(const BufferVarLayoutEntry & entry)145 static bool isUnsizedArray (const BufferVarLayoutEntry& entry)
146 {
147 	DE_ASSERT(entry.arraySize != 0 || entry.topLevelArraySize != 0);
148 	return entry.arraySize == 0 || entry.topLevelArraySize == 0;
149 }
150 
operator <<(std::ostream & stream,const BufferVarLayoutEntry & entry)151 std::ostream& operator<< (std::ostream& stream, const BufferVarLayoutEntry& entry)
152 {
153 	stream << entry.name << " { type = " << glu::getDataTypeName(entry.type)
154 		   << ", blockNdx = " << entry.blockNdx
155 		   << ", offset = " << entry.offset
156 		   << ", arraySize = " << entry.arraySize
157 		   << ", arrayStride = " << entry.arrayStride
158 		   << ", matrixStride = " << entry.matrixStride
159 		   << ", topLevelArraySize = " << entry.topLevelArraySize
160 		   << ", topLevelArrayStride = " << entry.topLevelArrayStride
161 		   << ", isRowMajor = " << (entry.isRowMajor ? "true" : "false")
162 		   << " }";
163 	return stream;
164 }
165 
166 // \todo [2012-01-24 pyry] Speed up lookups using hash.
167 
getVariableIndex(const string & name) const168 int BufferLayout::getVariableIndex (const string& name) const
169 {
170 	for (int ndx = 0; ndx < (int)bufferVars.size(); ndx++)
171 	{
172 		if (bufferVars[ndx].name == name)
173 			return ndx;
174 	}
175 	return -1;
176 }
177 
getBlockIndex(const string & name) const178 int BufferLayout::getBlockIndex (const string& name) const
179 {
180 	for (int ndx = 0; ndx < (int)blocks.size(); ndx++)
181 	{
182 		if (blocks[ndx].name == name)
183 			return ndx;
184 	}
185 	return -1;
186 }
187 
188 // ShaderInterface implementation.
189 
ShaderInterface(void)190 ShaderInterface::ShaderInterface (void)
191 {
192 }
193 
~ShaderInterface(void)194 ShaderInterface::~ShaderInterface (void)
195 {
196 	for (std::vector<StructType*>::iterator i = m_structs.begin(); i != m_structs.end(); i++)
197 		delete *i;
198 
199 	for (std::vector<BufferBlock*>::iterator i = m_bufferBlocks.begin(); i != m_bufferBlocks.end(); i++)
200 		delete *i;
201 }
202 
allocStruct(const char * name)203 StructType& ShaderInterface::allocStruct (const char* name)
204 {
205 	m_structs.reserve(m_structs.size()+1);
206 	m_structs.push_back(new StructType(name));
207 	return *m_structs.back();
208 }
209 
210 struct StructNameEquals
211 {
212 	std::string name;
213 
StructNameEqualsvkt::ssbo::StructNameEquals214 	StructNameEquals (const char* name_) : name(name_) {}
215 
operator ()vkt::ssbo::StructNameEquals216 	bool operator() (const StructType* type) const
217 	{
218 		return type->getTypeName() && name == type->getTypeName();
219 	}
220 };
221 
findStruct(const char * name) const222 const StructType* ShaderInterface::findStruct (const char* name) const
223 {
224 	std::vector<StructType*>::const_iterator pos = std::find_if(m_structs.begin(), m_structs.end(), StructNameEquals(name));
225 	return pos != m_structs.end() ? *pos : DE_NULL;
226 }
227 
getNamedStructs(std::vector<const StructType * > & structs) const228 void ShaderInterface::getNamedStructs (std::vector<const StructType*>& structs) const
229 {
230 	for (std::vector<StructType*>::const_iterator i = m_structs.begin(); i != m_structs.end(); i++)
231 	{
232 		if ((*i)->getTypeName() != DE_NULL)
233 			structs.push_back(*i);
234 	}
235 }
236 
allocBlock(const char * name)237 BufferBlock& ShaderInterface::allocBlock (const char* name)
238 {
239 	m_bufferBlocks.reserve(m_bufferBlocks.size()+1);
240 	m_bufferBlocks.push_back(new BufferBlock(name));
241 	return *m_bufferBlocks.back();
242 }
243 
244 namespace // Utilities
245 {
246 // Layout computation.
247 
getDataTypeByteSize(glu::DataType type)248 int getDataTypeByteSize (glu::DataType type)
249 {
250 	if (deInRange32(type, glu::TYPE_UINT8, glu::TYPE_UINT8_VEC4) || deInRange32(type, glu::TYPE_INT8, glu::TYPE_INT8_VEC4))
251 	{
252 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint8);
253 	}
254 	else if (deInRange32(type, glu::TYPE_UINT16, glu::TYPE_UINT16_VEC4) || deInRange32(type, glu::TYPE_INT16, glu::TYPE_INT16_VEC4) || deInRange32(type, glu::TYPE_FLOAT16, glu::TYPE_FLOAT16_VEC4))
255 	{
256 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint16);
257 	}
258 	else
259 	{
260 		return glu::getDataTypeScalarSize(type)*(int)sizeof(deUint32);
261 	}
262 }
263 
getDataTypeByteAlignment(glu::DataType type)264 int getDataTypeByteAlignment (glu::DataType type)
265 {
266 	switch (type)
267 	{
268 		case glu::TYPE_FLOAT:
269 		case glu::TYPE_INT:
270 		case glu::TYPE_UINT:
271 		case glu::TYPE_BOOL:		return 1*(int)sizeof(deUint32);
272 
273 		case glu::TYPE_FLOAT_VEC2:
274 		case glu::TYPE_INT_VEC2:
275 		case glu::TYPE_UINT_VEC2:
276 		case glu::TYPE_BOOL_VEC2:	return 2*(int)sizeof(deUint32);
277 
278 		case glu::TYPE_FLOAT_VEC3:
279 		case glu::TYPE_INT_VEC3:
280 		case glu::TYPE_UINT_VEC3:
281 		case glu::TYPE_BOOL_VEC3:	// Fall-through to vec4
282 
283 		case glu::TYPE_FLOAT_VEC4:
284 		case glu::TYPE_INT_VEC4:
285 		case glu::TYPE_UINT_VEC4:
286 		case glu::TYPE_BOOL_VEC4:	return 4*(int)sizeof(deUint32);
287 
288 		case glu::TYPE_UINT8:
289 		case glu::TYPE_INT8	:			return 1*(int)sizeof(deUint8);
290 
291 		case glu::TYPE_UINT8_VEC2:
292 		case glu::TYPE_INT8_VEC2:		return 2*(int)sizeof(deUint8);
293 
294 		case glu::TYPE_UINT8_VEC3:
295 		case glu::TYPE_INT8_VEC3:		// Fall-through to vec4
296 
297 		case glu::TYPE_UINT8_VEC4:
298 		case glu::TYPE_INT8_VEC4:		return 4*(int)sizeof(deUint8);
299 
300 		case glu::TYPE_UINT16:
301 		case glu::TYPE_INT16:
302 		case glu::TYPE_FLOAT16:			return 1*(int)sizeof(deUint16);
303 
304 		case glu::TYPE_UINT16_VEC2:
305 		case glu::TYPE_INT16_VEC2:
306 		case glu::TYPE_FLOAT16_VEC2:	return 2*(int)sizeof(deUint16);
307 
308 		case glu::TYPE_UINT16_VEC3:
309 		case glu::TYPE_INT16_VEC3:
310 		case glu::TYPE_FLOAT16_VEC3:	// Fall-through to vec4
311 
312 		case glu::TYPE_UINT16_VEC4:
313 		case glu::TYPE_INT16_VEC4:
314 		case glu::TYPE_FLOAT16_VEC4:	return 4*(int)sizeof(deUint16);
315 
316 		default:
317 			DE_ASSERT(false);
318 			return 0;
319 	}
320 }
321 
computeStd140BaseAlignment(const VarType & type,deUint32 layoutFlags)322 int computeStd140BaseAlignment (const VarType& type, deUint32 layoutFlags)
323 {
324 	const int vec4Alignment = (int)sizeof(deUint32)*4;
325 
326 	if (type.isBasicType())
327 	{
328 		glu::DataType basicType = type.getBasicType();
329 
330 		if (glu::isDataTypeMatrix(basicType))
331 		{
332 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
333 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
334 												 : glu::getDataTypeMatrixNumRows(basicType);
335 			const int	vecAlign	= deAlign32(getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize)), vec4Alignment);
336 
337 			return vecAlign;
338 		}
339 		else
340 			return getDataTypeByteAlignment(basicType);
341 	}
342 	else if (type.isArrayType())
343 	{
344 		int elemAlignment = computeStd140BaseAlignment(type.getElementType(), layoutFlags);
345 
346 		// Round up to alignment of vec4
347 		return deAlign32(elemAlignment, vec4Alignment);
348 	}
349 	else
350 	{
351 		DE_ASSERT(type.isStructType());
352 
353 		int maxBaseAlignment = 0;
354 
355 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
356 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd140BaseAlignment(memberIter->getType(), layoutFlags));
357 
358 		return deAlign32(maxBaseAlignment, vec4Alignment);
359 	}
360 }
361 
computeStd430BaseAlignment(const VarType & type,deUint32 layoutFlags)362 int computeStd430BaseAlignment (const VarType& type, deUint32 layoutFlags)
363 {
364 	// Otherwise identical to std140 except that alignment of structures and arrays
365 	// are not rounded up to alignment of vec4.
366 
367 	if (type.isBasicType())
368 	{
369 		glu::DataType basicType = type.getBasicType();
370 
371 		if (glu::isDataTypeMatrix(basicType))
372 		{
373 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
374 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
375 												 : glu::getDataTypeMatrixNumRows(basicType);
376 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
377 			return vecAlign;
378 		}
379 		else
380 			return getDataTypeByteAlignment(basicType);
381 	}
382 	else if (type.isArrayType())
383 	{
384 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
385 	}
386 	else
387 	{
388 		DE_ASSERT(type.isStructType());
389 
390 		int maxBaseAlignment = 0;
391 
392 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
393 			maxBaseAlignment = de::max(maxBaseAlignment, computeStd430BaseAlignment(memberIter->getType(), layoutFlags));
394 
395 		return maxBaseAlignment;
396 	}
397 }
398 
computeRelaxedBlockBaseAlignment(const VarType & type,deUint32 layoutFlags)399 int computeRelaxedBlockBaseAlignment (const VarType& type, deUint32 layoutFlags)
400 {
401 	if (type.isBasicType())
402 	{
403 		glu::DataType basicType = type.getBasicType();
404 
405 		if (glu::isDataTypeVector(basicType))
406 			return getDataTypeByteAlignment(glu::getDataTypeScalarType(basicType));
407 
408 		if (glu::isDataTypeMatrix(basicType))
409 		{
410 			const bool	isRowMajor	= !!(layoutFlags & LAYOUT_ROW_MAJOR);
411 			const int	vecSize		= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
412 												 : glu::getDataTypeMatrixNumRows(basicType);
413 			const int	vecAlign	= getDataTypeByteAlignment(glu::getDataTypeFloatVec(vecSize));
414 			return vecAlign;
415 		}
416 		else
417 			return getDataTypeByteAlignment(basicType);
418 	}
419 	else if (type.isArrayType())
420 		return computeStd430BaseAlignment(type.getElementType(), layoutFlags);
421 	else
422 	{
423 		DE_ASSERT(type.isStructType());
424 
425 		int maxBaseAlignment = 0;
426 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
427 			maxBaseAlignment = de::max(maxBaseAlignment, computeRelaxedBlockBaseAlignment(memberIter->getType(), layoutFlags));
428 
429 		return maxBaseAlignment;
430 	}
431 }
432 
computeScalarBlockAlignment(const VarType & type,deUint32 layoutFlags)433 int computeScalarBlockAlignment (const VarType& type, deUint32 layoutFlags)
434 {
435 	if (type.isBasicType())
436 	{
437 		return getDataTypeByteAlignment(glu::getDataTypeScalarType(type.getBasicType()));
438 	}
439 	else if (type.isArrayType())
440 		return computeScalarBlockAlignment(type.getElementType(), layoutFlags);
441 	else
442 	{
443 		DE_ASSERT(type.isStructType());
444 
445 		int maxBaseAlignment = 0;
446 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
447 			maxBaseAlignment = de::max(maxBaseAlignment, computeScalarBlockAlignment(memberIter->getType(), layoutFlags));
448 
449 		return maxBaseAlignment;
450 	}
451 }
452 
mergeLayoutFlags(deUint32 prevFlags,deUint32 newFlags)453 inline deUint32 mergeLayoutFlags (deUint32 prevFlags, deUint32 newFlags)
454 {
455 	const deUint32	packingMask		= LAYOUT_STD430|LAYOUT_STD140|LAYOUT_RELAXED|LAYOUT_SCALAR;
456 	const deUint32	matrixMask		= LAYOUT_ROW_MAJOR|LAYOUT_COLUMN_MAJOR;
457 
458 	deUint32 mergedFlags = 0;
459 
460 	mergedFlags |= ((newFlags & packingMask)	? newFlags : prevFlags) & packingMask;
461 	mergedFlags |= ((newFlags & matrixMask)		? newFlags : prevFlags) & matrixMask;
462 
463 	return mergedFlags;
464 }
465 
466 //! Appends all child elements to layout, returns value that should be appended to offset.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,int baseOffset,const std::string & curPrefix,const VarType & type,deUint32 layoutFlags)467 int computeReferenceLayout (
468 	BufferLayout&		layout,
469 	int					curBlockNdx,
470 	int					baseOffset,
471 	const std::string&	curPrefix,
472 	const VarType&		type,
473 	deUint32			layoutFlags)
474 {
475 	// Reference layout uses std430 rules by default. std140 rules are
476 	// choosen only for blocks that have std140 layout.
477 	const int	baseAlignment		= (layoutFlags & LAYOUT_SCALAR)  != 0 ? computeScalarBlockAlignment(type, layoutFlags)			:
478 									  (layoutFlags & LAYOUT_STD140)  != 0 ? computeStd140BaseAlignment(type, layoutFlags)		:
479 									  (layoutFlags & LAYOUT_RELAXED) != 0 ? computeRelaxedBlockBaseAlignment(type, layoutFlags)	:
480 									  computeStd430BaseAlignment(type, layoutFlags);
481 	int			curOffset			= deAlign32(baseOffset, baseAlignment);
482 	const int	topLevelArraySize	= 1; // Default values
483 	const int	topLevelArrayStride	= 0;
484 
485 	if (type.isBasicType())
486 	{
487 		const glu::DataType		basicType	= type.getBasicType();
488 		BufferVarLayoutEntry	entry;
489 
490 		entry.name					= curPrefix;
491 		entry.type					= basicType;
492 		entry.arraySize				= 1;
493 		entry.arrayStride			= 0;
494 		entry.matrixStride			= 0;
495 		entry.topLevelArraySize		= topLevelArraySize;
496 		entry.topLevelArrayStride	= topLevelArrayStride;
497 		entry.blockNdx				= curBlockNdx;
498 
499 		if (glu::isDataTypeMatrix(basicType))
500 		{
501 			// Array of vectors as specified in rules 5 & 7.
502 			const bool	isRowMajor			= !!(layoutFlags & LAYOUT_ROW_MAJOR);
503 			const int	vecSize				= isRowMajor ? glu::getDataTypeMatrixNumColumns(basicType)
504 														 : glu::getDataTypeMatrixNumRows(basicType);
505 			const glu::DataType	vecType		= glu::getDataTypeFloatVec(vecSize);
506 			const int	numVecs				= isRowMajor ? glu::getDataTypeMatrixNumRows(basicType)
507 														 : glu::getDataTypeMatrixNumColumns(basicType);
508 			const int	vecStride			= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
509 
510 			entry.offset		= curOffset;
511 			entry.matrixStride	= vecStride;
512 			entry.isRowMajor	= isRowMajor;
513 
514 			curOffset += numVecs*entry.matrixStride;
515 		}
516 		else
517 		{
518 			if (!(layoutFlags & LAYOUT_SCALAR) && (layoutFlags & LAYOUT_RELAXED) &&
519 				glu::isDataTypeVector(basicType) && (getDataTypeByteSize(basicType) <= 16 ? curOffset / 16 != (curOffset +  getDataTypeByteSize(basicType) - 1) / 16 : curOffset % 16 != 0))
520 				curOffset = deIntRoundToPow2(curOffset, 16);
521 
522 			// Scalar or vector.
523 			entry.offset = curOffset;
524 
525 			curOffset += getDataTypeByteSize(basicType);
526 		}
527 
528 		layout.bufferVars.push_back(entry);
529 	}
530 	else if (type.isArrayType())
531 	{
532 		const VarType&	elemType	= type.getElementType();
533 
534 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
535 		{
536 			// Array of scalars or vectors.
537 			const glu::DataType		elemBasicType	= elemType.getBasicType();
538 			const int				stride			= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) : baseAlignment;
539 			BufferVarLayoutEntry	entry;
540 
541 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
542 			entry.type					= elemBasicType;
543 			entry.blockNdx				= curBlockNdx;
544 			entry.offset				= curOffset;
545 			entry.arraySize				= type.getArraySize();
546 			entry.arrayStride			= stride;
547 			entry.matrixStride			= 0;
548 			entry.topLevelArraySize		= topLevelArraySize;
549 			entry.topLevelArrayStride	= topLevelArrayStride;
550 
551 			curOffset += stride*type.getArraySize();
552 
553 			layout.bufferVars.push_back(entry);
554 		}
555 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
556 		{
557 			// Array of matrices.
558 			const glu::DataType			elemBasicType	= elemType.getBasicType();
559 			const bool					isRowMajor		= !!(layoutFlags & LAYOUT_ROW_MAJOR);
560 			const int					vecSize			= isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
561 																	 : glu::getDataTypeMatrixNumRows(elemBasicType);
562 			const glu::DataType			vecType			= glu::getDataTypeFloatVec(vecSize);
563 			const int					numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
564 																	 : glu::getDataTypeMatrixNumColumns(elemBasicType);
565 			const int					vecStride		= (layoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) : baseAlignment;
566 			BufferVarLayoutEntry		entry;
567 
568 			entry.name					= curPrefix + "[0]"; // Array variables are always postfixed with [0]
569 			entry.type					= elemBasicType;
570 			entry.blockNdx				= curBlockNdx;
571 			entry.offset				= curOffset;
572 			entry.arraySize				= type.getArraySize();
573 			entry.arrayStride			= vecStride*numVecs;
574 			entry.matrixStride			= vecStride;
575 			entry.isRowMajor			= isRowMajor;
576 			entry.topLevelArraySize		= topLevelArraySize;
577 			entry.topLevelArrayStride	= topLevelArrayStride;
578 
579 			curOffset += entry.arrayStride*type.getArraySize();
580 
581 			layout.bufferVars.push_back(entry);
582 		}
583 		else
584 		{
585 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
586 
587 			for (int elemNdx = 0; elemNdx < type.getArraySize(); elemNdx++)
588 				curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "[" + de::toString(elemNdx) + "]", type.getElementType(), layoutFlags);
589 		}
590 	}
591 	else
592 	{
593 		DE_ASSERT(type.isStructType());
594 
595 		for (StructType::ConstIterator memberIter = type.getStructPtr()->begin(); memberIter != type.getStructPtr()->end(); memberIter++)
596 			curOffset += computeReferenceLayout(layout, curBlockNdx, curOffset, curPrefix + "." + memberIter->getName(), memberIter->getType(), layoutFlags);
597 
598 		if (!(layoutFlags & LAYOUT_SCALAR))
599 			curOffset = deAlign32(curOffset, baseAlignment);
600 	}
601 
602 	return curOffset-baseOffset;
603 }
604 
605 //! Appends all child elements to layout, returns offset increment.
computeReferenceLayout(BufferLayout & layout,int curBlockNdx,const std::string & blockPrefix,int baseOffset,const BufferVar & bufVar,deUint32 blockLayoutFlags)606 int computeReferenceLayout (BufferLayout& layout, int curBlockNdx, const std::string& blockPrefix, int baseOffset, const BufferVar& bufVar, deUint32 blockLayoutFlags)
607 {
608 	const VarType&	varType			= bufVar.getType();
609 	const deUint32	combinedFlags	= mergeLayoutFlags(blockLayoutFlags, bufVar.getFlags());
610 
611 	if (varType.isArrayType())
612 	{
613 		// Top-level arrays need special care.
614 		const int		topLevelArraySize	= varType.getArraySize() == VarType::UNSIZED_ARRAY ? 0 : varType.getArraySize();
615 		const string	prefix				= blockPrefix + bufVar.getName() + "[0]";
616 		const bool		isStd140			= (blockLayoutFlags & LAYOUT_STD140) != 0;
617 		const int		vec4Align			= (int)sizeof(deUint32)*4;
618 		const int		baseAlignment		= (blockLayoutFlags & LAYOUT_SCALAR)  != 0 ? computeScalarBlockAlignment(varType, combinedFlags)			:
619 											isStd140									? computeStd140BaseAlignment(varType, combinedFlags)		:
620 											(blockLayoutFlags & LAYOUT_RELAXED) != 0	? computeRelaxedBlockBaseAlignment(varType, combinedFlags)	:
621 											computeStd430BaseAlignment(varType, combinedFlags);
622 		int				curOffset			= deAlign32(baseOffset, baseAlignment);
623 		const VarType&	elemType			= varType.getElementType();
624 
625 		if (elemType.isBasicType() && !glu::isDataTypeMatrix(elemType.getBasicType()))
626 		{
627 			// Array of scalars or vectors.
628 			const glu::DataType		elemBasicType	= elemType.getBasicType();
629 			const int				elemBaseAlign	= getDataTypeByteAlignment(elemBasicType);
630 			const int				stride			= (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(elemBasicType) :
631 													  isStd140 ? deAlign32(elemBaseAlign, vec4Align) :
632 													  elemBaseAlign;
633 
634 			BufferVarLayoutEntry	entry;
635 
636 			entry.name					= prefix;
637 			entry.topLevelArraySize		= 1;
638 			entry.topLevelArrayStride	= 0;
639 			entry.type					= elemBasicType;
640 			entry.blockNdx				= curBlockNdx;
641 			entry.offset				= curOffset;
642 			entry.arraySize				= topLevelArraySize;
643 			entry.arrayStride			= stride;
644 			entry.matrixStride			= 0;
645 
646 			layout.bufferVars.push_back(entry);
647 
648 			curOffset += stride*topLevelArraySize;
649 		}
650 		else if (elemType.isBasicType() && glu::isDataTypeMatrix(elemType.getBasicType()))
651 		{
652 			// Array of matrices.
653 			const glu::DataType		elemBasicType	= elemType.getBasicType();
654 			const bool				isRowMajor		= !!(combinedFlags & LAYOUT_ROW_MAJOR);
655 			const int				vecSize			= isRowMajor ? glu::getDataTypeMatrixNumColumns(elemBasicType)
656 																 : glu::getDataTypeMatrixNumRows(elemBasicType);
657 			const int				numVecs			= isRowMajor ? glu::getDataTypeMatrixNumRows(elemBasicType)
658 																 : glu::getDataTypeMatrixNumColumns(elemBasicType);
659 			const glu::DataType		vecType			= glu::getDataTypeFloatVec(vecSize);
660 			const int				vecBaseAlign	= getDataTypeByteAlignment(vecType);
661 			const int				stride			= (blockLayoutFlags & LAYOUT_SCALAR) ? getDataTypeByteSize(vecType) :
662 													  isStd140 ? deAlign32(vecBaseAlign, vec4Align) :
663 													  vecBaseAlign;
664 
665 			BufferVarLayoutEntry	entry;
666 
667 			entry.name					= prefix;
668 			entry.topLevelArraySize		= 1;
669 			entry.topLevelArrayStride	= 0;
670 			entry.type					= elemBasicType;
671 			entry.blockNdx				= curBlockNdx;
672 			entry.offset				= curOffset;
673 			entry.arraySize				= topLevelArraySize;
674 			entry.arrayStride			= stride*numVecs;
675 			entry.matrixStride			= stride;
676 			entry.isRowMajor			= isRowMajor;
677 
678 			layout.bufferVars.push_back(entry);
679 
680 			curOffset += entry.arrayStride*topLevelArraySize;
681 		}
682 		else
683 		{
684 			DE_ASSERT(elemType.isStructType() || elemType.isArrayType());
685 
686 			// Struct base alignment is not added multiple times as curOffset supplied to computeReferenceLayout
687 			// was already aligned correctly. Thus computeReferenceLayout should not add any extra padding
688 			// before struct. Padding after struct will be added as it should.
689 			//
690 			// Stride could be computed prior to creating child elements, but it would essentially require running
691 			// the layout computation twice. Instead we fix stride to child elements afterwards.
692 
693 			const int	firstChildNdx	= (int)layout.bufferVars.size();
694 
695 			const int size = computeReferenceLayout(layout, curBlockNdx, deAlign32(curOffset, baseAlignment), prefix, varType.getElementType(), combinedFlags);
696 			const int stride = deAlign32(size, baseAlignment);
697 
698 			for (int childNdx = firstChildNdx; childNdx < (int)layout.bufferVars.size(); childNdx++)
699 			{
700 				layout.bufferVars[childNdx].topLevelArraySize	= topLevelArraySize;
701 				layout.bufferVars[childNdx].topLevelArrayStride	= stride;
702 			}
703 
704 			if (topLevelArraySize != 0)
705 				curOffset += stride*(topLevelArraySize - 1) + size;
706 		}
707 
708 		return curOffset-baseOffset;
709 	}
710 	else
711 		return computeReferenceLayout(layout, curBlockNdx, baseOffset, blockPrefix + bufVar.getName(), varType, combinedFlags);
712 }
713 
computeReferenceLayout(BufferLayout & layout,ShaderInterface & interface)714 void computeReferenceLayout (BufferLayout& layout, ShaderInterface& interface)
715 {
716 	int numBlocks = interface.getNumBlocks();
717 
718 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
719 	{
720 		BufferBlock&		block			= interface.getBlock(blockNdx);
721 		bool				hasInstanceName	= block.getInstanceName() != DE_NULL;
722 		std::string			blockPrefix		= hasInstanceName ? (std::string(block.getBlockName()) + ".") : std::string("");
723 		int					curOffset		= 0;
724 		int					activeBlockNdx	= (int)layout.blocks.size();
725 		int					firstVarNdx		= (int)layout.bufferVars.size();
726 
727 		size_t oldSize	= layout.bufferVars.size();
728 		for (BufferBlock::iterator varIter = block.begin(); varIter != block.end(); varIter++)
729 		{
730 			BufferVar& bufVar = *varIter;
731 			curOffset += computeReferenceLayout(layout, activeBlockNdx,  blockPrefix, curOffset, bufVar, block.getFlags());
732 			if (block.getFlags() & LAYOUT_RELAXED)
733 			{
734 				DE_ASSERT(!(layout.bufferVars.size() <= oldSize));
735 				bufVar.setOffset(layout.bufferVars[oldSize].offset);
736 			}
737 			oldSize	= layout.bufferVars.size();
738 		}
739 
740 		int	varIndicesEnd	= (int)layout.bufferVars.size();
741 		int	blockSize		= curOffset;
742 		int	numInstances	= block.isArray() ? block.getArraySize() : 1;
743 
744 		// Create block layout entries for each instance.
745 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
746 		{
747 			// Allocate entry for instance.
748 			layout.blocks.push_back(BlockLayoutEntry());
749 			BlockLayoutEntry& blockEntry = layout.blocks.back();
750 
751 			blockEntry.name = block.getBlockName();
752 			blockEntry.size = blockSize;
753 
754 			// Compute active variable set for block.
755 			for (int varNdx = firstVarNdx; varNdx < varIndicesEnd; varNdx++)
756 				blockEntry.activeVarIndices.push_back(varNdx);
757 
758 			if (block.isArray())
759 				blockEntry.name += "[" + de::toString(instanceNdx) + "]";
760 		}
761 	}
762 }
763 
764 // Value generator.
765 
generateValue(const BufferVarLayoutEntry & entry,int unsizedArraySize,void * basePtr,de::Random & rnd)766 void generateValue (const BufferVarLayoutEntry& entry, int unsizedArraySize, void* basePtr, de::Random& rnd)
767 {
768 	const glu::DataType	scalarType		= glu::getDataTypeScalarType(entry.type);
769 	const int			scalarSize		= glu::getDataTypeScalarSize(entry.type);
770 	const int			arraySize		= entry.arraySize == 0 ? unsizedArraySize : entry.arraySize;
771 	const int			arrayStride		= entry.arrayStride;
772 	const int			topLevelSize	= entry.topLevelArraySize == 0 ? unsizedArraySize : entry.topLevelArraySize;
773 	const int			topLevelStride	= entry.topLevelArrayStride;
774 	const bool			isMatrix		= glu::isDataTypeMatrix(entry.type);
775 	const int			numVecs			= isMatrix ? (entry.isRowMajor ? glu::getDataTypeMatrixNumRows(entry.type) : glu::getDataTypeMatrixNumColumns(entry.type)) : 1;
776 	const int			vecSize			= scalarSize / numVecs;
777 	const size_t		compSize		= getDataTypeByteSize(scalarType);
778 
779 	DE_ASSERT(scalarSize%numVecs == 0);
780 	DE_ASSERT(topLevelSize >= 0);
781 	DE_ASSERT(arraySize >= 0);
782 
783 	for (int topElemNdx = 0; topElemNdx < topLevelSize; topElemNdx++)
784 	{
785 		deUint8* const topElemPtr = (deUint8*)basePtr + entry.offset + topElemNdx*topLevelStride;
786 
787 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
788 		{
789 			deUint8* const elemPtr = topElemPtr + elemNdx*arrayStride;
790 
791 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
792 			{
793 				deUint8* const vecPtr = elemPtr + (isMatrix ? vecNdx*entry.matrixStride : 0);
794 
795 				for (int compNdx = 0; compNdx < vecSize; compNdx++)
796 				{
797 					deUint8* const compPtr = vecPtr + compSize*compNdx;
798 
799 					switch (scalarType)
800 					{
801 						case glu::TYPE_FLOAT:	*((float*)compPtr)		= (float)rnd.getInt(-9, 9);						break;
802 						case glu::TYPE_INT:		*((int*)compPtr)		= rnd.getInt(-9, 9);							break;
803 						case glu::TYPE_UINT:	*((deUint32*)compPtr)	= (deUint32)rnd.getInt(0, 9);					break;
804 						case glu::TYPE_INT8:	*((deInt8*)compPtr)		= (deInt8)rnd.getInt(-9, 9);					break;
805 						case glu::TYPE_UINT8:	*((deUint8*)compPtr)	= (deUint8)rnd.getInt(0, 9);					break;
806 						case glu::TYPE_INT16:	*((deInt16*)compPtr)	= (deInt16)rnd.getInt(-9, 9);					break;
807 						case glu::TYPE_UINT16:	*((deUint16*)compPtr)	= (deUint16)rnd.getInt(0, 9);					break;
808 						case glu::TYPE_FLOAT16:	*((deFloat16*)compPtr)	= deFloat32To16((float)rnd.getInt(-9, 9));		break;
809 						// \note Random bit pattern is used for true values. Spec states that all non-zero values are
810 						//       interpreted as true but some implementations fail this.
811 						case glu::TYPE_BOOL:	*((deUint32*)compPtr)	= rnd.getBool() ? rnd.getUint32()|1u : 0u;		break;
812 						default:
813 							DE_ASSERT(false);
814 					}
815 				}
816 			}
817 		}
818 	}
819 }
820 
generateValues(const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,deUint32 seed)821 void generateValues (const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, deUint32 seed)
822 {
823 	de::Random	rnd			(seed);
824 	const int	numBlocks	= (int)layout.blocks.size();
825 
826 	DE_ASSERT(numBlocks == (int)blockPointers.size());
827 
828 	for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
829 	{
830 		const BlockLayoutEntry&	blockLayout	= layout.blocks[blockNdx];
831 		const BlockDataPtr&		blockPtr	= blockPointers[blockNdx];
832 		const int				numEntries	= (int)layout.blocks[blockNdx].activeVarIndices.size();
833 
834 		for (int entryNdx = 0; entryNdx < numEntries; entryNdx++)
835 		{
836 			const int					varNdx		= blockLayout.activeVarIndices[entryNdx];
837 			const BufferVarLayoutEntry&	varEntry	= layout.bufferVars[varNdx];
838 
839 			generateValue(varEntry, blockPtr.lastUnsizedArraySize, blockPtr.ptr, rnd);
840 		}
841 	}
842 }
843 
844 // Shader generator.
845 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const BufferBlock & bufferBlock)846 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const BufferBlock& bufferBlock)
847 {
848 	for (BufferBlock::const_iterator iter = bufferBlock.begin(); iter != bufferBlock.end(); ++iter)
849 		vkt::typecomputil::collectUniqueBasicTypes(basicTypes, iter->getType());
850 }
851 
collectUniqueBasicTypes(std::set<glu::DataType> & basicTypes,const ShaderInterface & interface)852 void collectUniqueBasicTypes (std::set<glu::DataType>& basicTypes, const ShaderInterface& interface)
853 {
854 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
855 		collectUniqueBasicTypes(basicTypes, interface.getBlock(ndx));
856 }
857 
generateCompareFuncs(std::ostream & str,const ShaderInterface & interface)858 void generateCompareFuncs (std::ostream& str, const ShaderInterface& interface)
859 {
860 	std::set<glu::DataType> types;
861 	std::set<glu::DataType> compareFuncs;
862 
863 	// Collect unique basic types
864 	collectUniqueBasicTypes(types, interface);
865 
866 	// Set of compare functions required
867 	for (std::set<glu::DataType>::const_iterator iter = types.begin(); iter != types.end(); ++iter)
868 	{
869 		vkt::typecomputil::getCompareDependencies(compareFuncs, *iter);
870 	}
871 
872 	for (int type = 0; type < glu::TYPE_LAST; ++type)
873 	{
874 		if (compareFuncs.find(glu::DataType(type)) != compareFuncs.end())
875 			str << vkt::typecomputil::getCompareFuncForType(glu::DataType(type));
876 	}
877 }
878 
usesRelaxedLayout(const ShaderInterface & interface)879 bool usesRelaxedLayout (const ShaderInterface& interface)
880 {
881 	//If any of blocks has LAYOUT_RELAXED flag
882 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
883 	{
884 		if (interface.getBlock(ndx).getFlags() & LAYOUT_RELAXED)
885 			return true;
886 	}
887 	return false;
888 }
889 
uses16BitStorage(const ShaderInterface & interface)890 bool uses16BitStorage (const ShaderInterface& interface)
891 {
892 	// If any of blocks has LAYOUT_16BIT_STORAGE flag
893 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
894 	{
895 		if (interface.getBlock(ndx).getFlags() & LAYOUT_16BIT_STORAGE)
896 			return true;
897 	}
898 	return false;
899 }
900 
uses8BitStorage(const ShaderInterface & interface)901 bool uses8BitStorage (const ShaderInterface& interface)
902 {
903 	// If any of blocks has LAYOUT_8BIT_STORAGE flag
904 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
905 	{
906 		if (interface.getBlock(ndx).getFlags() & LAYOUT_8BIT_STORAGE)
907 			return true;
908 	}
909 	return false;
910 }
911 
usesScalarLayout(const ShaderInterface & interface)912 bool usesScalarLayout (const ShaderInterface& interface)
913 {
914 	// If any of blocks has LAYOUT_SCALAR flag
915 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
916 	{
917 		if (interface.getBlock(ndx).getFlags() & LAYOUT_SCALAR)
918 			return true;
919 	}
920 	return false;
921 }
922 
usesDescriptorIndexing(const ShaderInterface & interface)923 bool usesDescriptorIndexing(const ShaderInterface& interface)
924 {
925 	// If any of blocks has DESCRIPTOR_INDEXING flag
926 	for (int ndx = 0; ndx < interface.getNumBlocks(); ++ndx)
927 	{
928 		if (interface.getBlock(ndx).getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
929 			return true;
930 	}
931 	return false;
932 }
933 
934 struct Indent
935 {
936 	int level;
Indentvkt::ssbo::__anonc153944c0211::Indent937 	Indent (int level_) : level(level_) {}
938 };
939 
operator <<(std::ostream & str,const Indent & indent)940 std::ostream& operator<< (std::ostream& str, const Indent& indent)
941 {
942 	for (int i = 0; i < indent.level; i++)
943 		str << "\t";
944 	return str;
945 }
946 
generateDeclaration(std::ostream & src,const BufferVar & bufferVar,int indentLevel)947 void generateDeclaration (std::ostream& src, const BufferVar& bufferVar, int indentLevel)
948 {
949 	// \todo [pyry] Qualifiers
950 	if ((bufferVar.getFlags() & LAYOUT_MASK) != 0)
951 		src << "layout(" << LayoutFlagsFmt(bufferVar.getFlags() & LAYOUT_MASK) << ") ";
952 	else if (bufferVar.getOffset()!= ~0u)
953 		src << "layout(offset = "<<bufferVar.getOffset()<<") ";
954 
955 	src << glu::declare(bufferVar.getType(), bufferVar.getName(), indentLevel);
956 }
957 
generateDeclaration(std::ostream & src,const BufferBlock & block,int bindingPoint,bool usePhysStorageBuffer)958 void generateDeclaration (std::ostream& src, const BufferBlock& block, int bindingPoint, bool usePhysStorageBuffer)
959 {
960 	src << "layout(";
961 	if ((block.getFlags() & LAYOUT_MASK) != 0)
962 		src << LayoutFlagsFmt(block.getFlags() & LAYOUT_MASK) << ", ";
963 
964 	if (usePhysStorageBuffer)
965 		src << "buffer_reference";
966 	else
967 		src << "binding = " << bindingPoint;
968 
969 	src << ") ";
970 
971 	bool readonly = true;
972 	for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
973 	{
974 		const BufferVar& bufVar = *varIter;
975 		if (bufVar.getFlags() & ACCESS_WRITE) {
976 			readonly = false;
977 			break;
978 		}
979 	}
980 	if (readonly)
981 		src << "readonly ";
982 
983 	src << "buffer " << block.getBlockName();
984 	src << "\n{\n";
985 
986 	for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
987 	{
988 		src << Indent(1);
989 
990 		generateDeclaration(src, *varIter, 1 /* indent level */);
991 		src << ";\n";
992 	}
993 
994 	src << "}";
995 
996 	if (!usePhysStorageBuffer)
997 	{
998 		if (block.getInstanceName() != DE_NULL)
999 		{
1000 			src << " " << block.getInstanceName();
1001 			if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1002 				src << "[]";
1003 			else if (block.isArray())
1004 				src << "[" << block.getArraySize() << "]";
1005 		}
1006 		else
1007 			DE_ASSERT(!block.isArray());
1008 	}
1009 
1010 	src << ";\n";
1011 }
1012 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,bool singleCol,int colNumber,const void * valuePtr)1013 void generateImmMatrixSrc (std::ostream& src, glu::DataType basicType, int matrixStride, bool isRowMajor, bool singleCol, int colNumber, const void* valuePtr)
1014 {
1015 	DE_ASSERT(glu::isDataTypeMatrix(basicType));
1016 
1017 	const int		compSize		= sizeof(deUint32);
1018 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
1019 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
1020 
1021 	src << glu::getDataTypeName(singleCol ? glu::getDataTypeMatrixColumnType(basicType) : basicType) << "(";
1022 
1023 	// Constructed in column-wise order.
1024 	bool firstElem = true;
1025 	for (int colNdx = 0; colNdx < numCols; colNdx++)
1026 	{
1027 		if (singleCol && colNdx != colNumber)
1028 			continue;
1029 
1030 		for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1031 		{
1032 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdx*matrixStride + colNdx*compSize
1033 																				: colNdx*matrixStride + rowNdx*compSize);
1034 
1035 			if (!firstElem)
1036 				src << ", ";
1037 
1038 			src << de::floatToString(*((const float*)compPtr), 1);
1039 			firstElem = false;
1040 		}
1041 	}
1042 
1043 	src << ")";
1044 }
1045 
generateImmMatrixSrc(std::ostream & src,glu::DataType basicType,int matrixStride,bool isRowMajor,const void * valuePtr,const char * resultVar,const char * typeName,const string shaderName)1046 void generateImmMatrixSrc (std::ostream& src,
1047 						   glu::DataType basicType,
1048 						   int matrixStride,
1049 						   bool isRowMajor,
1050 						   const void* valuePtr,
1051 						   const char* resultVar,
1052 						   const char* typeName,
1053 						   const string shaderName)
1054 {
1055 	const int		compSize		= sizeof(deUint32);
1056 	const int		numRows			= glu::getDataTypeMatrixNumRows(basicType);
1057 	const int		numCols			= glu::getDataTypeMatrixNumColumns(basicType);
1058 
1059 	typeName = "float";
1060 	for (int colNdex = 0; colNdex < numCols; colNdex++)
1061 	{
1062 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1063 		{
1064 			src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << "(" << shaderName << "[" << colNdex << "][" << rowNdex << "], ";
1065 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? rowNdex*matrixStride + colNdex*compSize
1066 																						: colNdex*matrixStride + rowNdex*compSize);
1067 
1068 			src << de::floatToString(*((const float*)compPtr), 1);
1069 			src << ");\n";
1070 		}
1071 	}
1072 
1073 	typeName = "vec";
1074 	for (int colNdex = 0; colNdex < numCols; colNdex++)
1075 	{
1076 		src << "\t" << resultVar << " = " << resultVar << " && compare_" << typeName << numRows << "(" << shaderName << "[" << colNdex << "], " << typeName << numRows << "(";
1077 		for (int rowNdex = 0; rowNdex < numRows; rowNdex++)
1078 		{
1079 			const deUint8*	compPtr	= (const deUint8*)valuePtr + (isRowMajor ? (rowNdex * matrixStride + colNdex * compSize)
1080 																  : (colNdex * matrixStride + rowNdex * compSize));
1081 			src << de::floatToString(*((const float*)compPtr), 1);
1082 
1083 			if (rowNdex < numRows-1)
1084 				src << ", ";
1085 		}
1086 		src << "));\n";
1087 	}
1088 }
1089 
generateImmScalarVectorSrc(std::ostream & src,glu::DataType basicType,const void * valuePtr)1090 void generateImmScalarVectorSrc (std::ostream& src, glu::DataType basicType, const void* valuePtr)
1091 {
1092 	DE_ASSERT(glu::isDataTypeFloatOrVec(basicType)	||
1093 			  glu::isDataTypeIntOrIVec(basicType)	||
1094 			  glu::isDataTypeUintOrUVec(basicType)	||
1095 			  glu::isDataTypeBoolOrBVec(basicType)  ||
1096 			  glu::isDataTypeExplicitPrecision(basicType));
1097 
1098 	const glu::DataType		scalarType		= glu::getDataTypeScalarType(basicType);
1099 	const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1100 	const size_t			compSize		= getDataTypeByteSize(scalarType);
1101 
1102 	if (scalarSize > 1)
1103 		src << glu::getDataTypeName(vkt::typecomputil::getPromoteType(basicType)) << "(";
1104 
1105 	for (int scalarNdx = 0; scalarNdx < scalarSize; scalarNdx++)
1106 	{
1107 		const deUint8* compPtr = (const deUint8*)valuePtr + scalarNdx*compSize;
1108 
1109 		if (scalarNdx > 0)
1110 			src << ", ";
1111 
1112 		switch (scalarType)
1113 		{
1114 			case glu::TYPE_FLOAT16:	src << de::floatToString(deFloat16To32(*((const deFloat16*)compPtr)), 1);	break;
1115 			case glu::TYPE_FLOAT:	src << de::floatToString(*((const float*)compPtr), 1);			break;
1116 			case glu::TYPE_INT8:	src << (deUint32)*((const deInt8*)compPtr);						break;
1117 			case glu::TYPE_INT16:	src << *((const deInt16*)compPtr);								break;
1118 			case glu::TYPE_INT:		src << *((const int*)compPtr);									break;
1119 			case glu::TYPE_UINT8:	src << (deUint32)*((const deUint8*)compPtr) << "u";				break;
1120 			case glu::TYPE_UINT16:	src << *((const deUint16*)compPtr) << "u";						break;
1121 			case glu::TYPE_UINT:	src << *((const deUint32*)compPtr) << "u";						break;
1122 			case glu::TYPE_BOOL:	src << (*((const deUint32*)compPtr) != 0u ? "true" : "false");	break;
1123 			default:
1124 				DE_ASSERT(false);
1125 		}
1126 	}
1127 
1128 	if (scalarSize > 1)
1129 		src << ")";
1130 }
1131 
getAPIName(const BufferBlock & block,const BufferVar & var,const glu::TypeComponentVector & accessPath)1132 string getAPIName (const BufferBlock& block, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1133 {
1134 	std::ostringstream name;
1135 
1136 	if (block.getInstanceName())
1137 		name << block.getBlockName() << ".";
1138 
1139 	name << var.getName();
1140 
1141 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1142 	{
1143 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1144 		{
1145 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1146 			const StructType*	structPtr	= curType.getStructPtr();
1147 
1148 			name << "." << structPtr->getMember(pathComp->index).getName();
1149 		}
1150 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1151 		{
1152 			if (pathComp == accessPath.begin() || (pathComp+1) == accessPath.end())
1153 				name << "[0]"; // Top- / bottom-level array
1154 			else
1155 				name << "[" << pathComp->index << "]";
1156 		}
1157 		else
1158 			DE_ASSERT(false);
1159 	}
1160 
1161 	return name.str();
1162 }
1163 
getShaderName(const BufferBlock & block,int instanceNdx,const BufferVar & var,const glu::TypeComponentVector & accessPath)1164 string getShaderName (const BufferBlock& block, int instanceNdx, const BufferVar& var, const glu::TypeComponentVector& accessPath)
1165 {
1166 	std::ostringstream name;
1167 
1168 	if (block.getInstanceName())
1169 	{
1170 		name << block.getInstanceName();
1171 
1172 		if (block.getFlags() & LAYOUT_DESCRIPTOR_INDEXING)
1173 			name << "[nonuniformEXT(" << instanceNdx << ")]";
1174 		else if (block.isArray())
1175 			name << "[" << instanceNdx << "]";
1176 
1177 		name << ".";
1178 	}
1179 	else
1180 		DE_ASSERT(instanceNdx == 0);
1181 
1182 	name << var.getName();
1183 
1184 	for (glu::TypeComponentVector::const_iterator pathComp = accessPath.begin(); pathComp != accessPath.end(); pathComp++)
1185 	{
1186 		if (pathComp->type == glu::VarTypeComponent::STRUCT_MEMBER)
1187 		{
1188 			const VarType		curType		= glu::getVarType(var.getType(), accessPath.begin(), pathComp);
1189 			const StructType*	structPtr	= curType.getStructPtr();
1190 
1191 			name << "." << structPtr->getMember(pathComp->index).getName();
1192 		}
1193 		else if (pathComp->type == glu::VarTypeComponent::ARRAY_ELEMENT)
1194 			name << "[" << pathComp->index << "]";
1195 		else
1196 			DE_ASSERT(false);
1197 	}
1198 
1199 	return name.str();
1200 }
1201 
computeOffset(const BufferVarLayoutEntry & varLayout,const glu::TypeComponentVector & accessPath)1202 int computeOffset (const BufferVarLayoutEntry& varLayout, const glu::TypeComponentVector& accessPath)
1203 {
1204 	const int	topLevelNdx		= (accessPath.size() > 1 && accessPath.front().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.front().index : 0;
1205 	const int	bottomLevelNdx	= (!accessPath.empty() && accessPath.back().type == glu::VarTypeComponent::ARRAY_ELEMENT) ? accessPath.back().index : 0;
1206 
1207 	return varLayout.offset + varLayout.topLevelArrayStride*topLevelNdx + varLayout.arrayStride*bottomLevelNdx;
1208 }
1209 
generateCompareSrc(std::ostream & src,const char * resultVar,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixLoadFlags matrixLoadFlag)1210 void generateCompareSrc (
1211 	std::ostream&				src,
1212 	const char*					resultVar,
1213 	const BufferLayout&			bufferLayout,
1214 	const BufferBlock&			block,
1215 	int							instanceNdx,
1216 	const BlockDataPtr&			blockPtr,
1217 	const BufferVar&			bufVar,
1218 	const glu::SubTypeAccess&	accessPath,
1219 	MatrixLoadFlags				matrixLoadFlag)
1220 {
1221 	const VarType curType = accessPath.getType();
1222 
1223 	if (curType.isArrayType())
1224 	{
1225 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1226 
1227 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1228 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), LOAD_FULL_MATRIX);
1229 	}
1230 	else if (curType.isStructType())
1231 	{
1232 		const int numMembers = curType.getStructPtr()->getNumMembers();
1233 
1234 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1235 			generateCompareSrc(src, resultVar, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), LOAD_FULL_MATRIX);
1236 	}
1237 	else
1238 	{
1239 		DE_ASSERT(curType.isBasicType());
1240 
1241 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1242 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
1243 
1244 		DE_ASSERT(varNdx >= 0);
1245 		{
1246 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
1247 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1248 			const glu::DataType			basicType		= curType.getBasicType();
1249 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
1250 			const char*					typeName		= glu::getDataTypeName(basicType);
1251 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1252 
1253 
1254 			if (isMatrix)
1255 			{
1256 				if (matrixLoadFlag == LOAD_MATRIX_COMPONENTS)
1257 					generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, valuePtr, resultVar, typeName, shaderName);
1258 				else
1259 				{
1260 					src << "\t" << resultVar << " = compare_" << typeName << "(" << shaderName << ", ";
1261 					generateImmMatrixSrc (src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1, valuePtr);
1262 					src << ") && " << resultVar << ";\n";
1263 				}
1264 			}
1265 			else
1266 			{
1267 				const char* castName = "";
1268 				glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1269 				if (basicType != promoteType)
1270 					castName = glu::getDataTypeName(promoteType);
1271 
1272 				src << "\t" << resultVar << " = compare_" << typeName << "(" << castName << "(" << shaderName << "), ";
1273 				generateImmScalarVectorSrc(src, basicType, valuePtr);
1274 				src << ") && " << resultVar << ";\n";
1275 			}
1276 		}
1277 	}
1278 }
1279 
generateCompareSrc(std::ostream & src,const char * resultVar,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixLoadFlags matrixLoadFlag)1280 void generateCompareSrc (std::ostream& src, const char* resultVar, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixLoadFlags matrixLoadFlag)
1281 {
1282 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1283 	{
1284 		const BufferBlock&	block			= interface.getBlock(declNdx);
1285 		const bool			isArray			= block.isArray();
1286 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1287 
1288 		DE_ASSERT(!isArray || block.getInstanceName());
1289 
1290 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1291 		{
1292 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1293 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1294 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
1295 
1296 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1297 			{
1298 				const BufferVar& bufVar = *varIter;
1299 
1300 				if ((bufVar.getFlags() & ACCESS_READ) == 0)
1301 					continue; // Don't read from that variable.
1302 
1303 				generateCompareSrc(src, resultVar, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixLoadFlag);
1304 			}
1305 		}
1306 	}
1307 }
1308 
1309 // \todo [2013-10-14 pyry] Almost identical to generateCompareSrc - unify?
1310 
generateWriteSrc(std::ostream & src,const BufferLayout & bufferLayout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & blockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath,MatrixStoreFlags matrixStoreFlag)1311 void generateWriteSrc (
1312 	std::ostream&				src,
1313 	const BufferLayout&			bufferLayout,
1314 	const BufferBlock&			block,
1315 	int							instanceNdx,
1316 	const BlockDataPtr&			blockPtr,
1317 	const BufferVar&			bufVar,
1318 	const glu::SubTypeAccess&	accessPath,
1319 	MatrixStoreFlags			matrixStoreFlag)
1320 {
1321 	const VarType curType = accessPath.getType();
1322 
1323 	if (curType.isArrayType())
1324 	{
1325 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1326 
1327 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1328 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.element(elemNdx), matrixStoreFlag);
1329 	}
1330 	else if (curType.isStructType())
1331 	{
1332 		const int numMembers = curType.getStructPtr()->getNumMembers();
1333 
1334 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1335 			generateWriteSrc(src, bufferLayout, block, instanceNdx, blockPtr, bufVar, accessPath.member(memberNdx), matrixStoreFlag);
1336 	}
1337 	else
1338 	{
1339 		DE_ASSERT(curType.isBasicType());
1340 
1341 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1342 		const int		varNdx	= bufferLayout.getVariableIndex(apiName);
1343 
1344 		DE_ASSERT(varNdx >= 0);
1345 		{
1346 			const BufferVarLayoutEntry&	varLayout		= bufferLayout.bufferVars[varNdx];
1347 			const string				shaderName		= getShaderName(block, instanceNdx, bufVar, accessPath.getPath());
1348 			const glu::DataType			basicType		= curType.getBasicType();
1349 			const bool					isMatrix		= glu::isDataTypeMatrix(basicType);
1350 			const void*					valuePtr		= (const deUint8*)blockPtr.ptr + computeOffset(varLayout, accessPath.getPath());
1351 
1352 			const char* castName = "";
1353 			glu::DataType promoteType = vkt::typecomputil::getPromoteType(basicType);
1354 			if (basicType != promoteType)
1355 				castName = glu::getDataTypeName((!isMatrix || matrixStoreFlag == STORE_FULL_MATRIX) ? basicType : glu::getDataTypeMatrixColumnType(basicType));
1356 
1357 			if (isMatrix)
1358 			{
1359 				switch (matrixStoreFlag)
1360 				{
1361 					case STORE_FULL_MATRIX: {
1362 						src << "\t" << shaderName << " = " << castName << "(";
1363 						generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, false, -1, valuePtr);
1364 						src << ");\n";
1365 						break;
1366 					}
1367 					case STORE_MATRIX_COLUMNS: {
1368 						int numCols = glu::getDataTypeMatrixNumColumns(basicType);
1369 						for (int colIdx = 0; colIdx < numCols; ++colIdx)
1370 						{
1371 							src << "\t" << shaderName << "[" << colIdx << "]" << " = " << castName << "(";
1372 							generateImmMatrixSrc(src, basicType, varLayout.matrixStride, varLayout.isRowMajor, true, colIdx, valuePtr);
1373 							src << ");\n";
1374 						}
1375 						break;
1376 					}
1377 					default:
1378 						DE_ASSERT(false);
1379 						break;
1380 				}
1381 			}
1382 			else {
1383 				src << "\t" << shaderName << " = " << castName << "(";
1384 				generateImmScalarVectorSrc(src, basicType, valuePtr);
1385 				src << ");\n";
1386 			}
1387 		}
1388 	}
1389 }
1390 
generateWriteSrc(std::ostream & src,const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & blockPointers,MatrixStoreFlags matrixStoreFlag)1391 void generateWriteSrc (std::ostream& src, const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& blockPointers, MatrixStoreFlags matrixStoreFlag)
1392 {
1393 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1394 	{
1395 		const BufferBlock&	block			= interface.getBlock(declNdx);
1396 		const bool			isArray			= block.isArray();
1397 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1398 
1399 		DE_ASSERT(!isArray || block.getInstanceName());
1400 
1401 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1402 		{
1403 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1404 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1405 			const BlockDataPtr&	blockPtr		= blockPointers[blockNdx];
1406 
1407 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1408 			{
1409 				const BufferVar& bufVar = *varIter;
1410 
1411 				if ((bufVar.getFlags() & ACCESS_WRITE) == 0)
1412 					continue; // Don't write to that variable.
1413 
1414 				generateWriteSrc(src, layout, block, instanceNdx, blockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()), matrixStoreFlag);
1415 			}
1416 		}
1417 	}
1418 }
1419 
generateComputeShader(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & comparePtrs,const vector<BlockDataPtr> & writePtrs,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)1420 string generateComputeShader (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& comparePtrs, const vector<BlockDataPtr>& writePtrs, MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag, bool usePhysStorageBuffer)
1421 {
1422 	std::ostringstream src;
1423 
1424 	if (uses16BitStorage(interface) || uses8BitStorage(interface) ||
1425 		usesRelaxedLayout(interface) || usesScalarLayout(interface) ||
1426 		usesDescriptorIndexing(interface))
1427 	{
1428 		src << "#version 450\n";
1429 	}
1430 	else
1431 		src << "#version 310 es\n";
1432 
1433 
1434 	src << "#extension GL_EXT_shader_16bit_storage : enable\n";
1435 	src << "#extension GL_EXT_shader_8bit_storage : enable\n";
1436 	src << "#extension GL_EXT_scalar_block_layout : enable\n";
1437 	src << "#extension GL_EXT_buffer_reference : enable\n";
1438 	src << "#extension GL_EXT_nonuniform_qualifier : enable\n";
1439 	src << "layout(local_size_x = 1) in;\n";
1440 	src << "\n";
1441 
1442 	// Atomic counter for counting passed invocations.
1443 	src << "layout(std140, binding = 0) buffer AcBlock { highp uint ac_numPassed; };\n\n";
1444 
1445 	std::vector<const StructType*> namedStructs;
1446 	interface.getNamedStructs(namedStructs);
1447 	for (std::vector<const StructType*>::const_iterator structIter = namedStructs.begin(); structIter != namedStructs.end(); structIter++)
1448 		src << glu::declare(*structIter) << ";\n";
1449 
1450 	{
1451 		for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1452 		{
1453 			const BufferBlock& block = interface.getBlock(blockNdx);
1454 			generateDeclaration(src, block, 1 + blockNdx, usePhysStorageBuffer);
1455 		}
1456 
1457 		if (usePhysStorageBuffer)
1458 		{
1459 			src << "layout (push_constant, std430) uniform PC {\n";
1460 			for (int blockNdx = 0; blockNdx < interface.getNumBlocks(); blockNdx++)
1461 			{
1462 				const BufferBlock& block = interface.getBlock(blockNdx);
1463 				if (block.getInstanceName() != DE_NULL)
1464 				{
1465 					src << "	" << block.getBlockName() << " " << block.getInstanceName();
1466 					if (block.isArray())
1467 						src << "[" << block.getArraySize() << "]";
1468 					src << ";\n";
1469 				}
1470 			}
1471 			src << "};\n";
1472 		}
1473 	}
1474 
1475 	// Comparison utilities.
1476 	src << "\n";
1477 	generateCompareFuncs(src, interface);
1478 
1479 	src << "\n"
1480 		   "void main (void)\n"
1481 		   "{\n"
1482 		   "	bool allOk = true;\n";
1483 
1484 	// Value compare.
1485 	generateCompareSrc(src, "allOk", interface, layout, comparePtrs, matrixLoadFlag);
1486 
1487 	src << "	if (allOk)\n"
1488 		<< "		ac_numPassed++;\n"
1489 		<< "\n";
1490 
1491 	// Value write.
1492 	generateWriteSrc(src, interface, layout, writePtrs, matrixStoreFlag);
1493 
1494 	src << "}\n";
1495 
1496 	return src.str();
1497 }
1498 
copyBufferVarData(const BufferVarLayoutEntry & dstEntry,const BlockDataPtr & dstBlockPtr,const BufferVarLayoutEntry & srcEntry,const BlockDataPtr & srcBlockPtr)1499 void copyBufferVarData (const BufferVarLayoutEntry& dstEntry, const BlockDataPtr& dstBlockPtr, const BufferVarLayoutEntry& srcEntry, const BlockDataPtr& srcBlockPtr)
1500 {
1501 	DE_ASSERT(dstEntry.arraySize <= srcEntry.arraySize);
1502 	DE_ASSERT(dstEntry.topLevelArraySize <= srcEntry.topLevelArraySize);
1503 	DE_ASSERT(dstBlockPtr.lastUnsizedArraySize <= srcBlockPtr.lastUnsizedArraySize);
1504 	DE_ASSERT(dstEntry.type == srcEntry.type);
1505 
1506 	deUint8* const			dstBasePtr			= (deUint8*)dstBlockPtr.ptr + dstEntry.offset;
1507 	const deUint8* const	srcBasePtr			= (const deUint8*)srcBlockPtr.ptr + srcEntry.offset;
1508 	const int				scalarSize			= glu::getDataTypeScalarSize(dstEntry.type);
1509 	const bool				isMatrix			= glu::isDataTypeMatrix(dstEntry.type);
1510 	glu::DataType			scalarType			= glu::getDataTypeScalarType(dstEntry.type);
1511 	const size_t			compSize			= getDataTypeByteSize(scalarType);
1512 	const int				dstArraySize		= dstEntry.arraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.arraySize;
1513 	const int				dstArrayStride		= dstEntry.arrayStride;
1514 	const int				dstTopLevelSize		= dstEntry.topLevelArraySize == 0 ? dstBlockPtr.lastUnsizedArraySize : dstEntry.topLevelArraySize;
1515 	const int				dstTopLevelStride	= dstEntry.topLevelArrayStride;
1516 	const int				srcArraySize		= srcEntry.arraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.arraySize;
1517 	const int				srcArrayStride		= srcEntry.arrayStride;
1518 	const int				srcTopLevelSize		= srcEntry.topLevelArraySize == 0 ? srcBlockPtr.lastUnsizedArraySize : srcEntry.topLevelArraySize;
1519 	const int				srcTopLevelStride	= srcEntry.topLevelArrayStride;
1520 
1521 	DE_ASSERT(dstArraySize <= srcArraySize && dstTopLevelSize <= srcTopLevelSize);
1522 	DE_UNREF(srcArraySize && srcTopLevelSize);
1523 
1524 	for (int topElemNdx = 0; topElemNdx < dstTopLevelSize; topElemNdx++)
1525 	{
1526 		deUint8* const			dstTopPtr	= dstBasePtr + topElemNdx*dstTopLevelStride;
1527 		const deUint8* const	srcTopPtr	= srcBasePtr + topElemNdx*srcTopLevelStride;
1528 
1529 		for (int elementNdx = 0; elementNdx < dstArraySize; elementNdx++)
1530 		{
1531 			deUint8* const			dstElemPtr	= dstTopPtr + elementNdx*dstArrayStride;
1532 			const deUint8* const	srcElemPtr	= srcTopPtr + elementNdx*srcArrayStride;
1533 
1534 			if (isMatrix)
1535 			{
1536 				const int	numRows	= glu::getDataTypeMatrixNumRows(dstEntry.type);
1537 				const int	numCols	= glu::getDataTypeMatrixNumColumns(dstEntry.type);
1538 
1539 				for (int colNdx = 0; colNdx < numCols; colNdx++)
1540 				{
1541 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1542 					{
1543 						deUint8*		dstCompPtr	= dstElemPtr + (dstEntry.isRowMajor ? rowNdx*dstEntry.matrixStride + colNdx*compSize
1544 																						: colNdx*dstEntry.matrixStride + rowNdx*compSize);
1545 						const deUint8*	srcCompPtr	= srcElemPtr + (srcEntry.isRowMajor ? rowNdx*srcEntry.matrixStride + colNdx*compSize
1546 																						: colNdx*srcEntry.matrixStride + rowNdx*compSize);
1547 
1548 						DE_ASSERT((deIntptr)(srcCompPtr + compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1549 						DE_ASSERT((deIntptr)(dstCompPtr + compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1550 						deMemcpy(dstCompPtr, srcCompPtr, compSize);
1551 					}
1552 				}
1553 			}
1554 			else
1555 			{
1556 				DE_ASSERT((deIntptr)(srcElemPtr + scalarSize*compSize) - (deIntptr)srcBlockPtr.ptr <= (deIntptr)srcBlockPtr.size);
1557 				DE_ASSERT((deIntptr)(dstElemPtr + scalarSize*compSize) - (deIntptr)dstBlockPtr.ptr <= (deIntptr)dstBlockPtr.size);
1558 				deMemcpy(dstElemPtr, srcElemPtr, scalarSize*compSize);
1559 			}
1560 		}
1561 	}
1562 }
1563 
copyData(const BufferLayout & dstLayout,const vector<BlockDataPtr> & dstBlockPointers,const BufferLayout & srcLayout,const vector<BlockDataPtr> & srcBlockPointers)1564 void copyData (const BufferLayout& dstLayout, const vector<BlockDataPtr>& dstBlockPointers, const BufferLayout& srcLayout, const vector<BlockDataPtr>& srcBlockPointers)
1565 {
1566 	// \note Src layout is used as reference in case of activeVarIndices happens to be incorrect in dstLayout blocks.
1567 	int numBlocks = (int)srcLayout.blocks.size();
1568 
1569 	for (int srcBlockNdx = 0; srcBlockNdx < numBlocks; srcBlockNdx++)
1570 	{
1571 		const BlockLayoutEntry&		srcBlock	= srcLayout.blocks[srcBlockNdx];
1572 		const BlockDataPtr&			srcBlockPtr	= srcBlockPointers[srcBlockNdx];
1573 		int							dstBlockNdx	= dstLayout.getBlockIndex(srcBlock.name.c_str());
1574 
1575 		if (dstBlockNdx >= 0)
1576 		{
1577 			DE_ASSERT(de::inBounds(dstBlockNdx, 0, (int)dstBlockPointers.size()));
1578 
1579 			const BlockDataPtr& dstBlockPtr = dstBlockPointers[dstBlockNdx];
1580 
1581 			for (vector<int>::const_iterator srcVarNdxIter = srcBlock.activeVarIndices.begin(); srcVarNdxIter != srcBlock.activeVarIndices.end(); srcVarNdxIter++)
1582 			{
1583 				const BufferVarLayoutEntry&	srcEntry	= srcLayout.bufferVars[*srcVarNdxIter];
1584 				int							dstVarNdx	= dstLayout.getVariableIndex(srcEntry.name.c_str());
1585 
1586 				if (dstVarNdx >= 0)
1587 					copyBufferVarData(dstLayout.bufferVars[dstVarNdx], dstBlockPtr, srcEntry, srcBlockPtr);
1588 			}
1589 		}
1590 	}
1591 }
1592 
copyNonWrittenData(const BufferLayout & layout,const BufferBlock & block,int instanceNdx,const BlockDataPtr & srcBlockPtr,const BlockDataPtr & dstBlockPtr,const BufferVar & bufVar,const glu::SubTypeAccess & accessPath)1593 void copyNonWrittenData (
1594 	const BufferLayout&			layout,
1595 	const BufferBlock&			block,
1596 	int							instanceNdx,
1597 	const BlockDataPtr&			srcBlockPtr,
1598 	const BlockDataPtr&			dstBlockPtr,
1599 	const BufferVar&			bufVar,
1600 	const glu::SubTypeAccess&	accessPath)
1601 {
1602 	const VarType curType = accessPath.getType();
1603 
1604 	if (curType.isArrayType())
1605 	{
1606 		const int arraySize = curType.getArraySize() == VarType::UNSIZED_ARRAY ? block.getLastUnsizedArraySize(instanceNdx) : curType.getArraySize();
1607 
1608 		for (int elemNdx = 0; elemNdx < arraySize; elemNdx++)
1609 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.element(elemNdx));
1610 	}
1611 	else if (curType.isStructType())
1612 	{
1613 		const int numMembers = curType.getStructPtr()->getNumMembers();
1614 
1615 		for (int memberNdx = 0; memberNdx < numMembers; memberNdx++)
1616 			copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, accessPath.member(memberNdx));
1617 	}
1618 	else
1619 	{
1620 		DE_ASSERT(curType.isBasicType());
1621 
1622 		const string	apiName	= getAPIName(block, bufVar, accessPath.getPath());
1623 		const int		varNdx	= layout.getVariableIndex(apiName);
1624 
1625 		DE_ASSERT(varNdx >= 0);
1626 		{
1627 			const BufferVarLayoutEntry& varLayout = layout.bufferVars[varNdx];
1628 			copyBufferVarData(varLayout, dstBlockPtr, varLayout, srcBlockPtr);
1629 		}
1630 	}
1631 }
1632 
copyNonWrittenData(const ShaderInterface & interface,const BufferLayout & layout,const vector<BlockDataPtr> & srcPtrs,const vector<BlockDataPtr> & dstPtrs)1633 void copyNonWrittenData (const ShaderInterface& interface, const BufferLayout& layout, const vector<BlockDataPtr>& srcPtrs, const vector<BlockDataPtr>& dstPtrs)
1634 {
1635 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1636 	{
1637 		const BufferBlock&	block			= interface.getBlock(declNdx);
1638 		const bool			isArray			= block.isArray();
1639 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1640 
1641 		DE_ASSERT(!isArray || block.getInstanceName());
1642 
1643 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1644 		{
1645 			const string		instanceName	= block.getBlockName() + (isArray ? "[" + de::toString(instanceNdx) + "]" : string(""));
1646 			const int			blockNdx		= layout.getBlockIndex(instanceName);
1647 			const BlockDataPtr&	srcBlockPtr		= srcPtrs[blockNdx];
1648 			const BlockDataPtr&	dstBlockPtr		= dstPtrs[blockNdx];
1649 
1650 			for (BufferBlock::const_iterator varIter = block.begin(); varIter != block.end(); varIter++)
1651 			{
1652 				const BufferVar& bufVar = *varIter;
1653 
1654 				if (bufVar.getFlags() & ACCESS_WRITE)
1655 					continue;
1656 
1657 				copyNonWrittenData(layout, block, instanceNdx, srcBlockPtr, dstBlockPtr, bufVar, glu::SubTypeAccess(bufVar.getType()));
1658 			}
1659 		}
1660 	}
1661 }
1662 
compareComponents(glu::DataType scalarType,const void * ref,const void * res,int numComps)1663 bool compareComponents (glu::DataType scalarType, const void* ref, const void* res, int numComps)
1664 {
1665 	if (scalarType == glu::TYPE_FLOAT)
1666 	{
1667 		const float threshold = 0.05f; // Same as used in shaders - should be fine for values being used.
1668 
1669 		for (int ndx = 0; ndx < numComps; ndx++)
1670 		{
1671 			const float		refVal		= *((const float*)ref + ndx);
1672 			const float		resVal		= *((const float*)res + ndx);
1673 
1674 			if (deFloatAbs(resVal - refVal) >= threshold)
1675 				return false;
1676 		}
1677 	}
1678 	else if (scalarType == glu::TYPE_BOOL)
1679 	{
1680 		for (int ndx = 0; ndx < numComps; ndx++)
1681 		{
1682 			const deUint32	refVal		= *((const deUint32*)ref + ndx);
1683 			const deUint32	resVal		= *((const deUint32*)res + ndx);
1684 
1685 			if ((refVal != 0) != (resVal != 0))
1686 				return false;
1687 		}
1688 	}
1689 	else if (scalarType == glu::TYPE_INT8 || scalarType == glu::TYPE_UINT8)
1690 	{
1691 		return deMemCmp(ref, res, numComps*sizeof(deUint8)) == 0;
1692 	}
1693 	else if (scalarType == glu::TYPE_INT16 || scalarType == glu::TYPE_UINT16 || scalarType == glu::TYPE_FLOAT16)
1694 	{
1695 		return deMemCmp(ref, res, numComps*sizeof(deUint16)) == 0;
1696 	}
1697 	else
1698 	{
1699 		DE_ASSERT(scalarType == glu::TYPE_INT || scalarType == glu::TYPE_UINT);
1700 
1701 		return deMemCmp(ref, res, numComps*sizeof(deUint32)) == 0;
1702 	}
1703 
1704 	return true;
1705 }
1706 
compareBufferVarData(tcu::TestLog & log,const BufferVarLayoutEntry & refEntry,const BlockDataPtr & refBlockPtr,const BufferVarLayoutEntry & resEntry,const BlockDataPtr & resBlockPtr)1707 bool compareBufferVarData (tcu::TestLog& log, const BufferVarLayoutEntry& refEntry, const BlockDataPtr& refBlockPtr, const BufferVarLayoutEntry& resEntry, const BlockDataPtr& resBlockPtr)
1708 {
1709 	DE_ASSERT(resEntry.arraySize <= refEntry.arraySize);
1710 	DE_ASSERT(resEntry.topLevelArraySize <= refEntry.topLevelArraySize);
1711 	DE_ASSERT(resBlockPtr.lastUnsizedArraySize <= refBlockPtr.lastUnsizedArraySize);
1712 	DE_ASSERT(resEntry.type == refEntry.type);
1713 
1714 	deUint8* const			resBasePtr			= (deUint8*)resBlockPtr.ptr + resEntry.offset;
1715 	const deUint8* const	refBasePtr			= (const deUint8*)refBlockPtr.ptr + refEntry.offset;
1716 	const glu::DataType		scalarType			= glu::getDataTypeScalarType(refEntry.type);
1717 	const int				scalarSize			= glu::getDataTypeScalarSize(resEntry.type);
1718 	const bool				isMatrix			= glu::isDataTypeMatrix(resEntry.type);
1719 	const size_t			compSize			= getDataTypeByteSize(scalarType);
1720 	const int				maxPrints			= 3;
1721 	int						numFailed			= 0;
1722 
1723 	const int				resArraySize		= resEntry.arraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.arraySize;
1724 	const int				resArrayStride		= resEntry.arrayStride;
1725 	const int				resTopLevelSize		= resEntry.topLevelArraySize == 0 ? resBlockPtr.lastUnsizedArraySize : resEntry.topLevelArraySize;
1726 	const int				resTopLevelStride	= resEntry.topLevelArrayStride;
1727 	const int				refArraySize		= refEntry.arraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.arraySize;
1728 	const int				refArrayStride		= refEntry.arrayStride;
1729 	const int				refTopLevelSize		= refEntry.topLevelArraySize == 0 ? refBlockPtr.lastUnsizedArraySize : refEntry.topLevelArraySize;
1730 	const int				refTopLevelStride	= refEntry.topLevelArrayStride;
1731 
1732 	DE_ASSERT(resArraySize <= refArraySize && resTopLevelSize <= refTopLevelSize);
1733 	DE_UNREF(refArraySize && refTopLevelSize);
1734 
1735 	for (int topElemNdx = 0; topElemNdx < resTopLevelSize; topElemNdx++)
1736 	{
1737 		deUint8* const			resTopPtr	= resBasePtr + topElemNdx*resTopLevelStride;
1738 		const deUint8* const	refTopPtr	= refBasePtr + topElemNdx*refTopLevelStride;
1739 
1740 		for (int elementNdx = 0; elementNdx < resArraySize; elementNdx++)
1741 		{
1742 			deUint8* const			resElemPtr	= resTopPtr + elementNdx*resArrayStride;
1743 			const deUint8* const	refElemPtr	= refTopPtr + elementNdx*refArrayStride;
1744 
1745 			if (isMatrix)
1746 			{
1747 				const int	numRows	= glu::getDataTypeMatrixNumRows(resEntry.type);
1748 				const int	numCols	= glu::getDataTypeMatrixNumColumns(resEntry.type);
1749 				bool		isOk	= true;
1750 
1751 				for (int colNdx = 0; colNdx < numCols; colNdx++)
1752 				{
1753 					for (int rowNdx = 0; rowNdx < numRows; rowNdx++)
1754 					{
1755 						deUint8*		resCompPtr	= resElemPtr + (resEntry.isRowMajor ? rowNdx*resEntry.matrixStride + colNdx*compSize
1756 																						: colNdx*resEntry.matrixStride + rowNdx*compSize);
1757 						const deUint8*	refCompPtr	= refElemPtr + (refEntry.isRowMajor ? rowNdx*refEntry.matrixStride + colNdx*compSize
1758 																						: colNdx*refEntry.matrixStride + rowNdx*compSize);
1759 
1760 						DE_ASSERT((deIntptr)(refCompPtr + compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1761 						DE_ASSERT((deIntptr)(resCompPtr + compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1762 
1763 						isOk = isOk && compareComponents(scalarType, resCompPtr, refCompPtr, 1);
1764 					}
1765 				}
1766 
1767 				if (!isOk)
1768 				{
1769 					numFailed += 1;
1770 					if (numFailed < maxPrints)
1771 					{
1772 						std::ostringstream expected, got;
1773 						generateImmMatrixSrc(expected, refEntry.type, refEntry.matrixStride, refEntry.isRowMajor, false, -1, refElemPtr);
1774 						generateImmMatrixSrc(got, resEntry.type, resEntry.matrixStride, resEntry.isRowMajor, false, -1, resElemPtr);
1775 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1776 												<< "  expected " << expected.str() << "\n"
1777 												<< "  got " << got.str()
1778 							<< TestLog::EndMessage;
1779 					}
1780 				}
1781 			}
1782 			else
1783 			{
1784 				DE_ASSERT((deIntptr)(refElemPtr + scalarSize*compSize) - (deIntptr)refBlockPtr.ptr <= (deIntptr)refBlockPtr.size);
1785 				DE_ASSERT((deIntptr)(resElemPtr + scalarSize*compSize) - (deIntptr)resBlockPtr.ptr <= (deIntptr)resBlockPtr.size);
1786 
1787 				const bool isOk = compareComponents(scalarType, resElemPtr, refElemPtr, scalarSize);
1788 
1789 				if (!isOk)
1790 				{
1791 					numFailed += 1;
1792 					if (numFailed < maxPrints)
1793 					{
1794 						std::ostringstream expected, got;
1795 						generateImmScalarVectorSrc(expected, refEntry.type, refElemPtr);
1796 						generateImmScalarVectorSrc(got, resEntry.type, resElemPtr);
1797 						log << TestLog::Message << "ERROR: mismatch in " << refEntry.name << ", top-level ndx " << topElemNdx << ", bottom-level ndx " << elementNdx << ":\n"
1798 												<< "  expected " << expected.str() << "\n"
1799 												<< "  got " << got.str()
1800 							<< TestLog::EndMessage;
1801 					}
1802 				}
1803 			}
1804 		}
1805 	}
1806 
1807 	if (numFailed >= maxPrints)
1808 		log << TestLog::Message << "... (" << numFailed << " failures for " << refEntry.name << " in total)" << TestLog::EndMessage;
1809 
1810 	return numFailed == 0;
1811 }
1812 
compareData(tcu::TestLog & log,const BufferLayout & refLayout,const vector<BlockDataPtr> & refBlockPointers,const BufferLayout & resLayout,const vector<BlockDataPtr> & resBlockPointers)1813 bool compareData (tcu::TestLog& log, const BufferLayout& refLayout, const vector<BlockDataPtr>& refBlockPointers, const BufferLayout& resLayout, const vector<BlockDataPtr>& resBlockPointers)
1814 {
1815 	const int	numBlocks	= (int)refLayout.blocks.size();
1816 	bool		allOk		= true;
1817 
1818 	for (int refBlockNdx = 0; refBlockNdx < numBlocks; refBlockNdx++)
1819 	{
1820 		const BlockLayoutEntry&		refBlock	= refLayout.blocks[refBlockNdx];
1821 		const BlockDataPtr&			refBlockPtr	= refBlockPointers[refBlockNdx];
1822 		int							resBlockNdx	= resLayout.getBlockIndex(refBlock.name.c_str());
1823 
1824 		if (resBlockNdx >= 0)
1825 		{
1826 			DE_ASSERT(de::inBounds(resBlockNdx, 0, (int)resBlockPointers.size()));
1827 
1828 			const BlockDataPtr& resBlockPtr = resBlockPointers[resBlockNdx];
1829 
1830 			for (vector<int>::const_iterator refVarNdxIter = refBlock.activeVarIndices.begin(); refVarNdxIter != refBlock.activeVarIndices.end(); refVarNdxIter++)
1831 			{
1832 				const BufferVarLayoutEntry&	refEntry	= refLayout.bufferVars[*refVarNdxIter];
1833 				int							resVarNdx	= resLayout.getVariableIndex(refEntry.name.c_str());
1834 
1835 				if (resVarNdx >= 0)
1836 				{
1837 					const BufferVarLayoutEntry& resEntry = resLayout.bufferVars[resVarNdx];
1838 					allOk = compareBufferVarData(log, refEntry, refBlockPtr, resEntry, resBlockPtr) && allOk;
1839 				}
1840 			}
1841 		}
1842 	}
1843 
1844 	return allOk;
1845 }
1846 
getBlockAPIName(const BufferBlock & block,int instanceNdx)1847 string getBlockAPIName (const BufferBlock& block, int instanceNdx)
1848 {
1849 	DE_ASSERT(block.isArray() || instanceNdx == 0);
1850 	return block.getBlockName() + (block.isArray() ? ("[" + de::toString(instanceNdx) + "]") : string());
1851 }
1852 
1853 // \note Some implementations don't report block members in the order they are declared.
1854 //		 For checking whether size has to be adjusted by some top-level array actual size,
1855 //		 we only need to know a) whether there is a unsized top-level array, and b)
1856 //		 what is stride of that array.
1857 
hasUnsizedArray(const BufferLayout & layout,const BlockLayoutEntry & entry)1858 static bool hasUnsizedArray (const BufferLayout& layout, const BlockLayoutEntry& entry)
1859 {
1860 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1861 	{
1862 		if (isUnsizedArray(layout.bufferVars[*varNdx]))
1863 			return true;
1864 	}
1865 
1866 	return false;
1867 }
1868 
getUnsizedArrayStride(const BufferLayout & layout,const BlockLayoutEntry & entry)1869 static int getUnsizedArrayStride (const BufferLayout& layout, const BlockLayoutEntry& entry)
1870 {
1871 	for (vector<int>::const_iterator varNdx = entry.activeVarIndices.begin(); varNdx != entry.activeVarIndices.end(); ++varNdx)
1872 	{
1873 		const BufferVarLayoutEntry& varEntry = layout.bufferVars[*varNdx];
1874 
1875 		if (varEntry.arraySize == 0)
1876 			return varEntry.arrayStride;
1877 		else if (varEntry.topLevelArraySize == 0)
1878 			return varEntry.topLevelArrayStride;
1879 	}
1880 
1881 	return 0;
1882 }
1883 
computeBufferSizes(const ShaderInterface & interface,const BufferLayout & layout)1884 vector<int> computeBufferSizes (const ShaderInterface& interface, const BufferLayout& layout)
1885 {
1886 	vector<int> sizes(layout.blocks.size());
1887 
1888 	for (int declNdx = 0; declNdx < interface.getNumBlocks(); declNdx++)
1889 	{
1890 		const BufferBlock&	block			= interface.getBlock(declNdx);
1891 		const bool			isArray			= block.isArray();
1892 		const int			numInstances	= isArray ? block.getArraySize() : 1;
1893 
1894 		for (int instanceNdx = 0; instanceNdx < numInstances; instanceNdx++)
1895 		{
1896 			const string	apiName		= getBlockAPIName(block, instanceNdx);
1897 			const int		blockNdx	= layout.getBlockIndex(apiName);
1898 
1899 			if (blockNdx >= 0)
1900 			{
1901 				const BlockLayoutEntry&		blockLayout		= layout.blocks[blockNdx];
1902 				const int					baseSize		= blockLayout.size;
1903 				const bool					isLastUnsized	= hasUnsizedArray(layout, blockLayout);
1904 				const int					lastArraySize	= isLastUnsized ? block.getLastUnsizedArraySize(instanceNdx) : 0;
1905 				const int					stride			= isLastUnsized ? getUnsizedArrayStride(layout, blockLayout) : 0;
1906 
1907 				sizes[blockNdx] = baseSize + lastArraySize*stride;
1908 			}
1909 		}
1910 	}
1911 
1912 	return sizes;
1913 }
1914 
getBlockDataPtr(const BufferLayout & layout,const BlockLayoutEntry & blockLayout,void * ptr,int bufferSize)1915 BlockDataPtr getBlockDataPtr (const BufferLayout& layout, const BlockLayoutEntry& blockLayout, void* ptr, int bufferSize)
1916 {
1917 	const bool	isLastUnsized	= hasUnsizedArray(layout, blockLayout);
1918 	const int	baseSize		= blockLayout.size;
1919 
1920 	if (isLastUnsized)
1921 	{
1922 		const int		lastArrayStride	= getUnsizedArrayStride(layout, blockLayout);
1923 		const int		lastArraySize	= (bufferSize-baseSize) / (lastArrayStride ? lastArrayStride : 1);
1924 
1925 		DE_ASSERT(baseSize + lastArraySize*lastArrayStride == bufferSize);
1926 
1927 		return BlockDataPtr(ptr, bufferSize, lastArraySize);
1928 	}
1929 	else
1930 		return BlockDataPtr(ptr, bufferSize, 0);
1931 }
1932 
1933 struct Buffer
1934 {
1935 	deUint32				buffer;
1936 	int						size;
1937 
Buffervkt::ssbo::__anonc153944c0211::Buffer1938 	Buffer (deUint32 buffer_, int size_) : buffer(buffer_), size(size_) {}
Buffervkt::ssbo::__anonc153944c0211::Buffer1939 	Buffer (void) : buffer(0), size(0) {}
1940 };
1941 
1942 struct BlockLocation
1943 {
1944 	int						index;
1945 	int						offset;
1946 	int						size;
1947 
BlockLocationvkt::ssbo::__anonc153944c0211::BlockLocation1948 	BlockLocation (int index_, int offset_, int size_) : index(index_), offset(offset_), size(size_) {}
BlockLocationvkt::ssbo::__anonc153944c0211::BlockLocation1949 	BlockLocation (void) : index(0), offset(0), size(0) {}
1950 };
1951 
initRefDataStorage(const ShaderInterface & interface,const BufferLayout & layout,RefDataStorage & storage)1952 void initRefDataStorage (const ShaderInterface& interface, const BufferLayout& layout, RefDataStorage& storage)
1953 {
1954 	DE_ASSERT(storage.data.empty() && storage.pointers.empty());
1955 
1956 	const vector<int>	bufferSizes		= computeBufferSizes(interface, layout);
1957 	int					totalSize		= 0;
1958 	const int			vec4Alignment	= (int)sizeof(deUint32)*4;
1959 
1960 	for (vector<int>::const_iterator sizeIter = bufferSizes.begin(); sizeIter != bufferSizes.end(); ++sizeIter)
1961 	{
1962 		// Include enough space for alignment of individual blocks
1963 		totalSize += deRoundUp32(*sizeIter, vec4Alignment);
1964 	}
1965 
1966 	storage.data.resize(totalSize);
1967 
1968 	// Pointers for each block.
1969 	{
1970 		deUint8*	basePtr		= storage.data.empty() ? DE_NULL : &storage.data[0];
1971 		int			curOffset	= 0;
1972 
1973 		DE_ASSERT(bufferSizes.size() == layout.blocks.size());
1974 		DE_ASSERT(totalSize == 0 || basePtr);
1975 
1976 		storage.pointers.resize(layout.blocks.size());
1977 
1978 		for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1979 		{
1980 			const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
1981 			const int				bufferSize		= bufferSizes[blockNdx];
1982 
1983 			storage.pointers[blockNdx] = getBlockDataPtr(layout, blockLayout, basePtr + curOffset, bufferSize);
1984 
1985 			// Ensure each new block starts fully aligned to avoid unaligned host accesses
1986 			curOffset += deRoundUp32(bufferSize, vec4Alignment);
1987 		}
1988 	}
1989 }
1990 
1991 
blockLocationsToPtrs(const BufferLayout & layout,const vector<BlockLocation> & blockLocations,const vector<void * > & bufPtrs)1992 vector<BlockDataPtr> blockLocationsToPtrs (const BufferLayout& layout, const vector<BlockLocation>& blockLocations, const vector<void*>& bufPtrs)
1993 {
1994 	vector<BlockDataPtr> blockPtrs(blockLocations.size());
1995 
1996 	DE_ASSERT(layout.blocks.size() == blockLocations.size());
1997 
1998 	for (int blockNdx = 0; blockNdx < (int)layout.blocks.size(); blockNdx++)
1999 	{
2000 		const BlockLayoutEntry&	blockLayout		= layout.blocks[blockNdx];
2001 		const BlockLocation&	location		= blockLocations[blockNdx];
2002 
2003 		blockPtrs[blockNdx] = getBlockDataPtr(layout, blockLayout, (deUint8*)bufPtrs[location.index] + location.offset, location.size);
2004 	}
2005 
2006 	return blockPtrs;
2007 }
2008 
2009 } // anonymous (utilities)
2010 
allocateAndBindMemory(Context & context,vk::VkBuffer buffer,vk::MemoryRequirement memReqs)2011 de::MovePtr<vk::Allocation> allocateAndBindMemory (Context& context, vk::VkBuffer buffer, vk::MemoryRequirement memReqs)
2012 {
2013 	const vk::DeviceInterface&		vkd		= context.getDeviceInterface();
2014 	const vk::VkMemoryRequirements	bufReqs	= vk::getBufferMemoryRequirements(vkd, context.getDevice(), buffer);
2015 	de::MovePtr<vk::Allocation>		memory	= context.getDefaultAllocator().allocate(bufReqs, memReqs);
2016 
2017 	vkd.bindBufferMemory(context.getDevice(), buffer, memory->getMemory(), memory->getOffset());
2018 
2019 	return memory;
2020 }
2021 
createBuffer(Context & context,vk::VkDeviceSize bufferSize,vk::VkBufferUsageFlags usageFlags)2022 vk::Move<vk::VkBuffer> createBuffer (Context& context, vk::VkDeviceSize bufferSize, vk::VkBufferUsageFlags usageFlags)
2023 {
2024 	const vk::VkDevice			vkDevice			= context.getDevice();
2025 	const vk::DeviceInterface&	vk					= context.getDeviceInterface();
2026 	const deUint32			queueFamilyIndex	= context.getUniversalQueueFamilyIndex();
2027 
2028 	const vk::VkBufferCreateInfo	bufferInfo		=
2029 	{
2030 		vk::VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
2031 		DE_NULL,									// const void*			pNext;
2032 		0u,											// VkBufferCreateFlags	flags;
2033 		bufferSize,									// VkDeviceSize			size;
2034 		usageFlags,									// VkBufferUsageFlags	usage;
2035 		vk::VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
2036 		1u,											// deUint32				queueFamilyCount;
2037 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
2038 	};
2039 
2040 	return vk::createBuffer(vk, vkDevice, &bufferInfo);
2041 }
2042 
2043 // SSBOLayoutCaseInstance
2044 
2045 class SSBOLayoutCaseInstance : public TestInstance
2046 {
2047 public:
2048 								SSBOLayoutCaseInstance	(Context&					context,
2049 														SSBOLayoutCase::BufferMode	bufferMode,
2050 														const ShaderInterface&		interface,
2051 														const BufferLayout&			refLayout,
2052 														const RefDataStorage&		initialData,
2053 														const RefDataStorage&		writeData,
2054 														bool						usePhysStorageBuffer);
2055 	virtual						~SSBOLayoutCaseInstance	(void);
2056 	virtual tcu::TestStatus		iterate						(void);
2057 
2058 private:
2059 	SSBOLayoutCase::BufferMode	m_bufferMode;
2060 	const ShaderInterface&		m_interface;
2061 	const BufferLayout&			m_refLayout;
2062 	const RefDataStorage&		m_initialData;	// Initial data stored in buffer.
2063 	const RefDataStorage&		m_writeData;	// Data written by compute shader.
2064 	const bool					m_usePhysStorageBuffer;
2065 
2066 	typedef de::SharedPtr<vk::Unique<vk::VkBuffer> >	VkBufferSp;
2067 	typedef de::SharedPtr<vk::Allocation>				AllocationSp;
2068 
2069 	std::vector<VkBufferSp>		m_uniformBuffers;
2070 	std::vector<AllocationSp>	m_uniformAllocs;
2071 };
2072 
SSBOLayoutCaseInstance(Context & context,SSBOLayoutCase::BufferMode bufferMode,const ShaderInterface & interface,const BufferLayout & refLayout,const RefDataStorage & initialData,const RefDataStorage & writeData,bool usePhysStorageBuffer)2073 SSBOLayoutCaseInstance::SSBOLayoutCaseInstance (Context&					context,
2074 												SSBOLayoutCase::BufferMode	bufferMode,
2075 												const ShaderInterface&		interface,
2076 												const BufferLayout&			refLayout,
2077 												const RefDataStorage&		initialData,
2078 												const RefDataStorage&		writeData,
2079 												bool						usePhysStorageBuffer)
2080 	: TestInstance	(context)
2081 	, m_bufferMode	(bufferMode)
2082 	, m_interface	(interface)
2083 	, m_refLayout	(refLayout)
2084 	, m_initialData	(initialData)
2085 	, m_writeData	(writeData)
2086 	, m_usePhysStorageBuffer(usePhysStorageBuffer)
2087 {
2088 }
2089 
~SSBOLayoutCaseInstance(void)2090 SSBOLayoutCaseInstance::~SSBOLayoutCaseInstance (void)
2091 {
2092 }
2093 
iterate(void)2094 tcu::TestStatus SSBOLayoutCaseInstance::iterate (void)
2095 {
2096 	// todo: add compute stage availability check
2097 	const vk::DeviceInterface&	vk					= m_context.getDeviceInterface();
2098 	const vk::VkDevice			device				= m_context.getDevice();
2099 	const vk::VkQueue			queue				= m_context.getUniversalQueue();
2100 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2101 
2102 	// Create descriptor set
2103 	const deUint32 acBufferSize = 1024;
2104 	vk::Move<vk::VkBuffer> acBuffer (createBuffer(m_context, acBufferSize, vk:: VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
2105 	de::UniquePtr<vk::Allocation> acBufferAlloc (allocateAndBindMemory(m_context, *acBuffer, vk::MemoryRequirement::HostVisible));
2106 
2107 	deMemset(acBufferAlloc->getHostPtr(), 0, acBufferSize);
2108 	flushMappedMemoryRange(vk, device, acBufferAlloc->getMemory(), acBufferAlloc->getOffset(), acBufferSize);
2109 
2110 	vk::DescriptorSetLayoutBuilder setLayoutBuilder;
2111 	vk::DescriptorPoolBuilder poolBuilder;
2112 
2113 	setLayoutBuilder
2114 		.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2115 
2116 	int numBlocks = 0;
2117 	const int numBindings = m_interface.getNumBlocks();
2118 	for (int bindingNdx = 0; bindingNdx < numBindings; bindingNdx++)
2119 	{
2120 		const BufferBlock& block = m_interface.getBlock(bindingNdx);
2121 		if (block.isArray())
2122 		{
2123 			setLayoutBuilder
2124 				.addArrayBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, block.getArraySize(), vk::VK_SHADER_STAGE_COMPUTE_BIT);
2125 			numBlocks += block.getArraySize();
2126 		}
2127 		else
2128 		{
2129 			setLayoutBuilder
2130 				.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT);
2131 			numBlocks += 1;
2132 		}
2133 	}
2134 
2135 	poolBuilder
2136 		.addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)(1 + numBlocks));
2137 
2138 	const vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(setLayoutBuilder.build(vk, device));
2139 	const vk::Unique<vk::VkDescriptorPool> descriptorPool(poolBuilder.build(vk, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2140 
2141 	const vk::VkDescriptorSetAllocateInfo allocInfo =
2142 	{
2143 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2144 		DE_NULL,
2145 		*descriptorPool,
2146 		1u,
2147 		&descriptorSetLayout.get(),
2148 	};
2149 
2150 	const vk::Unique<vk::VkDescriptorSet> descriptorSet(allocateDescriptorSet(vk, device, &allocInfo));
2151 	const vk::VkDescriptorBufferInfo descriptorInfo = makeDescriptorBufferInfo(*acBuffer, 0ull, acBufferSize);
2152 
2153 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
2154 	std::vector<vk::VkDescriptorBufferInfo>	descriptors(numBlocks);
2155 
2156 	setUpdateBuilder
2157 		.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorInfo);
2158 
2159 	vector<BlockDataPtr>  mappedBlockPtrs;
2160 
2161 	vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
2162 	bool memoryDeviceAddress = false;
2163 	if (m_usePhysStorageBuffer)
2164 	{
2165 		usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
2166 		if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
2167 			memoryDeviceAddress = true;
2168 	}
2169 
2170 	// Upload base buffers
2171 	const std::vector<int> bufferSizes	= computeBufferSizes(m_interface, m_refLayout);
2172 	{
2173 		std::vector<void*>				mapPtrs;
2174 		std::vector<BlockLocation>		blockLocations	(numBlocks);
2175 
2176 		DE_ASSERT(bufferSizes.size() == m_refLayout.blocks.size());
2177 
2178 		if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2179 		{
2180 			mapPtrs.resize(numBlocks);
2181 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2182 			{
2183 				const deUint32 bufferSize = bufferSizes[blockNdx];
2184 				DE_ASSERT(bufferSize > 0);
2185 
2186 				blockLocations[blockNdx] = BlockLocation(blockNdx, 0, bufferSize);
2187 
2188 				vk::Move<vk::VkBuffer>				buffer		= createBuffer(m_context, bufferSize, usageFlags);
2189 				de::MovePtr<vk::Allocation>			alloc		= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible | (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2190 
2191 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, 0ull, bufferSize);
2192 
2193 				mapPtrs[blockNdx] = alloc->getHostPtr();
2194 
2195 				m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2196 				m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2197 			}
2198 		}
2199 		else
2200 		{
2201 			DE_ASSERT(m_bufferMode == SSBOLayoutCase::BUFFERMODE_SINGLE);
2202 
2203 			vk::VkPhysicalDeviceProperties properties;
2204 			m_context.getInstanceInterface().getPhysicalDeviceProperties(m_context.getPhysicalDevice(), &properties);
2205 			const int	bindingAlignment	= (int)properties.limits.minStorageBufferOffsetAlignment;
2206 			int			curOffset			= 0;
2207 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2208 			{
2209 				const int bufferSize = bufferSizes[blockNdx];
2210 				DE_ASSERT(bufferSize > 0);
2211 
2212 				if (bindingAlignment > 0)
2213 					curOffset = deRoundUp32(curOffset, bindingAlignment);
2214 
2215 				blockLocations[blockNdx] = BlockLocation(0, curOffset, bufferSize);
2216 				curOffset += bufferSize;
2217 			}
2218 
2219 			const int						totalBufferSize = curOffset;
2220 			vk::Move<vk::VkBuffer>			buffer			= createBuffer(m_context, totalBufferSize, usageFlags);
2221 			de::MovePtr<vk::Allocation>		alloc			= allocateAndBindMemory(m_context, *buffer, vk::MemoryRequirement::HostVisible | (memoryDeviceAddress ? vk::MemoryRequirement::DeviceAddress : vk::MemoryRequirement::Any));
2222 
2223 			mapPtrs.push_back(alloc->getHostPtr());
2224 
2225 			for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2226 			{
2227 				const deUint32						bufferSize	= bufferSizes[blockNdx];
2228 				const deUint32						offset		= blockLocations[blockNdx].offset;
2229 
2230 				descriptors[blockNdx] = makeDescriptorBufferInfo(*buffer, offset, bufferSize);
2231 			}
2232 
2233 			m_uniformBuffers.push_back(VkBufferSp(new vk::Unique<vk::VkBuffer>(buffer)));
2234 			m_uniformAllocs.push_back(AllocationSp(alloc.release()));
2235 		}
2236 
2237 		// Update remaining bindings
2238 		{
2239 			int blockNdx = 0;
2240 			for (int bindingNdx = 0; bindingNdx < numBindings; ++bindingNdx)
2241 			{
2242 				const BufferBlock&	block				= m_interface.getBlock(bindingNdx);
2243 				const int			numBlocksInBinding	= (block.isArray() ? block.getArraySize() : 1);
2244 
2245 				setUpdateBuilder.writeArray(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(bindingNdx + 1),
2246 					vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, numBlocksInBinding, &descriptors[blockNdx]);
2247 
2248 				blockNdx += numBlocksInBinding;
2249 			}
2250 		}
2251 
2252 		// Copy the initial data to the storage buffers
2253 		{
2254 			mappedBlockPtrs = blockLocationsToPtrs(m_refLayout, blockLocations, mapPtrs);
2255 			copyData(m_refLayout, mappedBlockPtrs, m_refLayout, m_initialData.pointers);
2256 
2257 			for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2258 			{
2259 				vk::Allocation* alloc = m_uniformAllocs[allocNdx].get();
2260 				flushMappedMemoryRange(vk, device, alloc->getMemory(), alloc->getOffset(), VK_WHOLE_SIZE);
2261 			}
2262 		}
2263 	}
2264 
2265 	std::vector<vk::VkDeviceAddress> gpuAddrs;
2266 	// Query the buffer device addresses and push them via push constants
2267 	if (m_usePhysStorageBuffer)
2268 	{
2269 		const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
2270 
2271 		vk::VkBufferDeviceAddressInfo info =
2272 		{
2273 			vk::VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,		// VkStructureType	sType;
2274 			DE_NULL,												// const void*		pNext;
2275 			0,														// VkBuffer			buffer
2276 		};
2277 
2278 		for (deUint32 i = 0; i < descriptors.size(); ++i)
2279 		{
2280 			info.buffer = descriptors[i].buffer;
2281 			vk::VkDeviceAddress addr;
2282 			if (useKHR)
2283 				addr = vk.getBufferDeviceAddress(device, &info);
2284 			else
2285 				addr = vk.getBufferDeviceAddressEXT(device, &info);
2286 			addr += descriptors[i].offset;
2287 			gpuAddrs.push_back(addr);
2288 		}
2289 	}
2290 
2291 	setUpdateBuilder.update(vk, device);
2292 
2293 	const vk::VkPushConstantRange pushConstRange =
2294 	{
2295 		vk::VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlags	stageFlags
2296 		0,															// deUint32				offset
2297 		(deUint32)(sizeof(vk::VkDeviceAddress)*descriptors.size())	// deUint32				size
2298 	};
2299 
2300 	// must fit in spec min max
2301 	DE_ASSERT(pushConstRange.size <= 128);
2302 
2303 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams =
2304 	{
2305 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType				sType;
2306 		DE_NULL,											// const void*					pNext;
2307 		(vk::VkPipelineLayoutCreateFlags)0,
2308 		1u,													// deUint32						descriptorSetCount;
2309 		&*descriptorSetLayout,								// const VkDescriptorSetLayout*	pSetLayouts;
2310 		m_usePhysStorageBuffer ? 1u : 0u,					// deUint32						pushConstantRangeCount;
2311 		&pushConstRange,									// const VkPushConstantRange*	pPushConstantRanges;
2312 	};
2313 	vk::Move<vk::VkPipelineLayout> pipelineLayout(createPipelineLayout(vk, device, &pipelineLayoutParams));
2314 
2315 	m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
2316 
2317 	vk::Move<vk::VkShaderModule> shaderModule (createShaderModule(vk, device, m_context.getBinaryCollection().get("compute"), 0));
2318 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
2319 	{
2320 		vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,// VkStructureType				sType;
2321 		DE_NULL,												// const void*					pNext;
2322 		(vk::VkPipelineShaderStageCreateFlags)0,
2323 		vk::VK_SHADER_STAGE_COMPUTE_BIT,						// VkShaderStage				stage;
2324 		*shaderModule,											// VkShader						shader;
2325 		"main",													//
2326 		DE_NULL,												// const VkSpecializationInfo*	pSpecializationInfo;
2327 	};
2328 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
2329 	{
2330 		vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType					sType;
2331 		DE_NULL,											// const void*						pNext;
2332 		0,													// VkPipelineCreateFlags			flags;
2333 		pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
2334 		*pipelineLayout,									// VkPipelineLayout					layout;
2335 		DE_NULL,											// VkPipeline						basePipelineHandle;
2336 		0,													// deInt32							basePipelineIndex;
2337 	};
2338 	vk::Move<vk::VkPipeline> pipeline(createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo));
2339 
2340 	m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
2341 
2342 	vk::Move<vk::VkCommandPool> cmdPool (createCommandPool(vk, device, vk::VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
2343 	vk::Move<vk::VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY));
2344 
2345 	beginCommandBuffer(vk, *cmdBuffer, 0u);
2346 
2347 	vk.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
2348 
2349 	if (gpuAddrs.size()) {
2350 		vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, vk::VK_SHADER_STAGE_COMPUTE_BIT,
2351 							0, (deUint32)(sizeof(vk::VkDeviceAddress)*gpuAddrs.size()), &gpuAddrs[0]);
2352 	}
2353 	vk.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
2354 
2355 	vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
2356 
2357 	// Add barriers for shader writes to storage buffers before host access
2358 	std::vector<vk::VkBufferMemoryBarrier> barriers;
2359 	if (m_bufferMode == SSBOLayoutCase::BUFFERMODE_PER_BLOCK)
2360 	{
2361 		for (int blockNdx = 0; blockNdx < numBlocks; blockNdx++)
2362 		{
2363 			const vk::VkBuffer uniformBuffer = m_uniformBuffers[blockNdx].get()->get();
2364 
2365 			const vk::VkBufferMemoryBarrier barrier	=
2366 			{
2367 				vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2368 				DE_NULL,
2369 				vk::VK_ACCESS_SHADER_WRITE_BIT,
2370 				vk::VK_ACCESS_HOST_READ_BIT,
2371 				VK_QUEUE_FAMILY_IGNORED,
2372 				VK_QUEUE_FAMILY_IGNORED,
2373 				uniformBuffer,
2374 				0u,
2375 				static_cast<vk::VkDeviceSize>(bufferSizes[blockNdx])
2376 			};
2377 			barriers.push_back(barrier);
2378 		}
2379 	}
2380 	else
2381 	{
2382 		const vk::VkBuffer uniformBuffer = m_uniformBuffers[0].get()->get();
2383 
2384 		vk::VkDeviceSize totalSize	= 0;
2385 		for (size_t bufferNdx = 0; bufferNdx < bufferSizes.size(); bufferNdx++)
2386 			totalSize += bufferSizes[bufferNdx];
2387 
2388 		const vk::VkBufferMemoryBarrier barrier	=
2389 		{
2390 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
2391 			DE_NULL,
2392 			vk::VK_ACCESS_SHADER_WRITE_BIT,
2393 			vk::VK_ACCESS_HOST_READ_BIT,
2394 			VK_QUEUE_FAMILY_IGNORED,
2395 			VK_QUEUE_FAMILY_IGNORED,
2396 			uniformBuffer,
2397 			0u,
2398 			totalSize
2399 		};
2400 		barriers.push_back(barrier);
2401 	}
2402 	vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0,
2403 						  0u, DE_NULL, static_cast<deUint32>(barriers.size()), &barriers[0], 0u, DE_NULL);
2404 
2405 	endCommandBuffer(vk, *cmdBuffer);
2406 
2407 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
2408 
2409 	// Read back ac_numPassed data
2410 	bool counterOk;
2411 	{
2412 		const int refCount = 1;
2413 		int resCount = 0;
2414 
2415 		invalidateAlloc(vk, device, *acBufferAlloc);
2416 
2417 		resCount = *((const int*)acBufferAlloc->getHostPtr());
2418 
2419 		counterOk = (refCount == resCount);
2420 		if (!counterOk)
2421 		{
2422 			m_context.getTestContext().getLog() << TestLog::Message << "Error: ac_numPassed = " << resCount << ", expected " << refCount << TestLog::EndMessage;
2423 		}
2424 	}
2425 
2426 	for (size_t allocNdx = 0; allocNdx < m_uniformAllocs.size(); allocNdx++)
2427 	{
2428 		vk::Allocation *alloc = m_uniformAllocs[allocNdx].get();
2429 		invalidateAlloc(vk, device, *alloc);
2430 	}
2431 
2432 	// Validate result
2433 	const bool compareOk = compareData(m_context.getTestContext().getLog(), m_refLayout, m_writeData.pointers, m_refLayout, mappedBlockPtrs);
2434 
2435 	if (compareOk && counterOk)
2436 		return tcu::TestStatus::pass("Result comparison and counter values are OK");
2437 	else if (!compareOk && counterOk)
2438 		return tcu::TestStatus::fail("Result comparison failed");
2439 	else if (compareOk && !counterOk)
2440 		return tcu::TestStatus::fail("Counter value incorrect");
2441 	else
2442 		return tcu::TestStatus::fail("Result comparison and counter values are incorrect");
2443 }
2444 
2445 // SSBOLayoutCase.
2446 
SSBOLayoutCase(tcu::TestContext & testCtx,const char * name,const char * description,BufferMode bufferMode,MatrixLoadFlags matrixLoadFlag,MatrixStoreFlags matrixStoreFlag,bool usePhysStorageBuffer)2447 SSBOLayoutCase::SSBOLayoutCase (tcu::TestContext& testCtx, const char* name, const char* description, BufferMode bufferMode, MatrixLoadFlags matrixLoadFlag, MatrixStoreFlags matrixStoreFlag, bool usePhysStorageBuffer)
2448 	: TestCase			(testCtx, name, description)
2449 	, m_bufferMode		(bufferMode)
2450 	, m_matrixLoadFlag	(matrixLoadFlag)
2451 	, m_matrixStoreFlag	(matrixStoreFlag)
2452 	, m_usePhysStorageBuffer(usePhysStorageBuffer)
2453 {
2454 }
2455 
~SSBOLayoutCase(void)2456 SSBOLayoutCase::~SSBOLayoutCase (void)
2457 {
2458 }
2459 
initPrograms(vk::SourceCollections & programCollection) const2460 void SSBOLayoutCase::initPrograms (vk::SourceCollections& programCollection) const
2461 {
2462 	DE_ASSERT(!m_computeShaderSrc.empty());
2463 
2464 	// Valid scalar layouts are a superset of valid relaxed layouts.  So check scalar layout first.
2465 	if (usesScalarLayout(m_interface))
2466 	{
2467 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2468 			<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0, vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
2469 	}
2470 	else if (usesRelaxedLayout(m_interface))
2471 	{
2472 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc)
2473 			<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_0, vk::ShaderBuildOptions::FLAG_ALLOW_RELAXED_OFFSETS);
2474 	}
2475 	else
2476 		programCollection.glslSources.add("compute") << glu::ComputeSource(m_computeShaderSrc);
2477 }
2478 
createInstance(Context & context) const2479 TestInstance* SSBOLayoutCase::createInstance (Context& context) const
2480 {
2481 	return new SSBOLayoutCaseInstance(context, m_bufferMode, m_interface, m_refLayout, m_initialData, m_writeData, m_usePhysStorageBuffer);
2482 }
2483 
checkSupport(Context & context) const2484 void SSBOLayoutCase::checkSupport(Context& context) const
2485 {
2486 	if (!context.isDeviceFunctionalitySupported("VK_KHR_relaxed_block_layout") && usesRelaxedLayout(m_interface))
2487 		TCU_THROW(NotSupportedError, "VK_KHR_relaxed_block_layout not supported");
2488 	if (!context.get16BitStorageFeatures().storageBuffer16BitAccess && uses16BitStorage(m_interface))
2489 		TCU_THROW(NotSupportedError, "storageBuffer16BitAccess not supported");
2490 	if (!context.get8BitStorageFeatures().storageBuffer8BitAccess && uses8BitStorage(m_interface))
2491 		TCU_THROW(NotSupportedError, "storageBuffer8BitAccess not supported");
2492 	if (!context.getScalarBlockLayoutFeatures().scalarBlockLayout && usesScalarLayout(m_interface))
2493 		TCU_THROW(NotSupportedError, "scalarBlockLayout not supported");
2494 	if (m_usePhysStorageBuffer && !context.isBufferDeviceAddressSupported())
2495 		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
2496 	if (usesDescriptorIndexing(m_interface) && (	!context.getDescriptorIndexingFeatures().shaderStorageBufferArrayNonUniformIndexing ||
2497 													!context.getDescriptorIndexingFeatures().runtimeDescriptorArray ) )
2498 		TCU_THROW(NotSupportedError, "Descriptor indexing over storage buffer not supported");
2499 
2500 	const vk::VkPhysicalDeviceProperties &properties = context.getDeviceProperties();
2501 	// Shader defines N+1 storage buffers: N to operate and one more to store the number of cases passed.
2502 	deUint32 blockCount = 1u;
2503 	for (deInt32 blockIdx = 0u; blockIdx < m_interface.getNumBlocks(); blockIdx++)
2504 	{
2505 		blockCount += m_interface.getBlock(blockIdx).getArraySize() ? m_interface.getBlock(blockIdx).getArraySize() : 1u;
2506 	}
2507 
2508 	if (properties.limits.maxPerStageDescriptorStorageBuffers < blockCount)
2509 		TCU_THROW(NotSupportedError, "Descriptor set storage buffers count higher than the maximum supported by the driver");
2510 }
2511 
delayedInit(void)2512 void SSBOLayoutCase::delayedInit (void)
2513 {
2514 	computeReferenceLayout(m_refLayout, m_interface);
2515 	initRefDataStorage(m_interface, m_refLayout, m_initialData);
2516 	initRefDataStorage(m_interface, m_refLayout, m_writeData);
2517 	generateValues(m_refLayout, m_initialData.pointers, deStringHash(getName()) ^ 0xad2f7214);
2518 	generateValues(m_refLayout, m_writeData.pointers, deStringHash(getName()) ^ 0x25ca4e7);
2519 	copyNonWrittenData(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers);
2520 
2521 	m_computeShaderSrc = generateComputeShader(m_interface, m_refLayout, m_initialData.pointers, m_writeData.pointers, m_matrixLoadFlag, m_matrixStoreFlag, m_usePhysStorageBuffer);
2522 }
2523 
2524 } // ssbo
2525 } // vkt
2526