• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *	  http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Tests for VK_EXT_buffer_device_address.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktBindingBufferDeviceAddressTests.hpp"
26 
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkObjUtil.hpp"
34 
35 #include "vktTestGroupUtil.hpp"
36 #include "vktTestCase.hpp"
37 
38 #include "deDefs.h"
39 #include "deMath.h"
40 #include "deRandom.h"
41 #include "deRandom.hpp"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44 
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47 
48 #include <string>
49 #include <sstream>
50 
51 namespace vkt
52 {
53 namespace BindingModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59 
60 typedef de::MovePtr<Unique<VkBuffer> >	VkBufferSp;
61 typedef de::MovePtr<Allocation>			AllocationSp;
62 
63 static const deUint32 DIM = 8;
64 
65 typedef enum
66 {
67 	BASE_UBO = 0,
68 	BASE_SSBO,
69 } Base;
70 
71 #define ENABLE_RAYTRACING 0
72 
73 typedef enum
74 {
75 	STAGE_COMPUTE = 0,
76 	STAGE_VERTEX,
77 	STAGE_FRAGMENT,
78 	STAGE_RAYGEN,
79 } Stage;
80 
81 typedef enum
82 {
83 	BT_SINGLE = 0,
84 	BT_MULTI,
85 	BT_REPLAY,
86 } BufType;
87 
88 typedef enum
89 {
90 	LAYOUT_STD140 = 0,
91 	LAYOUT_SCALAR,
92 } Layout;
93 
94 typedef enum
95 {
96 	CONVERT_NONE = 0,
97 	CONVERT_UINT64,
98 	CONVERT_UVEC2,
99 	CONVERT_U64CMP,
100 	CONVERT_UVEC2CMP,
101 	CONVERT_UVEC2TOU64,
102 	CONVERT_U64TOUVEC2,
103 } Convert;
104 
105 struct CaseDef
106 {
107 	deUint32 set;
108 	deUint32 depth;
109 	Base base;
110 	Stage stage;
111 	Convert convertUToPtr;
112 	bool storeInLocal;
113 	BufType bufType;
114 	Layout layout;
115 };
116 
117 class BufferAddressTestInstance : public TestInstance
118 {
119 public:
120 						BufferAddressTestInstance	(Context& context, const CaseDef& data);
121 						~BufferAddressTestInstance	(void);
122 	tcu::TestStatus		iterate						(void);
123 	virtual	void		fillBuffer					(const std::vector<deUint8 *>& cpuAddrs,
124 													 const std::vector<deUint64>& gpuAddrs,
125 													 deUint32 bufNum, deUint32 curDepth) const;
126 private:
127 	CaseDef				m_data;
128 
129 	enum
130 	{
131 		WIDTH = 256,
132 		HEIGHT = 256
133 	};
134 };
135 
BufferAddressTestInstance(Context & context,const CaseDef & data)136 BufferAddressTestInstance::BufferAddressTestInstance (Context& context, const CaseDef& data)
137 	: vkt::TestInstance		(context)
138 	, m_data				(data)
139 {
140 }
141 
~BufferAddressTestInstance(void)142 BufferAddressTestInstance::~BufferAddressTestInstance (void)
143 {
144 }
145 
146 class BufferAddressTestCase : public TestCase
147 {
148 	public:
149 							BufferAddressTestCase	(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
150 							~BufferAddressTestCase	(void);
151 	virtual	void			initPrograms			(SourceCollections& programCollection) const;
152 	virtual TestInstance*	createInstance			(Context& context) const;
153 	virtual void			checkSupport			(Context& context) const;
154 	virtual	void			checkBuffer				(std::stringstream& checks, deUint32 bufNum, deUint32 curDepth, const std::string &prefix) const;
155 
156 private:
157 	CaseDef					m_data;
158 };
159 
BufferAddressTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)160 BufferAddressTestCase::BufferAddressTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
161 	: vkt::TestCase	(context, name, desc)
162 	, m_data		(data)
163 {
164 }
165 
~BufferAddressTestCase(void)166 BufferAddressTestCase::~BufferAddressTestCase	(void)
167 {
168 }
169 
checkSupport(Context & context) const170 void BufferAddressTestCase::checkSupport (Context& context) const
171 {
172 	if (!context.isBufferDeviceAddressSupported())
173 		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
174 
175 	if (m_data.stage == STAGE_VERTEX && !context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
176 		TCU_THROW(NotSupportedError, "Vertex pipeline stores and atomics not supported");
177 
178 	if (m_data.set >= context.getDeviceProperties().limits.maxBoundDescriptorSets)
179 		TCU_THROW(NotSupportedError, "descriptor set number not supported");
180 
181 	bool isBufferDeviceAddressWithCaptureReplaySupported =
182 			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
183 			(context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") && context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
184 	if (m_data.bufType == BT_REPLAY && !isBufferDeviceAddressWithCaptureReplaySupported)
185 		TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
186 
187 	if (m_data.layout == LAYOUT_SCALAR && !context.getScalarBlockLayoutFeatures().scalarBlockLayout)
188 		TCU_THROW(NotSupportedError, "Scalar block layout not supported");
189 
190 #if ENABLE_RAYTRACING
191 	if (m_data.stage == STAGE_RAYGEN &&
192 		!context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
193 	{
194 		TCU_THROW(NotSupportedError, "Ray tracing not supported");
195 	}
196 #endif
197 
198 	const bool needsInt64	= (	m_data.convertUToPtr == CONVERT_UINT64		||
199 								m_data.convertUToPtr == CONVERT_U64CMP		||
200 								m_data.convertUToPtr == CONVERT_U64TOUVEC2	||
201 								m_data.convertUToPtr == CONVERT_UVEC2TOU64	);
202 
203 	const bool needsKHR		= (	m_data.convertUToPtr == CONVERT_UVEC2		||
204 								m_data.convertUToPtr == CONVERT_UVEC2CMP	||
205 								m_data.convertUToPtr == CONVERT_U64TOUVEC2	||
206 								m_data.convertUToPtr == CONVERT_UVEC2TOU64	);
207 
208 	if (needsInt64 && !context.getDeviceFeatures().shaderInt64)
209 		TCU_THROW(NotSupportedError, "Int64 not supported");
210 	if (needsKHR && !context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
211 		TCU_THROW(NotSupportedError, "VK_KHR_buffer_device_address not supported");
212 }
213 
checkBuffer(std::stringstream & checks,deUint32 bufNum,deUint32 curDepth,const std::string & prefix) const214 void BufferAddressTestCase::checkBuffer (std::stringstream& checks, deUint32 bufNum, deUint32 curDepth, const std::string &prefix) const
215 {
216 	string newPrefix = prefix;
217 	if (curDepth > 0)
218 	{
219 		if (m_data.convertUToPtr == CONVERT_UINT64 || m_data.convertUToPtr == CONVERT_UVEC2TOU64)
220 			newPrefix = "T1(uint64_t(T1(" + newPrefix + ")))";
221 		else if (m_data.convertUToPtr == CONVERT_UVEC2 || m_data.convertUToPtr == CONVERT_U64TOUVEC2)
222 			newPrefix = "T1(uvec2(T1(" + newPrefix + ")))";
223 	}
224 
225 	if (m_data.storeInLocal && curDepth != 0)
226 	{
227 		std::string localName = "l" + de::toString(bufNum);
228 		checks << "   " << ((bufNum & 1) ? "restrict " : "") << "T1 " << localName << " = " << newPrefix << ";\n";
229 		newPrefix = localName;
230 	}
231 
232 	checks << "   accum |= " << newPrefix << ".a[0] - " << bufNum*3+0 << ";\n";
233 	checks << "   accum |= " << newPrefix << ".a[pc.identity[1]] - " << bufNum*3+1 << ";\n";
234 	checks << "   accum |= " << newPrefix << ".b - " << bufNum*3+2 << ";\n";
235 	checks << "   accum |= int(" << newPrefix << ".e[0][0] - " << bufNum*3+3 << ");\n";
236 	checks << "   accum |= int(" << newPrefix << ".e[0][1] - " << bufNum*3+5 << ");\n";
237 	checks << "   accum |= int(" << newPrefix << ".e[1][0] - " << bufNum*3+4 << ");\n";
238 	checks << "   accum |= int(" << newPrefix << ".e[1][1] - " << bufNum*3+6 << ");\n";
239 
240 	if (m_data.layout == LAYOUT_SCALAR)
241 	{
242 		checks << "   f = " << newPrefix << ".f;\n";
243 		checks << "   accum |= f.x - " << bufNum*3+7 << ";\n";
244 		checks << "   accum |= f.y - " << bufNum*3+8 << ";\n";
245 		checks << "   accum |= f.z - " << bufNum*3+9 << ";\n";
246 	}
247 
248 	const std::string localPrefix = "l" + de::toString(bufNum);
249 
250 	if (m_data.convertUToPtr == CONVERT_U64CMP || m_data.convertUToPtr == CONVERT_UVEC2CMP)
251 	{
252 		const std::string type = ((m_data.convertUToPtr == CONVERT_U64CMP) ? "uint64_t" : "uvec2");
253 
254 		checks << "   " << type << " " << localPrefix << "c0 = " << type << "(" << newPrefix << ".c[0]);\n";
255 		checks << "   " << type << " " << localPrefix << "c1 = " << type << "(" << newPrefix << ".c[pc.identity[1]]);\n";
256 		checks << "   " << type << " " << localPrefix << "d  = " << type << "(" << newPrefix << ".d);\n";
257 	}
258 
259 	if (curDepth != m_data.depth)
260 	{
261 		// Check non-null pointers and inequality among them.
262 		if (m_data.convertUToPtr == CONVERT_U64CMP)
263 		{
264 			checks << "   if (" << localPrefix << "c0 == zero ||\n"
265 				   << "       " << localPrefix << "c1 == zero ||\n"
266 				   << "       " << localPrefix << "d  == zero ||\n"
267 				   << "       " << localPrefix << "c0 == " << localPrefix << "c1 ||\n"
268 				   << "       " << localPrefix << "c1 == " << localPrefix << "d  ||\n"
269 				   << "       " << localPrefix << "c0 == " << localPrefix << "d  ) {\n"
270 				   << "     accum |= 1;\n"
271 				   << "   }\n";
272 		}
273 		else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
274 		{
275 			checks << "   if (all(equal(" << localPrefix << "c0, zero)) ||\n"
276 				   << "       all(equal(" << localPrefix << "c1, zero)) ||\n"
277 				   << "       all(equal(" << localPrefix << "d , zero)) ||\n"
278 				   << "       all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) ||\n"
279 				   << "       all(equal(" << localPrefix << "c1, " << localPrefix << "d )) ||\n"
280 				   << "       all(equal(" << localPrefix << "c0, " << localPrefix << "d )) ) {\n"
281 				   << "     accum |= 1;\n"
282 				   << "   }\n";
283 		}
284 
285 		checkBuffer(checks, bufNum*3+1, curDepth+1, newPrefix + ".c[0]");
286 		checkBuffer(checks, bufNum*3+2, curDepth+1, newPrefix + ".c[pc.identity[1]]");
287 		checkBuffer(checks, bufNum*3+3, curDepth+1, newPrefix + ".d");
288 	}
289 	else
290 	{
291 		// Check null pointers nonexplicitly.
292 		if (m_data.convertUToPtr == CONVERT_U64CMP)
293 		{
294 			checks << "   if (!(" << localPrefix << "c0 == " << localPrefix << "c1 &&\n"
295 				   << "         " << localPrefix << "c1 == " << localPrefix << "d  &&\n"
296 				   << "         " << localPrefix << "c0 == " << localPrefix << "d  )) {\n"
297 				   << "     accum |= 1;\n"
298 				   << "   }\n";
299 		}
300 		else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
301 		{
302 			checks << "   if (!(all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) &&\n"
303 				   << "         all(equal(" << localPrefix << "c1, " << localPrefix << "d )) &&\n"
304 				   << "         all(equal(" << localPrefix << "c0, " << localPrefix << "d )) )) {\n"
305 				   << "     accum |= 1;\n"
306 				   << "   }\n";
307 		}
308 	}
309 }
310 
fillBuffer(const std::vector<deUint8 * > & cpuAddrs,const std::vector<deUint64> & gpuAddrs,deUint32 bufNum,deUint32 curDepth) const311 void BufferAddressTestInstance::fillBuffer (const std::vector<deUint8 *>& cpuAddrs,
312 											const std::vector<deUint64>& gpuAddrs,
313 											deUint32 bufNum, deUint32 curDepth) const
314 {
315 	deUint8 *buf = cpuAddrs[bufNum];
316 
317 	deUint32 aStride = m_data.layout == LAYOUT_SCALAR ? 1 : 4; // (in deUint32s)
318 	deUint32 cStride = m_data.layout == LAYOUT_SCALAR ? 1 : 2; // (in deUint64s)
319 	deUint32 matStride = m_data.layout == LAYOUT_SCALAR ? 2 : 4; // (in floats)
320 
321 	// a
322 	((deUint32 *)(buf+0))[0] = bufNum*3+0;
323 	((deUint32 *)(buf+0))[aStride] = bufNum*3+1;
324 	// b
325 	((deUint32 *)(buf+32))[0] = bufNum*3+2;
326 	if (m_data.layout == LAYOUT_SCALAR)
327 	{
328 		// f
329 		((deUint32 *)(buf+36))[0] = bufNum*3+7;
330 		((deUint32 *)(buf+36))[1] = bufNum*3+8;
331 		((deUint32 *)(buf+36))[2] = bufNum*3+9;
332 	}
333 	// e
334 	((float *)(buf+96))[0] = (float)(bufNum*3+3);
335 	((float *)(buf+96))[1] = (float)(bufNum*3+4);
336 	((float *)(buf+96))[matStride] = (float)(bufNum*3+5);
337 	((float *)(buf+96))[matStride+1] = (float)(bufNum*3+6);
338 
339 	if (curDepth != m_data.depth)
340 	{
341 		// c
342 		((deUint64 *)(buf+48))[0] = gpuAddrs[bufNum*3+1];
343 		((deUint64 *)(buf+48))[cStride] = gpuAddrs[bufNum*3+2];
344 		// d
345 		((deUint64 *)(buf+80))[0] = gpuAddrs[bufNum*3+3];
346 
347 		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+1, curDepth+1);
348 		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+2, curDepth+1);
349 		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+3, curDepth+1);
350 	}
351 	else
352 	{
353 		// c
354 		((deUint64 *)(buf+48))[0] = 0ull;
355 		((deUint64 *)(buf+48))[cStride] = 0ull;
356 		// d
357 		((deUint64 *)(buf+80))[0] = 0ull;
358 	}
359 }
360 
361 
initPrograms(SourceCollections & programCollection) const362 void BufferAddressTestCase::initPrograms (SourceCollections& programCollection) const
363 {
364 	std::stringstream decls, checks, localDecls;
365 
366 	std::string baseStorage = m_data.base == BASE_UBO ? "uniform" : "buffer";
367 	std::string memberStorage = "buffer";
368 
369 	decls << "layout(r32ui, set = " << m_data.set << ", binding = 0) uniform uimage2D image0_0;\n";
370 	decls << "layout(buffer_reference) " << memberStorage << " T1;\n";
371 
372 	std::string refType;
373 	switch (m_data.convertUToPtr)
374 	{
375 	case CONVERT_UINT64:
376 	case CONVERT_U64TOUVEC2:
377 		refType = "uint64_t";
378 		break;
379 
380 	case CONVERT_UVEC2:
381 	case CONVERT_UVEC2TOU64:
382 		refType = "uvec2";
383 		break;
384 
385 	default:
386 		refType = "T1";
387 		break;
388 	}
389 
390 	std::string layout = m_data.layout == LAYOUT_SCALAR ? "scalar" : "std140";
391 	decls <<
392 			"layout(set = " << m_data.set << ", binding = 1, " << layout << ") " << baseStorage << " T2 {\n"
393 			"   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
394 			"   layout(offset = 32) int b;\n"
395 			<< ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") <<
396 			"   layout(offset = 48) " << refType << " c[2]; // stride = 8 for scalar, 16 for std140\n"
397 			"   layout(offset = 80) " << refType << " d;\n"
398 			"   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
399 			"} x;\n";
400 	decls <<
401 			"layout(buffer_reference, " << layout << ") " << memberStorage << " T1 {\n"
402 			"   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
403 			"   layout(offset = 32) int b;\n"
404 			<< ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") <<
405 			"   layout(offset = 48) " << refType << " c[2]; // stride = 8 for scalar, 16 for std140\n"
406 			"   layout(offset = 80) " << refType << " d;\n"
407 			"   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
408 			"};\n";
409 
410 	if (m_data.convertUToPtr == CONVERT_U64CMP)
411 		localDecls << "  uint64_t zero = uint64_t(0);\n";
412 	else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
413 		localDecls << "  uvec2 zero = uvec2(0, 0);\n";
414 
415 	checkBuffer(checks, 0, 0, "x");
416 
417 	std::stringstream pushdecl;
418 	pushdecl << "layout (push_constant, std430) uniform Block { int identity[32]; } pc;\n";
419 
420 	vk::ShaderBuildOptions::Flags flags = vk::ShaderBuildOptions::Flags(0);
421 	if (m_data.layout == LAYOUT_SCALAR)
422 		flags = vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS;
423 
424 	// The conversion and comparison in uvec2 form test needs SPIR-V 1.5 for OpBitcast.
425 	const vk::SpirvVersion spirvVersion = ((m_data.convertUToPtr == CONVERT_UVEC2CMP) ? vk::SPIRV_VERSION_1_5 : vk::SPIRV_VERSION_1_0);
426 
427 	switch (m_data.stage)
428 	{
429 	default: DE_ASSERT(0); // Fallthrough
430 	case STAGE_COMPUTE:
431 		{
432 			std::stringstream css;
433 			css <<
434 				"#version 450 core\n"
435 				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
436 				"#extension GL_EXT_buffer_reference : enable\n"
437 				"#extension GL_EXT_scalar_block_layout : enable\n"
438 				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
439 				<< pushdecl.str()
440 				<< decls.str() <<
441 				"layout(local_size_x = 1, local_size_y = 1) in;\n"
442 				"void main()\n"
443 				"{\n"
444 				"  int accum = 0, temp;\n"
445 				"  ivec3 f;\n"
446 				<< localDecls.str()
447 				<< checks.str() <<
448 				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
449 				"  imageStore(image0_0, ivec2(gl_GlobalInvocationID.xy), color);\n"
450 				"}\n";
451 
452 			programCollection.glslSources.add("test") << glu::ComputeSource(css.str())
453 				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
454 			break;
455 		}
456 #if ENABLE_RAYTRACING
457 	case STAGE_RAYGEN:
458 		{
459 			std::stringstream css;
460 			css <<
461 				"#version 460 core\n"
462 				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
463 				"#extension GL_EXT_buffer_reference : enable\n"
464 				"#extension GL_EXT_scalar_block_layout : enable\n"
465 				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
466 				"#extension GL_NV_ray_tracing : require\n"
467 				<< pushdecl.str()
468 				<< decls.str() <<
469 				"void main()\n"
470 				"{\n"
471 				"  int accum = 0, temp;\n"
472 				"  ivec3 f;\n"
473 				<< localDecls.str()
474 				<< checks.str() <<
475 				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
476 				"  imageStore(image0_0, ivec2(gl_LaunchIDNV.xy), color);\n"
477 				"}\n";
478 
479 			programCollection.glslSources.add("test") << glu::RaygenSource(css.str())
480 				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
481 			break;
482 		}
483 #endif
484 	case STAGE_VERTEX:
485 		{
486 			std::stringstream vss;
487 			vss <<
488 				"#version 450 core\n"
489 				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
490 				"#extension GL_EXT_buffer_reference : enable\n"
491 				"#extension GL_EXT_scalar_block_layout : enable\n"
492 				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
493 				<< pushdecl.str()
494 				<< decls.str()  <<
495 				"void main()\n"
496 				"{\n"
497 				"  int accum = 0, temp;\n"
498 				"  ivec3 f;\n"
499 				<< localDecls.str()
500 				<< checks.str() <<
501 				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
502 				"  imageStore(image0_0, ivec2(gl_VertexIndex % " << DIM << ", gl_VertexIndex / " << DIM << "), color);\n"
503 				"  gl_PointSize = 1.0f;\n"
504 				"}\n";
505 
506 			programCollection.glslSources.add("test") << glu::VertexSource(vss.str())
507 				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
508 			break;
509 		}
510 	case STAGE_FRAGMENT:
511 		{
512 			std::stringstream vss;
513 			vss <<
514 				"#version 450 core\n"
515 				"void main()\n"
516 				"{\n"
517 				// full-viewport quad
518 				"  gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n"
519 				"}\n";
520 
521 			programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
522 
523 			std::stringstream fss;
524 			fss <<
525 				"#version 450 core\n"
526 				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
527 				"#extension GL_EXT_buffer_reference : enable\n"
528 				"#extension GL_EXT_scalar_block_layout : enable\n"
529 				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
530 				<< pushdecl.str()
531 				<< decls.str() <<
532 				"void main()\n"
533 				"{\n"
534 				"  int accum = 0, temp;\n"
535 				"  ivec3 f;\n"
536 				<< localDecls.str()
537 				<< checks.str() <<
538 				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
539 				"  imageStore(image0_0, ivec2(gl_FragCoord.x, gl_FragCoord.y), color);\n"
540 				"}\n";
541 
542 			programCollection.glslSources.add("test") << glu::FragmentSource(fss.str())
543 				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
544 			break;
545 		}
546 	}
547 
548 }
549 
createInstance(Context & context) const550 TestInstance* BufferAddressTestCase::createInstance (Context& context) const
551 {
552 	return new BufferAddressTestInstance(context, m_data);
553 }
554 
makeBufferCreateInfo(const void * pNext,const VkDeviceSize bufferSize,const VkBufferUsageFlags usage,const VkBufferCreateFlags flags)555 VkBufferCreateInfo makeBufferCreateInfo (const void*				pNext,
556 										 const VkDeviceSize			bufferSize,
557 										 const VkBufferUsageFlags	usage,
558 										 const VkBufferCreateFlags  flags)
559 {
560 	const VkBufferCreateInfo bufferCreateInfo =
561 	{
562 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
563 		pNext,									// const void*			pNext;
564 		flags,									// VkBufferCreateFlags	flags;
565 		bufferSize,								// VkDeviceSize			size;
566 		usage,									// VkBufferUsageFlags	usage;
567 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
568 		0u,										// deUint32				queueFamilyIndexCount;
569 		DE_NULL,								// const deUint32*		pQueueFamilyIndices;
570 	};
571 	return bufferCreateInfo;
572 }
573 
iterate(void)574 tcu::TestStatus BufferAddressTestInstance::iterate (void)
575 {
576 	const InstanceInterface&vki						= m_context.getInstanceInterface();
577 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
578 	const VkPhysicalDevice&	physDevice				= m_context.getPhysicalDevice();
579 	const VkDevice			device					= m_context.getDevice();
580 	Allocator&				allocator				= m_context.getDefaultAllocator();
581 	const bool				useKHR					= m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
582 
583 
584 	VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
585 	VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
586 
587 #if ENABLE_RAYTRACING
588 	if (m_data.stage == STAGE_RAYGEN)
589 	{
590 		allShaderStages = VK_SHADER_STAGE_RAYGEN_BIT_NV;
591 		allPipelineStages = VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV;
592 	}
593 #endif
594 
595 	VkPhysicalDeviceProperties2 properties;
596 	deMemset(&properties, 0, sizeof(properties));
597 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
598 
599 #if ENABLE_RAYTRACING
600 	VkPhysicalDeviceRayTracingPropertiesNV rayTracingProperties;
601 	deMemset(&rayTracingProperties, 0, sizeof(rayTracingProperties));
602 	rayTracingProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV;
603 
604 	if (m_context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
605 	{
606 		properties.pNext = &rayTracingProperties;
607 	}
608 #endif
609 
610 	m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
611 
612 	VkPipelineBindPoint bindPoint;
613 
614 	switch (m_data.stage)
615 	{
616 	case STAGE_COMPUTE:
617 		bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
618 		break;
619 #if ENABLE_RAYTRACING
620 	case STAGE_RAYGEN:
621 		bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_NV;
622 		break;
623 #endif
624 	default:
625 		bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
626 		break;
627 	}
628 
629 	Move<vk::VkDescriptorPool>	descriptorPool;
630 	Move<vk::VkDescriptorSet>	descriptorSet;
631 
632 	VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
633 
634 	VkDescriptorSetLayoutBinding bindings[2];
635 	bindings[0].binding = 0;
636 	bindings[0].stageFlags = allShaderStages;
637 	bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
638 	bindings[0].descriptorCount = 1;
639 	bindings[1].binding = 1;
640 	bindings[1].stageFlags = allShaderStages;
641 	bindings[1].descriptorType = m_data.base == BASE_UBO ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
642 	bindings[1].descriptorCount = 1;
643 
644 	// Create a layout and allocate a descriptor set for it.
645 	VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo =
646 	{
647 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
648 		DE_NULL,
649 
650 		0,
651 		(deUint32)2,
652 		&bindings[0]
653 	};
654 
655 	Move<vk::VkDescriptorSetLayout>	descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
656 
657 	setLayoutCreateInfo.bindingCount = 0;
658 	Move<vk::VkDescriptorSetLayout>	emptyDescriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
659 
660 	vk::DescriptorPoolBuilder poolBuilder;
661 	poolBuilder.addType(bindings[1].descriptorType, 1);
662 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1);
663 
664 	descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
665 	descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
666 
667 	VkDeviceSize	align = de::max(de::max(properties.properties.limits.minUniformBufferOffsetAlignment,
668 											properties.properties.limits.minStorageBufferOffsetAlignment),
669 											(VkDeviceSize)128 /*sizeof(T1)*/);
670 
671 	deUint32 numBindings = 1;
672 	for (deUint32 d = 0; d < m_data.depth; ++d)
673 	{
674 		numBindings = numBindings*3+1;
675 	}
676 
677 	VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT =
678 	{
679 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT,	// VkStructureType	 sType;
680 		DE_NULL,													// const void*		 pNext;
681 		0x000000000ULL,												// VkDeviceSize		 deviceAddress
682 	};
683 
684 	VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo =
685 	{
686 		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	 sType;
687 		DE_NULL,														// const void*		 pNext;
688 		0x000000000ULL,													// VkDeviceSize		 opaqueCaptureAddress
689 	};
690 
691 	std::vector<deUint8 *> cpuAddrs(numBindings);
692 	std::vector<VkDeviceAddress> gpuAddrs(numBindings);
693 	std::vector<deUint64> opaqueBufferAddrs(numBindings);
694 	std::vector<deUint64> opaqueMemoryAddrs(numBindings);
695 
696 	VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
697 	{
698 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	 sType;
699 		DE_NULL,										// const void*		 pNext;
700 		0,												// VkBuffer			 buffer
701 	};
702 
703 	VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo =
704 	{
705 		VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO,	// VkStructureType	 sType;
706 		DE_NULL,														// const void*		 pNext;
707 		0,																// VkDeviceMemory	 memory;
708 	};
709 
710 	bool multiBuffer = m_data.bufType != BT_SINGLE;
711 	deUint32 numBuffers = multiBuffer ? numBindings : 1;
712 	VkDeviceSize bufferSize = multiBuffer ? align : (align*numBindings);
713 
714 	vector<VkBufferSp>			buffers(numBuffers);
715 	vector<AllocationSp>		allocations(numBuffers);
716 
717 	VkBufferCreateInfo			bufferCreateInfo = makeBufferCreateInfo(DE_NULL, bufferSize,
718 														VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
719 														VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
720 														VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
721 														m_data.bufType == BT_REPLAY ? VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT : 0);
722 
723 	// VkMemoryAllocateFlags to be filled out later
724 	VkMemoryAllocateFlagsInfo	allocFlagsInfo =
725 	{
726 		VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,	//	VkStructureType	sType
727 		DE_NULL,										//	const void*		pNext
728 		0,												//	VkMemoryAllocateFlags    flags
729 		0,												//	uint32_t                 deviceMask
730 	};
731 
732 	VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo =
733 	{
734 		VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO,	// VkStructureType    sType;
735 		DE_NULL,														// const void*        pNext;
736 		0,																// uint64_t           opaqueCaptureAddress;
737 	};
738 
739 	if (useKHR)
740 		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
741 
742 	if (useKHR && m_data.bufType == BT_REPLAY)
743 	{
744 		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
745 		allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
746 	}
747 
748 	for (deUint32 i = 0; i < numBuffers; ++i)
749 	{
750 		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
751 
752 		// query opaque capture address before binding memory
753 		if (useKHR)
754 		{
755 			bufferDeviceAddressInfo.buffer = **buffers[i];
756 			opaqueBufferAddrs[i] = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
757 		}
758 
759 		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
760 
761 		if (useKHR)
762 		{
763 			deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
764 			opaqueMemoryAddrs[i] = vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
765 		}
766 
767 		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
768 	}
769 
770 	if (m_data.bufType == BT_REPLAY)
771 	{
772 		for (deUint32 i = 0; i < numBuffers; ++i)
773 		{
774 			bufferDeviceAddressInfo.buffer = **buffers[i];
775 			if (useKHR)
776 				gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
777 			else
778 				gpuAddrs[i] = vk.getBufferDeviceAddressEXT(device, &bufferDeviceAddressInfo);
779 		}
780 		buffers.clear();
781 		buffers.resize(numBuffers);
782 		allocations.clear();
783 		allocations.resize(numBuffers);
784 
785 		bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
786 
787 		for (deInt32 i = numBuffers-1; i >= 0; --i)
788 		{
789 			addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
790 			bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress = opaqueBufferAddrs[i];
791 			memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
792 
793 			buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
794 			allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
795 			VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
796 
797 			bufferDeviceAddressInfo.buffer = **buffers[i];
798 			VkDeviceSize newAddr;
799 			if (useKHR)
800 				newAddr = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
801 			else
802 				newAddr = vk.getBufferDeviceAddressEXT(device, &bufferDeviceAddressInfo);
803 			if (newAddr != gpuAddrs[i])
804 				return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
805 		}
806 	}
807 
808 	// Create a buffer and compute the address for each "align" bytes.
809 	for (deUint32 i = 0; i < numBindings; ++i)
810 	{
811 		bufferDeviceAddressInfo.buffer = **buffers[multiBuffer ? i : 0];
812 
813 		if (useKHR)
814 			gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
815 		else
816 			gpuAddrs[i] = vk.getBufferDeviceAddressEXT(device, &bufferDeviceAddressInfo);
817 		cpuAddrs[i] = (deUint8 *)allocations[multiBuffer ? i : 0]->getHostPtr();
818 		if (!multiBuffer)
819 		{
820 			cpuAddrs[i] = cpuAddrs[i] + align*i;
821 			gpuAddrs[i] = gpuAddrs[i] + align*i;
822 		}
823 		//printf("addr 0x%08x`%08x\n", (unsigned)(gpuAddrs[i]>>32), (unsigned)(gpuAddrs[i]));
824 	}
825 
826 	fillBuffer(cpuAddrs, gpuAddrs, 0, 0);
827 
828 	for (deUint32 i = 0; i < numBuffers; ++i)
829 		flushAlloc(vk, device, *allocations[i]);
830 
831 	const VkQueue					queue					= m_context.getUniversalQueue();
832 	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
833 	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
834 
835 	beginCommandBuffer(vk, *cmdBuffer, 0u);
836 
837 	// Push constants are used for dynamic indexing. PushConstant[i] = i.
838 
839 	const VkPushConstantRange pushConstRange =
840 	{
841 		allShaderStages,		// VkShaderStageFlags	stageFlags
842 		0,						// deUint32				offset
843 		128						// deUint32				size
844 	};
845 
846 	deUint32 nonEmptySetLimit = m_data.base == BASE_UBO ? properties.properties.limits.maxPerStageDescriptorUniformBuffers :
847 														  properties.properties.limits.maxPerStageDescriptorStorageBuffers;
848 	nonEmptySetLimit = de::min(nonEmptySetLimit, properties.properties.limits.maxPerStageDescriptorStorageImages);
849 
850 	vector<vk::VkDescriptorSetLayout>	descriptorSetLayoutsRaw(m_data.set+1);
851 	for (size_t i = 0; i < m_data.set+1; ++i)
852 	{
853 		// use nonempty descriptor sets to consume resources until we run out of descriptors
854 		if (i < nonEmptySetLimit - 1 || i == m_data.set)
855 			descriptorSetLayoutsRaw[i] = descriptorSetLayout.get();
856 		else
857 			descriptorSetLayoutsRaw[i] = emptyDescriptorSetLayout.get();
858 	}
859 
860 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
861 	{
862 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
863 		DE_NULL,													// pNext
864 		(VkPipelineLayoutCreateFlags)0,
865 		m_data.set+1,												// setLayoutCount
866 		&descriptorSetLayoutsRaw[0],								// pSetLayouts
867 		1u,															// pushConstantRangeCount
868 		&pushConstRange,											// pPushConstantRanges
869 	};
870 
871 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
872 
873 	// PushConstant[i] = i
874 	for (deUint32 i = 0; i < (deUint32)(128 / sizeof(deUint32)); ++i)
875 	{
876 		vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
877 							(deUint32)(i * sizeof(deUint32)), (deUint32)sizeof(deUint32), &i);
878 	}
879 
880 	de::MovePtr<BufferWithMemory> copyBuffer;
881 	copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
882 		vk, device, allocator, makeBufferCreateInfo(DE_NULL, DIM*DIM*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0), MemoryRequirement::HostVisible));
883 
884 	const VkImageCreateInfo			imageCreateInfo			=
885 	{
886 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
887 		DE_NULL,								// const void*				pNext;
888 		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
889 		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
890 		VK_FORMAT_R32_UINT,						// VkFormat					format;
891 		{
892 			DIM,								// deUint32	width;
893 			DIM,								// deUint32	height;
894 			1u									// deUint32	depth;
895 		},										// VkExtent3D				extent;
896 		1u,										// deUint32					mipLevels;
897 		1u,										// deUint32					arrayLayers;
898 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
899 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
900 		VK_IMAGE_USAGE_STORAGE_BIT
901 		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
902 		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
903 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
904 		0u,										// deUint32					queueFamilyIndexCount;
905 		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
906 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
907 	};
908 
909 	VkImageViewCreateInfo		imageViewCreateInfo		=
910 	{
911 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
912 		DE_NULL,									// const void*				pNext;
913 		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	flags;
914 		DE_NULL,									// VkImage					image;
915 		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
916 		VK_FORMAT_R32_UINT,							// VkFormat					format;
917 		{
918 			VK_COMPONENT_SWIZZLE_R,					// VkComponentSwizzle	r;
919 			VK_COMPONENT_SWIZZLE_G,					// VkComponentSwizzle	g;
920 			VK_COMPONENT_SWIZZLE_B,					// VkComponentSwizzle	b;
921 			VK_COMPONENT_SWIZZLE_A					// VkComponentSwizzle	a;
922 		},											// VkComponentMapping		 components;
923 		{
924 			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask;
925 			0u,										// deUint32				baseMipLevel;
926 			1u,										// deUint32				levelCount;
927 			0u,										// deUint32				baseArrayLayer;
928 			1u										// deUint32				layerCount;
929 		}											// VkImageSubresourceRange	subresourceRange;
930 	};
931 
932 	de::MovePtr<ImageWithMemory> image;
933 	Move<VkImageView> imageView;
934 
935 	image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
936 		vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
937 	imageViewCreateInfo.image = **image;
938 	imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
939 
940 	VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
941 	VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffers[0], 0, align);
942 
943 	VkWriteDescriptorSet w =
944 	{
945 		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,							// sType
946 		DE_NULL,														// pNext
947 		*descriptorSet,													// dstSet
948 		(deUint32)0,													// dstBinding
949 		0,																// dstArrayElement
950 		1u,																// descriptorCount
951 		bindings[0].descriptorType,										// descriptorType
952 		&imageInfo,														// pImageInfo
953 		&bufferInfo,													// pBufferInfo
954 		DE_NULL,														// pTexelBufferView
955 	};
956 	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
957 
958 	w.dstBinding = 1;
959 	w.descriptorType = bindings[1].descriptorType;
960 	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
961 
962 	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, m_data.set, 1, &descriptorSet.get(), 0, DE_NULL);
963 
964 	Move<VkPipeline> pipeline;
965 	Move<VkRenderPass> renderPass;
966 	Move<VkFramebuffer> framebuffer;
967 	de::MovePtr<BufferWithMemory> sbtBuffer;
968 
969 	m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
970 
971 	if (m_data.stage == STAGE_COMPUTE)
972 	{
973 		const Unique<VkShaderModule>	shader(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
974 
975 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
976 		{
977 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
978 			DE_NULL,
979 			(VkPipelineShaderStageCreateFlags)0,
980 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
981 			*shader,													// shader
982 			"main",
983 			DE_NULL,													// pSpecializationInfo
984 		};
985 
986 		const VkComputePipelineCreateInfo		pipelineCreateInfo =
987 		{
988 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
989 			DE_NULL,
990 			0u,															// flags
991 			shaderCreateInfo,											// cs
992 			*pipelineLayout,											// layout
993 			(vk::VkPipeline)0,											// basePipelineHandle
994 			0u,															// basePipelineIndex
995 		};
996 		pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
997 	}
998 #if ENABLE_RAYTRACING
999 	else if (m_data.stage == STAGE_RAYGEN)
1000 	{
1001 		const Unique<VkShaderModule>	shader(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1002 
1003 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
1004 		{
1005 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1006 			DE_NULL,
1007 			(VkPipelineShaderStageCreateFlags)0,
1008 			VK_SHADER_STAGE_RAYGEN_BIT_NV,								// stage
1009 			*shader,													// shader
1010 			"main",
1011 			DE_NULL,													// pSpecializationInfo
1012 		};
1013 
1014 		VkRayTracingShaderGroupCreateInfoNV group =
1015 		{
1016 			VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV,
1017 			DE_NULL,
1018 			VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV,			// type
1019 			0,														// generalShader
1020 			VK_SHADER_UNUSED_NV,									// closestHitShader
1021 			VK_SHADER_UNUSED_NV,									// anyHitShader
1022 			VK_SHADER_UNUSED_NV,									// intersectionShader
1023 		};
1024 
1025 		VkRayTracingPipelineCreateInfoNV pipelineCreateInfo = {
1026 			VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV,	// sType
1027 			DE_NULL,												// pNext
1028 			0,														// flags
1029 			1,														// stageCount
1030 			&shaderCreateInfo,										// pStages
1031 			1,														// groupCount
1032 			&group,													// pGroups
1033 			0,														// maxRecursionDepth
1034 			*pipelineLayout,										// layout
1035 			(vk::VkPipeline)0,										// basePipelineHandle
1036 			0u,														// basePipelineIndex
1037 		};
1038 
1039 		pipeline = createRayTracingPipelineNV(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1040 
1041 		sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1042 			vk, device, allocator, makeBufferCreateInfo(DE_NULL, rayTracingProperties.shaderGroupHandleSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, 0), MemoryRequirement::HostVisible));
1043 		deUint32 *ptr = (deUint32 *)sbtBuffer->getAllocation().getHostPtr();
1044 		invalidateAlloc(vk, device, sbtBuffer->getAllocation());
1045 
1046 		vk.getRayTracingShaderGroupHandlesNV(device, *pipeline, 0, 1, rayTracingProperties.shaderGroupHandleSize, ptr);
1047 	}
1048 #endif
1049 	else
1050 	{
1051 
1052 		const vk::VkSubpassDescription		subpassDesc			=
1053 		{
1054 			(vk::VkSubpassDescriptionFlags)0,
1055 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
1056 			0u,														// inputCount
1057 			DE_NULL,												// pInputAttachments
1058 			0u,														// colorCount
1059 			DE_NULL,												// pColorAttachments
1060 			DE_NULL,												// pResolveAttachments
1061 			DE_NULL,												// depthStencilAttachment
1062 			0u,														// preserveCount
1063 			DE_NULL,												// pPreserveAttachments
1064 		};
1065 		const vk::VkRenderPassCreateInfo	renderPassParams	=
1066 		{
1067 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
1068 			DE_NULL,												// pNext
1069 			(vk::VkRenderPassCreateFlags)0,
1070 			0u,														// attachmentCount
1071 			DE_NULL,												// pAttachments
1072 			1u,														// subpassCount
1073 			&subpassDesc,											// pSubpasses
1074 			0u,														// dependencyCount
1075 			DE_NULL,												// pDependencies
1076 		};
1077 
1078 		renderPass = createRenderPass(vk, device, &renderPassParams);
1079 
1080 		const vk::VkFramebufferCreateInfo	framebufferParams	=
1081 		{
1082 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
1083 			DE_NULL,										// pNext
1084 			(vk::VkFramebufferCreateFlags)0,
1085 			*renderPass,									// renderPass
1086 			0u,												// attachmentCount
1087 			DE_NULL,										// pAttachments
1088 			DIM,											// width
1089 			DIM,											// height
1090 			1u,												// layers
1091 		};
1092 
1093 		framebuffer = createFramebuffer(vk, device, &framebufferParams);
1094 
1095 		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
1096 		{
1097 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
1098 			DE_NULL,													// const void*								pNext;
1099 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
1100 			0u,															// deUint32									vertexBindingDescriptionCount;
1101 			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
1102 			0u,															// deUint32									vertexAttributeDescriptionCount;
1103 			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
1104 		};
1105 
1106 		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
1107 		{
1108 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
1109 			DE_NULL,														// const void*								pNext;
1110 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
1111 			(m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology						topology;
1112 			VK_FALSE														// VkBool32									primitiveRestartEnable;
1113 		};
1114 
1115 		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
1116 		{
1117 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
1118 			DE_NULL,														// const void*								pNext;
1119 			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
1120 			VK_FALSE,														// VkBool32									depthClampEnable;
1121 			(m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,			// VkBool32									rasterizerDiscardEnable;
1122 			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
1123 			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
1124 			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
1125 			VK_FALSE,														// VkBool32									depthBiasEnable;
1126 			0.0f,															// float									depthBiasConstantFactor;
1127 			0.0f,															// float									depthBiasClamp;
1128 			0.0f,															// float									depthBiasSlopeFactor;
1129 			1.0f															// float									lineWidth;
1130 		};
1131 
1132 		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
1133 		{
1134 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType							sType
1135 			DE_NULL,													// const void*								pNext
1136 			0u,															// VkPipelineMultisampleStateCreateFlags	flags
1137 			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples
1138 			VK_FALSE,													// VkBool32									sampleShadingEnable
1139 			1.0f,														// float									minSampleShading
1140 			DE_NULL,													// const VkSampleMask*						pSampleMask
1141 			VK_FALSE,													// VkBool32									alphaToCoverageEnable
1142 			VK_FALSE													// VkBool32									alphaToOneEnable
1143 		};
1144 
1145 		VkViewport viewport = makeViewport(DIM, DIM);
1146 		VkRect2D scissor = makeRect2D(DIM, DIM);
1147 
1148 		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
1149 		{
1150 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType							sType
1151 			DE_NULL,												// const void*								pNext
1152 			(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags		flags
1153 			1u,														// deUint32									viewportCount
1154 			&viewport,												// const VkViewport*						pViewports
1155 			1u,														// deUint32									scissorCount
1156 			&scissor												// const VkRect2D*							pScissors
1157 		};
1158 
1159 		Move<VkShaderModule> fs;
1160 		Move<VkShaderModule> vs;
1161 
1162 		deUint32 numStages;
1163 		if (m_data.stage == STAGE_VERTEX)
1164 		{
1165 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1166 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1167 			numStages = 1u;
1168 		}
1169 		else
1170 		{
1171 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1172 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1173 			numStages = 2u;
1174 		}
1175 
1176 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] =
1177 		{
1178 			{
1179 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1180 				DE_NULL,
1181 				(VkPipelineShaderStageCreateFlags)0,
1182 				VK_SHADER_STAGE_VERTEX_BIT,									// stage
1183 				*vs,														// shader
1184 				"main",
1185 				DE_NULL,													// pSpecializationInfo
1186 			},
1187 			{
1188 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1189 				DE_NULL,
1190 				(VkPipelineShaderStageCreateFlags)0,
1191 				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
1192 				*fs,														// shader
1193 				"main",
1194 				DE_NULL,													// pSpecializationInfo
1195 			}
1196 		};
1197 
1198 		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
1199 		{
1200 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
1201 			DE_NULL,											// const void*										pNext;
1202 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
1203 			numStages,											// deUint32											stageCount;
1204 			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
1205 			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
1206 			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
1207 			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
1208 			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
1209 			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
1210 			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
1211 			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
1212 			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
1213 			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
1214 			pipelineLayout.get(),								// VkPipelineLayout									layout;
1215 			renderPass.get(),									// VkRenderPass										renderPass;
1216 			0u,													// deUint32											subpass;
1217 			DE_NULL,											// VkPipeline										basePipelineHandle;
1218 			0													// int												basePipelineIndex;
1219 		};
1220 
1221 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1222 	}
1223 
1224 	m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
1225 
1226 	const VkImageMemoryBarrier imageBarrier =
1227 	{
1228 		VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
1229 		DE_NULL,											// const void*			pNext
1230 		0u,													// VkAccessFlags		srcAccessMask
1231 		VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
1232 		VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
1233 		VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
1234 		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
1235 		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
1236 		**image,											// VkImage				image
1237 		{
1238 			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
1239 			0u,										// uint32_t				baseMipLevel
1240 			1u,										// uint32_t				mipLevels,
1241 			0u,										// uint32_t				baseArray
1242 			1u,										// uint32_t				arraySize
1243 		}
1244 	};
1245 
1246 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1247 							(VkDependencyFlags)0,
1248 							0, (const VkMemoryBarrier*)DE_NULL,
1249 							0, (const VkBufferMemoryBarrier*)DE_NULL,
1250 							1, &imageBarrier);
1251 
1252 	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1253 
1254 	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1255 	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1256 
1257 	VkMemoryBarrier					memBarrier =
1258 	{
1259 		VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
1260 		DE_NULL,							// pNext
1261 		0u,									// srcAccessMask
1262 		0u,									// dstAccessMask
1263 	};
1264 
1265 	vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1266 
1267 	memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1268 	memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1269 	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1270 		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1271 
1272 	if (m_data.stage == STAGE_COMPUTE)
1273 	{
1274 		vk.cmdDispatch(*cmdBuffer, DIM, DIM, 1);
1275 	}
1276 #if ENABLE_RAYTRACING
1277 	else if (m_data.stage == STAGE_RAYGEN)
1278 	{
1279 		vk.cmdTraceRaysNV(*cmdBuffer,
1280 			**sbtBuffer, 0,
1281 			DE_NULL, 0, 0,
1282 			DE_NULL, 0, 0,
1283 			DE_NULL, 0, 0,
1284 			DIM, DIM, 1);
1285 	}
1286 #endif
1287 	else
1288 	{
1289 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1290 						makeRect2D(DIM, DIM),
1291 						0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1292 		// Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1293 		if (m_data.stage == STAGE_VERTEX)
1294 		{
1295 			vk.cmdDraw(*cmdBuffer, DIM*DIM, 1u, 0u, 0u);
1296 		}
1297 		else
1298 		{
1299 			vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1300 		}
1301 		endRenderPass(vk, *cmdBuffer);
1302 	}
1303 
1304 	memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1305 	memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1306 	vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1307 		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1308 
1309 	const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(DIM, DIM, 1u),
1310 															 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
1311 	vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, &copyRegion);
1312 
1313 	endCommandBuffer(vk, *cmdBuffer);
1314 
1315 	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1316 
1317 	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1318 	invalidateAlloc(vk, device, copyBuffer->getAllocation());
1319 
1320 	qpTestResult res = QP_TEST_RESULT_PASS;
1321 
1322 	for (deUint32 i = 0; i < DIM*DIM; ++i)
1323 	{
1324 		if (ptr[i] != 1)
1325 		{
1326 			res = QP_TEST_RESULT_FAIL;
1327 		}
1328 	}
1329 
1330 	return tcu::TestStatus(res, qpGetTestResultName(res));
1331 }
1332 
1333 class CaptureReplayTestCase : public TestCase
1334 {
1335 public:
1336 							CaptureReplayTestCase	(tcu::TestContext& context, const char* name, const char* desc, deUint32 seed);
1337 							~CaptureReplayTestCase	(void);
initPrograms(SourceCollections & programCollection) const1338 	virtual	void			initPrograms			(SourceCollections& programCollection) const { DE_UNREF(programCollection); }
1339 	virtual TestInstance*	createInstance			(Context& context) const;
1340 	virtual void			checkSupport			(Context& context) const;
1341 private:
1342 	deUint32				m_seed;
1343 };
1344 
CaptureReplayTestCase(tcu::TestContext & context,const char * name,const char * desc,deUint32 seed)1345 CaptureReplayTestCase::CaptureReplayTestCase (tcu::TestContext& context, const char* name, const char* desc, deUint32 seed)
1346 	: vkt::TestCase	(context, name, desc)
1347 	, m_seed(seed)
1348 {
1349 }
1350 
~CaptureReplayTestCase(void)1351 CaptureReplayTestCase::~CaptureReplayTestCase	(void)
1352 {
1353 }
1354 
checkSupport(Context & context) const1355 void CaptureReplayTestCase::checkSupport (Context& context) const
1356 {
1357 	if (!context.isBufferDeviceAddressSupported())
1358 		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
1359 
1360 	bool isBufferDeviceAddressWithCaptureReplaySupported =
1361 			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
1362 			(context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") && context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
1363 
1364 	if (!isBufferDeviceAddressWithCaptureReplaySupported)
1365 		TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
1366 }
1367 
1368 class CaptureReplayTestInstance : public TestInstance
1369 {
1370 public:
1371 						CaptureReplayTestInstance	(Context& context, deUint32 seed);
1372 						~CaptureReplayTestInstance	(void);
1373 	tcu::TestStatus		iterate						(void);
1374 private:
1375 	deUint32			m_seed;
1376 };
1377 
CaptureReplayTestInstance(Context & context,deUint32 seed)1378 CaptureReplayTestInstance::CaptureReplayTestInstance (Context& context, deUint32 seed)
1379 	: vkt::TestInstance		(context)
1380 	, m_seed(seed)
1381 {
1382 }
1383 
~CaptureReplayTestInstance(void)1384 CaptureReplayTestInstance::~CaptureReplayTestInstance (void)
1385 {
1386 }
1387 
createInstance(Context & context) const1388 TestInstance* CaptureReplayTestCase::createInstance (Context& context) const
1389 {
1390 	return new CaptureReplayTestInstance(context, m_seed);
1391 }
1392 
iterate(void)1393 tcu::TestStatus CaptureReplayTestInstance::iterate (void)
1394 {
1395 	const InstanceInterface&vki						= m_context.getInstanceInterface();
1396 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
1397 	const VkPhysicalDevice&	physDevice				= m_context.getPhysicalDevice();
1398 	const VkDevice			device					= m_context.getDevice();
1399 	const bool				useKHR					= m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1400 	de::Random				rng(m_seed);
1401 
1402 	VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT =
1403 	{
1404 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT,	// VkStructureType	 sType;
1405 		DE_NULL,													// const void*		 pNext;
1406 		0x000000000ULL,												// VkDeviceSize		 deviceAddress
1407 	};
1408 
1409 	VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo =
1410 	{
1411 		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	 sType;
1412 		DE_NULL,														// const void*		 pNext;
1413 		0x000000000ULL,													// VkDeviceSize		 opaqueCaptureAddress
1414 	};
1415 
1416 	const deUint32 numBuffers = 100;
1417 	std::vector<VkDeviceSize> bufferSizes(numBuffers);
1418 	// random sizes, powers of two [4K, 4MB]
1419 	for (deUint32 i = 0; i < numBuffers; ++i)
1420 		bufferSizes[i] = 4096 << (rng.getUint32() % 11);
1421 
1422 	std::vector<VkDeviceAddress> gpuAddrs(numBuffers);
1423 	std::vector<deUint64> opaqueBufferAddrs(numBuffers);
1424 	std::vector<deUint64> opaqueMemoryAddrs(numBuffers);
1425 
1426 	VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
1427 	{
1428 		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	 sType;
1429 		DE_NULL,										// const void*		 pNext;
1430 		0,												// VkBuffer			 buffer
1431 	};
1432 
1433 	VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo =
1434 	{
1435 		VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO,	// VkStructureType	 sType;
1436 		DE_NULL,														// const void*		 pNext;
1437 		0,																// VkDeviceMemory	 memory;
1438 	};
1439 
1440 	vector<VkBufferSp>			buffers(numBuffers);
1441 	vector<AllocationSp>		allocations(numBuffers);
1442 
1443 	VkBufferCreateInfo			bufferCreateInfo = makeBufferCreateInfo(DE_NULL, 0,
1444 														VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1445 														VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1446 														VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
1447 														VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT);
1448 
1449 	// VkMemoryAllocateFlags to be filled out later
1450 	VkMemoryAllocateFlagsInfo	allocFlagsInfo =
1451 	{
1452 		VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,	//	VkStructureType	sType
1453 		DE_NULL,										//	const void*		pNext
1454 		0,												//	VkMemoryAllocateFlags    flags
1455 		0,												//	uint32_t                 deviceMask
1456 	};
1457 
1458 	VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo =
1459 	{
1460 		VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO,	// VkStructureType    sType;
1461 		DE_NULL,														// const void*        pNext;
1462 		0,																// uint64_t           opaqueCaptureAddress;
1463 	};
1464 
1465 	if (useKHR)
1466 		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
1467 
1468 	if (useKHR)
1469 	{
1470 		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
1471 		allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
1472 	}
1473 
1474 	for (deUint32 i = 0; i < numBuffers; ++i)
1475 	{
1476 		bufferCreateInfo.size = bufferSizes[i];
1477 		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1478 
1479 		// query opaque capture address before binding memory
1480 		if (useKHR)
1481 		{
1482 			bufferDeviceAddressInfo.buffer = **buffers[i];
1483 			opaqueBufferAddrs[i] = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
1484 		}
1485 
1486 		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
1487 
1488 		if (useKHR)
1489 		{
1490 			deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
1491 			opaqueMemoryAddrs[i] = vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
1492 		}
1493 
1494 		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1495 	}
1496 
1497 	for (deUint32 i = 0; i < numBuffers; ++i)
1498 	{
1499 		bufferDeviceAddressInfo.buffer = **buffers[i];
1500 		if (useKHR)
1501 			gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1502 		else
1503 			gpuAddrs[i] = vk.getBufferDeviceAddressEXT(device, &bufferDeviceAddressInfo);
1504 	}
1505 	buffers.clear();
1506 	buffers.resize(numBuffers);
1507 	allocations.clear();
1508 	allocations.resize(numBuffers);
1509 
1510 	bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
1511 
1512 	for (deInt32 i = numBuffers-1; i >= 0; --i)
1513 	{
1514 		addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
1515 		bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress = opaqueBufferAddrs[i];
1516 		memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
1517 
1518 		bufferCreateInfo.size = bufferSizes[i];
1519 		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1520 		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
1521 		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1522 
1523 		bufferDeviceAddressInfo.buffer = **buffers[i];
1524 		VkDeviceSize newAddr;
1525 		if (useKHR)
1526 			newAddr = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1527 		else
1528 			newAddr = vk.getBufferDeviceAddressEXT(device, &bufferDeviceAddressInfo);
1529 		if (newAddr != gpuAddrs[i])
1530 			return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
1531 	}
1532 
1533 	return tcu::TestStatus(QP_TEST_RESULT_PASS, qpGetTestResultName(QP_TEST_RESULT_PASS));
1534 }
1535 
1536 }	// anonymous
1537 
createBufferDeviceAddressTests(tcu::TestContext & testCtx)1538 tcu::TestCaseGroup*	createBufferDeviceAddressTests (tcu::TestContext& testCtx)
1539 {
1540 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "buffer_device_address", "Test VK_EXT_buffer_device_address"));
1541 
1542 	typedef struct
1543 	{
1544 		deUint32				count;
1545 		const char*				name;
1546 		const char*				description;
1547 	} TestGroupCase;
1548 
1549 	TestGroupCase setCases[] =
1550 	{
1551 		{ 0,	"set0",		"set 0"		},
1552 		{ 3,	"set3",		"set 3"		},
1553 		{ 7,	"set7",		"set 7"		},
1554 		{ 15,	"set15",	"set 15"	},
1555 		{ 31,	"set31",	"set 31"	},
1556 	};
1557 
1558 	TestGroupCase depthCases[] =
1559 	{
1560 		{ 1,	"depth1",	"1 nested struct"		},
1561 		{ 2,	"depth2",	"2 nested structs"		},
1562 		{ 3,	"depth3",	"3 nested structs"		},
1563 	};
1564 
1565 	TestGroupCase baseCases[] =
1566 	{
1567 		{ BASE_UBO,	"baseubo",	"base ubo"		},
1568 		{ BASE_SSBO,"basessbo",	"base ssbo"		},
1569 	};
1570 
1571 	TestGroupCase cvtCases[] =
1572 	{
1573 		{ CONVERT_NONE,			"load",				"load reference"										},
1574 		{ CONVERT_UINT64,		"convert",			"load and convert reference"							},
1575 		{ CONVERT_UVEC2,		"convertuvec2",		"load and convert reference to uvec2"					},
1576 		{ CONVERT_U64CMP,		"convertchecku64",	"load, convert and compare references as uint64_t"		},
1577 		{ CONVERT_UVEC2CMP,		"convertcheckuv2",	"load, convert and compare references as uvec2"			},
1578 		{ CONVERT_UVEC2TOU64,	"crossconvertu2p",	"load reference as uint64_t and convert it to uvec2"	},
1579 		{ CONVERT_U64TOUVEC2,	"crossconvertp2u",	"load reference as uvec2 and convert it to uint64_t"	},
1580 	};
1581 
1582 	TestGroupCase storeCases[] =
1583 	{
1584 		{ 0,	"nostore",		"don't store intermediate reference"		},
1585 		{ 1,	"store",		"store intermediate reference"				},
1586 	};
1587 
1588 	TestGroupCase btCases[] =
1589 	{
1590 		{ BT_SINGLE,	"single",		"single buffer"	},
1591 		{ BT_MULTI,		"multi",		"multiple buffers"	},
1592 		{ BT_REPLAY,	"replay",		"multiple buffers and VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT"	},
1593 	};
1594 
1595 	TestGroupCase layoutCases[] =
1596 	{
1597 		{ LAYOUT_STD140,	"std140",		"std140"	},
1598 		{ LAYOUT_SCALAR,	"scalar",		"scalar"	},
1599 	};
1600 
1601 	TestGroupCase stageCases[] =
1602 	{
1603 		{ STAGE_COMPUTE,	"comp",		"compute"	},
1604 		{ STAGE_FRAGMENT,	"frag",		"fragment"	},
1605 		{ STAGE_VERTEX,		"vert",		"vertex"	},
1606 #if ENABLE_RAYTRACING
1607 		{ STAGE_RAYGEN,		"rgen",		"raygen"	},
1608 #endif
1609 	};
1610 
1611 	for (int setNdx = 0; setNdx < DE_LENGTH_OF_ARRAY(setCases); setNdx++)
1612 	{
1613 		de::MovePtr<tcu::TestCaseGroup> setGroup(new tcu::TestCaseGroup(testCtx, setCases[setNdx].name, setCases[setNdx].description));
1614 		for (int depthNdx = 0; depthNdx < DE_LENGTH_OF_ARRAY(depthCases); depthNdx++)
1615 		{
1616 			de::MovePtr<tcu::TestCaseGroup> depthGroup(new tcu::TestCaseGroup(testCtx, depthCases[depthNdx].name, depthCases[depthNdx].description));
1617 			for (int baseNdx = 0; baseNdx < DE_LENGTH_OF_ARRAY(baseCases); baseNdx++)
1618 			{
1619 				de::MovePtr<tcu::TestCaseGroup> baseGroup(new tcu::TestCaseGroup(testCtx, baseCases[baseNdx].name, baseCases[baseNdx].description));
1620 				for (int cvtNdx = 0; cvtNdx < DE_LENGTH_OF_ARRAY(cvtCases); cvtNdx++)
1621 				{
1622 					de::MovePtr<tcu::TestCaseGroup> cvtGroup(new tcu::TestCaseGroup(testCtx, cvtCases[cvtNdx].name, cvtCases[cvtNdx].description));
1623 					for (int storeNdx = 0; storeNdx < DE_LENGTH_OF_ARRAY(storeCases); storeNdx++)
1624 					{
1625 						de::MovePtr<tcu::TestCaseGroup> storeGroup(new tcu::TestCaseGroup(testCtx, storeCases[storeNdx].name, storeCases[storeNdx].description));
1626 						for (int btNdx = 0; btNdx < DE_LENGTH_OF_ARRAY(btCases); btNdx++)
1627 						{
1628 							de::MovePtr<tcu::TestCaseGroup> btGroup(new tcu::TestCaseGroup(testCtx, btCases[btNdx].name, btCases[btNdx].description));
1629 							for (int layoutNdx = 0; layoutNdx < DE_LENGTH_OF_ARRAY(layoutCases); layoutNdx++)
1630 							{
1631 								de::MovePtr<tcu::TestCaseGroup> layoutGroup(new tcu::TestCaseGroup(testCtx, layoutCases[layoutNdx].name, layoutCases[layoutNdx].description));
1632 								for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1633 								{
1634 									CaseDef c =
1635 									{
1636 										setCases[setNdx].count,						// deUint32 set;
1637 										depthCases[depthNdx].count,					// deUint32 depth;
1638 										(Base)baseCases[baseNdx].count,				// Base base;
1639 										(Stage)stageCases[stageNdx].count,			// Stage stage;
1640 										(Convert)cvtCases[cvtNdx].count,			// Convert convertUToPtr;
1641 										!!storeCases[storeNdx].count,				// bool storeInLocal;
1642 										(BufType)btCases[btNdx].count,				// BufType bufType;
1643 										(Layout)layoutCases[layoutNdx].count,		// Layout layout;
1644 									};
1645 
1646 									// Skip more complex test cases for most descriptor sets, to reduce runtime.
1647 									if (c.set != 3 && (c.depth == 3 || c.layout != LAYOUT_STD140))
1648 										continue;
1649 
1650 									layoutGroup->addChild(new BufferAddressTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1651 								}
1652 								btGroup->addChild(layoutGroup.release());
1653 							}
1654 							storeGroup->addChild(btGroup.release());
1655 						}
1656 						cvtGroup->addChild(storeGroup.release());
1657 					}
1658 					baseGroup->addChild(cvtGroup.release());
1659 				}
1660 				depthGroup->addChild(baseGroup.release());
1661 			}
1662 			setGroup->addChild(depthGroup.release());
1663 		}
1664 		group->addChild(setGroup.release());
1665 	}
1666 
1667 	de::MovePtr<tcu::TestCaseGroup> capGroup(new tcu::TestCaseGroup(testCtx, "capture_replay_stress", "Test VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT"));
1668 	for (deUint32 i = 0; i < 10; ++i)
1669 	{
1670 		capGroup->addChild(new CaptureReplayTestCase(testCtx, (std::string("seed_") + de::toString(i)).c_str(), "", i));
1671 	}
1672 	group->addChild(capGroup.release());
1673 	return group.release();
1674 }
1675 
1676 }	// BindingModel
1677 }	// vkt
1678