1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2018 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Vulkan Memory Model tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26
27 #include "vkBufferWithMemory.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkQueryUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vktTestGroupUtil.hpp"
34 #include "vktTestCase.hpp"
35
36 #include "deDefs.h"
37 #include "deMath.h"
38 #include "deSharedPtr.hpp"
39 #include "deString.h"
40
41 #include "tcuTestCase.hpp"
42 #include "tcuTestLog.hpp"
43
44 #include <string>
45 #include <sstream>
46
47 namespace vkt
48 {
49 namespace MemoryModel
50 {
51 namespace
52 {
53 using namespace vk;
54 using namespace std;
55
56 typedef enum
57 {
58 TT_MP = 0, // message passing
59 TT_WAR, // write-after-read hazard
60 } TestType;
61
62 typedef enum
63 {
64 ST_FENCE_FENCE = 0,
65 ST_FENCE_ATOMIC,
66 ST_ATOMIC_FENCE,
67 ST_ATOMIC_ATOMIC,
68 ST_CONTROL_BARRIER,
69 ST_CONTROL_AND_MEMORY_BARRIER,
70 } SyncType;
71
72 typedef enum
73 {
74 SC_BUFFER = 0,
75 SC_IMAGE,
76 SC_WORKGROUP,
77 } StorageClass;
78
79 typedef enum
80 {
81 SCOPE_DEVICE = 0,
82 SCOPE_QUEUEFAMILY,
83 SCOPE_WORKGROUP,
84 SCOPE_SUBGROUP,
85 } Scope;
86
87 typedef enum
88 {
89 STAGE_COMPUTE = 0,
90 STAGE_VERTEX,
91 STAGE_FRAGMENT,
92 } Stage;
93
94 typedef enum
95 {
96 DATA_TYPE_UINT = 0,
97 DATA_TYPE_UINT64,
98 } DataType;
99
100 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
101 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
102
103 struct CaseDef
104 {
105 bool payloadMemLocal;
106 bool guardMemLocal;
107 bool coherent;
108 bool core11;
109 bool atomicRMW;
110 TestType testType;
111 StorageClass payloadSC;
112 StorageClass guardSC;
113 Scope scope;
114 SyncType syncType;
115 Stage stage;
116 DataType dataType;
117 };
118
119 class MemoryModelTestInstance : public TestInstance
120 {
121 public:
122 MemoryModelTestInstance (Context& context, const CaseDef& data);
123 ~MemoryModelTestInstance (void);
124 tcu::TestStatus iterate (void);
125 private:
126 CaseDef m_data;
127
128 enum
129 {
130 WIDTH = 256,
131 HEIGHT = 256
132 };
133 };
134
MemoryModelTestInstance(Context & context,const CaseDef & data)135 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
136 : vkt::TestInstance (context)
137 , m_data (data)
138 {
139 }
140
~MemoryModelTestInstance(void)141 MemoryModelTestInstance::~MemoryModelTestInstance (void)
142 {
143 }
144
145 class MemoryModelTestCase : public TestCase
146 {
147 public:
148 MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
149 ~MemoryModelTestCase (void);
150 virtual void initPrograms (SourceCollections& programCollection) const;
151 virtual TestInstance* createInstance (Context& context) const;
152 virtual void checkSupport (Context& context) const;
153
154 private:
155 CaseDef m_data;
156 };
157
MemoryModelTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)158 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
159 : vkt::TestCase (context, name, desc)
160 , m_data (data)
161 {
162 }
163
~MemoryModelTestCase(void)164 MemoryModelTestCase::~MemoryModelTestCase (void)
165 {
166 }
167
checkSupport(Context & context) const168 void MemoryModelTestCase::checkSupport(Context& context) const
169 {
170 if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
171 {
172 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
173 }
174
175 if (!m_data.core11)
176 {
177 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
178 {
179 TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
180 }
181
182 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
183 {
184 TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
185 }
186 }
187
188 if (m_data.scope == SCOPE_SUBGROUP)
189 {
190 // Check for subgroup support for scope_subgroup tests.
191 VkPhysicalDeviceSubgroupProperties subgroupProperties;
192 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
193 subgroupProperties.pNext = DE_NULL;
194 subgroupProperties.supportedOperations = 0;
195
196 VkPhysicalDeviceProperties2 properties;
197 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
198 properties.pNext = &subgroupProperties;
199
200 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
201
202 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
203 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
204 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
205 {
206 TCU_THROW(NotSupportedError, "Subgroup features not supported");
207 }
208 }
209 if (m_data.dataType == DATA_TYPE_UINT64)
210 {
211 if (!context.getDeviceFeatures().shaderInt64)
212 {
213 TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
214 }
215 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
216 m_data.guardSC == SC_BUFFER)
217 {
218 TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
219 }
220 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
221 m_data.guardSC == SC_WORKGROUP)
222 {
223 TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
224 }
225 }
226 if (m_data.stage == STAGE_VERTEX)
227 {
228 if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
229 {
230 TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
231 }
232 }
233 if (m_data.stage == STAGE_FRAGMENT)
234 {
235 if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
236 {
237 TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
238 }
239 }
240 }
241
242
initPrograms(SourceCollections & programCollection) const243 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
244 {
245 Scope invocationMapping = m_data.scope;
246 if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
247 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
248 {
249 invocationMapping = SCOPE_WORKGROUP;
250 }
251
252 const char *scopeStr;
253 switch (m_data.scope)
254 {
255 default: DE_ASSERT(0); // fall through
256 case SCOPE_DEVICE: scopeStr = "gl_ScopeDevice"; break;
257 case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
258 case SCOPE_WORKGROUP: scopeStr = "gl_ScopeWorkgroup"; break;
259 case SCOPE_SUBGROUP: scopeStr = "gl_ScopeSubgroup"; break;
260 }
261
262 const char *typeStr = m_data.dataType == DATA_TYPE_UINT64 ? "uint64_t" : "uint";
263
264 // Construct storageSemantics strings. Both release and acquire
265 // always have the payload storage class. They only include the
266 // guard storage class if they're using FENCE for that side of the
267 // sync.
268 std::stringstream storageSemanticsRelease;
269 switch (m_data.payloadSC)
270 {
271 default: DE_ASSERT(0); // fall through
272 case SC_BUFFER: storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
273 case SC_IMAGE: storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
274 case SC_WORKGROUP: storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
275 }
276 std::stringstream storageSemanticsAcquire;
277 storageSemanticsAcquire << storageSemanticsRelease.str();
278 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
279 {
280 switch (m_data.guardSC)
281 {
282 default: DE_ASSERT(0); // fall through
283 case SC_BUFFER: storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
284 case SC_IMAGE: storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
285 case SC_WORKGROUP: storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
286 }
287 }
288 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
289 {
290 switch (m_data.guardSC)
291 {
292 default: DE_ASSERT(0); // fall through
293 case SC_BUFFER: storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
294 case SC_IMAGE: storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
295 case SC_WORKGROUP: storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
296 }
297 }
298
299 std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
300
301 semanticsRelease << "gl_SemanticsRelease";
302 semanticsAcquire << "gl_SemanticsAcquire";
303 semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
304 if (!m_data.coherent && m_data.testType != TT_WAR)
305 {
306 DE_ASSERT(!m_data.core11);
307 semanticsRelease << " | gl_SemanticsMakeAvailable";
308 semanticsAcquire << " | gl_SemanticsMakeVisible";
309 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
310 }
311
312 std::stringstream css;
313 css << "#version 450 core\n";
314 if (!m_data.core11)
315 {
316 css << "#pragma use_vulkan_memory_model\n";
317 }
318 css <<
319 "#extension GL_KHR_shader_subgroup_basic : enable\n"
320 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
321 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
322 "#extension GL_KHR_memory_scope_semantics : enable\n"
323 "#extension GL_ARB_gpu_shader_int64 : enable\n"
324 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
325 "layout(constant_id = 0) const int DIM = 1;\n"
326 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
327 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
328
329 if (m_data.stage == STAGE_COMPUTE)
330 {
331 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
332 }
333
334 const char *memqual = "";
335 if (m_data.coherent)
336 {
337 if (m_data.core11)
338 {
339 // Vulkan 1.1 only has "coherent", use it regardless of scope
340 memqual = "coherent";
341 }
342 else
343 {
344 switch (m_data.scope)
345 {
346 default: DE_ASSERT(0); // fall through
347 case SCOPE_DEVICE: memqual = "devicecoherent"; break;
348 case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
349 case SCOPE_WORKGROUP: memqual = "workgroupcoherent"; break;
350 case SCOPE_SUBGROUP: memqual = "subgroupcoherent"; break;
351 }
352 }
353 }
354 else
355 {
356 DE_ASSERT(!m_data.core11);
357 memqual = "nonprivate";
358 }
359
360 // Declare payload, guard, and fail resources
361 switch (m_data.payloadSC)
362 {
363 default: DE_ASSERT(0); // fall through
364 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
365 case SC_IMAGE: css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n"; break;
366 case SC_WORKGROUP: css << "shared S payload;\n"; break;
367 }
368 if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
369 {
370 // The guard variable is only accessed with atomics and need not be declared coherent.
371 switch (m_data.guardSC)
372 {
373 default: DE_ASSERT(0); // fall through
374 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
375 case SC_IMAGE: css << "layout(set=0, binding=1, r32ui) uniform uimage2D guard;\n"; break;
376 case SC_WORKGROUP: css << "shared S guard;\n"; break;
377 }
378 }
379
380 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
381
382 css <<
383 "void main()\n"
384 "{\n"
385 " bool pass = true;\n"
386 " bool skip = false;\n";
387
388 if (m_data.stage == STAGE_FRAGMENT)
389 {
390 // Kill helper invocations so they don't load outside the bounds of the SSBO.
391 // Helper pixels are also initially "active" and if a thread gets one as its
392 // partner in SCOPE_SUBGROUP mode, it can't run the test.
393 css << " if (gl_HelperInvocation) { return; }\n";
394 }
395
396 // Compute coordinates based on the storage class and scope.
397 // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
398 // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
399 // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
400 switch (invocationMapping)
401 {
402 default: DE_ASSERT(0); // fall through
403 case SCOPE_SUBGROUP:
404 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
405 css << " uvec4 ballot = subgroupBallot(true);\n"
406 " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
407
408 switch (m_data.stage)
409 {
410 default: DE_ASSERT(0); // fall through
411 case STAGE_COMPUTE:
412 css <<
413 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
414 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
415 " uint sharedCoord = localId.y * DIM + localId.x;\n"
416 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
417 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
418 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
419 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
420 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
421 break;
422 case STAGE_VERTEX:
423 css <<
424 " uint bufferCoord = gl_VertexIndex;\n"
425 " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
426 " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
427 " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
428 " gl_PointSize = 1.0f;\n"
429 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
430 break;
431 case STAGE_FRAGMENT:
432 css <<
433 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
434 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
435 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
436 " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
437 " uint sharedCoord = localId.y * DIM + localId.x;\n"
438 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
439 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
440 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
441 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
442 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
443 break;
444 }
445 break;
446 case SCOPE_WORKGROUP:
447 css <<
448 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
449 " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
450 " uint sharedCoord = localId.y * DIM + localId.x;\n"
451 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
452 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
453 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
454 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
455 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
456 break;
457 case SCOPE_QUEUEFAMILY:
458 case SCOPE_DEVICE:
459 switch (m_data.stage)
460 {
461 default: DE_ASSERT(0); // fall through
462 case STAGE_COMPUTE:
463 css <<
464 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
465 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
466 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
467 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
468 " ivec2 imageCoord = globalId;\n"
469 " ivec2 partnerImageCoord = partnerGlobalId;\n";
470 break;
471 case STAGE_VERTEX:
472 css <<
473 " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
474 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
475 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
476 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
477 " ivec2 imageCoord = globalId;\n"
478 " ivec2 partnerImageCoord = partnerGlobalId;\n"
479 " gl_PointSize = 1.0f;\n"
480 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
481 break;
482 case STAGE_FRAGMENT:
483 css <<
484 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
485 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
486 " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
487 " ivec2 partnerGroupId = groupId;\n"
488 " uint sharedCoord = localId.y * DIM + localId.x;\n"
489 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
490 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
491 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
492 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
493 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
494 break;
495 }
496 break;
497 }
498
499 // Initialize shared memory, followed by a barrier
500 if (m_data.payloadSC == SC_WORKGROUP)
501 {
502 css << " payload.x[sharedCoord] = 0;\n";
503 }
504 if (m_data.guardSC == SC_WORKGROUP)
505 {
506 css << " guard.x[sharedCoord] = 0;\n";
507 }
508 if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
509 {
510 switch (invocationMapping)
511 {
512 default: DE_ASSERT(0); // fall through
513 case SCOPE_SUBGROUP: css << " subgroupBarrier();\n"; break;
514 case SCOPE_WORKGROUP: css << " barrier();\n"; break;
515 }
516 }
517
518 if (m_data.testType == TT_MP)
519 {
520 // Store payload
521 switch (m_data.payloadSC)
522 {
523 default: DE_ASSERT(0); // fall through
524 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
525 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
526 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
527 }
528 }
529 else
530 {
531 DE_ASSERT(m_data.testType == TT_WAR);
532 // Load payload
533 switch (m_data.payloadSC)
534 {
535 default: DE_ASSERT(0); // fall through
536 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
537 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
538 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
539 }
540 }
541 if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
542 {
543 // Acquire and release separate from control barrier
544 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
545 " controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
546 " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
547 }
548 else if (m_data.syncType == ST_CONTROL_BARRIER)
549 {
550 // Control barrier performs both acquire and release
551 css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", "
552 << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
553 << semanticsAcquireRelease.str() << ");\n";
554 }
555 else
556 {
557 // Release barrier
558 std::stringstream atomicReleaseSemantics;
559 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
560 {
561 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
562 atomicReleaseSemantics << ", 0, 0";
563 }
564 else
565 {
566 atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
567 }
568 // Atomic store guard
569 if (m_data.atomicRMW)
570 {
571 switch (m_data.guardSC)
572 {
573 default: DE_ASSERT(0); // fall through
574 case SC_BUFFER: css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
575 case SC_IMAGE: css << " imageAtomicExchange(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
576 case SC_WORKGROUP: css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
577 }
578 }
579 else
580 {
581 switch (m_data.guardSC)
582 {
583 default: DE_ASSERT(0); // fall through
584 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
585 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, (1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
586 case SC_WORKGROUP: css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
587 }
588 }
589
590 std::stringstream atomicAcquireSemantics;
591 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
592 {
593 atomicAcquireSemantics << ", 0, 0";
594 }
595 else
596 {
597 atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
598 }
599 // Atomic load guard
600 if (m_data.atomicRMW)
601 {
602 switch (m_data.guardSC)
603 {
604 default: DE_ASSERT(0); // fall through
605 case SC_BUFFER: css << " skip = atomicExchange(guard.x[partnerBufferCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
606 case SC_IMAGE: css << " skip = imageAtomicExchange(guard, partnerImageCoord, 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
607 case SC_WORKGROUP: css << " skip = atomicExchange(guard.x[partnerSharedCoord], 2u, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
608 }
609 } else
610 {
611 switch (m_data.guardSC)
612 {
613 default: DE_ASSERT(0); // fall through
614 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
615 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
616 case SC_WORKGROUP: css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
617 }
618 }
619 // Acquire barrier
620 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
621 {
622 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
623 }
624 }
625 if (m_data.testType == TT_MP)
626 {
627 // Load payload
628 switch (m_data.payloadSC)
629 {
630 default: DE_ASSERT(0); // fall through
631 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
632 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
633 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
634 }
635 css <<
636 " if (!skip && r != partnerBufferCoord) { fail.x[bufferCoord] = 1; }\n"
637 "}\n";
638 }
639 else
640 {
641 DE_ASSERT(m_data.testType == TT_WAR);
642 // Store payload, only if the partner invocation has already done its read
643 css << " if (!skip) {\n ";
644 switch (m_data.payloadSC)
645 {
646 default: DE_ASSERT(0); // fall through
647 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord;\n"; break;
648 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n"; break;
649 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord;\n"; break;
650 }
651 css <<
652 " }\n"
653 " if (r != 0) { fail.x[bufferCoord] = 1; }\n"
654 "}\n";
655 }
656
657 // Draw a fullscreen triangle strip based on gl_VertexIndex
658 std::stringstream vss;
659 vss <<
660 "#version 450 core\n"
661 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
662 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
663
664 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
665
666 switch (m_data.stage)
667 {
668 default: DE_ASSERT(0); // fall through
669 case STAGE_COMPUTE:
670 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
671 break;
672 case STAGE_VERTEX:
673 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
674 break;
675 case STAGE_FRAGMENT:
676 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
677 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
678 break;
679 }
680 }
681
createInstance(Context & context) const682 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
683 {
684 return new MemoryModelTestInstance(context, m_data);
685 }
686
makeBufferCreateInfo(const VkDeviceSize bufferSize,const VkBufferUsageFlags usage)687 VkBufferCreateInfo makeBufferCreateInfo (const VkDeviceSize bufferSize,
688 const VkBufferUsageFlags usage)
689 {
690 const VkBufferCreateInfo bufferCreateInfo =
691 {
692 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
693 DE_NULL, // const void* pNext;
694 (VkBufferCreateFlags)0, // VkBufferCreateFlags flags;
695 bufferSize, // VkDeviceSize size;
696 usage, // VkBufferUsageFlags usage;
697 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
698 0u, // deUint32 queueFamilyIndexCount;
699 DE_NULL, // const deUint32* pQueueFamilyIndices;
700 };
701 return bufferCreateInfo;
702 }
703
makeDescriptorSet(const DeviceInterface & vk,const VkDevice device,const VkDescriptorPool descriptorPool,const VkDescriptorSetLayout setLayout)704 Move<VkDescriptorSet> makeDescriptorSet (const DeviceInterface& vk,
705 const VkDevice device,
706 const VkDescriptorPool descriptorPool,
707 const VkDescriptorSetLayout setLayout)
708 {
709 const VkDescriptorSetAllocateInfo allocateParams =
710 {
711 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType sType;
712 DE_NULL, // const void* pNext;
713 descriptorPool, // VkDescriptorPool descriptorPool;
714 1u, // deUint32 setLayoutCount;
715 &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
716 };
717 return allocateDescriptorSet(vk, device, &allocateParams);
718 }
719
iterate(void)720 tcu::TestStatus MemoryModelTestInstance::iterate (void)
721 {
722 const DeviceInterface& vk = m_context.getDeviceInterface();
723 const VkDevice device = m_context.getDevice();
724 Allocator& allocator = m_context.getDefaultAllocator();
725
726 VkPhysicalDeviceProperties2 properties;
727 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
728 properties.pNext = NULL;
729
730 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
731
732 deUint32 DIM = 31;
733 deUint32 NUM_WORKGROUP_EACH_DIM = 8;
734 // If necessary, shrink workgroup size to fit HW limits
735 if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
736 {
737 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
738 }
739 deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
740
741 VkDeviceSize bufferSizes[3];
742 de::MovePtr<BufferWithMemory> buffers[3];
743 vk::VkDescriptorBufferInfo bufferDescriptors[3];
744 de::MovePtr<BufferWithMemory> copyBuffer;
745
746 for (deUint32 i = 0; i < 3; ++i)
747 {
748 size_t elementSize = m_data.dataType == DATA_TYPE_UINT64 ? sizeof(deUint64) : sizeof(deUint32);
749 // buffer2 is the "fail" buffer, and is always uint
750 if (i == 2)
751 elementSize = sizeof(deUint32);
752 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
753
754 bool local;
755 switch (i)
756 {
757 default: DE_ASSERT(0); // fall through
758 case 0:
759 if (m_data.payloadSC != SC_BUFFER)
760 continue;
761 local = m_data.payloadMemLocal;
762 break;
763 case 1:
764 if (m_data.guardSC != SC_BUFFER)
765 continue;
766 local = m_data.guardMemLocal;
767 break;
768 case 2: local = true; break;
769 }
770
771 try
772 {
773 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
774 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
775 local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
776 }
777 catch (const tcu::NotSupportedError&)
778 {
779 if (!local)
780 {
781 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
782 }
783 throw;
784 }
785 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
786 }
787
788 // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
789 try
790 {
791 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
792 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
793 }
794 catch (const tcu::NotSupportedError&)
795 {
796 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
797 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
798 }
799
800 const VkImageCreateInfo imageCreateInfo =
801 {
802 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
803 DE_NULL, // const void* pNext;
804 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
805 VK_IMAGE_TYPE_2D, // VkImageType imageType;
806 VK_FORMAT_R32_UINT, // VkFormat format;
807 {
808 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 width;
809 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 height;
810 1u // deUint32 depth;
811 }, // VkExtent3D extent;
812 1u, // deUint32 mipLevels;
813 1u, // deUint32 arrayLayers;
814 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
815 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
816 VK_IMAGE_USAGE_STORAGE_BIT
817 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
818 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
819 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
820 0u, // deUint32 queueFamilyIndexCount;
821 DE_NULL, // const deUint32* pQueueFamilyIndices;
822 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
823 };
824 VkImageViewCreateInfo imageViewCreateInfo =
825 {
826 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
827 DE_NULL, // const void* pNext;
828 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
829 DE_NULL, // VkImage image;
830 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
831 VK_FORMAT_R32_UINT, // VkFormat format;
832 {
833 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
834 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
835 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
836 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
837 }, // VkComponentMapping components;
838 {
839 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
840 0u, // deUint32 baseMipLevel;
841 1u, // deUint32 levelCount;
842 0u, // deUint32 baseArrayLayer;
843 1u // deUint32 layerCount;
844 } // VkImageSubresourceRange subresourceRange;
845 };
846
847
848 de::MovePtr<ImageWithMemory> images[2];
849 Move<VkImageView> imageViews[2];
850 vk::VkDescriptorImageInfo imageDescriptors[2];
851
852 for (deUint32 i = 0; i < 2; ++i)
853 {
854
855 bool local;
856 switch (i)
857 {
858 default: DE_ASSERT(0); // fall through
859 case 0:
860 if (m_data.payloadSC != SC_IMAGE)
861 continue;
862 local = m_data.payloadMemLocal;
863 break;
864 case 1:
865 if (m_data.guardSC != SC_IMAGE)
866 continue;
867 local = m_data.guardMemLocal;
868 break;
869 }
870
871 try
872 {
873 images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
874 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
875 }
876 catch (const tcu::NotSupportedError&)
877 {
878 if (!local)
879 {
880 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
881 }
882 throw;
883 }
884 imageViewCreateInfo.image = **images[i];
885 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
886
887 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
888 }
889
890 vk::DescriptorSetLayoutBuilder layoutBuilder;
891
892 switch (m_data.payloadSC)
893 {
894 default:
895 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
896 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
897 }
898 switch (m_data.guardSC)
899 {
900 default:
901 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
902 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
903 }
904 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
905
906 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
907
908 vk::Unique<vk::VkDescriptorPool> descriptorPool(vk::DescriptorPoolBuilder()
909 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
910 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
911 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
912 vk::Unique<vk::VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
913
914 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
915 switch (m_data.payloadSC)
916 {
917 default: DE_ASSERT(0); // fall through
918 case SC_WORKGROUP:
919 break;
920 case SC_BUFFER:
921 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
922 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
923 break;
924 case SC_IMAGE:
925 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
926 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
927 break;
928 }
929 switch (m_data.guardSC)
930 {
931 default: DE_ASSERT(0); // fall through
932 case SC_WORKGROUP:
933 break;
934 case SC_BUFFER:
935 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
936 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
937 break;
938 case SC_IMAGE:
939 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
940 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
941 break;
942 }
943 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
944 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
945
946 setUpdateBuilder.update(vk, device);
947
948
949 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
950 {
951 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
952 DE_NULL, // pNext
953 (VkPipelineLayoutCreateFlags)0,
954 1, // setLayoutCount
955 &descriptorSetLayout.get(), // pSetLayouts
956 0u, // pushConstantRangeCount
957 DE_NULL, // pPushConstantRanges
958 };
959
960 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
961
962 Move<VkPipeline> pipeline;
963 Move<VkRenderPass> renderPass;
964 Move<VkFramebuffer> framebuffer;
965
966 VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
967
968 const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
969
970 const vk::VkSpecializationMapEntry entries[3] =
971 {
972 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
973 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
974 };
975
976 const vk::VkSpecializationInfo specInfo =
977 {
978 2, // mapEntryCount
979 entries, // pMapEntries
980 sizeof(specData), // dataSize
981 specData // pData
982 };
983
984 if (m_data.stage == STAGE_COMPUTE)
985 {
986 const Unique<VkShaderModule> shader (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
987
988 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
989 {
990 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
991 DE_NULL,
992 (VkPipelineShaderStageCreateFlags)0,
993 VK_SHADER_STAGE_COMPUTE_BIT, // stage
994 *shader, // shader
995 "main",
996 &specInfo, // pSpecializationInfo
997 };
998
999 const VkComputePipelineCreateInfo pipelineCreateInfo =
1000 {
1001 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1002 DE_NULL,
1003 0u, // flags
1004 shaderCreateInfo, // cs
1005 *pipelineLayout, // layout
1006 (vk::VkPipeline)0, // basePipelineHandle
1007 0u, // basePipelineIndex
1008 };
1009 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1010 }
1011 else
1012 {
1013
1014 const vk::VkSubpassDescription subpassDesc =
1015 {
1016 (vk::VkSubpassDescriptionFlags)0,
1017 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1018 0u, // inputCount
1019 DE_NULL, // pInputAttachments
1020 0u, // colorCount
1021 DE_NULL, // pColorAttachments
1022 DE_NULL, // pResolveAttachments
1023 DE_NULL, // depthStencilAttachment
1024 0u, // preserveCount
1025 DE_NULL, // pPreserveAttachments
1026
1027 };
1028 const vk::VkRenderPassCreateInfo renderPassParams =
1029 {
1030 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1031 DE_NULL, // pNext
1032 (vk::VkRenderPassCreateFlags)0,
1033 0u, // attachmentCount
1034 DE_NULL, // pAttachments
1035 1u, // subpassCount
1036 &subpassDesc, // pSubpasses
1037 0u, // dependencyCount
1038 DE_NULL, // pDependencies
1039 };
1040
1041 renderPass = createRenderPass(vk, device, &renderPassParams);
1042
1043 const vk::VkFramebufferCreateInfo framebufferParams =
1044 {
1045 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1046 DE_NULL, // pNext
1047 (vk::VkFramebufferCreateFlags)0,
1048 *renderPass, // renderPass
1049 0u, // attachmentCount
1050 DE_NULL, // pAttachments
1051 DIM*NUM_WORKGROUP_EACH_DIM, // width
1052 DIM*NUM_WORKGROUP_EACH_DIM, // height
1053 1u, // layers
1054 };
1055
1056 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1057
1058 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
1059 {
1060 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1061 DE_NULL, // const void* pNext;
1062 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1063 0u, // deUint32 vertexBindingDescriptionCount;
1064 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1065 0u, // deUint32 vertexAttributeDescriptionCount;
1066 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1067 };
1068
1069 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
1070 {
1071 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1072 DE_NULL, // const void* pNext;
1073 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1074 (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1075 VK_FALSE // VkBool32 primitiveRestartEnable;
1076 };
1077
1078 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
1079 {
1080 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1081 DE_NULL, // const void* pNext;
1082 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
1083 VK_FALSE, // VkBool32 depthClampEnable;
1084 (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1085 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1086 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1087 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
1088 VK_FALSE, // VkBool32 depthBiasEnable;
1089 0.0f, // float depthBiasConstantFactor;
1090 0.0f, // float depthBiasClamp;
1091 0.0f, // float depthBiasSlopeFactor;
1092 1.0f // float lineWidth;
1093 };
1094
1095 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
1096 {
1097 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
1098 DE_NULL, // const void* pNext
1099 0u, // VkPipelineMultisampleStateCreateFlags flags
1100 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
1101 VK_FALSE, // VkBool32 sampleShadingEnable
1102 1.0f, // float minSampleShading
1103 DE_NULL, // const VkSampleMask* pSampleMask
1104 VK_FALSE, // VkBool32 alphaToCoverageEnable
1105 VK_FALSE // VkBool32 alphaToOneEnable
1106 };
1107
1108 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1109 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1110
1111 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
1112 {
1113 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
1114 DE_NULL, // const void* pNext
1115 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
1116 1u, // deUint32 viewportCount
1117 &viewport, // const VkViewport* pViewports
1118 1u, // deUint32 scissorCount
1119 &scissor // const VkRect2D* pScissors
1120 };
1121
1122 Move<VkShaderModule> fs;
1123 Move<VkShaderModule> vs;
1124
1125 deUint32 numStages;
1126 if (m_data.stage == STAGE_VERTEX)
1127 {
1128 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1129 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1130 numStages = 1u;
1131 }
1132 else
1133 {
1134 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1135 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1136 numStages = 2u;
1137 }
1138
1139 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1140 {
1141 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1142 DE_NULL,
1143 (VkPipelineShaderStageCreateFlags)0,
1144 VK_SHADER_STAGE_VERTEX_BIT, // stage
1145 *vs, // shader
1146 "main",
1147 &specInfo, // pSpecializationInfo
1148 },
1149 {
1150 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1151 DE_NULL,
1152 (VkPipelineShaderStageCreateFlags)0,
1153 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1154 *fs, // shader
1155 "main",
1156 &specInfo, // pSpecializationInfo
1157 }
1158 };
1159
1160 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
1161 {
1162 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1163 DE_NULL, // const void* pNext;
1164 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
1165 numStages, // deUint32 stageCount;
1166 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
1167 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1168 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1169 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1170 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
1171 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1172 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1173 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1174 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1175 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1176 pipelineLayout.get(), // VkPipelineLayout layout;
1177 renderPass.get(), // VkRenderPass renderPass;
1178 0u, // deUint32 subpass;
1179 DE_NULL, // VkPipeline basePipelineHandle;
1180 0 // int basePipelineIndex;
1181 };
1182
1183 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1184 }
1185
1186 const VkQueue queue = m_context.getUniversalQueue();
1187 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
1188 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1189
1190 beginCommandBuffer(vk, *cmdBuffer, 0u);
1191
1192 vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1193
1194 for (deUint32 i = 0; i < 2; ++i)
1195 {
1196 if (!images[i])
1197 continue;
1198
1199 const VkImageMemoryBarrier imageBarrier =
1200 {
1201 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
1202 DE_NULL, // const void* pNext
1203 0u, // VkAccessFlags srcAccessMask
1204 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
1205 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
1206 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
1207 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
1208 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
1209 **images[i], // VkImage image
1210 {
1211 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
1212 0u, // uint32_t baseMipLevel
1213 1u, // uint32_t mipLevels,
1214 0u, // uint32_t baseArray
1215 1u, // uint32_t arraySize
1216 }
1217 };
1218
1219 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1220 (VkDependencyFlags)0,
1221 0, (const VkMemoryBarrier*)DE_NULL,
1222 0, (const VkBufferMemoryBarrier*)DE_NULL,
1223 1, &imageBarrier);
1224 }
1225
1226 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1227 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1228
1229 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1230 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1231
1232 VkMemoryBarrier memBarrier =
1233 {
1234 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1235 DE_NULL, // pNext
1236 0u, // srcAccessMask
1237 0u, // dstAccessMask
1238 };
1239
1240 for (deUint32 iters = 0; iters < 200; ++iters)
1241 {
1242 for (deUint32 i = 0; i < 2; ++i)
1243 {
1244 if (buffers[i])
1245 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1246 if (images[i])
1247 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1248 }
1249
1250 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1251 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1252 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1253 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1254
1255 if (m_data.stage == STAGE_COMPUTE)
1256 {
1257 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1258 }
1259 else
1260 {
1261 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1262 makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1263 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1264 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1265 if (m_data.stage == STAGE_VERTEX)
1266 {
1267 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1268 }
1269 else
1270 {
1271 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1272 }
1273 endRenderPass(vk, *cmdBuffer);
1274 }
1275
1276 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1277 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1278 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1279 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1280 }
1281
1282 const VkBufferCopy copyParams =
1283 {
1284 (VkDeviceSize)0u, // srcOffset
1285 (VkDeviceSize)0u, // dstOffset
1286 bufferSizes[2] // size
1287 };
1288
1289 vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params);
1290
1291 endCommandBuffer(vk, *cmdBuffer);
1292
1293 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1294
1295 tcu::TestLog& log = m_context.getTestContext().getLog();
1296
1297 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1298 invalidateMappedMemoryRange(vk, device, copyBuffer->getAllocation().getMemory(), copyBuffer->getAllocation().getOffset(), bufferSizes[2]);
1299 qpTestResult res = QP_TEST_RESULT_PASS;
1300
1301 deUint32 numErrors = 0;
1302 for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1303 {
1304 if (ptr[i] != 0)
1305 {
1306 if (numErrors < 256)
1307 {
1308 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1309 }
1310 numErrors++;
1311 res = QP_TEST_RESULT_FAIL;
1312 }
1313 }
1314
1315 if (numErrors)
1316 {
1317 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1318 }
1319
1320 return tcu::TestStatus(res, qpGetTestResultName(res));
1321 }
1322
1323 } // anonymous
1324
createTests(tcu::TestContext & testCtx)1325 tcu::TestCaseGroup* createTests (tcu::TestContext& testCtx)
1326 {
1327 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1328 testCtx, "memory_model", "Memory model tests"));
1329
1330 typedef struct
1331 {
1332 deUint32 value;
1333 const char* name;
1334 const char* description;
1335 } TestGroupCase;
1336
1337 TestGroupCase ttCases[] =
1338 {
1339 { TT_MP, "message_passing", "message passing" },
1340 { TT_WAR, "write_after_read", "write after read" },
1341 };
1342
1343 TestGroupCase core11Cases[] =
1344 {
1345 { 1, "core11", "Supported by Vulkan1.1" },
1346 { 0, "ext", "Requires VK_KHR_vulkan_memory_model extension" },
1347 };
1348
1349 TestGroupCase dtCases[] =
1350 {
1351 { DATA_TYPE_UINT, "u32", "uint32_t atomics" },
1352 { DATA_TYPE_UINT64, "u64", "uint64_t atomics" },
1353 };
1354
1355 TestGroupCase cohCases[] =
1356 {
1357 { 1, "coherent", "coherent payload variable" },
1358 { 0, "noncoherent", "noncoherent payload variable" },
1359 };
1360
1361 TestGroupCase stCases[] =
1362 {
1363 { ST_FENCE_FENCE, "fence_fence", "release fence, acquire fence" },
1364 { ST_FENCE_ATOMIC, "fence_atomic", "release fence, atomic acquire" },
1365 { ST_ATOMIC_FENCE, "atomic_fence", "atomic release, acquire fence" },
1366 { ST_ATOMIC_ATOMIC, "atomic_atomic", "atomic release, atomic acquire" },
1367 { ST_CONTROL_BARRIER, "control_barrier", "control barrier" },
1368 { ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier", "control barrier with release/acquire" },
1369 };
1370
1371 TestGroupCase rmwCases[] =
1372 {
1373 { 0, "atomicwrite", "atomic write" },
1374 { 1, "atomicrmw", "atomic rmw" },
1375 };
1376
1377 TestGroupCase scopeCases[] =
1378 {
1379 { SCOPE_DEVICE, "device", "device scope" },
1380 { SCOPE_QUEUEFAMILY, "queuefamily", "queuefamily scope" },
1381 { SCOPE_WORKGROUP, "workgroup", "workgroup scope" },
1382 { SCOPE_SUBGROUP, "subgroup", "subgroup scope" },
1383 };
1384
1385 TestGroupCase plCases[] =
1386 {
1387 { 0, "payload_nonlocal", "payload variable in non-local memory" },
1388 { 1, "payload_local", "payload variable in local memory" },
1389 };
1390
1391 TestGroupCase pscCases[] =
1392 {
1393 { SC_BUFFER, "buffer", "payload variable in buffer memory" },
1394 { SC_IMAGE, "image", "payload variable in image memory" },
1395 { SC_WORKGROUP, "workgroup", "payload variable in workgroup memory" },
1396 };
1397
1398 TestGroupCase glCases[] =
1399 {
1400 { 0, "guard_nonlocal", "guard variable in non-local memory" },
1401 { 1, "guard_local", "guard variable in local memory" },
1402 };
1403
1404 TestGroupCase gscCases[] =
1405 {
1406 { SC_BUFFER, "buffer", "guard variable in buffer memory" },
1407 { SC_IMAGE, "image", "guard variable in image memory" },
1408 { SC_WORKGROUP, "workgroup", "guard variable in workgroup memory" },
1409 };
1410
1411 TestGroupCase stageCases[] =
1412 {
1413 { STAGE_COMPUTE, "comp", "compute shader" },
1414 { STAGE_VERTEX, "vert", "vertex shader" },
1415 { STAGE_FRAGMENT, "frag", "fragment shader" },
1416 };
1417
1418
1419 for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1420 {
1421 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1422 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1423 {
1424 de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1425 for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1426 {
1427 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1428 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1429 {
1430 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1431 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1432 {
1433 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1434 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1435 {
1436 de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1437 for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1438 {
1439 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1440 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1441 {
1442 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1443 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1444 {
1445 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1446 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1447 {
1448 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1449 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1450 {
1451 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1452 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1453 {
1454 CaseDef c =
1455 {
1456 !!plCases[plNdx].value, // bool payloadMemLocal;
1457 !!glCases[glNdx].value, // bool guardMemLocal;
1458 !!cohCases[cohNdx].value, // bool coherent;
1459 !!core11Cases[core11Ndx].value, // bool core11;
1460 !!rmwCases[rmwNdx].value, // bool atomicRMW;
1461 (TestType)ttCases[ttNdx].value, // TestType testType;
1462 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
1463 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
1464 (Scope)scopeCases[scopeNdx].value, // Scope scope;
1465 (SyncType)stCases[stNdx].value, // SyncType syncType;
1466 (Stage)stageCases[stageNdx].value, // Stage stage;
1467 (DataType)dtCases[dtNdx].value, // DataType dataType;
1468 };
1469
1470 // Mustpass11 tests should only exercise things we expect to work on
1471 // existing implementations. Exclude noncoherent tests which require
1472 // new extensions, and assume atomic synchronization wouldn't work
1473 // (i.e. atomics may be implemented as relaxed atomics). Exclude
1474 // queuefamily scope which doesn't exist in Vulkan 1.1.
1475 if (c.core11 &&
1476 (c.coherent == 0 ||
1477 c.syncType == ST_FENCE_ATOMIC ||
1478 c.syncType == ST_ATOMIC_FENCE ||
1479 c.syncType == ST_ATOMIC_ATOMIC ||
1480 c.dataType == DATA_TYPE_UINT64 ||
1481 c.scope == SCOPE_QUEUEFAMILY))
1482 {
1483 continue;
1484 }
1485
1486 if (c.stage != STAGE_COMPUTE &&
1487 c.scope == SCOPE_WORKGROUP)
1488 {
1489 continue;
1490 }
1491
1492 // Don't exercise local and non-local for workgroup memory
1493 // Also don't exercise workgroup memory for non-compute stages
1494 if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1495 {
1496 continue;
1497 }
1498 if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1499 {
1500 continue;
1501 }
1502 // Can't do control barrier with larger than workgroup scope, or non-compute stages
1503 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1504 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1505 {
1506 continue;
1507 }
1508
1509 // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1510 if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1511 {
1512 continue;
1513 }
1514
1515 // uint64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1516 if (c.dataType == DATA_TYPE_UINT64 && c.syncType != ST_ATOMIC_ATOMIC)
1517 {
1518 continue;
1519 }
1520
1521 // No 64-bit image types, so skip tests with both payload and guard in image memory
1522 if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1523 {
1524 continue;
1525 }
1526
1527 // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1528 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1529 (c.guardSC != 0 || c.guardMemLocal != 0))
1530 {
1531 continue;
1532 }
1533
1534 gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1535 }
1536 glGroup->addChild(gscGroup.release());
1537 }
1538 pscGroup->addChild(glGroup.release());
1539 }
1540 plGroup->addChild(pscGroup.release());
1541 }
1542 scopeGroup->addChild(plGroup.release());
1543 }
1544 rmwGroup->addChild(scopeGroup.release());
1545 }
1546 stGroup->addChild(rmwGroup.release());
1547 }
1548 cohGroup->addChild(stGroup.release());
1549 }
1550 dtGroup->addChild(cohGroup.release());
1551 }
1552 core11Group->addChild(dtGroup.release());
1553 }
1554 ttGroup->addChild(core11Group.release());
1555 }
1556 group->addChild(ttGroup.release());
1557 }
1558 return group.release();
1559 }
1560
1561 } // MemoryModel
1562 } // vkt
1563