1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Vulkan Memory Model tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27
28 #include "vkBufferWithMemory.hpp"
29 #include "vkImageWithMemory.hpp"
30 #include "vkQueryUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkTypeUtil.hpp"
34 #include "vkObjUtil.hpp"
35
36 #include "vktTestGroupUtil.hpp"
37 #include "vktTestCase.hpp"
38
39 #include "deDefs.h"
40 #include "deMath.h"
41 #include "deSharedPtr.hpp"
42 #include "deString.h"
43
44 #include "tcuTestCase.hpp"
45 #include "tcuTestLog.hpp"
46
47 #include <string>
48 #include <sstream>
49
50 namespace vkt
51 {
52 namespace MemoryModel
53 {
54 namespace
55 {
56 using namespace vk;
57 using namespace std;
58
59 typedef enum
60 {
61 TT_MP = 0, // message passing
62 TT_WAR, // write-after-read hazard
63 } TestType;
64
65 typedef enum
66 {
67 ST_FENCE_FENCE = 0,
68 ST_FENCE_ATOMIC,
69 ST_ATOMIC_FENCE,
70 ST_ATOMIC_ATOMIC,
71 ST_CONTROL_BARRIER,
72 ST_CONTROL_AND_MEMORY_BARRIER,
73 } SyncType;
74
75 typedef enum
76 {
77 SC_BUFFER = 0,
78 SC_IMAGE,
79 SC_WORKGROUP,
80 SC_PHYSBUFFER,
81 } StorageClass;
82
83 typedef enum
84 {
85 SCOPE_DEVICE = 0,
86 SCOPE_QUEUEFAMILY,
87 SCOPE_WORKGROUP,
88 SCOPE_SUBGROUP,
89 } Scope;
90
91 typedef enum
92 {
93 STAGE_COMPUTE = 0,
94 STAGE_VERTEX,
95 STAGE_FRAGMENT,
96 } Stage;
97
98 typedef enum
99 {
100 DATA_TYPE_UINT = 0,
101 DATA_TYPE_UINT64,
102 DATA_TYPE_FLOAT32,
103 DATA_TYPE_FLOAT64,
104 } DataType;
105
106 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
107 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
108
109 struct CaseDef
110 {
111 bool payloadMemLocal;
112 bool guardMemLocal;
113 bool coherent;
114 bool core11;
115 bool atomicRMW;
116 TestType testType;
117 StorageClass payloadSC;
118 StorageClass guardSC;
119 Scope scope;
120 SyncType syncType;
121 Stage stage;
122 DataType dataType;
123 bool transitive;
124 bool transitiveVis;
125 };
126
127 class MemoryModelTestInstance : public TestInstance
128 {
129 public:
130 MemoryModelTestInstance (Context& context, const CaseDef& data);
131 ~MemoryModelTestInstance (void);
132 tcu::TestStatus iterate (void);
133 private:
134 CaseDef m_data;
135
136 enum
137 {
138 WIDTH = 256,
139 HEIGHT = 256
140 };
141 };
142
MemoryModelTestInstance(Context & context,const CaseDef & data)143 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
144 : vkt::TestInstance (context)
145 , m_data (data)
146 {
147 }
148
~MemoryModelTestInstance(void)149 MemoryModelTestInstance::~MemoryModelTestInstance (void)
150 {
151 }
152
153 class MemoryModelTestCase : public TestCase
154 {
155 public:
156 MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
157 ~MemoryModelTestCase (void);
158 virtual void initPrograms (SourceCollections& programCollection) const;
159 virtual void initProgramsTransitive(SourceCollections& programCollection) const;
160 virtual TestInstance* createInstance (Context& context) const;
161 virtual void checkSupport (Context& context) const;
162
163 private:
164 CaseDef m_data;
165 };
166
MemoryModelTestCase(tcu::TestContext & context,const char * name,const char * desc,const CaseDef data)167 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
168 : vkt::TestCase (context, name, desc)
169 , m_data (data)
170 {
171 }
172
~MemoryModelTestCase(void)173 MemoryModelTestCase::~MemoryModelTestCase (void)
174 {
175 }
176
checkSupport(Context & context) const177 void MemoryModelTestCase::checkSupport(Context& context) const
178 {
179 if (!context.contextSupports(vk::ApiVersion(1, 1, 0)))
180 {
181 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
182 }
183
184 if (!m_data.core11)
185 {
186 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
187 {
188 TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
189 }
190
191 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
192 {
193 TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
194 }
195 }
196
197 if (m_data.scope == SCOPE_SUBGROUP)
198 {
199 // Check for subgroup support for scope_subgroup tests.
200 VkPhysicalDeviceSubgroupProperties subgroupProperties;
201 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
202 subgroupProperties.pNext = DE_NULL;
203 subgroupProperties.supportedOperations = 0;
204
205 VkPhysicalDeviceProperties2 properties;
206 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
207 properties.pNext = &subgroupProperties;
208
209 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
210
211 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
212 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
213 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
214 {
215 TCU_THROW(NotSupportedError, "Subgroup features not supported");
216 }
217
218 VkShaderStageFlags stage= VK_SHADER_STAGE_COMPUTE_BIT;
219 if (m_data.stage == STAGE_VERTEX)
220 {
221 stage = VK_SHADER_STAGE_VERTEX_BIT;
222 }
223 else if (m_data.stage == STAGE_COMPUTE)
224 {
225 stage = VK_SHADER_STAGE_COMPUTE_BIT;
226 }
227 else if (m_data.stage == STAGE_FRAGMENT)
228 {
229 stage = VK_SHADER_STAGE_FRAGMENT_BIT;
230 }
231
232 if((subgroupProperties.supportedStages & stage)==0)
233 {
234 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
235 }
236 }
237 if (m_data.dataType == DATA_TYPE_UINT64)
238 {
239 if (!context.getDeviceFeatures().shaderInt64)
240 {
241 TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
242 }
243 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
244 (m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
245 {
246 TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
247 }
248 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
249 m_data.guardSC == SC_WORKGROUP)
250 {
251 TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
252 }
253 }
254
255 if (m_data.dataType == DATA_TYPE_FLOAT32)
256 {
257 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
258 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
259
260 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
261 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
262 {
263 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
264 }
265
266 if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
267 {
268 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
269 }
270
271 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
272 {
273 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
274 }
275 }
276
277 if (m_data.dataType == DATA_TYPE_FLOAT64)
278 {
279 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
280 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
281
282 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
283 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
284 {
285 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
286 }
287
288 if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
289 {
290 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
291 }
292
293 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
294 {
295 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
296 }
297 }
298
299 if (m_data.transitive &&
300 !context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
301 TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
302
303 if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) && !context.isBufferDeviceAddressSupported())
304 TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
305
306 if (m_data.stage == STAGE_VERTEX)
307 {
308 if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
309 {
310 TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
311 }
312 }
313 if (m_data.stage == STAGE_FRAGMENT)
314 {
315 if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
316 {
317 TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
318 }
319 }
320 }
321
322
initPrograms(SourceCollections & programCollection) const323 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
324 {
325 if (m_data.transitive)
326 {
327 initProgramsTransitive(programCollection);
328 return;
329 }
330 DE_ASSERT(!m_data.transitiveVis);
331
332 Scope invocationMapping = m_data.scope;
333 if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
334 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
335 {
336 invocationMapping = SCOPE_WORKGROUP;
337 }
338
339 const char *scopeStr;
340 switch (m_data.scope)
341 {
342 default: DE_ASSERT(0); // fall through
343 case SCOPE_DEVICE: scopeStr = "gl_ScopeDevice"; break;
344 case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
345 case SCOPE_WORKGROUP: scopeStr = "gl_ScopeWorkgroup"; break;
346 case SCOPE_SUBGROUP: scopeStr = "gl_ScopeSubgroup"; break;
347 }
348
349 const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
350 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
351 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
352
353 // Construct storageSemantics strings. Both release and acquire
354 // always have the payload storage class. They only include the
355 // guard storage class if they're using FENCE for that side of the
356 // sync.
357 std::stringstream storageSemanticsRelease;
358 switch (m_data.payloadSC)
359 {
360 default: DE_ASSERT(0); // fall through
361 case SC_PHYSBUFFER: // fall through
362 case SC_BUFFER: storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
363 case SC_IMAGE: storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
364 case SC_WORKGROUP: storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
365 }
366 std::stringstream storageSemanticsAcquire;
367 storageSemanticsAcquire << storageSemanticsRelease.str();
368 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
369 {
370 switch (m_data.guardSC)
371 {
372 default: DE_ASSERT(0); // fall through
373 case SC_PHYSBUFFER: // fall through
374 case SC_BUFFER: storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
375 case SC_IMAGE: storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
376 case SC_WORKGROUP: storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
377 }
378 }
379 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
380 {
381 switch (m_data.guardSC)
382 {
383 default: DE_ASSERT(0); // fall through
384 case SC_PHYSBUFFER: // fall through
385 case SC_BUFFER: storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
386 case SC_IMAGE: storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
387 case SC_WORKGROUP: storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
388 }
389 }
390
391 std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
392
393 semanticsRelease << "gl_SemanticsRelease";
394 semanticsAcquire << "gl_SemanticsAcquire";
395 semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
396 if (!m_data.coherent && m_data.testType != TT_WAR)
397 {
398 DE_ASSERT(!m_data.core11);
399 semanticsRelease << " | gl_SemanticsMakeAvailable";
400 semanticsAcquire << " | gl_SemanticsMakeVisible";
401 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
402 }
403
404 std::stringstream css;
405 css << "#version 450 core\n";
406 if (!m_data.core11)
407 {
408 css << "#pragma use_vulkan_memory_model\n";
409 }
410 if (!intType)
411 {
412 css <<
413 "#extension GL_EXT_shader_atomic_float : enable\n"
414 "#extension GL_KHR_memory_scope_semantics : enable\n";
415 }
416 css <<
417 "#extension GL_KHR_shader_subgroup_basic : enable\n"
418 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
419 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
420 "#extension GL_KHR_memory_scope_semantics : enable\n"
421 "#extension GL_ARB_gpu_shader_int64 : enable\n"
422 "#extension GL_EXT_buffer_reference : enable\n"
423 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
424 "layout(constant_id = 0) const int DIM = 1;\n"
425 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
426 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
427
428 if (m_data.stage == STAGE_COMPUTE)
429 {
430 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
431 }
432
433 const char *memqual = "";
434 if (m_data.coherent)
435 {
436 if (m_data.core11)
437 {
438 // Vulkan 1.1 only has "coherent", use it regardless of scope
439 memqual = "coherent";
440 }
441 else
442 {
443 switch (m_data.scope)
444 {
445 default: DE_ASSERT(0); // fall through
446 case SCOPE_DEVICE: memqual = "devicecoherent"; break;
447 case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
448 case SCOPE_WORKGROUP: memqual = "workgroupcoherent"; break;
449 case SCOPE_SUBGROUP: memqual = "subgroupcoherent"; break;
450 }
451 }
452 }
453 else
454 {
455 DE_ASSERT(!m_data.core11);
456 memqual = "nonprivate";
457 }
458
459 stringstream pushConstMembers;
460
461 // Declare payload, guard, and fail resources
462 switch (m_data.payloadSC)
463 {
464 default: DE_ASSERT(0); // fall through
465 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
466 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break;
467 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
468 case SC_IMAGE:
469 if (intType)
470 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
471 else
472 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
473 break;
474 case SC_WORKGROUP: css << "shared S payload;\n"; break;
475 }
476 if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
477 {
478 // The guard variable is only accessed with atomics and need not be declared coherent.
479 switch (m_data.guardSC)
480 {
481 default: DE_ASSERT(0); // fall through
482 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
483 pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
484 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
485 case SC_IMAGE:
486 if (intType)
487 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
488 else
489 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
490 break;
491 case SC_WORKGROUP: css << "shared S guard;\n"; break;
492 }
493 }
494
495 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
496
497 if (pushConstMembers.str().size() != 0) {
498 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
499 }
500
501 css <<
502 "void main()\n"
503 "{\n"
504 " bool pass = true;\n"
505 " bool skip = false;\n";
506
507 if (m_data.payloadSC == SC_PHYSBUFFER)
508 css << " " << memqual << " PayloadRef payload = payloadref;\n";
509
510 if (m_data.stage == STAGE_FRAGMENT)
511 {
512 // Kill helper invocations so they don't load outside the bounds of the SSBO.
513 // Helper pixels are also initially "active" and if a thread gets one as its
514 // partner in SCOPE_SUBGROUP mode, it can't run the test.
515 css << " if (gl_HelperInvocation) { return; }\n";
516 }
517
518 // Compute coordinates based on the storage class and scope.
519 // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
520 // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
521 // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
522 switch (invocationMapping)
523 {
524 default: DE_ASSERT(0); // fall through
525 case SCOPE_SUBGROUP:
526 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
527 css << " uvec4 ballot = subgroupBallot(true);\n"
528 " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
529
530 switch (m_data.stage)
531 {
532 default: DE_ASSERT(0); // fall through
533 case STAGE_COMPUTE:
534 css <<
535 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
536 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
537 " uint sharedCoord = localId.y * DIM + localId.x;\n"
538 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
539 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
540 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
541 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
542 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
543 break;
544 case STAGE_VERTEX:
545 css <<
546 " uint bufferCoord = gl_VertexIndex;\n"
547 " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
548 " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
549 " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
550 " gl_PointSize = 1.0f;\n"
551 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
552 break;
553 case STAGE_FRAGMENT:
554 css <<
555 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
556 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
557 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
558 " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
559 " uint sharedCoord = localId.y * DIM + localId.x;\n"
560 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
561 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
562 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
563 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
564 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
565 break;
566 }
567 break;
568 case SCOPE_WORKGROUP:
569 css <<
570 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
571 " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
572 " uint sharedCoord = localId.y * DIM + localId.x;\n"
573 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
574 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
575 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
576 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
577 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
578 break;
579 case SCOPE_QUEUEFAMILY:
580 case SCOPE_DEVICE:
581 switch (m_data.stage)
582 {
583 default: DE_ASSERT(0); // fall through
584 case STAGE_COMPUTE:
585 css <<
586 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
587 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
588 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
589 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
590 " ivec2 imageCoord = globalId;\n"
591 " ivec2 partnerImageCoord = partnerGlobalId;\n";
592 break;
593 case STAGE_VERTEX:
594 css <<
595 " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
596 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
597 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
598 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
599 " ivec2 imageCoord = globalId;\n"
600 " ivec2 partnerImageCoord = partnerGlobalId;\n"
601 " gl_PointSize = 1.0f;\n"
602 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
603 break;
604 case STAGE_FRAGMENT:
605 css <<
606 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
607 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
608 " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
609 " ivec2 partnerGroupId = groupId;\n"
610 " uint sharedCoord = localId.y * DIM + localId.x;\n"
611 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
612 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
613 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
614 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
615 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
616 break;
617 }
618 break;
619 }
620
621 // Initialize shared memory, followed by a barrier
622 if (m_data.payloadSC == SC_WORKGROUP)
623 {
624 css << " payload.x[sharedCoord] = 0;\n";
625 }
626 if (m_data.guardSC == SC_WORKGROUP)
627 {
628 css << " guard.x[sharedCoord] = 0;\n";
629 }
630 if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
631 {
632 switch (invocationMapping)
633 {
634 default: DE_ASSERT(0); // fall through
635 case SCOPE_SUBGROUP: css << " subgroupBarrier();\n"; break;
636 case SCOPE_WORKGROUP: css << " barrier();\n"; break;
637 }
638 }
639
640 if (m_data.testType == TT_MP)
641 {
642 if (intType)
643 {
644 // Store payload
645 switch (m_data.payloadSC)
646 {
647 default: DE_ASSERT(0); // fall through
648 case SC_PHYSBUFFER: // fall through
649 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
650 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
651 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
652 }
653 }
654 else
655 {
656 // Store payload
657 switch (m_data.payloadSC)
658 {
659 default: DE_ASSERT(0); // fall through
660 case SC_PHYSBUFFER: // fall through
661 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
662 case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, 0)); \n"; break;
663 case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n"; break;
664 }
665 }
666 }
667 else
668 {
669 DE_ASSERT(m_data.testType == TT_WAR);
670 // Load payload
671 switch (m_data.payloadSC)
672 {
673 default: DE_ASSERT(0); // fall through
674 case SC_PHYSBUFFER: // fall through
675 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
676 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
677 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
678 }
679 }
680 if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
681 {
682 // Acquire and release separate from control barrier
683 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
684 " controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
685 " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
686 }
687 else if (m_data.syncType == ST_CONTROL_BARRIER)
688 {
689 // Control barrier performs both acquire and release
690 css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", "
691 << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
692 << semanticsAcquireRelease.str() << ");\n";
693 }
694 else
695 {
696 // Don't type cast for 64 bit image atomics
697 const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
698 // Release barrier
699 std::stringstream atomicReleaseSemantics;
700 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
701 {
702 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
703 atomicReleaseSemantics << ", 0, 0";
704 }
705 else
706 {
707 atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
708 }
709 // Atomic store guard
710 if (m_data.atomicRMW)
711 {
712 switch (m_data.guardSC)
713 {
714 default: DE_ASSERT(0); // fall through
715 case SC_PHYSBUFFER: // fall through
716 case SC_BUFFER: css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
717 case SC_IMAGE: css << " imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
718 case SC_WORKGROUP: css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
719 }
720 }
721 else
722 {
723 switch (m_data.guardSC)
724 {
725 default: DE_ASSERT(0); // fall through
726 case SC_PHYSBUFFER: // fall through
727 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
728 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
729 case SC_WORKGROUP: css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
730 }
731 }
732
733 std::stringstream atomicAcquireSemantics;
734 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
735 {
736 atomicAcquireSemantics << ", 0, 0";
737 }
738 else
739 {
740 atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
741 }
742 // Atomic load guard
743 if (m_data.atomicRMW)
744 {
745 switch (m_data.guardSC)
746 {
747 default: DE_ASSERT(0); // fall through
748 case SC_PHYSBUFFER: // fall through
749 case SC_BUFFER: css << " skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
750 case SC_IMAGE: css << " skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
751 case SC_WORKGROUP: css << " skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
752 }
753 } else
754 {
755 switch (m_data.guardSC)
756 {
757 default: DE_ASSERT(0); // fall through
758 case SC_PHYSBUFFER: // fall through
759 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
760 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
761 case SC_WORKGROUP: css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
762 }
763 }
764 // Acquire barrier
765 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
766 {
767 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
768 }
769 }
770 if (m_data.testType == TT_MP)
771 {
772 // Load payload
773 switch (m_data.payloadSC)
774 {
775 default: DE_ASSERT(0); // fall through
776 case SC_PHYSBUFFER: // fall through
777 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
778 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
779 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
780 }
781 css <<
782 " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
783 "}\n";
784 }
785 else
786 {
787 DE_ASSERT(m_data.testType == TT_WAR);
788 // Store payload, only if the partner invocation has already done its read
789 css << " if (!skip) {\n ";
790 switch (m_data.payloadSC)
791 {
792 default: DE_ASSERT(0); // fall through
793 case SC_PHYSBUFFER: // fall through
794 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n"; break;
795 case SC_IMAGE:
796 if (intType) {
797 css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
798 }
799 else {
800 css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
801 }
802 break;
803 case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n"; break;
804 }
805 css <<
806 " }\n"
807 " if (r != 0) { fail.x[bufferCoord] = 1; }\n"
808 "}\n";
809 }
810
811 // Draw a fullscreen triangle strip based on gl_VertexIndex
812 std::stringstream vss;
813 vss <<
814 "#version 450 core\n"
815 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
816 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
817
818 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
819
820 switch (m_data.stage)
821 {
822 default: DE_ASSERT(0); // fall through
823 case STAGE_COMPUTE:
824 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
825 break;
826 case STAGE_VERTEX:
827 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
828 break;
829 case STAGE_FRAGMENT:
830 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
831 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
832 break;
833 }
834 }
835
836
initProgramsTransitive(SourceCollections & programCollection) const837 void MemoryModelTestCase::initProgramsTransitive (SourceCollections& programCollection) const
838 {
839 Scope invocationMapping = m_data.scope;
840
841 const char* typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
842 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
843 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
844
845 // Construct storageSemantics strings. Both release and acquire
846 // always have the payload storage class. They only include the
847 // guard storage class if they're using FENCE for that side of the
848 // sync.
849 std::stringstream storageSemanticsPayload;
850 switch (m_data.payloadSC)
851 {
852 default: DE_ASSERT(0); // fall through
853 case SC_PHYSBUFFER: // fall through
854 case SC_BUFFER: storageSemanticsPayload << "gl_StorageSemanticsBuffer"; break;
855 case SC_IMAGE: storageSemanticsPayload << "gl_StorageSemanticsImage"; break;
856 }
857 std::stringstream storageSemanticsGuard;
858 switch (m_data.guardSC)
859 {
860 default: DE_ASSERT(0); // fall through
861 case SC_PHYSBUFFER: // fall through
862 case SC_BUFFER: storageSemanticsGuard << "gl_StorageSemanticsBuffer"; break;
863 case SC_IMAGE: storageSemanticsGuard << "gl_StorageSemanticsImage"; break;
864 }
865 std::stringstream storageSemanticsAll;
866 storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
867
868 std::stringstream css;
869 css << "#version 450 core\n";
870 css << "#pragma use_vulkan_memory_model\n";
871 if (!intType)
872 {
873 css <<
874 "#extension GL_EXT_shader_atomic_float : enable\n"
875 "#extension GL_KHR_memory_scope_semantics : enable\n";
876 }
877 css <<
878 "#extension GL_KHR_shader_subgroup_basic : enable\n"
879 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
880 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
881 "#extension GL_KHR_memory_scope_semantics : enable\n"
882 "#extension GL_ARB_gpu_shader_int64 : enable\n"
883 "#extension GL_EXT_buffer_reference : enable\n"
884 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
885 "layout(constant_id = 0) const int DIM = 1;\n"
886 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
887 "shared bool sharedSkip;\n";
888
889 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
890
891 const char *memqual = "";
892 const char *semAvail = "";
893 const char *semVis = "";
894 if (m_data.coherent)
895 {
896 memqual = "workgroupcoherent";
897 }
898 else
899 {
900 memqual = "nonprivate";
901 semAvail = " | gl_SemanticsMakeAvailable";
902 semVis = " | gl_SemanticsMakeVisible";
903 }
904
905 stringstream pushConstMembers;
906
907 // Declare payload, guard, and fail resources
908 switch (m_data.payloadSC)
909 {
910 default: DE_ASSERT(0); // fall through
911 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
912 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break;
913 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
914 case SC_IMAGE:
915 if (intType)
916 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
917 else
918 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
919 break;
920 }
921 // The guard variable is only accessed with atomics and need not be declared coherent.
922 switch (m_data.guardSC)
923 {
924 default: DE_ASSERT(0); // fall through
925 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
926 pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
927 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
928 case SC_IMAGE:
929 if (intType)
930 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
931 else
932 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
933 break;
934 }
935
936 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
937
938 if (pushConstMembers.str().size() != 0) {
939 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
940 }
941
942 css <<
943 "void main()\n"
944 "{\n"
945 " bool pass = true;\n"
946 " bool skip = false;\n"
947 " sharedSkip = false;\n";
948
949 if (m_data.payloadSC == SC_PHYSBUFFER)
950 css << " " << memqual << " PayloadRef payload = payloadref;\n";
951
952 // Compute coordinates based on the storage class and scope.
953 switch (invocationMapping)
954 {
955 default: DE_ASSERT(0); // fall through
956 case SCOPE_DEVICE:
957 css <<
958 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
959 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
960 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
961 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
962 " ivec2 imageCoord = globalId;\n"
963 " ivec2 partnerImageCoord = partnerGlobalId;\n"
964 " ivec2 globalId00 = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
965 " ivec2 partnerGlobalId00 = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_WorkGroupID.xy));\n"
966 " uint bufferCoord00 = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
967 " uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId00.x;\n"
968 " ivec2 imageCoord00 = globalId00;\n"
969 " ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
970 break;
971 }
972
973 // Store payload
974 if (intType)
975 {
976 switch (m_data.payloadSC)
977 {
978 default: DE_ASSERT(0); // fall through
979 case SC_PHYSBUFFER: // fall through
980 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
981 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
982 }
983 }
984 else
985 {
986 switch (m_data.payloadSC)
987 {
988 default: DE_ASSERT(0); // fall through
989 case SC_PHYSBUFFER: // fall through
990 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
991 case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); \n"; break;
992 }
993 }
994
995 // Sync to other threads in the workgroup
996 css << " controlBarrier(gl_ScopeWorkgroup, "
997 "gl_ScopeWorkgroup, " <<
998 storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
999 "gl_SemanticsAcquireRelease" << semAvail << ");\n";
1000
1001 // Device-scope release/availability in invocation(0,0)
1002 css << " if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1003 const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1004 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE) {
1005 switch (m_data.guardSC)
1006 {
1007 default: DE_ASSERT(0); // fall through
1008 case SC_PHYSBUFFER: // fall through
1009 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1010 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1011 }
1012 } else {
1013 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1014 switch (m_data.guardSC)
1015 {
1016 default: DE_ASSERT(0); // fall through
1017 case SC_PHYSBUFFER: // fall through
1018 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1019 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1020 }
1021 }
1022
1023 // Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1024 if (!m_data.transitiveVis) {
1025 css << " }\n";
1026 }
1027 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC) {
1028 switch (m_data.guardSC)
1029 {
1030 default: DE_ASSERT(0); // fall through
1031 case SC_PHYSBUFFER: // fall through
1032 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1033 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1034 }
1035 } else {
1036 switch (m_data.guardSC)
1037 {
1038 default: DE_ASSERT(0); // fall through
1039 case SC_PHYSBUFFER: // fall through
1040 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n"; break;
1041 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n"; break;
1042 }
1043 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1044 }
1045
1046 // If invocation(0,0) did the acquire then store "skip" to shared memory and
1047 // synchronize with the workgroup
1048 if (m_data.transitiveVis) {
1049 css << " sharedSkip = skip;\n";
1050 css << " }\n";
1051
1052 css << " controlBarrier(gl_ScopeWorkgroup, "
1053 "gl_ScopeWorkgroup, " <<
1054 storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1055 "gl_SemanticsAcquireRelease" << semVis << ");\n";
1056 css << " skip = sharedSkip;\n";
1057 }
1058
1059 // Load payload
1060 switch (m_data.payloadSC)
1061 {
1062 default: DE_ASSERT(0); // fall through
1063 case SC_PHYSBUFFER: // fall through
1064 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
1065 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
1066 }
1067 css <<
1068 " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1069 "}\n";
1070
1071 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1072
1073 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1074 }
1075
createInstance(Context & context) const1076 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
1077 {
1078 return new MemoryModelTestInstance(context, m_data);
1079 }
1080
iterate(void)1081 tcu::TestStatus MemoryModelTestInstance::iterate (void)
1082 {
1083 const DeviceInterface& vk = m_context.getDeviceInterface();
1084 const VkDevice device = m_context.getDevice();
1085 Allocator& allocator = m_context.getDefaultAllocator();
1086
1087 VkPhysicalDeviceProperties2 properties;
1088 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1089 properties.pNext = NULL;
1090
1091 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1092
1093 deUint32 DIM = 31;
1094 deUint32 NUM_WORKGROUP_EACH_DIM = 8;
1095 // If necessary, shrink workgroup size to fit HW limits
1096 if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1097 {
1098 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1099 }
1100 deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1101
1102 VkDeviceSize bufferSizes[3];
1103 de::MovePtr<BufferWithMemory> buffers[3];
1104 vk::VkDescriptorBufferInfo bufferDescriptors[3];
1105 de::MovePtr<BufferWithMemory> copyBuffer;
1106
1107 for (deUint32 i = 0; i < 3; ++i)
1108 {
1109 size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64)? sizeof(deUint64) : sizeof(deUint32);
1110 // buffer2 is the "fail" buffer, and is always uint
1111 if (i == 2)
1112 elementSize = sizeof(deUint32);
1113 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1114
1115 vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1116
1117 bool memoryDeviceAddress = false;
1118
1119 bool local;
1120 switch (i)
1121 {
1122 default: DE_ASSERT(0); // fall through
1123 case 0:
1124 if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1125 continue;
1126 local = m_data.payloadMemLocal;
1127 if (m_data.payloadSC == SC_PHYSBUFFER)
1128 {
1129 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1130 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1131 memoryDeviceAddress = true;
1132 }
1133 break;
1134 case 1:
1135 if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1136 continue;
1137 local = m_data.guardMemLocal;
1138 if (m_data.guardSC == SC_PHYSBUFFER)
1139 {
1140 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1141 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1142 memoryDeviceAddress = true;
1143 }
1144 break;
1145 case 2: local = true; break;
1146 }
1147
1148 try
1149 {
1150 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1151 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1152 (memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1153 (local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1154 }
1155 catch (const tcu::NotSupportedError&)
1156 {
1157 if (!local)
1158 {
1159 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1160 }
1161 throw;
1162 }
1163 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1164 }
1165
1166 // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1167 try
1168 {
1169 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1170 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1171 }
1172 catch (const tcu::NotSupportedError&)
1173 {
1174 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1175 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
1176 }
1177
1178 VkFormat imageFormat;
1179 switch (m_data.dataType)
1180 {
1181 case DATA_TYPE_UINT:
1182 case DATA_TYPE_UINT64:
1183 imageFormat = VK_FORMAT_R32_UINT;
1184 break;
1185 case DATA_TYPE_FLOAT32:
1186 case DATA_TYPE_FLOAT64:
1187 imageFormat = VK_FORMAT_R32_SFLOAT;
1188 break;
1189 default:
1190 TCU_FAIL("Invalid data type.");
1191 }
1192
1193 const VkImageCreateInfo imageCreateInfo =
1194 {
1195 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1196 DE_NULL, // const void* pNext;
1197 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
1198 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1199 imageFormat, // VkFormat format;
1200 {
1201 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 width;
1202 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 height;
1203 1u // deUint32 depth;
1204 }, // VkExtent3D extent;
1205 1u, // deUint32 mipLevels;
1206 1u, // deUint32 arrayLayers;
1207 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1208 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1209 VK_IMAGE_USAGE_STORAGE_BIT
1210 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
1211 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
1212 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1213 0u, // deUint32 queueFamilyIndexCount;
1214 DE_NULL, // const deUint32* pQueueFamilyIndices;
1215 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
1216 };
1217 VkImageViewCreateInfo imageViewCreateInfo =
1218 {
1219 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1220 DE_NULL, // const void* pNext;
1221 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
1222 DE_NULL, // VkImage image;
1223 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1224 imageFormat, // VkFormat format;
1225 {
1226 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1227 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1228 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1229 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1230 }, // VkComponentMapping components;
1231 {
1232 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1233 0u, // deUint32 baseMipLevel;
1234 1u, // deUint32 levelCount;
1235 0u, // deUint32 baseArrayLayer;
1236 1u // deUint32 layerCount;
1237 } // VkImageSubresourceRange subresourceRange;
1238 };
1239
1240
1241 de::MovePtr<ImageWithMemory> images[2];
1242 Move<VkImageView> imageViews[2];
1243 vk::VkDescriptorImageInfo imageDescriptors[2];
1244
1245 for (deUint32 i = 0; i < 2; ++i)
1246 {
1247
1248 bool local;
1249 switch (i)
1250 {
1251 default: DE_ASSERT(0); // fall through
1252 case 0:
1253 if (m_data.payloadSC != SC_IMAGE)
1254 continue;
1255 local = m_data.payloadMemLocal;
1256 break;
1257 case 1:
1258 if (m_data.guardSC != SC_IMAGE)
1259 continue;
1260 local = m_data.guardMemLocal;
1261 break;
1262 }
1263
1264 try
1265 {
1266 images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
1267 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1268 }
1269 catch (const tcu::NotSupportedError&)
1270 {
1271 if (!local)
1272 {
1273 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1274 }
1275 throw;
1276 }
1277 imageViewCreateInfo.image = **images[i];
1278 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
1279
1280 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1281 }
1282
1283 vk::DescriptorSetLayoutBuilder layoutBuilder;
1284
1285 switch (m_data.payloadSC)
1286 {
1287 default:
1288 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1289 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1290 }
1291 switch (m_data.guardSC)
1292 {
1293 default:
1294 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1295 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1296 }
1297 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1298
1299 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1300
1301 vk::Unique<vk::VkDescriptorPool> descriptorPool(vk::DescriptorPoolBuilder()
1302 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1303 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1304 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1305 vk::Unique<vk::VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1306
1307 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1308 switch (m_data.payloadSC)
1309 {
1310 default: DE_ASSERT(0); // fall through
1311 case SC_PHYSBUFFER:
1312 case SC_WORKGROUP:
1313 break;
1314 case SC_BUFFER:
1315 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1316 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1317 break;
1318 case SC_IMAGE:
1319 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1320 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1321 break;
1322 }
1323 switch (m_data.guardSC)
1324 {
1325 default: DE_ASSERT(0); // fall through
1326 case SC_PHYSBUFFER:
1327 case SC_WORKGROUP:
1328 break;
1329 case SC_BUFFER:
1330 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1331 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1332 break;
1333 case SC_IMAGE:
1334 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1335 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1336 break;
1337 }
1338 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1339 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1340
1341 setUpdateBuilder.update(vk, device);
1342
1343 const VkPushConstantRange pushConstRange =
1344 {
1345 allShaderStages, // VkShaderStageFlags stageFlags
1346 0, // deUint32 offset
1347 16 // deUint32 size
1348 };
1349
1350 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1351 {
1352 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
1353 DE_NULL, // pNext
1354 (VkPipelineLayoutCreateFlags)0,
1355 1, // setLayoutCount
1356 &descriptorSetLayout.get(), // pSetLayouts
1357 1u, // pushConstantRangeCount
1358 &pushConstRange, // pPushConstantRanges
1359 };
1360
1361 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1362
1363 Move<VkPipeline> pipeline;
1364 Move<VkRenderPass> renderPass;
1365 Move<VkFramebuffer> framebuffer;
1366
1367 VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1368
1369 const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1370
1371 const vk::VkSpecializationMapEntry entries[3] =
1372 {
1373 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
1374 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
1375 };
1376
1377 const vk::VkSpecializationInfo specInfo =
1378 {
1379 2, // mapEntryCount
1380 entries, // pMapEntries
1381 sizeof(specData), // dataSize
1382 specData // pData
1383 };
1384
1385 if (m_data.stage == STAGE_COMPUTE)
1386 {
1387 const Unique<VkShaderModule> shader (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1388
1389 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
1390 {
1391 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1392 DE_NULL,
1393 (VkPipelineShaderStageCreateFlags)0,
1394 VK_SHADER_STAGE_COMPUTE_BIT, // stage
1395 *shader, // shader
1396 "main",
1397 &specInfo, // pSpecializationInfo
1398 };
1399
1400 const VkComputePipelineCreateInfo pipelineCreateInfo =
1401 {
1402 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1403 DE_NULL,
1404 0u, // flags
1405 shaderCreateInfo, // cs
1406 *pipelineLayout, // layout
1407 (vk::VkPipeline)0, // basePipelineHandle
1408 0u, // basePipelineIndex
1409 };
1410 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1411 }
1412 else
1413 {
1414
1415 const vk::VkSubpassDescription subpassDesc =
1416 {
1417 (vk::VkSubpassDescriptionFlags)0,
1418 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1419 0u, // inputCount
1420 DE_NULL, // pInputAttachments
1421 0u, // colorCount
1422 DE_NULL, // pColorAttachments
1423 DE_NULL, // pResolveAttachments
1424 DE_NULL, // depthStencilAttachment
1425 0u, // preserveCount
1426 DE_NULL, // pPreserveAttachments
1427
1428 };
1429 const vk::VkRenderPassCreateInfo renderPassParams =
1430 {
1431 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1432 DE_NULL, // pNext
1433 (vk::VkRenderPassCreateFlags)0,
1434 0u, // attachmentCount
1435 DE_NULL, // pAttachments
1436 1u, // subpassCount
1437 &subpassDesc, // pSubpasses
1438 0u, // dependencyCount
1439 DE_NULL, // pDependencies
1440 };
1441
1442 renderPass = createRenderPass(vk, device, &renderPassParams);
1443
1444 const vk::VkFramebufferCreateInfo framebufferParams =
1445 {
1446 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1447 DE_NULL, // pNext
1448 (vk::VkFramebufferCreateFlags)0,
1449 *renderPass, // renderPass
1450 0u, // attachmentCount
1451 DE_NULL, // pAttachments
1452 DIM*NUM_WORKGROUP_EACH_DIM, // width
1453 DIM*NUM_WORKGROUP_EACH_DIM, // height
1454 1u, // layers
1455 };
1456
1457 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1458
1459 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
1460 {
1461 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1462 DE_NULL, // const void* pNext;
1463 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1464 0u, // deUint32 vertexBindingDescriptionCount;
1465 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1466 0u, // deUint32 vertexAttributeDescriptionCount;
1467 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1468 };
1469
1470 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
1471 {
1472 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1473 DE_NULL, // const void* pNext;
1474 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1475 (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1476 VK_FALSE // VkBool32 primitiveRestartEnable;
1477 };
1478
1479 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
1480 {
1481 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1482 DE_NULL, // const void* pNext;
1483 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
1484 VK_FALSE, // VkBool32 depthClampEnable;
1485 (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1486 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1487 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1488 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
1489 VK_FALSE, // VkBool32 depthBiasEnable;
1490 0.0f, // float depthBiasConstantFactor;
1491 0.0f, // float depthBiasClamp;
1492 0.0f, // float depthBiasSlopeFactor;
1493 1.0f // float lineWidth;
1494 };
1495
1496 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
1497 {
1498 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
1499 DE_NULL, // const void* pNext
1500 0u, // VkPipelineMultisampleStateCreateFlags flags
1501 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
1502 VK_FALSE, // VkBool32 sampleShadingEnable
1503 1.0f, // float minSampleShading
1504 DE_NULL, // const VkSampleMask* pSampleMask
1505 VK_FALSE, // VkBool32 alphaToCoverageEnable
1506 VK_FALSE // VkBool32 alphaToOneEnable
1507 };
1508
1509 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1510 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1511
1512 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
1513 {
1514 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
1515 DE_NULL, // const void* pNext
1516 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
1517 1u, // deUint32 viewportCount
1518 &viewport, // const VkViewport* pViewports
1519 1u, // deUint32 scissorCount
1520 &scissor // const VkRect2D* pScissors
1521 };
1522
1523 Move<VkShaderModule> fs;
1524 Move<VkShaderModule> vs;
1525
1526 deUint32 numStages;
1527 if (m_data.stage == STAGE_VERTEX)
1528 {
1529 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1530 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1531 numStages = 1u;
1532 }
1533 else
1534 {
1535 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1536 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1537 numStages = 2u;
1538 }
1539
1540 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1541 {
1542 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1543 DE_NULL,
1544 (VkPipelineShaderStageCreateFlags)0,
1545 VK_SHADER_STAGE_VERTEX_BIT, // stage
1546 *vs, // shader
1547 "main",
1548 &specInfo, // pSpecializationInfo
1549 },
1550 {
1551 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1552 DE_NULL,
1553 (VkPipelineShaderStageCreateFlags)0,
1554 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1555 *fs, // shader
1556 "main",
1557 &specInfo, // pSpecializationInfo
1558 }
1559 };
1560
1561 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
1562 {
1563 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1564 DE_NULL, // const void* pNext;
1565 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
1566 numStages, // deUint32 stageCount;
1567 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
1568 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1569 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1570 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1571 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
1572 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1573 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1574 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1575 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1576 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1577 pipelineLayout.get(), // VkPipelineLayout layout;
1578 renderPass.get(), // VkRenderPass renderPass;
1579 0u, // deUint32 subpass;
1580 DE_NULL, // VkPipeline basePipelineHandle;
1581 0 // int basePipelineIndex;
1582 };
1583
1584 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1585 }
1586
1587 const VkQueue queue = m_context.getUniversalQueue();
1588 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex());
1589 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1590
1591 VkBufferDeviceAddressInfo addrInfo =
1592 {
1593 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
1594 DE_NULL, // const void* pNext;
1595 0, // VkBuffer buffer
1596 };
1597
1598 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1599 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1600
1601 VkMemoryBarrier memBarrier =
1602 {
1603 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1604 DE_NULL, // pNext
1605 0u, // srcAccessMask
1606 0u, // dstAccessMask
1607 };
1608
1609 const VkBufferCopy copyParams =
1610 {
1611 (VkDeviceSize)0u, // srcOffset
1612 (VkDeviceSize)0u, // dstOffset
1613 bufferSizes[2] // size
1614 };
1615
1616 deUint32 NUM_SUBMITS = 4;
1617
1618 for (deUint32 x = 0; x < NUM_SUBMITS; ++x)
1619 {
1620 beginCommandBuffer(vk, *cmdBuffer, 0u);
1621
1622 if (x == 0)
1623 vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1624
1625 for (deUint32 i = 0; i < 2; ++i)
1626 {
1627 if (!images[i])
1628 continue;
1629
1630 const VkImageMemoryBarrier imageBarrier =
1631 {
1632 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
1633 DE_NULL, // const void* pNext
1634 0u, // VkAccessFlags srcAccessMask
1635 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
1636 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
1637 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
1638 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
1639 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
1640 **images[i], // VkImage image
1641 {
1642 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
1643 0u, // uint32_t baseMipLevel
1644 1u, // uint32_t mipLevels,
1645 0u, // uint32_t baseArray
1646 1u, // uint32_t arraySize
1647 }
1648 };
1649
1650 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1651 (VkDependencyFlags)0,
1652 0, (const VkMemoryBarrier*)DE_NULL,
1653 0, (const VkBufferMemoryBarrier*)DE_NULL,
1654 1, &imageBarrier);
1655 }
1656
1657 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1658 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1659
1660 if (m_data.payloadSC == SC_PHYSBUFFER)
1661 {
1662 const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1663 addrInfo.buffer = **buffers[0];
1664 VkDeviceAddress addr;
1665 if (useKHR)
1666 addr = vk.getBufferDeviceAddress(device, &addrInfo);
1667 else
1668 addr = vk.getBufferDeviceAddressEXT(device, &addrInfo);
1669 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1670 0, sizeof(VkDeviceSize), &addr);
1671 }
1672 if (m_data.guardSC == SC_PHYSBUFFER)
1673 {
1674 const bool useKHR = m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1675 addrInfo.buffer = **buffers[1];
1676 VkDeviceAddress addr;
1677 if (useKHR)
1678 addr = vk.getBufferDeviceAddress(device, &addrInfo);
1679 else
1680 addr = vk.getBufferDeviceAddressEXT(device, &addrInfo);
1681 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1682 8, sizeof(VkDeviceSize), &addr);
1683 }
1684
1685 for (deUint32 iters = 0; iters < 50; ++iters)
1686 {
1687 for (deUint32 i = 0; i < 2; ++i)
1688 {
1689 if (buffers[i])
1690 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1691 if (images[i])
1692 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1693 }
1694
1695 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1696 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1697 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1698 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1699
1700 if (m_data.stage == STAGE_COMPUTE)
1701 {
1702 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1703 }
1704 else
1705 {
1706 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1707 makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1708 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1709 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1710 if (m_data.stage == STAGE_VERTEX)
1711 {
1712 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1713 }
1714 else
1715 {
1716 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1717 }
1718 endRenderPass(vk, *cmdBuffer);
1719 }
1720
1721 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1722 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1723 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1724 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1725 }
1726
1727 if (x == NUM_SUBMITS - 1)
1728 {
1729 vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params);
1730 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1731 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1732 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1733 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1734 }
1735
1736 endCommandBuffer(vk, *cmdBuffer);
1737
1738 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1739
1740 vk.resetCommandBuffer(*cmdBuffer, 0x00000000);
1741 }
1742
1743 tcu::TestLog& log = m_context.getTestContext().getLog();
1744
1745 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1746 invalidateAlloc(vk, device, copyBuffer->getAllocation());
1747 qpTestResult res = QP_TEST_RESULT_PASS;
1748
1749 deUint32 numErrors = 0;
1750 for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1751 {
1752 if (ptr[i] != 0)
1753 {
1754 if (numErrors < 256)
1755 {
1756 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1757 }
1758 numErrors++;
1759 res = QP_TEST_RESULT_FAIL;
1760 }
1761 }
1762
1763 if (numErrors)
1764 {
1765 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1766 }
1767
1768 return tcu::TestStatus(res, qpGetTestResultName(res));
1769 }
1770
1771 } // anonymous
1772
createTests(tcu::TestContext & testCtx)1773 tcu::TestCaseGroup* createTests (tcu::TestContext& testCtx)
1774 {
1775 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1776 testCtx, "memory_model", "Memory model tests"));
1777
1778 typedef struct
1779 {
1780 deUint32 value;
1781 const char* name;
1782 const char* description;
1783 } TestGroupCase;
1784
1785 TestGroupCase ttCases[] =
1786 {
1787 { TT_MP, "message_passing", "message passing" },
1788 { TT_WAR, "write_after_read", "write after read" },
1789 };
1790
1791 TestGroupCase core11Cases[] =
1792 {
1793 { 1, "core11", "Supported by Vulkan1.1" },
1794 { 0, "ext", "Requires VK_KHR_vulkan_memory_model extension" },
1795 };
1796
1797 TestGroupCase dtCases[] =
1798 {
1799 { DATA_TYPE_UINT, "u32", "uint32_t atomics" },
1800 { DATA_TYPE_UINT64, "u64", "uint64_t atomics" },
1801 { DATA_TYPE_FLOAT32, "f32", "float32 atomics" },
1802 { DATA_TYPE_FLOAT64, "f64", "float64 atomics" },
1803 };
1804
1805 TestGroupCase cohCases[] =
1806 {
1807 { 1, "coherent", "coherent payload variable" },
1808 { 0, "noncoherent", "noncoherent payload variable" },
1809 };
1810
1811 TestGroupCase stCases[] =
1812 {
1813 { ST_FENCE_FENCE, "fence_fence", "release fence, acquire fence" },
1814 { ST_FENCE_ATOMIC, "fence_atomic", "release fence, atomic acquire" },
1815 { ST_ATOMIC_FENCE, "atomic_fence", "atomic release, acquire fence" },
1816 { ST_ATOMIC_ATOMIC, "atomic_atomic", "atomic release, atomic acquire" },
1817 { ST_CONTROL_BARRIER, "control_barrier", "control barrier" },
1818 { ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier", "control barrier with release/acquire" },
1819 };
1820
1821 TestGroupCase rmwCases[] =
1822 {
1823 { 0, "atomicwrite", "atomic write" },
1824 { 1, "atomicrmw", "atomic rmw" },
1825 };
1826
1827 TestGroupCase scopeCases[] =
1828 {
1829 { SCOPE_DEVICE, "device", "device scope" },
1830 { SCOPE_QUEUEFAMILY, "queuefamily", "queuefamily scope" },
1831 { SCOPE_WORKGROUP, "workgroup", "workgroup scope" },
1832 { SCOPE_SUBGROUP, "subgroup", "subgroup scope" },
1833 };
1834
1835 TestGroupCase plCases[] =
1836 {
1837 { 0, "payload_nonlocal", "payload variable in non-local memory" },
1838 { 1, "payload_local", "payload variable in local memory" },
1839 };
1840
1841 TestGroupCase pscCases[] =
1842 {
1843 { SC_BUFFER, "buffer", "payload variable in buffer memory" },
1844 { SC_IMAGE, "image", "payload variable in image memory" },
1845 { SC_WORKGROUP, "workgroup", "payload variable in workgroup memory" },
1846 { SC_PHYSBUFFER,"physbuffer", "payload variable in physical storage buffer memory" },
1847 };
1848
1849 TestGroupCase glCases[] =
1850 {
1851 { 0, "guard_nonlocal", "guard variable in non-local memory" },
1852 { 1, "guard_local", "guard variable in local memory" },
1853 };
1854
1855 TestGroupCase gscCases[] =
1856 {
1857 { SC_BUFFER, "buffer", "guard variable in buffer memory" },
1858 { SC_IMAGE, "image", "guard variable in image memory" },
1859 { SC_WORKGROUP, "workgroup", "guard variable in workgroup memory" },
1860 { SC_PHYSBUFFER,"physbuffer", "guard variable in physical storage buffer memory" },
1861 };
1862
1863 TestGroupCase stageCases[] =
1864 {
1865 { STAGE_COMPUTE, "comp", "compute shader" },
1866 { STAGE_VERTEX, "vert", "vertex shader" },
1867 { STAGE_FRAGMENT, "frag", "fragment shader" },
1868 };
1869
1870
1871 for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1872 {
1873 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name, ttCases[ttNdx].description));
1874 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1875 {
1876 de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name, core11Cases[core11Ndx].description));
1877 for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1878 {
1879 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name, dtCases[dtNdx].description));
1880 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1881 {
1882 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
1883 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1884 {
1885 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
1886 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1887 {
1888 de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name, rmwCases[rmwNdx].description));
1889 for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1890 {
1891 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name, scopeCases[scopeNdx].description));
1892 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1893 {
1894 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
1895 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1896 {
1897 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
1898 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1899 {
1900 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
1901 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1902 {
1903 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
1904 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1905 {
1906 CaseDef c =
1907 {
1908 !!plCases[plNdx].value, // bool payloadMemLocal;
1909 !!glCases[glNdx].value, // bool guardMemLocal;
1910 !!cohCases[cohNdx].value, // bool coherent;
1911 !!core11Cases[core11Ndx].value, // bool core11;
1912 !!rmwCases[rmwNdx].value, // bool atomicRMW;
1913 (TestType)ttCases[ttNdx].value, // TestType testType;
1914 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
1915 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
1916 (Scope)scopeCases[scopeNdx].value, // Scope scope;
1917 (SyncType)stCases[stNdx].value, // SyncType syncType;
1918 (Stage)stageCases[stageNdx].value, // Stage stage;
1919 (DataType)dtCases[dtNdx].value, // DataType dataType;
1920 false, // bool transitive;
1921 false, // bool transitiveVis;
1922 };
1923
1924 // Mustpass11 tests should only exercise things we expect to work on
1925 // existing implementations. Exclude noncoherent tests which require
1926 // new extensions, and assume atomic synchronization wouldn't work
1927 // (i.e. atomics may be implemented as relaxed atomics). Exclude
1928 // queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
1929 // physical storage buffer which doesn't support the legacy decorations.
1930 if (c.core11 &&
1931 (c.coherent == 0 ||
1932 c.syncType == ST_FENCE_ATOMIC ||
1933 c.syncType == ST_ATOMIC_FENCE ||
1934 c.syncType == ST_ATOMIC_ATOMIC ||
1935 c.dataType == DATA_TYPE_UINT64 ||
1936 c.dataType == DATA_TYPE_FLOAT64 ||
1937 c.scope == SCOPE_QUEUEFAMILY ||
1938 c.payloadSC == SC_PHYSBUFFER ||
1939 c.guardSC == SC_PHYSBUFFER))
1940 {
1941 continue;
1942 }
1943
1944 if (c.stage != STAGE_COMPUTE &&
1945 c.scope == SCOPE_WORKGROUP)
1946 {
1947 continue;
1948 }
1949
1950 // Don't exercise local and non-local for workgroup memory
1951 // Also don't exercise workgroup memory for non-compute stages
1952 if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
1953 {
1954 continue;
1955 }
1956 if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
1957 {
1958 continue;
1959 }
1960 // Can't do control barrier with larger than workgroup scope, or non-compute stages
1961 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1962 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
1963 {
1964 continue;
1965 }
1966
1967 // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
1968 if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
1969 {
1970 continue;
1971 }
1972
1973 // uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
1974 const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 || c.dataType == DATA_TYPE_FLOAT32 || c.dataType == DATA_TYPE_FLOAT64);
1975 if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
1976 {
1977 continue;
1978 }
1979
1980 // No 64-bit image types, so skip tests with both payload and guard in image memory
1981 if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
1982 {
1983 continue;
1984 }
1985
1986 // No support for atomic operations on 64-bit floating point images
1987 if (c.dataType == DATA_TYPE_FLOAT64 && (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
1988 {
1989 continue;
1990 }
1991 // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
1992 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
1993 (c.guardSC != 0 || c.guardMemLocal != 0))
1994 {
1995 continue;
1996 }
1997
1998 gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, stageCases[stageNdx].description, c));
1999 }
2000 glGroup->addChild(gscGroup.release());
2001 }
2002 pscGroup->addChild(glGroup.release());
2003 }
2004 plGroup->addChild(pscGroup.release());
2005 }
2006 scopeGroup->addChild(plGroup.release());
2007 }
2008 rmwGroup->addChild(scopeGroup.release());
2009 }
2010 stGroup->addChild(rmwGroup.release());
2011 }
2012 cohGroup->addChild(stGroup.release());
2013 }
2014 dtGroup->addChild(cohGroup.release());
2015 }
2016 core11Group->addChild(dtGroup.release());
2017 }
2018 ttGroup->addChild(core11Group.release());
2019 }
2020 group->addChild(ttGroup.release());
2021 }
2022
2023 TestGroupCase transVisCases[] =
2024 {
2025 { 0, "nontransvis", "destination invocation acquires" },
2026 { 1, "transvis", "invocation 0,0 acquires" },
2027 };
2028
2029 de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive", "transitive"));
2030 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2031 {
2032 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name, cohCases[cohNdx].description));
2033 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2034 {
2035 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name, stCases[stNdx].description));
2036 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2037 {
2038 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name, plCases[plNdx].description));
2039 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2040 {
2041 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name, pscCases[pscNdx].description));
2042 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2043 {
2044 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name, glCases[glNdx].description));
2045 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2046 {
2047 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name, gscCases[gscNdx].description));
2048 for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2049 {
2050 CaseDef c =
2051 {
2052 !!plCases[plNdx].value, // bool payloadMemLocal;
2053 !!glCases[glNdx].value, // bool guardMemLocal;
2054 !!cohCases[cohNdx].value, // bool coherent;
2055 false, // bool core11;
2056 false, // bool atomicRMW;
2057 TT_MP, // TestType testType;
2058 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2059 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2060 SCOPE_DEVICE, // Scope scope;
2061 (SyncType)stCases[stNdx].value, // SyncType syncType;
2062 STAGE_COMPUTE, // Stage stage;
2063 DATA_TYPE_UINT, // DataType dataType;
2064 true, // bool transitive;
2065 !!transVisCases[visNdx].value, // bool transitiveVis;
2066 };
2067 if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2068 {
2069 continue;
2070 }
2071 if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2072 {
2073 continue;
2074 }
2075 gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, transVisCases[visNdx].description, c));
2076 }
2077 glGroup->addChild(gscGroup.release());
2078 }
2079 pscGroup->addChild(glGroup.release());
2080 }
2081 plGroup->addChild(pscGroup.release());
2082 }
2083 stGroup->addChild(plGroup.release());
2084 }
2085 cohGroup->addChild(stGroup.release());
2086 }
2087 transGroup->addChild(cohGroup.release());
2088 }
2089 group->addChild(transGroup.release());
2090
2091 // Padding tests.
2092 group->addChild(createPaddingTests(testCtx));
2093
2094 return group.release();
2095 }
2096
2097 } // MemoryModel
2098 } // vkt
2099