1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2019 NVIDIA Corporation.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "glcSubgroupsBasicTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31 #include "tcuStringTemplate.hpp"
32
33 using namespace tcu;
34 using namespace std;
35
36 namespace glc
37 {
38 namespace subgroups
39 {
40
41 namespace
42 {
43 static const deUint32 ELECTED_VALUE = 42u;
44 static const deUint32 UNELECTED_VALUE = 13u;
45 static const deUint64 SHADER_BUFFER_SIZE = 4096ull;
46
checkFragmentSubgroupBarriersNoSSBO(std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32)47 static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void*> datas,
48 deUint32 width, deUint32 height, deUint32)
49 {
50 const float* const resultData = reinterpret_cast<const float*>(datas[0]);
51
52 for (deUint32 x = 0u; x < width; ++x)
53 {
54 for (deUint32 y = 0u; y < height; ++y)
55 {
56 const deUint32 ndx = (x * height + y) * 4u;
57 if (1.0f == resultData[ndx +2])
58 {
59 if(resultData[ndx] != resultData[ndx +1])
60 {
61 return false;
62 }
63 }
64 else if (resultData[ndx] != resultData[ndx +3])
65 {
66 return false;
67 }
68 }
69 }
70
71 return true;
72 }
73
checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void * > datas,deUint32 width,deUint32)74 static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void*> datas,
75 deUint32 width, deUint32)
76 {
77 const float* const resultData = reinterpret_cast<const float*>(datas[0]);
78 float poisonValuesFound = 0.0f;
79 float numSubgroupsUsed = 0.0f;
80
81 for (deUint32 x = 0; x < width; ++x)
82 {
83 deUint32 val = static_cast<deUint32>(resultData[x * 2]);
84 numSubgroupsUsed += resultData[x * 2 + 1];
85
86 switch (val)
87 {
88 default:
89 // some garbage value was found!
90 return false;
91 case UNELECTED_VALUE:
92 break;
93 case ELECTED_VALUE:
94 poisonValuesFound += 1.0f;
95 break;
96 }
97 }
98 return numSubgroupsUsed == poisonValuesFound;
99 }
100
checkVertexPipelineStagesSubgroupElect(std::vector<const void * > datas,deUint32 width,deUint32)101 static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void*> datas,
102 deUint32 width, deUint32)
103 {
104 const deUint32* const resultData =
105 reinterpret_cast<const deUint32*>(datas[0]);
106 deUint32 poisonValuesFound = 0;
107
108 for (deUint32 x = 0; x < width; ++x)
109 {
110 deUint32 val = resultData[x];
111
112 switch (val)
113 {
114 default:
115 // some garbage value was found!
116 return false;
117 case UNELECTED_VALUE:
118 break;
119 case ELECTED_VALUE:
120 poisonValuesFound++;
121 break;
122 }
123 }
124
125 // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
126 const deUint32 numSubgroupsUsed =
127 *reinterpret_cast<const deUint32*>(datas[1]);
128
129 return numSubgroupsUsed == poisonValuesFound;
130 }
131
checkVertexPipelineStagesSubgroupBarriers(std::vector<const void * > datas,deUint32 width,deUint32)132 static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void*> datas,
133 deUint32 width, deUint32)
134 {
135 const deUint32* const resultData = reinterpret_cast<const deUint32*>(datas[0]);
136
137 // We used this SSBO to generate our unique value!
138 const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[1]);
139
140 for (deUint32 x = 0; x < width; ++x)
141 {
142 deUint32 val = resultData[x];
143
144 if (val != ref)
145 return false;
146 }
147
148 return true;
149 }
150
checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void * > datas,deUint32 width,deUint32)151 static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void*> datas,
152 deUint32 width, deUint32)
153 {
154 const float* const resultData = reinterpret_cast<const float*>(datas[0]);
155
156 for (deUint32 x = 0u; x < width; ++x)
157 {
158 const deUint32 ndx = x*4u;
159 if (1.0f == resultData[ndx +2])
160 {
161 if(resultData[ndx] != resultData[ndx +1])
162 return false;
163 }
164 else if (resultData[ndx] != resultData[ndx +3])
165 {
166 return false;
167 }
168 }
169 return true;
170 }
171
checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void * > datas,deUint32 width,deUint32)172 static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void*> datas,
173 deUint32 width, deUint32)
174 {
175 const float* const resultData = reinterpret_cast<const float*>(datas[0]);
176
177 for (deUint32 x = 0u; x < width; ++x)
178 {
179 const deUint32 ndx = x*4u;
180 if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3])
181 {
182 return false;
183 }
184 }
185 return true;
186 }
187
checkComputeSubgroupElect(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)188 static bool checkComputeSubgroupElect(std::vector<const void*> datas,
189 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
190 deUint32)
191 {
192 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
193 }
194
checkComputeSubgroupBarriers(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)195 static bool checkComputeSubgroupBarriers(std::vector<const void*> datas,
196 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
197 deUint32)
198 {
199 // We used this SSBO to generate our unique value!
200 const deUint32 ref = *reinterpret_cast<const deUint32*>(datas[1]);
201 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
202 }
203
204 enum OpType
205 {
206 OPTYPE_ELECT = 0,
207 OPTYPE_SUBGROUP_BARRIER,
208 OPTYPE_SUBGROUP_MEMORY_BARRIER,
209 OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
210 OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
211 OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
212 OPTYPE_LAST
213 };
214
getOpTypeName(int opType)215 std::string getOpTypeName(int opType)
216 {
217 switch (opType)
218 {
219 default:
220 DE_FATAL("Unsupported op type");
221 return "";
222 case OPTYPE_ELECT:
223 return "subgroupElect";
224 case OPTYPE_SUBGROUP_BARRIER:
225 return "subgroupBarrier";
226 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
227 return "subgroupMemoryBarrier";
228 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
229 return "subgroupMemoryBarrierBuffer";
230 case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
231 return "subgroupMemoryBarrierShared";
232 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
233 return "subgroupMemoryBarrierImage";
234 }
235 }
236
237 struct CaseDefinition
238 {
239 int opType;
240 subgroups::ShaderStageFlags shaderStage;
241 };
242
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)243 void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
244 {
245 if(subgroups::SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
246 {
247 const string fragmentGLSL =
248 "${VERSION_DECL}\n"
249 "layout(location = 0) in highp vec4 in_color;\n"
250 "layout(location = 0) out highp vec4 out_color;\n"
251 "void main()\n"
252 "{\n"
253 " out_color = in_color;\n"
254 "}\n";
255
256 programCollection.add("fragment") << glu::FragmentSource(fragmentGLSL);
257 }
258 if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
259 {
260 const string vertexGLSL =
261 "${VERSION_DECL}\n"
262 "void main (void)\n"
263 "{\n"
264 " vec2 uv = vec2((gl_VertexID << 1) & 2, gl_VertexID & 2);\n"
265 " gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
266 " gl_PointSize = 1.0f;\n"
267 "}\n";
268
269 programCollection.add("vert") << glu::VertexSource(vertexGLSL);
270 }
271 else if (subgroups::SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
272 subgroups::setVertexShaderFrameBuffer(programCollection);
273
274 if (OPTYPE_ELECT == caseDef.opType)
275 {
276 std::ostringstream electedValue ;
277 std::ostringstream unelectedValue;
278 electedValue << ELECTED_VALUE;
279 unelectedValue << UNELECTED_VALUE;
280
281 if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
282 {
283 const string vertexGLSL =
284 "${VERSION_DECL}\n"
285 "#extension GL_KHR_shader_subgroup_basic: enable\n"
286 "layout(location = 0) out vec4 out_color;\n"
287 "layout(location = 0) in highp vec4 in_position;\n"
288 "\n"
289 "void main (void)\n"
290 "{\n"
291 " if (subgroupElect())\n"
292 " {\n"
293 " out_color.r = " + electedValue.str() + ".0f;\n"
294 " out_color.g = 1.0f;\n"
295 " }\n"
296 " else\n"
297 " {\n"
298 " out_color.r = " + unelectedValue.str() + ".0f;\n"
299 " out_color.g = 0.0f;\n"
300 " }\n"
301 " gl_Position = in_position;\n"
302 " gl_PointSize = 1.0f;\n"
303 "}\n";
304
305 programCollection.add("vert") << glu::VertexSource(vertexGLSL);
306 }
307 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
308 {
309 const string geometryGLSL =
310 "${VERSION_DECL}\n"
311 "#extension GL_KHR_shader_subgroup_basic: enable\n"
312 "layout(points) in;\n"
313 "layout(points, max_vertices = 1) out;\n"
314 "layout(location = 0) out vec4 out_color;\n"
315 "void main (void)\n"
316 "{\n"
317 " if (subgroupElect())\n"
318 " {\n"
319 " out_color.r = " + electedValue.str() + ".0f;\n"
320 " out_color.g = 1.0f;\n"
321 " }\n"
322 " else\n"
323 " {\n"
324 " out_color.r = " + unelectedValue.str() + ".0f;\n"
325 " out_color.g = 0.0f;\n"
326 " }\n"
327 " gl_Position = gl_in[0].gl_Position;\n"
328 " EmitVertex();\n"
329 " EndPrimitive();\n"
330 "}\n";
331
332 programCollection.add("geometry") << glu::GeometrySource(geometryGLSL);
333 }
334 else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
335 {
336 const string controlSourceGLSL =
337 "${VERSION_DECL}\n"
338 "#extension GL_KHR_shader_subgroup_basic: enable\n"
339 "#extension GL_EXT_tessellation_shader : require\n"
340 "layout(vertices = 2) out;\n"
341 "void main (void)\n"
342 "{\n"
343 " if (gl_InvocationID == 0)\n"
344 " {\n"
345 " gl_TessLevelOuter[0] = 1.0f;\n"
346 " gl_TessLevelOuter[1] = 1.0f;\n"
347 " }\n"
348 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
349 "}\n";
350
351 programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
352
353 const string evaluationSourceGLSL =
354 "${VERSION_DECL}\n"
355 "#extension GL_KHR_shader_subgroup_basic: enable\n"
356 "#extension GL_EXT_tessellation_shader : require\n"
357 "layout(isolines, equal_spacing, ccw ) in;\n"
358 "layout(location = 0) out vec4 out_color;\n"
359 "\n"
360 "void main (void)\n"
361 "{\n"
362 " if (subgroupElect())\n"
363 " {\n"
364 " out_color.r = 2.0f * " + electedValue.str() + ".0f - " + unelectedValue.str() + ".0f;\n"
365 " out_color.g = 2.0f;\n"
366 " }\n"
367 " else\n"
368 " {\n"
369 " out_color.r = " + unelectedValue.str() + ".0f;\n"
370 " out_color.g = 0.0f;\n"
371 " }\n"
372 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
373 "}\n";
374
375 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
376 }
377 else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
378 {
379 const string controlSourceGLSL =
380 "${VERSION_DECL}\n"
381 "#extension GL_KHR_shader_subgroup_basic: enable\n"
382 "#extension GL_EXT_tessellation_shader : require\n"
383 "layout(vertices = 2) out;\n"
384 "layout(location = 0) out vec4 out_color[];\n"
385 "void main (void)\n"
386 "{\n"
387 " if (gl_InvocationID == 0)\n"
388 " {\n"
389 " gl_TessLevelOuter[0] = 1.0f;\n"
390 " gl_TessLevelOuter[1] = 1.0f;\n"
391 " }\n"
392 " if (subgroupElect())\n"
393 " {\n"
394 " out_color[gl_InvocationID].r = " + electedValue.str() + ".0f;\n"
395 " out_color[gl_InvocationID].g = 1.0f;\n"
396 " }\n"
397 " else\n"
398 " {\n"
399 " out_color[gl_InvocationID].r = " + unelectedValue.str() + ".0f;\n"
400 " out_color[gl_InvocationID].g = 0.0f;\n"
401 " }\n"
402 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
403 "}\n";
404
405 programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
406
407 const string evaluationSourceGLSL =
408 "${VERSION_DECL}\n"
409 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
410 "#extension GL_EXT_tessellation_shader : require\n"
411 "layout(isolines, equal_spacing, ccw ) in;\n"
412 "layout(location = 0) in vec4 in_color[];\n"
413 "layout(location = 0) out vec4 out_color;\n"
414 "\n"
415 "void main (void)\n"
416 "{\n"
417 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
418 " out_color = in_color[0];\n"
419 "}\n";
420
421 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
422 }
423 else
424 {
425 DE_FATAL("Unsupported shader stage");
426 }
427 }
428 else
429 {
430 std::ostringstream bdy;
431 string color = (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n";
432 switch (caseDef.opType)
433 {
434 default:
435 DE_FATAL("Unhandled op type!");
436 break;
437 case OPTYPE_SUBGROUP_BARRIER:
438 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
439 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
440 {
441 bdy << " tempResult2 = tempBuffer[id];\n"
442 << " if (subgroupElect())\n"
443 << " {\n"
444 << " tempResult = value;\n"
445 << " " << color
446 << " }\n"
447 << " else\n"
448 << " {\n"
449 << " tempResult = tempBuffer[id];\n"
450 << " }\n"
451 << " " << getOpTypeName(caseDef.opType) << "();\n";
452 break;
453 }
454 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
455 bdy << " tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
456 << " if (subgroupElect())\n"
457 << " {\n"
458 << " tempResult = value;\n"
459 << " " << color
460 << " }\n"
461 << " else\n"
462 << " {\n"
463 << " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
464 << " }\n"
465 << " subgroupMemoryBarrierImage();\n";
466
467 break;
468 }
469
470 if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
471 {
472 std::ostringstream fragment;
473 fragment << "${VERSION_DECL}\n"
474 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
475 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
476 << "precision highp int;\n"
477 << "layout(location = 0) out highp vec4 out_color;\n"
478 << "\n"
479 << "layout(binding = 0, std140) uniform Buffer1\n"
480 << "{\n"
481 << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
482 << "};\n"
483 << "\n"
484 << "layout(binding = 1, std140) uniform Buffer2\n"
485 << "{\n"
486 << " uint value;\n"
487 << "};\n"
488 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
489 << "void main (void)\n"
490 << "{\n"
491 << " if (gl_HelperInvocation) return;\n"
492 << " uint id = 0u;\n"
493 << " if (subgroupElect())\n"
494 << " {\n"
495 << " id = uint(gl_FragCoord.x);\n"
496 << " }\n"
497 << " id = subgroupBroadcastFirst(id);\n"
498 << " uint localId = id;\n"
499 << " uint tempResult = 0u;\n"
500 << " uint tempResult2 = 0u;\n"
501 << " out_color.b = 0.0f;\n"
502 << bdy.str()
503 << " out_color.r = float(tempResult);\n"
504 << " out_color.g = float(value);\n"
505 << " out_color.a = float(tempResult2);\n"
506 << "}\n";
507 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
508 }
509 else if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
510 {
511 std::ostringstream vertex;
512 vertex << "${VERSION_DECL}\n"
513 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
514 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
515 <<"\n"
516 << "layout(location = 0) out vec4 out_color;\n"
517 << "layout(location = 0) in highp vec4 in_position;\n"
518 << "\n"
519 << "layout(binding = 0, std140) uniform Buffer1\n"
520 << "{\n"
521 << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
522 << "};\n"
523 << "\n"
524 << "layout(binding = 1, std140) uniform Buffer2\n"
525 << "{\n"
526 << " uint value;\n"
527 << "};\n"
528 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
529 << "void main (void)\n"
530 << "{\n"
531 << " uint id = 0u;\n"
532 << " if (subgroupElect())\n"
533 << " {\n"
534 << " id = uint(gl_VertexID);\n"
535 << " }\n"
536 << " id = subgroupBroadcastFirst(id);\n"
537 << " uint tempResult = 0u;\n"
538 << " uint tempResult2 = 0u;\n"
539 << " out_color.b = 0.0f;\n"
540 << bdy.str()
541 << " out_color.r = float(tempResult);\n"
542 << " out_color.g = float(value);\n"
543 << " out_color.a = float(tempResult2);\n"
544 << " gl_Position = in_position;\n"
545 << " gl_PointSize = 1.0f;\n"
546 << "}\n";
547 programCollection.add("vert") << glu::VertexSource(vertex.str());
548 }
549 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
550 {
551 std::ostringstream geometry;
552
553 geometry << "${VERSION_DECL}\n"
554 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
555 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
556 << "layout(points) in;\n"
557 << "layout(points, max_vertices = 1) out;\n"
558 << "layout(location = 0) out vec4 out_color;\n"
559 << "layout(binding = 0, std140) uniform Buffer1\n"
560 << "{\n"
561 << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
562 << "};\n"
563 << "\n"
564 << "layout(binding = 1, std140) uniform Buffer2\n"
565 << "{\n"
566 << " uint value;\n"
567 << "};\n"
568 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
569 << "void main (void)\n"
570 << "{\n"
571 << " uint id = 0u;\n"
572 << " if (subgroupElect())\n"
573 << " {\n"
574 << " id = uint(gl_InvocationID);\n"
575 << " }\n"
576 << " id = subgroupBroadcastFirst(id);\n"
577 << " uint tempResult = 0u;\n"
578 << " uint tempResult2 = 0u;\n"
579 << " out_color.b = 0.0f;\n"
580 << bdy.str()
581 << " out_color.r = float(tempResult);\n"
582 << " out_color.g = float(value);\n"
583 << " out_color.a = float(tempResult2);\n"
584 << " gl_Position = gl_in[0].gl_Position;\n"
585 << " EmitVertex();\n"
586 << " EndPrimitive();\n"
587 << "}\n";
588
589 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
590 }
591 else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
592 {
593 std::ostringstream controlSource;
594 std::ostringstream evaluationSource;
595
596 controlSource << "${VERSION_DECL}\n"
597 << "#extension GL_EXT_tessellation_shader : require\n"
598 << "layout(vertices = 2) out;\n"
599 << "void main (void)\n"
600 << "{\n"
601 << " if (gl_InvocationID == 0)\n"
602 <<" {\n"
603 << " gl_TessLevelOuter[0] = 1.0f;\n"
604 << " gl_TessLevelOuter[1] = 1.0f;\n"
605 << " }\n"
606 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
607 << "}\n";
608
609 evaluationSource << "${VERSION_DECL}\n"
610 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
611 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
612 << "#extension GL_EXT_tessellation_shader : require\n"
613 << "layout(isolines, equal_spacing, ccw ) in;\n"
614 << "layout(location = 0) out vec4 out_color;\n"
615 << "layout(binding = 0, std140) uniform Buffer1\n"
616 << "{\n"
617 << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
618 << "};\n"
619 << "\n"
620 << "layout(binding = 1, std140) uniform Buffer2\n"
621 << "{\n"
622 << " uint value;\n"
623 << "};\n"
624 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
625 << "void main (void)\n"
626 << "{\n"
627 << " uint id = 0u;\n"
628 << " if (subgroupElect())\n"
629 << " {\n"
630 << " id = uint(gl_PrimitiveID);\n"
631 << " }\n"
632 << " id = subgroupBroadcastFirst(id);\n"
633 << " uint tempResult = 0u;\n"
634 << " uint tempResult2 = 0u;\n"
635 << " out_color.b = 0.0f;\n"
636 << bdy.str()
637 << " out_color.r = float(tempResult);\n"
638 << " out_color.g = float(value);\n"
639 << " out_color.a = float(tempResult2);\n"
640 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
641 << "}\n";
642
643 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
644 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
645 }
646 else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
647 {
648 std::ostringstream controlSource;
649 std::ostringstream evaluationSource;
650
651 controlSource << "${VERSION_DECL}\n"
652 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
653 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
654 << "#extension GL_EXT_tessellation_shader : require\n"
655 << "layout(vertices = 2) out;\n"
656 << "layout(location = 0) out vec4 out_color[];\n"
657 << "layout(binding = 0, std140) uniform Buffer1\n"
658 << "{\n"
659 << " uint tempBuffer["<<SHADER_BUFFER_SIZE/4ull<<"];\n"
660 << "};\n"
661 << "\n"
662 << "layout(binding = 1, std140) uniform Buffer2\n"
663 << "{\n"
664 << " uint value;\n"
665 << "};\n"
666 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" : "\n")
667 << "void main (void)\n"
668 << "{\n"
669 << " uint id = 0u;\n"
670 << " if (gl_InvocationID == 0)\n"
671 <<" {\n"
672 << " gl_TessLevelOuter[0] = 1.0f;\n"
673 << " gl_TessLevelOuter[1] = 1.0f;\n"
674 << " }\n"
675 << " if (subgroupElect())\n"
676 << " {\n"
677 << " id = uint(gl_InvocationID);\n"
678 << " }\n"
679 << " id = subgroupBroadcastFirst(id);\n"
680 << " uint tempResult = 0u;\n"
681 << " uint tempResult2 = 0u;\n"
682 << " out_color[gl_InvocationID].b = 0.0f;\n"
683 << bdy.str()
684 << " out_color[gl_InvocationID].r = float(tempResult);\n"
685 << " out_color[gl_InvocationID].g = float(value);\n"
686 << " out_color[gl_InvocationID].a = float(tempResult2);\n"
687 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
688 << "}\n";
689
690 evaluationSource << "${VERSION_DECL}\n"
691 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
692 << "#extension GL_EXT_tessellation_shader : require\n"
693 << "layout(isolines, equal_spacing, ccw ) in;\n"
694 << "layout(location = 0) in vec4 in_color[];\n"
695 << "layout(location = 0) out vec4 out_color;\n"
696 << "\n"
697 << "void main (void)\n"
698 << "{\n"
699 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
700 << " out_color = in_color[0];\n"
701 << "}\n";
702
703 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
704 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
705 }
706 else
707 {
708 DE_FATAL("Unsupported shader stage");
709 }
710 }
711 }
712
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)713 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
714 {
715 if (OPTYPE_ELECT == caseDef.opType)
716 {
717 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
718 {
719 std::ostringstream src;
720
721 src << "${VERSION_DECL}\n"
722 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
723 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
724 << "layout(binding = 0, std430) buffer Buffer1\n"
725 << "{\n"
726 << " uint result[];\n"
727 << "};\n"
728 << "\n"
729 << subgroups::getSharedMemoryBallotHelper()
730 << "void main (void)\n"
731 << "{\n"
732 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
733 << " highp uint offset = globalSize.x * ((globalSize.y * "
734 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
735 "gl_GlobalInvocationID.x;\n"
736 << " uint value = " << UNELECTED_VALUE << "u;\n"
737 << " if (subgroupElect())\n"
738 << " {\n"
739 << " value = " << ELECTED_VALUE << "u;\n"
740 << " }\n"
741 << " uvec4 bits = uvec4(bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "u)));\n"
742 << " result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
743 << "}\n";
744
745 programCollection.add("comp") << glu::ComputeSource(src.str());
746 }
747 else
748 {
749 {
750 std::ostringstream vertex;
751 vertex << "${VERSION_DECL}\n"
752 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
753 << "layout(binding = 0, std430) buffer Buffer0\n"
754 << "{\n"
755 << " uint result[];\n"
756 << "} b0;\n"
757 << "layout(binding = 4, std430) buffer Buffer4\n"
758 << "{\n"
759 << " uint numSubgroupsExecuted;\n"
760 << "} b4;\n"
761 << "\n"
762 << "void main (void)\n"
763 << "{\n"
764 << " if (subgroupElect())\n"
765 << " {\n"
766 << " b0.result[gl_VertexID] = " << ELECTED_VALUE << "u;\n"
767 << " atomicAdd(b4.numSubgroupsExecuted, 1u);\n"
768 << " }\n"
769 << " else\n"
770 << " {\n"
771 << " b0.result[gl_VertexID] = " << UNELECTED_VALUE << "u;\n"
772 << " }\n"
773 << " float pixelSize = 2.0f/1024.0f;\n"
774 << " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
775 << " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
776 << " gl_PointSize = 1.0f;\n"
777 << "}\n";
778 programCollection.add("vert") << glu::VertexSource(vertex.str());
779 }
780
781 {
782 std::ostringstream tesc;
783 tesc << "${VERSION_DECL}\n"
784 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
785 << "layout(vertices=1) out;\n"
786 << "layout(binding = 1, std430) buffer Buffer1\n"
787 << "{\n"
788 << " uint result[];\n"
789 << "} b1;\n"
790 << "layout(binding = 5, std430) buffer Buffer5\n"
791 << "{\n"
792 << " uint numSubgroupsExecuted;\n"
793 << "} b5;\n"
794 << "\n"
795 << "void main (void)\n"
796 << "{\n"
797 << " if (subgroupElect())\n"
798 << " {\n"
799 << " b1.result[gl_PrimitiveID] = " << ELECTED_VALUE << "u;\n"
800 << " atomicAdd(b5.numSubgroupsExecuted, 1u);\n"
801 << " }\n"
802 << " else\n"
803 << " {\n"
804 << " b1.result[gl_PrimitiveID] = " << UNELECTED_VALUE << "u;\n"
805 << " }\n"
806 << " if (gl_InvocationID == 0)\n"
807 << " {\n"
808 << " gl_TessLevelOuter[0] = 1.0f;\n"
809 << " gl_TessLevelOuter[1] = 1.0f;\n"
810 << " }\n"
811 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
812 << "}\n";
813 programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
814 }
815
816 {
817 std::ostringstream tese;
818 tese << "${VERSION_DECL}\n"
819 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
820 << "layout(isolines) in;\n"
821 << "layout(binding = 2, std430) buffer Buffer2\n"
822 << "{\n"
823 << " uint result[];\n"
824 << "} b2;\n"
825 << "layout(binding = 6, std430) buffer Buffer6\n"
826 << "{\n"
827 << " uint numSubgroupsExecuted;\n"
828 << "} b6;\n"
829 << "\n"
830 << "void main (void)\n"
831 << "{\n"
832 << " if (subgroupElect())\n"
833 << " {\n"
834 << " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << "u;\n"
835 << " atomicAdd(b6.numSubgroupsExecuted, 1u);\n"
836 << " }\n"
837 << " else\n"
838 << " {\n"
839 << " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << "u;\n"
840 << " }\n"
841 << " float pixelSize = 2.0f/1024.0f;\n"
842 << " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
843 << "}\n";
844 programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
845 }
846 {
847 std::ostringstream geometry;
848 geometry << "#extension GL_KHR_shader_subgroup_basic: enable\n"
849 << "layout(${TOPOLOGY}) in;\n"
850 << "layout(points, max_vertices = 1) out;\n"
851 << "layout(binding = 3, std430) buffer Buffer3\n"
852 << "{\n"
853 << " uint result[];\n"
854 << "} b3;\n"
855 << "layout(binding = 7, std430) buffer Buffer7\n"
856 << "{\n"
857 << " uint numSubgroupsExecuted;\n"
858 << "} b7;\n"
859 << "\n"
860 << "void main (void)\n"
861 << "{\n"
862 << " if (subgroupElect())\n"
863 << " {\n"
864 << " b3.result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << "u;\n"
865 << " atomicAdd(b7.numSubgroupsExecuted, 1u);\n"
866 << " }\n"
867 << " else\n"
868 << " {\n"
869 << " b3.result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << "u;\n"
870 << " }\n"
871 << " gl_Position = gl_in[0].gl_Position;\n"
872 << " EmitVertex();\n"
873 << " EndPrimitive();\n"
874 << "}\n";
875 subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
876 }
877
878 {
879 std::ostringstream fragment;
880 fragment << "${VERSION_DECL}\n"
881 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
882 << "precision highp int;\n"
883 << "layout(location = 0) out uint data;\n"
884 << "layout(binding = 8, std430) buffer Buffer8\n"
885 << "{\n"
886 << " uint numSubgroupsExecuted;\n"
887 << "} b8;\n"
888 << "void main (void)\n"
889 << "{\n"
890 << " if (gl_HelperInvocation) return;\n"
891 << " if (subgroupElect())\n"
892 << " {\n"
893 << " data = " << ELECTED_VALUE << "u;\n"
894 << " atomicAdd(b8.numSubgroupsExecuted, 1u);\n"
895 << " }\n"
896 << " else\n"
897 << " {\n"
898 << " data = " << UNELECTED_VALUE << "u;\n"
899 << " }\n"
900 << "}\n";
901 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
902 }
903 subgroups::addNoSubgroupShader(programCollection);
904 }
905 }
906 else
907 {
908 std::ostringstream bdy;
909
910 switch (caseDef.opType)
911 {
912 default:
913 DE_FATAL("Unhandled op type!");
914 break;
915 case OPTYPE_SUBGROUP_BARRIER:
916 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
917 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
918 bdy << " if (subgroupElect())\n"
919 << " {\n"
920 << " b${SSBO1}.tempBuffer[id] = b${SSBO1}.value;\n"
921 << " }\n"
922 << " " << getOpTypeName(caseDef.opType) << "();\n"
923 << " tempResult = b${SSBO1}.tempBuffer[id];\n";
924 break;
925 case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
926 bdy << " if (subgroupElect())\n"
927 << " {\n"
928 << " tempShared[localId] = b${SSBO1}.value;\n"
929 << " }\n"
930 << " subgroupMemoryBarrierShared();\n"
931 << " tempResult = tempShared[localId];\n";
932 break;
933 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
934 bdy << " if (subgroupElect())\n"
935 << " {\n"
936 << " imageStore(tempImage${IMG1}, ivec2(id, 0), uvec4(b${SSBO1}.value));\n"
937 << " }\n"
938 << " subgroupMemoryBarrierImage();\n"
939 << " tempResult = imageLoad(tempImage${IMG1}, ivec2(id, 0)).x;\n";
940 break;
941 }
942
943 tcu::StringTemplate bdyTemplate(bdy.str());
944
945 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
946 {
947 std::ostringstream src;
948 map<string, string> bufferNameMapping;
949 bufferNameMapping.insert(pair<string, string>("SSBO1", "1"));
950 bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
951
952 src << "${VERSION_DECL}\n"
953 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
954 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
955 << "layout(binding = 0, std430) buffer Buffer0\n"
956 << "{\n"
957 << " uint result[];\n"
958 << "} b0;\n"
959 << "layout(binding = 1, std430) buffer Buffer1\n"
960 << "{\n"
961 << " uint value;\n"
962 << " uint tempBuffer[];\n"
963 << "} b1;\n"
964 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "\n")
965 << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
966 << "\n"
967 << "void main (void)\n"
968 << "{\n"
969 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
970 << " highp uint offset = globalSize.x * ((globalSize.y * "
971 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
972 "gl_GlobalInvocationID.x;\n"
973 << " uint localId = gl_SubgroupID;\n"
974 << " uint id = globalSize.x * ((globalSize.y * "
975 "gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
976 "gl_WorkGroupID.x + localId;\n"
977 << " uint tempResult = 0u;\n"
978 << bdyTemplate.specialize(bufferNameMapping)
979 << " b0.result[offset] = tempResult;\n"
980 << "}\n";
981
982 programCollection.add("comp") << glu::ComputeSource(src.str());
983 }
984 else
985 {
986 {
987 map<string, string> bufferNameMapping;
988 bufferNameMapping.insert(pair<string, string>("SSBO1", "4"));
989 bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
990
991 std::ostringstream vertex;
992 vertex <<
993 "${VERSION_DECL}\n"
994 "#extension GL_KHR_shader_subgroup_basic: enable\n"
995 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
996 "layout(binding = 0, std430) buffer Buffer0\n"
997 "{\n"
998 " uint result[];\n"
999 "} b0;\n"
1000 "layout(binding = 4, std430) buffer Buffer4\n"
1001 "{\n"
1002 " uint value;\n"
1003 " uint tempBuffer[];\n"
1004 "} b4;\n"
1005 "layout(binding = 5, std430) buffer Buffer5\n"
1006 "{\n"
1007 " uint subgroupID;\n"
1008 "} b5;\n"
1009 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "")
1010 << "void main (void)\n"
1011 "{\n"
1012 " uint id = 0u;\n"
1013 " if (subgroupElect())\n"
1014 " {\n"
1015 " id = atomicAdd(b5.subgroupID, 1u);\n"
1016 " }\n"
1017 " id = subgroupBroadcastFirst(id);\n"
1018 " uint localId = id;\n"
1019 " uint tempResult = 0u;\n"
1020 + bdyTemplate.specialize(bufferNameMapping) +
1021 " b0.result[gl_VertexID] = tempResult;\n"
1022 " float pixelSize = 2.0f/1024.0f;\n"
1023 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1024 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1025 " gl_PointSize = 1.0f;\n"
1026 "}\n";
1027 programCollection.add("vert") << glu::VertexSource(vertex.str());
1028 }
1029
1030 {
1031 map<string, string> bufferNameMapping;
1032 bufferNameMapping.insert(pair<string, string>("SSBO1", "6"));
1033 bufferNameMapping.insert(pair<string, string>("IMG1", "1"));
1034
1035 std::ostringstream tesc;
1036 tesc <<
1037 "${VERSION_DECL}\n"
1038 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1039 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1040 "layout(vertices=1) out;\n"
1041 "layout(binding = 1, std430) buffer Buffer1\n"
1042 "{\n"
1043 " uint result[];\n"
1044 "} b1;\n"
1045 "layout(binding = 6, std430) buffer Buffer6\n"
1046 "{\n"
1047 " uint value;\n"
1048 " uint tempBuffer[];\n"
1049 "} b6;\n"
1050 "layout(binding = 7, std430) buffer Buffer7\n"
1051 "{\n"
1052 " uint subgroupID;\n"
1053 "} b7;\n"
1054 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 1, r32ui) uniform highp uimage2D tempImage1;\n" : "")
1055 << "void main (void)\n"
1056 "{\n"
1057 " uint id = 0u;\n"
1058 " if (subgroupElect())\n"
1059 " {\n"
1060 " id = atomicAdd(b7.subgroupID, 1u);\n"
1061 " }\n"
1062 " id = subgroupBroadcastFirst(id);\n"
1063 " uint localId = id;\n"
1064 " uint tempResult = 0u;\n"
1065 + bdyTemplate.specialize(bufferNameMapping) +
1066 " b1.result[gl_PrimitiveID] = tempResult;\n"
1067 " if (gl_InvocationID == 0)\n"
1068 " {\n"
1069 " gl_TessLevelOuter[0] = 1.0f;\n"
1070 " gl_TessLevelOuter[1] = 1.0f;\n"
1071 " }\n"
1072 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1073 "}\n";
1074 programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
1075 }
1076
1077 {
1078 map<string, string> bufferNameMapping;
1079 bufferNameMapping.insert(pair<string, string>("SSBO1", "8"));
1080 bufferNameMapping.insert(pair<string, string>("IMG1", "2"));
1081
1082 std::ostringstream tese;
1083 tese <<
1084 "${VERSION_DECL}\n"
1085 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1086 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1087 "layout(isolines) in;\n"
1088 "layout(binding = 2, std430) buffer Buffer2\n"
1089 "{\n"
1090 " uint result[];\n"
1091 "} b2;\n"
1092 "layout(binding = 8, std430) buffer Buffer8\n"
1093 "{\n"
1094 " uint value;\n"
1095 " uint tempBuffer[];\n"
1096 "} b8;\n"
1097 "layout(binding = 9, std430) buffer Buffer9\n"
1098 "{\n"
1099 " uint subgroupID;\n"
1100 "} b9;\n"
1101 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 2, r32ui) uniform highp uimage2D tempImage2;\n" : "")
1102 << "void main (void)\n"
1103 "{\n"
1104 " uint id = 0u;\n"
1105 " if (subgroupElect())\n"
1106 " {\n"
1107 " id = atomicAdd(b9.subgroupID, 1u);\n"
1108 " }\n"
1109 " id = subgroupBroadcastFirst(id);\n"
1110 " uint localId = id;\n"
1111 " uint tempResult = 0u;\n"
1112 + bdyTemplate.specialize(bufferNameMapping) +
1113 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult;\n"
1114 " float pixelSize = 2.0f/1024.0f;\n"" gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1115 "}\n";
1116 programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
1117 }
1118 {
1119 map<string, string> bufferNameMapping;
1120 bufferNameMapping.insert(pair<string, string>("SSBO1", "10"));
1121 bufferNameMapping.insert(pair<string, string>("IMG1", "3"));
1122
1123 std::ostringstream geometry;
1124 geometry <<
1125 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1126 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1127 "layout(${TOPOLOGY}) in;\n"
1128 "layout(points, max_vertices = 1) out;\n"
1129 "layout(binding = 3, std430) buffer Buffer3\n"
1130 "{\n"
1131 " uint result[];\n"
1132 "} b3;\n"
1133 "layout(binding = 10, std430) buffer Buffer10\n"
1134 "{\n"
1135 " uint value;\n"
1136 " uint tempBuffer[];\n"
1137 "} b10;\n"
1138 "layout(binding = 11, std430) buffer Buffer11\n"
1139 "{\n"
1140 " uint subgroupID;\n"
1141 "} b11;\n"
1142 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 3, r32ui) uniform highp uimage2D tempImage3;\n" : "")
1143 << "void main (void)\n"
1144 "{\n"
1145 " uint id = 0u;\n"
1146 " if (subgroupElect())\n"
1147 " {\n"
1148 " id = atomicAdd(b11.subgroupID, 1u);\n"
1149 " }\n"
1150 " id = subgroupBroadcastFirst(id);\n"
1151 " uint localId = id;\n"
1152 " uint tempResult = 0u;\n"
1153 + bdyTemplate.specialize(bufferNameMapping) +
1154 " b3.result[gl_PrimitiveIDIn] = tempResult;\n"
1155 " gl_Position = gl_in[0].gl_Position;\n"
1156 " EmitVertex();\n"
1157 " EndPrimitive();\n"
1158 "}\n";
1159 subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
1160 }
1161
1162 {
1163 map<string, string> bufferNameMapping;
1164 bufferNameMapping.insert(pair<string, string>("SSBO1", "12"));
1165 bufferNameMapping.insert(pair<string, string>("IMG1", "4"));
1166
1167 std::ostringstream fragment;
1168 fragment <<
1169 "${VERSION_DECL}\n"
1170 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1171 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1172 "precision highp int;\n"
1173 "layout(location = 0) out uint result;\n"
1174 "layout(binding = 12, std430) buffer Buffer12\n"
1175 "{\n"
1176 " uint value;\n"
1177 " uint tempBuffer[];\n"
1178 "} b12;\n"
1179 "layout(binding = 13, std430) buffer Buffer13\n"
1180 "{\n"
1181 " uint subgroupID;\n"
1182 "} b13;\n"
1183 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 4, r32ui) uniform highp uimage2D tempImage4;\n" : "")
1184 << "void main (void)\n"
1185 "{\n"
1186 " if (gl_HelperInvocation) return;\n"
1187 " uint id = 0u;\n"
1188 " if (subgroupElect())\n"
1189 " {\n"
1190 " id = atomicAdd(b13.subgroupID, 1u);\n"
1191 " }\n"
1192 " id = subgroupBroadcastFirst(id);\n"
1193 " uint localId = id;\n"
1194 " uint tempResult = 0u;\n"
1195 + bdyTemplate.specialize(bufferNameMapping) +
1196 " result = tempResult;\n"
1197 "}\n";
1198 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
1199 }
1200
1201 subgroups::addNoSubgroupShader(programCollection);
1202 }
1203 }
1204 }
1205
supportedCheck(Context & context,CaseDefinition caseDef)1206 void supportedCheck (Context& context, CaseDefinition caseDef)
1207 {
1208 DE_UNREF(caseDef);
1209 if (!subgroups::isSubgroupSupported(context))
1210 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1211 }
1212
noSSBOtest(Context & context,const CaseDefinition caseDef)1213 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
1214 {
1215 if (!subgroups::areSubgroupOperationsSupportedForStage(
1216 context, caseDef.shaderStage))
1217 {
1218 if (subgroups::areSubgroupOperationsRequiredForStage(
1219 caseDef.shaderStage))
1220 {
1221 return tcu::TestStatus::fail(
1222 "Shader stage " +
1223 subgroups::getShaderStageName(caseDef.shaderStage) +
1224 " is required to support subgroup operations!");
1225 }
1226 else
1227 {
1228 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
1229 }
1230 }
1231
1232 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
1233 {
1234 return tcu::TestStatus::fail(
1235 "Subgroup feature " +
1236 subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
1237 " is a required capability!");
1238 }
1239
1240 if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1241 {
1242 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
1243 {
1244 TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1245 }
1246 }
1247
1248 if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType)
1249 {
1250 if (!subgroups::isImageSupportedForStageOnDevice(context, caseDef.shaderStage))
1251 {
1252 TCU_THROW(NotSupportedError, "Subgroup basic memory barrier image test for " +
1253 subgroups::getShaderStageName(caseDef.shaderStage) +
1254 " stage requires that image uniforms be supported on this stage");
1255 }
1256 }
1257
1258 const deUint32 inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1259 std::vector<subgroups::SSBOData> inputDatas (inputDatasCount);
1260
1261 inputDatas[0].format = subgroups::FORMAT_R32_UINT;
1262 inputDatas[0].layout = subgroups::SSBOData::LayoutStd140;
1263 inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull;
1264 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1265 inputDatas[0].binding = 0u;
1266
1267 inputDatas[1].format = subgroups::FORMAT_R32_UINT;
1268 inputDatas[1].layout = subgroups::SSBOData::LayoutStd140;
1269 inputDatas[1].numElements = 1ull;
1270 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1271 inputDatas[1].binding = 1u;
1272
1273 if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType )
1274 {
1275 inputDatas[2].format = subgroups::FORMAT_R32_UINT;
1276 inputDatas[2].layout = subgroups::SSBOData::LayoutPacked;
1277 inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1278 inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1279 inputDatas[2].isImage = true;
1280 inputDatas[2].binding = 0u;
1281 }
1282
1283 if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1284 {
1285 if (OPTYPE_ELECT == caseDef.opType)
1286 return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
1287 else
1288 return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1289 }
1290 else if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1291 {
1292 return subgroups::makeFragmentFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
1293 }
1294 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1295 {
1296 if (OPTYPE_ELECT == caseDef.opType)
1297 return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO);
1298 else
1299 return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1300 }
1301
1302 if (OPTYPE_ELECT == caseDef.opType)
1303 return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage);
1304
1305 return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
1306 (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)? checkVertexPipelineStagesSubgroupBarriersNoSSBO : checkTessellationEvaluationSubgroupBarriersNoSSBO,
1307 caseDef.shaderStage);
1308 }
1309
test(Context & context,const CaseDefinition caseDef)1310 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
1311 {
1312 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
1313 {
1314 return tcu::TestStatus::fail(
1315 "Subgroup feature " +
1316 subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
1317 " is a required capability!");
1318 }
1319
1320 if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1321 {
1322 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
1323 {
1324 TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1325 }
1326 }
1327
1328 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
1329 {
1330 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
1331 {
1332 return tcu::TestStatus::fail("Shader stage " +
1333 subgroups::getShaderStageName(caseDef.shaderStage) +
1334 " is required to support subgroup operations!");
1335 }
1336
1337 if (OPTYPE_ELECT == caseDef.opType)
1338 {
1339 return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect);
1340 }
1341 else
1342 {
1343 const deUint32 inputDatasCount = 2;
1344 subgroups::SSBOData inputDatas[inputDatasCount];
1345 inputDatas[0].format = subgroups::FORMAT_R32_UINT;
1346 inputDatas[0].layout = subgroups::SSBOData::LayoutStd430;
1347 inputDatas[0].numElements = 1 + SHADER_BUFFER_SIZE;
1348 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1349 inputDatas[0].binding = 1u;
1350
1351 inputDatas[1].format = subgroups::FORMAT_R32_UINT;
1352 inputDatas[1].layout = subgroups::SSBOData::LayoutPacked;
1353 inputDatas[1].numElements = SHADER_BUFFER_SIZE;
1354 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNone;
1355 inputDatas[1].isImage = true;
1356 inputDatas[1].binding = 0u;
1357
1358 return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers);
1359 }
1360 }
1361 else
1362 {
1363 if (!subgroups::isFragmentSSBOSupportedForDevice(context))
1364 {
1365 TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
1366 }
1367
1368 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
1369 int combinedSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS);
1370
1371 subgroups::ShaderStageFlags stages = (subgroups::ShaderStageFlags)(caseDef.shaderStage & supportedStages);
1372
1373 if ( subgroups::SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
1374 {
1375 if ( (stages & subgroups::SHADER_STAGE_FRAGMENT_BIT) == 0)
1376 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
1377 else
1378 stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
1379 }
1380
1381 if ((subgroups::ShaderStageFlags)0u == stages)
1382 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
1383
1384 // with sufficient effort we could dynamically assign the binding points
1385 // based on the number of stages actually supported, etc, but we already
1386 // have the framebuffer tests which cover those cases, so there doesn't seem
1387 // to be much benefit in doing that right now.
1388 if (combinedSSBOs < 14)
1389 TCU_THROW(NotSupportedError, "Device does not support enough combined SSBOs for this test (14)");
1390
1391 if (OPTYPE_ELECT == caseDef.opType)
1392 {
1393 const deUint32 inputCount = 5u;
1394 subgroups::SSBOData inputData[inputCount];
1395
1396 inputData[0].format = subgroups::FORMAT_R32_UINT;
1397 inputData[0].layout = subgroups::SSBOData::LayoutStd430;
1398 inputData[0].numElements = 1;
1399 inputData[0].initializeType = subgroups::SSBOData::InitializeZero;
1400 inputData[0].binding = 4u;
1401 inputData[0].stages = subgroups::SHADER_STAGE_VERTEX_BIT;
1402
1403 inputData[1].format = subgroups::FORMAT_R32_UINT;
1404 inputData[1].layout = subgroups::SSBOData::LayoutStd430;
1405 inputData[1].numElements = 1;
1406 inputData[1].initializeType = subgroups::SSBOData::InitializeZero;
1407 inputData[1].binding = 5u;
1408 inputData[1].stages = subgroups::SHADER_STAGE_TESS_CONTROL_BIT;
1409
1410 inputData[2].format = subgroups::FORMAT_R32_UINT;
1411 inputData[2].layout = subgroups::SSBOData::LayoutStd430;
1412 inputData[2].numElements = 1;
1413 inputData[2].initializeType = subgroups::SSBOData::InitializeZero;
1414 inputData[2].binding = 6u;
1415 inputData[2].stages = subgroups::SHADER_STAGE_TESS_EVALUATION_BIT;
1416
1417 inputData[3].format = subgroups::FORMAT_R32_UINT;
1418 inputData[3].layout = subgroups::SSBOData::LayoutStd430;
1419 inputData[3].numElements = 1;
1420 inputData[3].initializeType = subgroups::SSBOData::InitializeZero;
1421 inputData[3].binding = 7u;
1422 inputData[3].stages = subgroups::SHADER_STAGE_GEOMETRY_BIT;
1423
1424 inputData[4].format = subgroups::FORMAT_R32_UINT;
1425 inputData[4].layout = subgroups::SSBOData::LayoutStd430;
1426 inputData[4].numElements = 1;
1427 inputData[4].initializeType = subgroups::SSBOData::InitializeZero;
1428 inputData[4].binding = 8u;
1429 inputData[4].stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
1430
1431 return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages);
1432 }
1433 else
1434 {
1435 const subgroups::ShaderStageFlags stagesBits[] =
1436 {
1437 subgroups::SHADER_STAGE_VERTEX_BIT,
1438 subgroups::SHADER_STAGE_TESS_CONTROL_BIT,
1439 subgroups::SHADER_STAGE_TESS_EVALUATION_BIT,
1440 subgroups::SHADER_STAGE_GEOMETRY_BIT,
1441 subgroups::SHADER_STAGE_FRAGMENT_BIT,
1442 };
1443
1444 const deUint32 inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 3u;
1445 subgroups::SSBOData inputDatas[inputDatasCount];
1446
1447 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
1448 {
1449 const deUint32 index = ndx*3;
1450 const deUint32 ssboIndex = ndx*2;
1451 const deUint32 imgIndex = ndx;
1452 inputDatas[index].format = subgroups::FORMAT_R32_UINT;
1453 inputDatas[index].layout = subgroups::SSBOData::LayoutStd430;
1454 inputDatas[index].numElements = 1 + SHADER_BUFFER_SIZE;
1455 inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero;
1456 inputDatas[index].binding = ssboIndex + 4u;
1457 inputDatas[index].stages = stagesBits[ndx];
1458
1459 inputDatas[index + 1].format = subgroups::FORMAT_R32_UINT;
1460 inputDatas[index + 1].layout = subgroups::SSBOData::LayoutStd430;
1461 inputDatas[index + 1].numElements = 1;
1462 inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
1463 inputDatas[index + 1].binding = ssboIndex + 5u;
1464 inputDatas[index + 1].stages = stagesBits[ndx];
1465
1466 inputDatas[index + 2].format = subgroups::FORMAT_R32_UINT;
1467 inputDatas[index + 2].layout = subgroups::SSBOData::LayoutPacked;
1468 inputDatas[index + 2].numElements = SHADER_BUFFER_SIZE;
1469 inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNone;
1470 inputDatas[index + 2].isImage = true;
1471 inputDatas[index + 2].binding = imgIndex;
1472 inputDatas[index + 2].stages = stagesBits[ndx];
1473 }
1474
1475 return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages);
1476 }
1477 }
1478 }
1479 }
1480
createSubgroupsBasicTests(deqp::Context & testCtx)1481 deqp::TestCaseGroup* createSubgroupsBasicTests(deqp::Context& testCtx)
1482 {
1483 de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup(
1484 testCtx, "graphics", "Subgroup basic category tests: graphics"));
1485 de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup(
1486 testCtx, "compute", "Subgroup basic category tests: compute"));
1487 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup(
1488 testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
1489
1490 const subgroups::ShaderStageFlags stages[] =
1491 {
1492 SHADER_STAGE_FRAGMENT_BIT,
1493 SHADER_STAGE_VERTEX_BIT,
1494 SHADER_STAGE_TESS_EVALUATION_BIT,
1495 SHADER_STAGE_TESS_CONTROL_BIT,
1496 SHADER_STAGE_GEOMETRY_BIT,
1497 };
1498
1499 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
1500 {
1501 const std::string op = de::toLower(getOpTypeName(opTypeIndex));
1502
1503 {
1504 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
1505 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "",
1506 supportedCheck, initPrograms, test, caseDef);
1507 }
1508
1509 if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex)
1510 {
1511 // Shared isn't available in non compute shaders.
1512 continue;
1513 }
1514
1515 {
1516 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS};
1517 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(),
1518 op, "",
1519 supportedCheck, initPrograms, test, caseDef);
1520 }
1521
1522 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1523 {
1524 if (opTypeIndex == OPTYPE_ELECT && stageIndex == 0)
1525 continue; // This is not tested. I don't know why.
1526
1527 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
1528 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(),
1529 op + "_" + getShaderStageName(caseDef.shaderStage), "",
1530 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
1531 }
1532 }
1533
1534 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(
1535 testCtx, "basic", "Subgroup basic category tests"));
1536
1537 group->addChild(graphicGroup.release());
1538 group->addChild(computeGroup.release());
1539 group->addChild(framebufferGroup.release());
1540
1541 return group.release();
1542 }
1543
1544 } // subgroups
1545 } // glc
1546