1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 The Khronos Group Inc.
6 * Copyright (c) 2019 Google Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Concurrent draw tests
23 * Tests that create queue for rendering as well as queue for
24 * compute, and trigger work on both pipelines at the same time,
25 * and finally verify that the results are as expected.
26 *//*--------------------------------------------------------------------*/
27
28 #include "vktDrawConcurrentTests.hpp"
29
30 #include "vktCustomInstancesDevices.hpp"
31 #include "vktTestCaseUtil.hpp"
32 #include "vktDrawTestCaseUtil.hpp"
33 #include "../compute/vktComputeTestsUtil.hpp"
34
35 #include "vktDrawBaseClass.hpp"
36
37 #include "tcuTestLog.hpp"
38 #include "tcuResource.hpp"
39 #include "tcuImageCompare.hpp"
40 #include "tcuTextureUtil.hpp"
41 #include "tcuRGBA.hpp"
42
43 #include "vkDefs.hpp"
44 #include "vkCmdUtil.hpp"
45 #include "vkQueryUtil.hpp"
46 #include "vkBuilderUtil.hpp"
47 #include "vkBarrierUtil.hpp"
48
49 #include "deRandom.hpp"
50
51 using namespace vk;
52
53 namespace vkt
54 {
55 namespace Draw
56 {
57 namespace
58 {
59
60 class ConcurrentDraw : public DrawTestsBaseClass
61 {
62 public:
63 typedef TestSpecBase TestSpec;
64 ConcurrentDraw (Context &context, TestSpec testSpec);
65 virtual tcu::TestStatus iterate (void);
66 };
67
ConcurrentDraw(Context & context,TestSpec testSpec)68 ConcurrentDraw::ConcurrentDraw (Context &context, TestSpec testSpec)
69 : DrawTestsBaseClass(context, testSpec.shaders[glu::SHADERTYPE_VERTEX], testSpec.shaders[glu::SHADERTYPE_FRAGMENT], testSpec.useDynamicRendering, testSpec.topology)
70 {
71 m_data.push_back(VertexElementData(tcu::Vec4(1.0f, -1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
72 m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
73
74 int refVertexIndex = 2;
75
76 for (int i = 0; i < 1000; i++)
77 {
78 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
79 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
80 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
81 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, -0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
82 m_data.push_back(VertexElementData(tcu::Vec4(0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
83 m_data.push_back(VertexElementData(tcu::Vec4(-0.3f, 0.3f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), refVertexIndex++));
84 }
85 m_data.push_back(VertexElementData(tcu::Vec4(-1.0f, 1.0f, 1.0f, 1.0f), tcu::RGBA::blue().toVec(), -1));
86
87 initialize();
88 }
89
iterate(void)90 tcu::TestStatus ConcurrentDraw::iterate (void)
91 {
92 enum
93 {
94 NO_MATCH_FOUND = ~((deUint32)0),
95 ERROR_NONE = 0,
96 ERROR_WAIT_COMPUTE = 1,
97 ERROR_WAIT_DRAW = 2
98 };
99
100 struct Queue
101 {
102 VkQueue queue;
103 deUint32 queueFamilyIndex;
104 };
105
106 const DeviceInterface& vk = m_context.getDeviceInterface();
107 const deUint32 numValues = 1024;
108 const InstanceInterface& instance = m_context.getInstanceInterface();
109 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
110 const auto validation = m_context.getTestContext().getCommandLine().isValidationEnabled();
111 tcu::TestLog& log = m_context.getTestContext().getLog();
112 Move<VkDevice> computeDevice;
113 std::vector<VkQueueFamilyProperties> queueFamilyProperties;
114 VkDeviceCreateInfo deviceInfo;
115 VkPhysicalDeviceFeatures deviceFeatures;
116 const float queuePriority = 1.0f;
117 VkDeviceQueueCreateInfo queueInfos;
118 Queue computeQueue = { DE_NULL, (deUint32)NO_MATCH_FOUND };
119
120 // Set up compute
121
122 queueFamilyProperties = getPhysicalDeviceQueueFamilyProperties(instance, physicalDevice);
123
124 for (deUint32 queueNdx = 0; queueNdx < queueFamilyProperties.size(); ++queueNdx)
125 {
126 if (queueFamilyProperties[queueNdx].queueFlags & VK_QUEUE_COMPUTE_BIT)
127 {
128 if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
129 computeQueue.queueFamilyIndex = queueNdx;
130 }
131 }
132
133 if (computeQueue.queueFamilyIndex == NO_MATCH_FOUND)
134 TCU_THROW(NotSupportedError, "Compute queue couldn't be created");
135
136 VkDeviceQueueCreateInfo queueInfo;
137 deMemset(&queueInfo, 0, sizeof(queueInfo));
138
139 queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
140 queueInfo.pNext = DE_NULL;
141 queueInfo.flags = (VkDeviceQueueCreateFlags)0u;
142 queueInfo.queueFamilyIndex = computeQueue.queueFamilyIndex;
143 queueInfo.queueCount = 1;
144 queueInfo.pQueuePriorities = &queuePriority;
145
146 queueInfos = queueInfo;
147
148 deMemset(&deviceInfo, 0, sizeof(deviceInfo));
149 instance.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
150
151 deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
152 deviceInfo.pNext = DE_NULL;
153 deviceInfo.enabledExtensionCount = 0u;
154 deviceInfo.ppEnabledExtensionNames = DE_NULL;
155 deviceInfo.enabledLayerCount = 0u;
156 deviceInfo.ppEnabledLayerNames = DE_NULL;
157 deviceInfo.pEnabledFeatures = &deviceFeatures;
158 deviceInfo.queueCreateInfoCount = 1;
159 deviceInfo.pQueueCreateInfos = &queueInfos;
160
161 computeDevice = createCustomDevice(validation, m_context.getPlatformInterface(), m_context.getInstance(), instance, physicalDevice, &deviceInfo);
162
163 vk.getDeviceQueue(*computeDevice, computeQueue.queueFamilyIndex, 0, &computeQueue.queue);
164
165 // Create an input/output buffer
166 const VkPhysicalDeviceMemoryProperties memoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);
167
168 SimpleAllocator * allocator = new SimpleAllocator(vk, *computeDevice, memoryProperties);
169 const VkDeviceSize bufferSizeBytes = sizeof(deUint32) * numValues;
170 const vkt::compute::Buffer buffer(vk, *computeDevice, *allocator, makeBufferCreateInfo(bufferSizeBytes, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), MemoryRequirement::HostVisible);
171
172 // Fill the buffer with data
173
174 typedef std::vector<deUint32> data_vector_t;
175 data_vector_t inputData(numValues);
176
177 {
178 de::Random rnd(0x82ce7f);
179 const Allocation& bufferAllocation = buffer.getAllocation();
180 deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
181
182 for (deUint32 i = 0; i < numValues; ++i)
183 {
184 deUint32 val = rnd.getUint32();
185 inputData[i] = val;
186 *bufferPtr++ = val;
187 }
188
189 flushAlloc(vk, *computeDevice, bufferAllocation);
190 }
191
192 // Create descriptor set
193
194 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
195 DescriptorSetLayoutBuilder()
196 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
197 .build(vk, *computeDevice));
198
199 const Unique<VkDescriptorPool> descriptorPool(
200 DescriptorPoolBuilder()
201 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
202 .build(vk, *computeDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
203
204 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, *computeDevice, *descriptorPool, *descriptorSetLayout));
205
206 const VkDescriptorBufferInfo bufferDescriptorInfo = makeDescriptorBufferInfo(*buffer, 0ull, bufferSizeBytes);
207 DescriptorSetUpdateBuilder()
208 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptorInfo)
209 .update(vk, *computeDevice);
210
211 // Perform the computation
212
213 const Unique<VkShaderModule> shaderModule(createShaderModule(vk, *computeDevice, m_context.getBinaryCollection().get("vulkan/draw/ConcurrentPayload.comp"), 0u));
214
215 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(vk, *computeDevice, *descriptorSetLayout));
216 const Unique<VkPipeline> pipeline(vkt::compute::makeComputePipeline(vk, *computeDevice, *pipelineLayout, *shaderModule));
217 const VkBufferMemoryBarrier hostWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT, *buffer, 0ull, bufferSizeBytes);
218 const VkBufferMemoryBarrier shaderWriteBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *buffer, 0ull, bufferSizeBytes);
219 const Unique<VkCommandPool> cmdPool(makeCommandPool(vk, *computeDevice, computeQueue.queueFamilyIndex));
220 const Unique<VkCommandBuffer> computeCommandBuffer(allocateCommandBuffer(vk, *computeDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
221
222 // Compute command buffer
223
224 beginCommandBuffer(vk, *computeCommandBuffer);
225 vk.cmdBindPipeline(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
226 vk.cmdBindDescriptorSets(*computeCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
227 vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &hostWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
228 vk.cmdDispatch(*computeCommandBuffer, 1, 1, 1);
229 vk.cmdPipelineBarrier(*computeCommandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &shaderWriteBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
230 endCommandBuffer(vk, *computeCommandBuffer);
231
232 const VkSubmitInfo submitInfo =
233 {
234 VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
235 DE_NULL, // pNext
236 0u, // waitSemaphoreCount
237 DE_NULL, // pWaitSemaphores
238 (const VkPipelineStageFlags*)DE_NULL, // pWaitDstStageMask
239 1u, // commandBufferCount
240 &computeCommandBuffer.get(), // pCommandBuffers
241 0u, // signalSemaphoreCount
242 DE_NULL // pSignalSemaphores
243 };
244
245 // Set up draw
246
247 const VkQueue drawQueue = m_context.getUniversalQueue();
248 const VkDevice drawDevice = m_context.getDevice();
249
250 beginRender();
251
252 const VkDeviceSize vertexBufferOffset = 0;
253 const VkBuffer vertexBuffer = m_vertexBuffer->object();
254
255 m_vk.cmdBindVertexBuffers(*m_cmdBuffer, 0, 1, &vertexBuffer, &vertexBufferOffset);
256 m_vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
257
258 m_vk.cmdDraw(*m_cmdBuffer, 6, 1, 2, 0);
259
260 endRender();
261 endCommandBuffer(m_vk, *m_cmdBuffer);
262
263 const VkCommandBuffer drawCommandBuffer = m_cmdBuffer.get();
264 const bool useDeviceGroups = false;
265 const deUint32 deviceMask = 1u;
266 const Unique<VkFence> drawFence(createFence(vk, drawDevice));
267
268 VkDeviceGroupSubmitInfo deviceGroupSubmitInfo =
269 {
270 VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHR, // VkStructureType sType;
271 DE_NULL, // const void* pNext;
272 0u, // deUint32 waitSemaphoreCount;
273 DE_NULL, // const deUint32* pWaitSemaphoreDeviceIndices;
274 1u, // deUint32 commandBufferCount;
275 &deviceMask, // const deUint32* pCommandBufferDeviceMasks;
276 0u, // deUint32 signalSemaphoreCount;
277 DE_NULL, // const deUint32* pSignalSemaphoreDeviceIndices;
278 };
279
280 const VkSubmitInfo drawSubmitInfo =
281 {
282 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
283 useDeviceGroups ? &deviceGroupSubmitInfo : DE_NULL, // const void* pNext;
284 0u, // deUint32 waitSemaphoreCount;
285 DE_NULL, // const VkSemaphore* pWaitSemaphores;
286 (const VkPipelineStageFlags*)DE_NULL, // const VkPipelineStageFlags* pWaitDstStageMask;
287 1u, // deUint32 commandBufferCount;
288 &drawCommandBuffer, // const VkCommandBuffer* pCommandBuffers;
289 0u, // deUint32 signalSemaphoreCount;
290 DE_NULL, // const VkSemaphore* pSignalSemaphores;
291 };
292
293 const Unique<VkFence> computeFence(createFence(vk, *computeDevice));
294
295 // Submit both compute and draw queues
296 VK_CHECK(vk.queueSubmit(computeQueue.queue, 1u, &submitInfo, *computeFence));
297 VK_CHECK(vk.queueSubmit(drawQueue, 1u, &drawSubmitInfo, *drawFence));
298
299 int err = ERROR_NONE;
300
301 if (VK_SUCCESS != vk.waitForFences(*computeDevice, 1u, &computeFence.get(), DE_TRUE, ~0ull))
302 err = ERROR_WAIT_COMPUTE;
303
304 if (VK_SUCCESS != vk.waitForFences(drawDevice, 1u, &drawFence.get(), DE_TRUE, ~0ull))
305 err = ERROR_WAIT_DRAW;
306
307 // Have to wait for all fences before calling fail, or some fence may be left hanging.
308
309 if (err == ERROR_WAIT_COMPUTE)
310 return tcu::TestStatus::fail("Failed waiting for compute queue fence.");
311
312 if (err == ERROR_WAIT_DRAW)
313 return tcu::TestStatus::fail("Failed waiting for draw queue fence.");
314
315 // Validation - compute
316
317 const Allocation& bufferAllocation = buffer.getAllocation();
318 invalidateAlloc(vk, *computeDevice, bufferAllocation);
319 const deUint32* bufferPtr = static_cast<deUint32*>(bufferAllocation.getHostPtr());
320
321 for (deUint32 ndx = 0; ndx < numValues; ++ndx)
322 {
323 const deUint32 res = bufferPtr[ndx];
324 const deUint32 inp = inputData[ndx];
325 const deUint32 ref = ~inp;
326
327 if (res != ref)
328 {
329 std::ostringstream msg;
330 msg << "Comparison failed (compute) for InOut.values[" << ndx << "] ref:" << ref << " res:" << res << " inp:" << inp;
331 return tcu::TestStatus::fail(msg.str());
332 }
333 }
334
335 // Validation - draw
336
337 tcu::Texture2D referenceFrame(mapVkFormat(m_colorAttachmentFormat), (int)(0.5f + static_cast<float>(WIDTH)), (int)(0.5f + static_cast<float>(HEIGHT)));
338
339 referenceFrame.allocLevel(0);
340
341 const deInt32 frameWidth = referenceFrame.getWidth();
342 const deInt32 frameHeight = referenceFrame.getHeight();
343
344 tcu::clear(referenceFrame.getLevel(0), tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f));
345
346 ReferenceImageCoordinates refCoords;
347
348 for (int y = 0; y < frameHeight; y++)
349 {
350 const float yCoord = (float)(y / (0.5 * frameHeight)) - 1.0f;
351
352 for (int x = 0; x < frameWidth; x++)
353 {
354 const float xCoord = (float)(x / (0.5 * frameWidth)) - 1.0f;
355
356 if ((yCoord >= refCoords.bottom &&
357 yCoord <= refCoords.top &&
358 xCoord >= refCoords.left &&
359 xCoord <= refCoords.right))
360 referenceFrame.getLevel(0).setPixel(tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f), x, y);
361 }
362 }
363
364 const VkOffset3D zeroOffset = { 0, 0, 0 };
365 const tcu::ConstPixelBufferAccess renderedFrame = m_colorTargetImage->readSurface(
366 drawQueue, m_context.getDefaultAllocator(), VK_IMAGE_LAYOUT_GENERAL, zeroOffset, WIDTH, HEIGHT, VK_IMAGE_ASPECT_COLOR_BIT);
367
368 qpTestResult res = QP_TEST_RESULT_PASS;
369
370 if (!tcu::fuzzyCompare(log, "Result", "Image comparison result",
371 referenceFrame.getLevel(0), renderedFrame, 0.05f,
372 tcu::COMPARE_LOG_RESULT))
373 {
374 res = QP_TEST_RESULT_FAIL;
375 }
376
377 return tcu::TestStatus(res, qpGetTestResultName(res));
378 }
379
checkSupport(Context & context,ConcurrentDraw::TestSpec testSpec)380 void checkSupport(Context& context, ConcurrentDraw::TestSpec testSpec)
381 {
382 if (testSpec.useDynamicRendering)
383 context.requireDeviceFunctionality("VK_KHR_dynamic_rendering");
384 }
385
386 } // anonymous
387
ConcurrentDrawTests(tcu::TestContext & testCtx,bool useDynamicRendering)388 ConcurrentDrawTests::ConcurrentDrawTests (tcu::TestContext &testCtx, bool useDynamicRendering)
389 : TestCaseGroup (testCtx, "concurrent", "concurrent drawing")
390 , m_useDynamicRendering (useDynamicRendering)
391 {
392 /* Left blank on purpose */
393 }
394
init(void)395 void ConcurrentDrawTests::init (void)
396 {
397 ConcurrentDraw::TestSpec testSpec
398 {
399 {
400 { glu::SHADERTYPE_VERTEX, "vulkan/draw/VertexFetch.vert" },
401 { glu::SHADERTYPE_FRAGMENT, "vulkan/draw/VertexFetch.frag" },
402 { glu::SHADERTYPE_COMPUTE, "vulkan/draw/ConcurrentPayload.comp" }
403 },
404 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
405 m_useDynamicRendering
406 };
407
408 addChild(new InstanceFactory<ConcurrentDraw, FunctionSupport1<ConcurrentDraw::TestSpec>>(m_testCtx, "compute_and_triangle_list", "Draws triangle list while running a compute shader", testSpec, FunctionSupport1<ConcurrentDraw::TestSpec>::Args(checkSupport, testSpec)));
409 }
410
411 } // DrawTests
412 } // vkt
413