1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #define NOMINMAX
18 #include <vulkan_interop_common.hpp>
19 #include <string>
20 #include "harness/errorHelpers.h"
21
22 #define MAX_2D_IMAGES 5
23 #define MAX_2D_IMAGE_WIDTH 1024
24 #define MAX_2D_IMAGE_HEIGHT 1024
25 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
26 #define MAX_2D_IMAGE_MIP_LEVELS 11
27 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
28 #define NUM_THREADS_PER_GROUP_X 32
29 #define NUM_THREADS_PER_GROUP_Y 32
30 #define NUM_BLOCKS(size, blockSize) \
31 (ROUND_UP((size), (blockSize)) / (blockSize))
32
33 #define ASSERT(x) \
34 if (!(x)) \
35 { \
36 fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__, \
37 __LINE__); \
38 exit(1); \
39 }
40
41 #define ASSERT_LEQ(x, y) \
42 if (x > y) \
43 { \
44 ASSERT(0); \
45 }
46
47 namespace {
48 struct Params
49 {
50 uint32_t numImage2DDescriptors;
51 };
52 }
53 static cl_uchar uuid[CL_UUID_SIZE_KHR];
54 static cl_device_id deviceId = NULL;
55 size_t max_width = MAX_2D_IMAGE_WIDTH;
56 size_t max_height = MAX_2D_IMAGE_HEIGHT;
57
58 const char *kernel_text_numImage_1 = " \
59 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
60 __kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
61 {\n\
62 int threadIdxX = get_global_id(0);\n\
63 int threadIdxY = get_global_id(1);\n\
64 int numThreadsX = get_global_size(0); \n\
65 int numThreadsY = get_global_size(1);\n\
66 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
67 {\n\
68 return;\n\
69 }\n\
70 %s dataA = read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
71 %s dataB = read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
72 write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
73 write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
74 \n\
75 }";
76
77 const char *kernel_text_numImage_2 = " \
78 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
79 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
80 {\n\
81 int threadIdxX = get_global_id(0);\n\
82 int threadIdxY = get_global_id(1);\n\
83 int numThreadsX = get_global_size(0);\n\
84 int numThreadsY = get_global_size(1);\n\
85 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
86 {\n\
87 return;\n\
88 }\n\
89 %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
90 %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
91 %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
92 %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
93 write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
94 write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
95 write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
96 write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
97 \n\
98 }";
99
100 const char *kernel_text_numImage_4 = " \
101 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
102 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels) \n\
103 {\n\
104 int threadIdxX = get_global_id(0);\n\
105 int threadIdxY = get_global_id(1);\n\
106 int numThreadsX = get_global_size(0);\n\
107 int numThreadsY = get_global_size(1);\n\
108 if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
109 {\n\
110 return;\n\
111 }\n\
112 %s dataA = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
113 %s dataB = read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
114 %s dataC = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
115 %s dataD = read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
116 %s dataE = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
117 %s dataF = read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
118 %s dataG = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
119 %s dataH = read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
120 write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
121 write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
122 write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
123 write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
124 write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
125 write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
126 write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
127 write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
128 \n\
129 }";
130
131 const uint32_t num2DImagesList[] = { 1, 2, 4 };
132 const uint32_t widthList[] = { 4, 64, 183, 1024 };
133 const uint32_t heightList[] = { 4, 64, 365 };
134
getKernelType(VulkanFormat format,cl_kernel kernel_float,cl_kernel kernel_signed,cl_kernel kernel_unsigned)135 const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
136 cl_kernel kernel_signed,
137 cl_kernel kernel_unsigned)
138 {
139 cl_kernel kernel;
140 switch (format)
141 {
142 case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
143
144 case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
145
146 case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
147
148 case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
149
150 case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
151
152 case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
153
154 case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
155
156 case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
157
158 case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
159
160 case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
161
162 case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
163
164 case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
165
166 case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
167
168 case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
169
170 case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
171
172 case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
173
174 case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
175
176 case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
177
178 case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
179
180 case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
181
182 case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
183
184 default:
185 log_error(" Unsupported format");
186 ASSERT(0);
187 break;
188 }
189 return kernel;
190 }
191
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)192 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
193 cl_command_queue &cmd_queue2,
194 cl_kernel *kernel_unsigned,
195 cl_kernel *kernel_signed, cl_kernel *kernel_float,
196 VulkanDevice &vkDevice)
197 {
198 cl_int err = CL_SUCCESS;
199 size_t origin[3] = { 0, 0, 0 };
200 size_t region[3] = { 1, 1, 1 };
201
202 cl_kernel updateKernelCQ1, updateKernelCQ2;
203 std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
204 const std::vector<VulkanExternalMemoryHandleType>
205 vkExternalMemoryHandleTypeList =
206 getSupportedVulkanExternalMemoryHandleTypeList();
207 char magicValue = 0;
208
209 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
210 VulkanDeviceMemory vkParamsDeviceMemory(
211 vkDevice, vkParamsBuffer.getSize(),
212 getVulkanMemoryType(vkDevice,
213 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
214 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
215
216 uint64_t maxImage2DSize =
217 max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
218 VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
219 VulkanDeviceMemory vkSrcBufferDeviceMemory(
220 vkDevice, vkSrcBuffer.getSize(),
221 getVulkanMemoryType(vkDevice,
222 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
223 vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
224
225 char *srcBufferPtr, *dstBufferPtr;
226 srcBufferPtr = (char *)malloc(maxImage2DSize);
227 dstBufferPtr = (char *)malloc(maxImage2DSize);
228
229 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
230 VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
231 VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
232 VulkanDescriptorSetLayout vkDescriptorSetLayout(
233 vkDevice, vkDescriptorSetLayoutBindingList);
234 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
235
236 VulkanDescriptorPool vkDescriptorPool(vkDevice,
237 vkDescriptorSetLayoutBindingList);
238 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
239 vkDescriptorSetLayout);
240
241 VulkanCommandPool vkCommandPool(vkDevice);
242 VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
243 VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
244 VulkanQueue &vkQueue = vkDevice.getQueue();
245
246 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
247 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
248 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
249 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
250 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
251 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
252
253 clVk2CLExternalSemaphore = new clExternalSemaphore(
254 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
255 clCl2VkExternalSemaphore = new clExternalSemaphore(
256 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
257
258 std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
259 std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
260 std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
261 std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
262 std::vector<char> vkImage2DShader;
263
264 for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
265 {
266 VulkanFormat vkFormat = vkFormatList[fIdx];
267 log_info("Format: %d\n", vkFormat);
268 uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
269 ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
270 log_info("elementSize= %d\n", elementSize);
271
272 std::string fileName = "image2D_"
273 + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
274 log_info("Load %s file", fileName.c_str());
275 vkImage2DShader = readFile(fileName);
276 VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
277
278 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
279 vkImage2DShaderModule);
280
281 for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
282 {
283 uint32_t width = widthList[wIdx];
284 log_info("Width: %d\n", width);
285 if (width > max_width) continue;
286 region[0] = width;
287 for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
288 {
289 uint32_t height = heightList[hIdx];
290 log_info("Height: %d", height);
291 if (height > max_height) continue;
292 region[1] = height;
293
294 uint32_t numMipLevels = 1;
295 log_info("Number of mipmap levels: %d\n", numMipLevels);
296
297 magicValue++;
298 char *vkSrcBufferDeviceMemoryPtr =
299 (char *)vkSrcBufferDeviceMemory.map();
300 uint64_t srcBufSize = 0;
301 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
302 memset(srcBufferPtr, 0, maxImage2DSize);
303 uint32_t mipLevel = 0;
304 for (uint32_t row = 0;
305 row < std::max(height >> mipLevel, uint32_t(1)); row++)
306 {
307 for (uint32_t col = 0;
308 col < std::max(width >> mipLevel, uint32_t(1)); col++)
309 {
310 for (uint32_t elementByte = 0;
311 elementByte < elementSize; elementByte++)
312 {
313 vkSrcBufferDeviceMemoryPtr[srcBufSize] =
314 (char)(magicValue + mipLevel + row + col);
315 srcBufferPtr[srcBufSize] =
316 (char)(magicValue + mipLevel + row + col);
317 srcBufSize++;
318 }
319 }
320 }
321 srcBufSize = ROUND_UP(
322 srcBufSize,
323 std::max(
324 elementSize,
325 (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
326 vkSrcBufferDeviceMemory.unmap();
327
328 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
329 niIdx++)
330 {
331 uint32_t num2DImages = num2DImagesList[niIdx] + 1;
332 // added one image for cross-cq case for updateKernelCQ2
333 log_info("Number of images: %d\n", num2DImages);
334 ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
335 uint32_t num_2D_image;
336 if (useSingleImageKernel)
337 {
338 num_2D_image = 1;
339 }
340 else
341 {
342 num_2D_image = num2DImages;
343 }
344 Params *params = (Params *)vkParamsDeviceMemory.map();
345 params->numImage2DDescriptors = num_2D_image * numMipLevels;
346 vkParamsDeviceMemory.unmap();
347 vkDescriptorSet.update(0, vkParamsBuffer);
348 for (size_t emhtIdx = 0;
349 emhtIdx < vkExternalMemoryHandleTypeList.size();
350 emhtIdx++)
351 {
352 VulkanExternalMemoryHandleType
353 vkExternalMemoryHandleType =
354 vkExternalMemoryHandleTypeList[emhtIdx];
355 log_info("External memory handle type: %d \n",
356 vkExternalMemoryHandleType);
357 if ((true == disableNTHandleType)
358 && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
359 == vkExternalMemoryHandleType))
360 {
361 // Skip running for WIN32 NT handle.
362 continue;
363 }
364 VulkanImage2D vkDummyImage2D(
365 vkDevice, vkFormatList[0], widthList[0],
366 heightList[0], 1, vkExternalMemoryHandleType);
367 const VulkanMemoryTypeList &memoryTypeList =
368 vkDummyImage2D.getMemoryTypeList();
369
370 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
371 mtIdx++)
372 {
373 const VulkanMemoryType &memoryType =
374 memoryTypeList[mtIdx];
375 log_info("Memory type index: %d\n",
376 (uint32_t)memoryType);
377 log_info("Memory type property: %d\n",
378 memoryType.getMemoryTypeProperty());
379 if (!useDeviceLocal)
380 {
381 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
382 == memoryType.getMemoryTypeProperty())
383 {
384 continue;
385 }
386 }
387
388 size_t totalImageMemSize = 0;
389 uint64_t interImageOffset = 0;
390 {
391 VulkanImage2D vkImage2D(
392 vkDevice, vkFormat, width, height,
393 numMipLevels, vkExternalMemoryHandleType);
394 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
395 totalImageMemSize =
396 ROUND_UP(vkImage2D.getSize(),
397 vkImage2D.getAlignment());
398 }
399 VulkanImage2DList vkNonDedicatedImage2DList(
400 num2DImages, vkDevice, vkFormat, width, height,
401 numMipLevels, vkExternalMemoryHandleType);
402 for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
403 {
404 if (non_dedicated)
405 {
406 vkNonDedicatedImage2DListDeviceMemory1
407 .push_back(new VulkanDeviceMemory(
408 vkDevice, totalImageMemSize,
409 memoryType,
410 vkExternalMemoryHandleType));
411 }
412 else
413 {
414 vkNonDedicatedImage2DListDeviceMemory1
415 .push_back(new VulkanDeviceMemory(
416 vkDevice,
417 vkNonDedicatedImage2DList[bIdx],
418 memoryType,
419 vkExternalMemoryHandleType));
420 }
421 vkNonDedicatedImage2DListDeviceMemory1[bIdx]
422 ->bindImage(vkNonDedicatedImage2DList[bIdx],
423 0);
424 nonDedicatedExternalMemory1.push_back(
425 new clExternalMemoryImage(
426 *vkNonDedicatedImage2DListDeviceMemory1
427 [bIdx],
428 vkExternalMemoryHandleType, context,
429 totalImageMemSize, width, height, 0,
430 vkNonDedicatedImage2DList[bIdx],
431 deviceId));
432 }
433 VulkanImageViewList vkNonDedicatedImage2DViewList(
434 vkDevice, vkNonDedicatedImage2DList);
435 VulkanImage2DList vkNonDedicatedImage2DList2(
436 num2DImages, vkDevice, vkFormat, width, height,
437 numMipLevels, vkExternalMemoryHandleType);
438 for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
439 {
440 if (non_dedicated)
441 {
442 vkNonDedicatedImage2DListDeviceMemory2
443 .push_back(new VulkanDeviceMemory(
444 vkDevice, totalImageMemSize,
445 memoryType,
446 vkExternalMemoryHandleType));
447 }
448 else
449 {
450 vkNonDedicatedImage2DListDeviceMemory2
451 .push_back(new VulkanDeviceMemory(
452 vkDevice,
453 vkNonDedicatedImage2DList2[bIdx],
454 memoryType,
455 vkExternalMemoryHandleType));
456 }
457 vkNonDedicatedImage2DListDeviceMemory2[bIdx]
458 ->bindImage(
459 vkNonDedicatedImage2DList2[bIdx], 0);
460 nonDedicatedExternalMemory2.push_back(
461 new clExternalMemoryImage(
462 *vkNonDedicatedImage2DListDeviceMemory2
463 [bIdx],
464 vkExternalMemoryHandleType, context,
465 totalImageMemSize, width, height, 0,
466 vkNonDedicatedImage2DList2[bIdx],
467 deviceId));
468 }
469 VulkanImageViewList vkDedicatedImage2DViewList(
470 vkDevice, vkNonDedicatedImage2DList2);
471
472 cl_mem external_mem_image1[5];
473 cl_mem external_mem_image2[5];
474 for (int i = 0; i < num2DImages; i++)
475 {
476 external_mem_image1[i] =
477 nonDedicatedExternalMemory1[i]
478 ->getExternalMemoryImage();
479 external_mem_image2[i] =
480 nonDedicatedExternalMemory2[i]
481 ->getExternalMemoryImage();
482 }
483 VulkanImage2DList &vkImage2DList =
484 vkNonDedicatedImage2DList;
485 VulkanImageViewList &vkImage2DViewList =
486 vkNonDedicatedImage2DViewList;
487
488 clCl2VkExternalSemaphore->signal(cmd_queue1);
489 if (!useSingleImageKernel)
490 {
491 for (size_t i2DIdx = 0;
492 i2DIdx < vkImage2DList.size(); i2DIdx++)
493 {
494 for (uint32_t mipLevel = 0;
495 mipLevel < numMipLevels; mipLevel++)
496 {
497 uint32_t i2DvIdx =
498 (uint32_t)(i2DIdx * numMipLevels)
499 + mipLevel;
500 vkDescriptorSet.update(
501 1 + i2DvIdx,
502 vkImage2DViewList[i2DvIdx]);
503 }
504 }
505 vkCopyCommandBuffer.begin();
506 vkCopyCommandBuffer.pipelineBarrier(
507 vkImage2DList,
508 VULKAN_IMAGE_LAYOUT_UNDEFINED,
509 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
510 for (size_t i2DIdx = 0;
511 i2DIdx < vkImage2DList.size(); i2DIdx++)
512 {
513 vkCopyCommandBuffer.copyBufferToImage(
514 vkSrcBuffer, vkImage2DList[i2DIdx],
515 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
516 }
517 vkCopyCommandBuffer.pipelineBarrier(
518 vkImage2DList,
519 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
520 VULKAN_IMAGE_LAYOUT_GENERAL);
521 vkCopyCommandBuffer.end();
522 memset(dstBufferPtr, 0, srcBufSize);
523 vkQueue.submit(vkCopyCommandBuffer);
524 vkShaderCommandBuffer.begin();
525 vkShaderCommandBuffer.bindPipeline(
526 vkComputePipeline);
527 vkShaderCommandBuffer.bindDescriptorSets(
528 vkComputePipeline, vkPipelineLayout,
529 vkDescriptorSet);
530 vkShaderCommandBuffer.dispatch(
531 NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
532 NUM_BLOCKS(height,
533 NUM_THREADS_PER_GROUP_Y / 2),
534 1);
535 vkShaderCommandBuffer.end();
536 }
537 for (uint32_t iter = 0; iter < innerIterations;
538 iter++)
539 {
540 if (useSingleImageKernel)
541 {
542 for (size_t i2DIdx = 0;
543 i2DIdx < vkImage2DList.size();
544 i2DIdx++)
545 {
546 vkDescriptorSet.update(
547 1, vkImage2DViewList[i2DIdx]);
548 vkCopyCommandBuffer.begin();
549 vkCopyCommandBuffer.pipelineBarrier(
550 vkImage2DList,
551 VULKAN_IMAGE_LAYOUT_UNDEFINED,
552 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
553
554 vkCopyCommandBuffer.copyBufferToImage(
555 vkSrcBuffer, vkImage2DList[i2DIdx],
556 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
557 vkCopyCommandBuffer.pipelineBarrier(
558 vkImage2DList,
559 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
560 VULKAN_IMAGE_LAYOUT_GENERAL);
561 vkCopyCommandBuffer.end();
562 memset(dstBufferPtr, 0, srcBufSize);
563 vkQueue.submit(vkCopyCommandBuffer);
564 vkShaderCommandBuffer.begin();
565 vkShaderCommandBuffer.bindPipeline(
566 vkComputePipeline);
567 vkShaderCommandBuffer
568 .bindDescriptorSets(
569 vkComputePipeline,
570 vkPipelineLayout,
571 vkDescriptorSet);
572 vkShaderCommandBuffer.dispatch(
573 NUM_BLOCKS(width,
574 NUM_THREADS_PER_GROUP_X),
575 NUM_BLOCKS(height,
576 NUM_THREADS_PER_GROUP_Y
577 / 2),
578 1);
579 vkShaderCommandBuffer.end();
580 if (i2DIdx < vkImage2DList.size() - 1)
581 {
582 vkQueue.submit(
583 vkShaderCommandBuffer);
584 }
585 }
586 }
587 vkQueue.submit(vkCl2VkSemaphore,
588 vkShaderCommandBuffer,
589 vkVk2CLSemaphore);
590 clVk2CLExternalSemaphore->wait(cmd_queue1);
591 switch (num2DImages)
592 {
593 case 2:
594 updateKernelCQ1 = getKernelType(
595 vkFormat, kernel_float[0],
596 kernel_signed[0],
597 kernel_unsigned[0]);
598 break;
599 case 3:
600 updateKernelCQ1 = getKernelType(
601 vkFormat, kernel_float[1],
602 kernel_signed[1],
603 kernel_unsigned[1]);
604 break;
605 case 5:
606 updateKernelCQ1 = getKernelType(
607 vkFormat, kernel_float[2],
608 kernel_signed[2],
609 kernel_unsigned[2]);
610 break;
611 }
612 updateKernelCQ2 = getKernelType(
613 vkFormat, kernel_float[3], kernel_signed[3],
614 kernel_unsigned[3]);
615 // similar kernel-type based on vkFormat
616 int j = 0;
617 // Setting arguments of updateKernelCQ2
618
619 err = clSetKernelArg(updateKernelCQ2, 0,
620 sizeof(cl_mem),
621 &external_mem_image1[0]);
622 err |= clSetKernelArg(updateKernelCQ2, 1,
623 sizeof(cl_mem),
624 &external_mem_image2[0]);
625 err |= clSetKernelArg(
626 updateKernelCQ2, 2, sizeof(cl_mem),
627 &external_mem_image1[num2DImages - 1]);
628 err |= clSetKernelArg(
629 updateKernelCQ2, 3, sizeof(cl_mem),
630 &external_mem_image2[num2DImages - 1]);
631 err |= clSetKernelArg(updateKernelCQ2, 4,
632 sizeof(unsigned int),
633 &num2DImages);
634 err |= clSetKernelArg(updateKernelCQ2, 5,
635 sizeof(unsigned int),
636 &width);
637 err |= clSetKernelArg(updateKernelCQ2, 6,
638 sizeof(unsigned int),
639 &height);
640 err |= clSetKernelArg(updateKernelCQ2, 7,
641 sizeof(unsigned int),
642 &numMipLevels);
643 for (int i = 0; i < num2DImages - 1; i++, ++j)
644 {
645 err = clSetKernelArg(
646 updateKernelCQ1, j, sizeof(cl_mem),
647 &external_mem_image1[i]);
648 err |= clSetKernelArg(
649 updateKernelCQ1, ++j, sizeof(cl_mem),
650 &external_mem_image2[i]);
651 }
652 err |= clSetKernelArg(updateKernelCQ1, j,
653 sizeof(unsigned int),
654 &num2DImages);
655 err |= clSetKernelArg(updateKernelCQ1, ++j,
656 sizeof(unsigned int),
657 &width);
658 err |= clSetKernelArg(updateKernelCQ1, ++j,
659 sizeof(unsigned int),
660 &height);
661 err |= clSetKernelArg(updateKernelCQ1, ++j,
662 sizeof(unsigned int),
663 &numMipLevels);
664
665 if (err != CL_SUCCESS)
666 {
667 print_error(
668 err,
669 "Error: Failed to set arg values \n");
670 goto CLEANUP;
671 }
672 // clVk2CLExternalSemaphore->wait(cmd_queue1);
673 size_t global_work_size[3] = { width, height,
674 1 };
675 cl_event first_launch;
676 err = clEnqueueNDRangeKernel(
677 cmd_queue1, updateKernelCQ1, 2, NULL,
678 global_work_size, NULL, 0, NULL,
679 &first_launch);
680 if (err != CL_SUCCESS)
681 {
682 goto CLEANUP;
683 }
684 err = clEnqueueNDRangeKernel(
685 cmd_queue2, updateKernelCQ2, 2, NULL,
686 global_work_size, NULL, 1, &first_launch,
687 NULL);
688 if (err != CL_SUCCESS)
689 {
690 goto CLEANUP;
691 }
692
693 clFinish(cmd_queue2);
694 clCl2VkExternalSemaphore->signal(cmd_queue2);
695 }
696
697 unsigned int flags = 0;
698 size_t mipmapLevelOffset = 0;
699 cl_event eventReadImage = NULL;
700 clFinish(cmd_queue2);
701 for (int i = 0; i < num2DImages; i++)
702 {
703 err = clEnqueueReadImage(
704 cmd_queue1, external_mem_image2[i], CL_TRUE,
705 origin, region, 0, 0, dstBufferPtr, 0, NULL,
706 &eventReadImage);
707
708 if (err != CL_SUCCESS)
709 {
710 print_error(err,
711 "clEnqueueReadImage failed with"
712 "error\n");
713 }
714
715 if (memcmp(srcBufferPtr, dstBufferPtr,
716 srcBufSize))
717 {
718 log_info("Source and destination buffers "
719 "don't match\n");
720 if (debug_trace)
721 {
722 log_info("Source buffer contents: \n");
723 for (uint64_t sIdx = 0;
724 sIdx < srcBufSize; sIdx++)
725 {
726 log_info(
727 "%d ",
728 (int)vkSrcBufferDeviceMemoryPtr
729 [sIdx]);
730 }
731 log_info("Destination buffer contents:"
732 "\n");
733 for (uint64_t dIdx = 0;
734 dIdx < srcBufSize; dIdx++)
735 {
736 log_info("%d ",
737 (int)dstBufferPtr[dIdx]);
738 }
739 }
740 err = -1;
741 break;
742 }
743 }
744 for (int i = 0; i < num2DImages; i++)
745 {
746 delete vkNonDedicatedImage2DListDeviceMemory1
747 [i];
748 delete vkNonDedicatedImage2DListDeviceMemory2
749 [i];
750 delete nonDedicatedExternalMemory1[i];
751 delete nonDedicatedExternalMemory2[i];
752 }
753 vkNonDedicatedImage2DListDeviceMemory1.erase(
754 vkNonDedicatedImage2DListDeviceMemory1.begin(),
755 vkNonDedicatedImage2DListDeviceMemory1.begin()
756 + num2DImages);
757 vkNonDedicatedImage2DListDeviceMemory2.erase(
758 vkNonDedicatedImage2DListDeviceMemory2.begin(),
759 vkNonDedicatedImage2DListDeviceMemory2.begin()
760 + num2DImages);
761 nonDedicatedExternalMemory1.erase(
762 nonDedicatedExternalMemory1.begin(),
763 nonDedicatedExternalMemory1.begin()
764 + num2DImages);
765 nonDedicatedExternalMemory2.erase(
766 nonDedicatedExternalMemory2.begin(),
767 nonDedicatedExternalMemory2.begin()
768 + num2DImages);
769 if (CL_SUCCESS != err)
770 {
771 goto CLEANUP;
772 }
773 }
774 }
775 }
776 }
777 }
778
779 vkImage2DShader.clear();
780 }
781 CLEANUP:
782 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
783 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
784
785 if (srcBufferPtr) free(srcBufferPtr);
786 if (dstBufferPtr) free(dstBufferPtr);
787 return err;
788 }
789
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)790 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
791 cl_kernel *kernel_unsigned,
792 cl_kernel *kernel_signed, cl_kernel *kernel_float,
793 VulkanDevice &vkDevice)
794 {
795 cl_int err = CL_SUCCESS;
796 size_t origin[3] = { 0, 0, 0 };
797 size_t region[3] = { 1, 1, 1 };
798 cl_kernel updateKernelCQ1;
799 std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
800 const std::vector<VulkanExternalMemoryHandleType>
801 vkExternalMemoryHandleTypeList =
802 getSupportedVulkanExternalMemoryHandleTypeList();
803 char magicValue = 0;
804
805 VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
806 VulkanDeviceMemory vkParamsDeviceMemory(
807 vkDevice, vkParamsBuffer.getSize(),
808 getVulkanMemoryType(vkDevice,
809 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
810 vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
811
812 uint64_t maxImage2DSize =
813 max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
814 VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
815 VulkanDeviceMemory vkSrcBufferDeviceMemory(
816 vkDevice, vkSrcBuffer.getSize(),
817 getVulkanMemoryType(vkDevice,
818 VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
819 vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
820
821 char *srcBufferPtr, *dstBufferPtr;
822 srcBufferPtr = (char *)malloc(maxImage2DSize);
823 dstBufferPtr = (char *)malloc(maxImage2DSize);
824
825 VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
826 VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
827 VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
828 VulkanDescriptorSetLayout vkDescriptorSetLayout(
829 vkDevice, vkDescriptorSetLayoutBindingList);
830 VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
831
832 VulkanDescriptorPool vkDescriptorPool(vkDevice,
833 vkDescriptorSetLayoutBindingList);
834 VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
835 vkDescriptorSetLayout);
836
837 VulkanCommandPool vkCommandPool(vkDevice);
838 VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
839 VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
840 VulkanQueue &vkQueue = vkDevice.getQueue();
841
842 VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
843 getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
844 VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
845 VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
846 clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
847 clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
848
849 clVk2CLExternalSemaphore = new clExternalSemaphore(
850 vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
851 clCl2VkExternalSemaphore = new clExternalSemaphore(
852 vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
853
854 std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
855 std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
856 std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
857 std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
858 std::vector<char> vkImage2DShader;
859
860 for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
861 {
862 VulkanFormat vkFormat = vkFormatList[fIdx];
863 log_info("Format: %d\n", vkFormat);
864 uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
865 ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
866 log_info("elementSize= %d\n", elementSize);
867
868 std::string fileName = "image2D_"
869 + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
870 log_info("Load %s file", fileName.c_str());
871 vkImage2DShader = readFile(fileName);
872 VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
873
874 VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
875 vkImage2DShaderModule);
876
877 for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
878 {
879 uint32_t width = widthList[wIdx];
880 log_info("Width: %d\n", width);
881 if (width > max_width) continue;
882 region[0] = width;
883 for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
884 {
885 uint32_t height = heightList[hIdx];
886 log_info("Height: %d\n", height);
887 if (height > max_height) continue;
888 region[1] = height;
889
890 uint32_t numMipLevels = 1;
891 log_info("Number of mipmap levels: %d\n", numMipLevels);
892
893 magicValue++;
894 char *vkSrcBufferDeviceMemoryPtr =
895 (char *)vkSrcBufferDeviceMemory.map();
896 uint64_t srcBufSize = 0;
897 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
898 memset(srcBufferPtr, 0, maxImage2DSize);
899 uint32_t mipLevel = 0;
900 for (uint32_t row = 0;
901 row < std::max(height >> mipLevel, uint32_t(1)); row++)
902 {
903 for (uint32_t col = 0;
904 col < std::max(width >> mipLevel, uint32_t(1)); col++)
905 {
906 for (uint32_t elementByte = 0;
907 elementByte < elementSize; elementByte++)
908 {
909 vkSrcBufferDeviceMemoryPtr[srcBufSize] =
910 (char)(magicValue + mipLevel + row + col);
911 srcBufferPtr[srcBufSize] =
912 (char)(magicValue + mipLevel + row + col);
913 srcBufSize++;
914 }
915 }
916 }
917 srcBufSize = ROUND_UP(
918 srcBufSize,
919 std::max(
920 elementSize,
921 (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
922 vkSrcBufferDeviceMemory.unmap();
923
924 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
925 niIdx++)
926 {
927 uint32_t num2DImages = num2DImagesList[niIdx];
928 log_info("Number of images: %d\n", num2DImages);
929 ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
930
931 Params *params = (Params *)vkParamsDeviceMemory.map();
932 uint32_t num_2D_image;
933 if (useSingleImageKernel)
934 {
935 num_2D_image = 1;
936 }
937 else
938 {
939 num_2D_image = num2DImages;
940 }
941 params->numImage2DDescriptors = num_2D_image * numMipLevels;
942 vkParamsDeviceMemory.unmap();
943 vkDescriptorSet.update(0, vkParamsBuffer);
944 for (size_t emhtIdx = 0;
945 emhtIdx < vkExternalMemoryHandleTypeList.size();
946 emhtIdx++)
947 {
948 VulkanExternalMemoryHandleType
949 vkExternalMemoryHandleType =
950 vkExternalMemoryHandleTypeList[emhtIdx];
951 log_info("External memory handle type: %d \n",
952 vkExternalMemoryHandleType);
953 if ((true == disableNTHandleType)
954 && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
955 == vkExternalMemoryHandleType))
956 {
957 // Skip running for WIN32 NT handle.
958 continue;
959 }
960 VulkanImage2D vkDummyImage2D(
961 vkDevice, vkFormatList[0], widthList[0],
962 heightList[0], 1, vkExternalMemoryHandleType);
963 const VulkanMemoryTypeList &memoryTypeList =
964 vkDummyImage2D.getMemoryTypeList();
965
966 for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
967 mtIdx++)
968 {
969 const VulkanMemoryType &memoryType =
970 memoryTypeList[mtIdx];
971 log_info("Memory type index: %d\n",
972 (uint32_t)memoryType);
973 log_info("Memory type property: %d\n",
974 memoryType.getMemoryTypeProperty());
975 if (!useDeviceLocal)
976 {
977 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
978 == memoryType.getMemoryTypeProperty())
979 {
980 continue;
981 }
982 }
983 size_t totalImageMemSize = 0;
984 uint64_t interImageOffset = 0;
985 {
986 VulkanImage2D vkImage2D(
987 vkDevice, vkFormat, width, height,
988 numMipLevels, vkExternalMemoryHandleType);
989 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
990 totalImageMemSize =
991 ROUND_UP(vkImage2D.getSize(),
992 vkImage2D.getAlignment());
993 }
994 VulkanImage2DList vkNonDedicatedImage2DList(
995 num2DImages, vkDevice, vkFormat, width, height,
996 numMipLevels, vkExternalMemoryHandleType);
997 for (size_t bIdx = 0;
998 bIdx < vkNonDedicatedImage2DList.size();
999 bIdx++)
1000 {
1001 // Create list of Vulkan device memories and
1002 // bind the list of Vulkan images.
1003 vkNonDedicatedImage2DListDeviceMemory1
1004 .push_back(new VulkanDeviceMemory(
1005 vkDevice, totalImageMemSize, memoryType,
1006 vkExternalMemoryHandleType));
1007 vkNonDedicatedImage2DListDeviceMemory1[bIdx]
1008 ->bindImage(vkNonDedicatedImage2DList[bIdx],
1009 0);
1010 nonDedicatedExternalMemory1.push_back(
1011 new clExternalMemoryImage(
1012 *vkNonDedicatedImage2DListDeviceMemory1
1013 [bIdx],
1014 vkExternalMemoryHandleType, context,
1015 totalImageMemSize, width, height, 0,
1016 vkNonDedicatedImage2DList[bIdx],
1017 deviceId));
1018 }
1019 VulkanImageViewList vkNonDedicatedImage2DViewList(
1020 vkDevice, vkNonDedicatedImage2DList);
1021
1022 VulkanImage2DList vkNonDedicatedImage2DList2(
1023 num2DImages, vkDevice, vkFormat, width, height,
1024 numMipLevels, vkExternalMemoryHandleType);
1025 for (size_t bIdx = 0;
1026 bIdx < vkNonDedicatedImage2DList2.size();
1027 bIdx++)
1028 {
1029 vkNonDedicatedImage2DListDeviceMemory2
1030 .push_back(new VulkanDeviceMemory(
1031 vkDevice, totalImageMemSize, memoryType,
1032 vkExternalMemoryHandleType));
1033 vkNonDedicatedImage2DListDeviceMemory2[bIdx]
1034 ->bindImage(
1035 vkNonDedicatedImage2DList2[bIdx], 0);
1036 nonDedicatedExternalMemory2.push_back(
1037 new clExternalMemoryImage(
1038 *vkNonDedicatedImage2DListDeviceMemory2
1039 [bIdx],
1040 vkExternalMemoryHandleType, context,
1041 totalImageMemSize, width, height, 0,
1042 vkNonDedicatedImage2DList2[bIdx],
1043 deviceId));
1044 }
1045 VulkanImageViewList vkDedicatedImage2DViewList(
1046 vkDevice, vkNonDedicatedImage2DList2);
1047 cl_mem external_mem_image1[4];
1048 cl_mem external_mem_image2[4];
1049 for (int i = 0; i < num2DImages; i++)
1050 {
1051 external_mem_image1[i] =
1052 nonDedicatedExternalMemory1[i]
1053 ->getExternalMemoryImage();
1054 external_mem_image2[i] =
1055 nonDedicatedExternalMemory2[i]
1056 ->getExternalMemoryImage();
1057 }
1058 VulkanImage2DList &vkImage2DList =
1059 vkNonDedicatedImage2DList;
1060 VulkanImageViewList &vkImage2DViewList =
1061 vkNonDedicatedImage2DViewList;
1062
1063 clCl2VkExternalSemaphore->signal(cmd_queue1);
1064 if (!useSingleImageKernel)
1065 {
1066 for (size_t i2DIdx = 0;
1067 i2DIdx < vkImage2DList.size(); i2DIdx++)
1068 {
1069 for (uint32_t mipLevel = 0;
1070 mipLevel < numMipLevels; mipLevel++)
1071 {
1072 uint32_t i2DvIdx =
1073 (uint32_t)(i2DIdx * numMipLevels)
1074 + mipLevel;
1075 vkDescriptorSet.update(
1076 1 + i2DvIdx,
1077 vkImage2DViewList[i2DvIdx]);
1078 }
1079 }
1080 vkCopyCommandBuffer.begin();
1081 vkCopyCommandBuffer.pipelineBarrier(
1082 vkImage2DList,
1083 VULKAN_IMAGE_LAYOUT_UNDEFINED,
1084 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1085 for (size_t i2DIdx = 0;
1086 i2DIdx < vkImage2DList.size(); i2DIdx++)
1087 {
1088 vkCopyCommandBuffer.copyBufferToImage(
1089 vkSrcBuffer, vkImage2DList[i2DIdx],
1090 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1091 }
1092 vkCopyCommandBuffer.pipelineBarrier(
1093 vkImage2DList,
1094 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1095 VULKAN_IMAGE_LAYOUT_GENERAL);
1096 vkCopyCommandBuffer.end();
1097 memset(dstBufferPtr, 0, srcBufSize);
1098 vkQueue.submit(vkCopyCommandBuffer);
1099 vkShaderCommandBuffer.begin();
1100 vkShaderCommandBuffer.bindPipeline(
1101 vkComputePipeline);
1102 vkShaderCommandBuffer.bindDescriptorSets(
1103 vkComputePipeline, vkPipelineLayout,
1104 vkDescriptorSet);
1105 vkShaderCommandBuffer.dispatch(
1106 NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
1107 NUM_BLOCKS(height,
1108 NUM_THREADS_PER_GROUP_Y / 2),
1109 1);
1110 vkShaderCommandBuffer.end();
1111 }
1112 for (uint32_t iter = 0; iter < innerIterations;
1113 iter++)
1114 {
1115 if (useSingleImageKernel)
1116 {
1117 for (size_t i2DIdx = 0;
1118 i2DIdx < vkImage2DList.size();
1119 i2DIdx++)
1120 {
1121 vkDescriptorSet.update(
1122 1, vkImage2DViewList[i2DIdx]);
1123 vkCopyCommandBuffer.begin();
1124 vkCopyCommandBuffer.pipelineBarrier(
1125 vkImage2DList,
1126 VULKAN_IMAGE_LAYOUT_UNDEFINED,
1127 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1128
1129 vkCopyCommandBuffer.copyBufferToImage(
1130 vkSrcBuffer, vkImage2DList[i2DIdx],
1131 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1132 vkCopyCommandBuffer.pipelineBarrier(
1133 vkImage2DList,
1134 VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1135 VULKAN_IMAGE_LAYOUT_GENERAL);
1136 vkCopyCommandBuffer.end();
1137 memset(dstBufferPtr, 0, srcBufSize);
1138 vkQueue.submit(vkCopyCommandBuffer);
1139 vkShaderCommandBuffer.begin();
1140 vkShaderCommandBuffer.bindPipeline(
1141 vkComputePipeline);
1142 vkShaderCommandBuffer
1143 .bindDescriptorSets(
1144 vkComputePipeline,
1145 vkPipelineLayout,
1146 vkDescriptorSet);
1147 vkShaderCommandBuffer.dispatch(
1148 NUM_BLOCKS(width,
1149 NUM_THREADS_PER_GROUP_X),
1150 NUM_BLOCKS(height,
1151 NUM_THREADS_PER_GROUP_Y
1152 / 2),
1153 1);
1154 vkShaderCommandBuffer.end();
1155 if (i2DIdx < vkImage2DList.size() - 1)
1156 {
1157 vkQueue.submit(
1158 vkShaderCommandBuffer);
1159 }
1160 }
1161 }
1162 vkQueue.submit(vkCl2VkSemaphore,
1163 vkShaderCommandBuffer,
1164 vkVk2CLSemaphore);
1165 clVk2CLExternalSemaphore->wait(cmd_queue1);
1166 switch (num2DImages)
1167 {
1168 case 1:
1169 updateKernelCQ1 = getKernelType(
1170 vkFormat, kernel_float[0],
1171 kernel_signed[0],
1172 kernel_unsigned[0]);
1173 break;
1174 case 2:
1175 updateKernelCQ1 = getKernelType(
1176 vkFormat, kernel_float[1],
1177 kernel_signed[1],
1178 kernel_unsigned[1]);
1179 break;
1180 case 4:
1181 updateKernelCQ1 = getKernelType(
1182 vkFormat, kernel_float[2],
1183 kernel_signed[2],
1184 kernel_unsigned[2]);
1185 break;
1186 }
1187 int j = 0;
1188 for (int i = 0; i < num2DImages; i++, ++j)
1189 {
1190 err = clSetKernelArg(
1191 updateKernelCQ1, j, sizeof(cl_mem),
1192 &external_mem_image1[i]);
1193 err |= clSetKernelArg(
1194 updateKernelCQ1, ++j, sizeof(cl_mem),
1195 &external_mem_image2[i]);
1196 }
1197 err |= clSetKernelArg(updateKernelCQ1, j,
1198 sizeof(unsigned int),
1199 &num2DImages);
1200 err |= clSetKernelArg(updateKernelCQ1, ++j,
1201 sizeof(unsigned int),
1202 &width);
1203 err |= clSetKernelArg(updateKernelCQ1, ++j,
1204 sizeof(unsigned int),
1205 &height);
1206 err |= clSetKernelArg(updateKernelCQ1, ++j,
1207 sizeof(unsigned int),
1208 &numMipLevels);
1209
1210 if (err != CL_SUCCESS)
1211 {
1212 print_error(err,
1213 "Error: Failed to set arg "
1214 "values for kernel-1\n");
1215 goto CLEANUP;
1216 }
1217
1218 size_t global_work_size[3] = { width, height,
1219 1 };
1220 err = clEnqueueNDRangeKernel(
1221 cmd_queue1, updateKernelCQ1, 2, NULL,
1222 global_work_size, NULL, 0, NULL, NULL);
1223 if (err != CL_SUCCESS)
1224 {
1225 goto CLEANUP;
1226 }
1227 clCl2VkExternalSemaphore->signal(cmd_queue1);
1228 }
1229
1230 unsigned int flags = 0;
1231 size_t mipmapLevelOffset = 0;
1232 cl_event eventReadImage = NULL;
1233 for (int i = 0; i < num2DImages; i++)
1234 {
1235 err = clEnqueueReadImage(
1236 cmd_queue1, external_mem_image2[i], CL_TRUE,
1237 origin, region, 0, 0, dstBufferPtr, 0, NULL,
1238 &eventReadImage);
1239
1240 if (err != CL_SUCCESS)
1241 {
1242 print_error(err,
1243 "clEnqueueReadImage failed with"
1244 "error\n");
1245 }
1246
1247 if (memcmp(srcBufferPtr, dstBufferPtr,
1248 srcBufSize))
1249 {
1250 log_info("Source and destination buffers "
1251 "don't match\n");
1252 if (debug_trace)
1253 {
1254 log_info("Source buffer contents: \n");
1255 for (uint64_t sIdx = 0;
1256 sIdx < srcBufSize; sIdx++)
1257 {
1258 log_info(
1259 "%d",
1260 (int)vkSrcBufferDeviceMemoryPtr
1261 [sIdx]);
1262 }
1263 log_info(
1264 "Destination buffer contents:");
1265 for (uint64_t dIdx = 0;
1266 dIdx < srcBufSize; dIdx++)
1267 {
1268 log_info("%d",
1269 (int)dstBufferPtr[dIdx]);
1270 }
1271 }
1272 err = -1;
1273 break;
1274 }
1275 }
1276 for (int i = 0; i < num2DImages; i++)
1277 {
1278 delete vkNonDedicatedImage2DListDeviceMemory1
1279 [i];
1280 delete vkNonDedicatedImage2DListDeviceMemory2
1281 [i];
1282 delete nonDedicatedExternalMemory1[i];
1283 delete nonDedicatedExternalMemory2[i];
1284 }
1285 vkNonDedicatedImage2DListDeviceMemory1.erase(
1286 vkNonDedicatedImage2DListDeviceMemory1.begin(),
1287 vkNonDedicatedImage2DListDeviceMemory1.begin()
1288 + num2DImages);
1289 vkNonDedicatedImage2DListDeviceMemory2.erase(
1290 vkNonDedicatedImage2DListDeviceMemory2.begin(),
1291 vkNonDedicatedImage2DListDeviceMemory2.begin()
1292 + num2DImages);
1293 nonDedicatedExternalMemory1.erase(
1294 nonDedicatedExternalMemory1.begin(),
1295 nonDedicatedExternalMemory1.begin()
1296 + num2DImages);
1297 nonDedicatedExternalMemory2.erase(
1298 nonDedicatedExternalMemory2.begin(),
1299 nonDedicatedExternalMemory2.begin()
1300 + num2DImages);
1301 if (CL_SUCCESS != err)
1302 {
1303 goto CLEANUP;
1304 }
1305 }
1306 }
1307 }
1308 }
1309 }
1310 vkImage2DShader.clear();
1311 }
1312 CLEANUP:
1313 if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1314 if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1315
1316 if (srcBufferPtr) free(srcBufferPtr);
1317 if (dstBufferPtr) free(dstBufferPtr);
1318 return err;
1319 }
1320
test_image_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_)1321 int test_image_common(cl_device_id device_, cl_context context_,
1322 cl_command_queue queue_, int numElements_)
1323 {
1324 int current_device = 0;
1325 int device_count = 0;
1326 int devices_prohibited = 0;
1327 cl_int err = CL_SUCCESS;
1328 cl_platform_id platform = NULL;
1329 size_t extensionSize = 0;
1330 cl_uint num_devices = 0;
1331 cl_uint device_no = 0;
1332 cl_device_id *devices;
1333 char *extensions = NULL;
1334 const char *program_source_const;
1335 cl_command_queue cmd_queue1 = NULL;
1336 cl_command_queue cmd_queue2 = NULL;
1337 cl_context context = NULL;
1338 const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
1339 // One kernel for Cross-CQ case
1340 const uint32_t num_kernel_types = 3;
1341 const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
1342 kernel_text_numImage_2,
1343 kernel_text_numImage_4 };
1344 char source_1[4096];
1345 char source_2[4096];
1346 char source_3[4096];
1347 size_t program_source_length;
1348 cl_program program[num_kernel_types];
1349 cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
1350 cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
1351 cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
1352 cl_mem external_mem_image1;
1353 cl_mem external_mem_image2;
1354
1355 VulkanDevice vkDevice;
1356
1357 cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1358 // get the platform ID
1359 err = clGetPlatformIDs(1, &platform, NULL);
1360 if (err != CL_SUCCESS)
1361 {
1362 print_error(err, "Error: Failed to get platform\n");
1363 goto CLEANUP;
1364 }
1365
1366 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1367 if (CL_SUCCESS != err)
1368 {
1369 print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
1370 goto CLEANUP;
1371 }
1372 devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1373 if (NULL == devices)
1374 {
1375 err = CL_OUT_OF_HOST_MEMORY;
1376 print_error(err, "Unable to allocate memory for devices\n");
1377 goto CLEANUP;
1378 }
1379 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1380 NULL);
1381 if (CL_SUCCESS != err)
1382 {
1383 print_error(err, "Failed to get deviceID.\n");
1384 goto CLEANUP;
1385 }
1386 contextProperties[1] = (cl_context_properties)platform;
1387 log_info("Assigned contextproperties for platform\n");
1388 for (device_no = 0; device_no < num_devices; device_no++)
1389 {
1390 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
1391 &extensionSize);
1392 if (CL_SUCCESS != err)
1393 {
1394 print_error(
1395 err,
1396 "Error in clGetDeviceInfo for getting device_extension size\n");
1397 goto CLEANUP;
1398 }
1399 extensions = (char *)malloc(extensionSize);
1400 if (NULL == extensions)
1401 {
1402 err = CL_OUT_OF_HOST_MEMORY;
1403 print_error(err, "Unable to allocate memory for extensions\n");
1404 goto CLEANUP;
1405 }
1406 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1407 extensionSize, extensions, NULL);
1408 if (CL_SUCCESS != err)
1409 {
1410 print_error(
1411 err, "Error in clGetDeviceInfo for getting device_extension\n");
1412 goto CLEANUP;
1413 }
1414 err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1415 CL_UUID_SIZE_KHR, uuid, &extensionSize);
1416 if (CL_SUCCESS != err)
1417 {
1418 print_error(err, "clGetDeviceInfo failed with error");
1419 goto CLEANUP;
1420 }
1421 err =
1422 memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1423 if (err == 0)
1424 {
1425 break;
1426 }
1427 }
1428 if (device_no >= num_devices)
1429 {
1430 err = EXIT_FAILURE;
1431 print_error(err,
1432 "OpenCL error:"
1433 "No Vulkan-OpenCL Interop capable GPU found.\n");
1434 goto CLEANUP;
1435 }
1436 deviceId = devices[device_no];
1437 err = setMaxImageDimensions(deviceId, max_width, max_height);
1438 if (CL_SUCCESS != err)
1439 {
1440 print_error(err, "error setting max image dimensions");
1441 goto CLEANUP;
1442 }
1443 log_info("Set max_width to %lu and max_height to %lu\n", max_width,
1444 max_height);
1445 context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1446 NULL, NULL, &err);
1447 if (CL_SUCCESS != err)
1448 {
1449 print_error(err, "error creating context");
1450 goto CLEANUP;
1451 }
1452 log_info("Successfully created context !!!\n");
1453
1454 cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1455 if (CL_SUCCESS != err)
1456 {
1457 err = CL_INVALID_COMMAND_QUEUE;
1458 print_error(err, "Error: Failed to create command queue!\n");
1459 goto CLEANUP;
1460 }
1461 log_info("clCreateCommandQueue successfull \n");
1462
1463 cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1464 if (CL_SUCCESS != err)
1465 {
1466 err = CL_INVALID_COMMAND_QUEUE;
1467 print_error(err, "Error: Failed to create command queue!\n");
1468 goto CLEANUP;
1469 }
1470 log_info("clCreateCommandQueue2 successful \n");
1471
1472 for (int i = 0; i < num_kernels; i++)
1473 {
1474 switch (i)
1475 {
1476 case 0:
1477 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1478 "f", "f", "f");
1479 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1480 "i", "i");
1481 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1482 "ui", "ui", "ui");
1483 break;
1484 case 1:
1485 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1486 "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1487 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1488 "int4", "i", "int4", "i", "i", "i", "i", "i");
1489 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1490 "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1491 "ui");
1492 break;
1493 case 2:
1494 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1495 "f", "float4", "f", "float4", "f", "float4", "f",
1496 "float4", "f", "float4", "f", "float4", "f", "f", "f",
1497 "f", "f", "f", "f", "f", "f");
1498 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1499 "int4", "i", "int4", "i", "int4", "i", "int4", "i",
1500 "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
1501 "i", "i");
1502 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1503 "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
1504 "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
1505 "ui", "ui", "ui", "ui", "ui", "ui");
1506 break;
1507 case 3:
1508 // Addtional case for creating updateKernelCQ2 which takes two
1509 // images
1510 sprintf(source_1, kernel_source[1], "float4", "f", "float4",
1511 "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1512 sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
1513 "int4", "i", "int4", "i", "i", "i", "i", "i");
1514 sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
1515 "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1516 "ui");
1517 break;
1518 }
1519 const char *sourceTexts[num_kernel_types] = { source_1, source_2,
1520 source_3 };
1521 for (int k = 0; k < num_kernel_types; k++)
1522 {
1523 program_source_length = strlen(sourceTexts[k]);
1524 program[k] = clCreateProgramWithSource(
1525 context, 1, &sourceTexts[k], &program_source_length, &err);
1526 err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
1527 }
1528
1529 if (err != CL_SUCCESS)
1530 {
1531 print_error(err, "Error: Failed to build program");
1532 goto CLEANUP;
1533 }
1534 // create the kernel
1535 kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
1536 if (err != CL_SUCCESS)
1537 {
1538 print_error(err, "clCreateKernel failed");
1539 goto CLEANUP;
1540 }
1541 kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
1542 if (err != CL_SUCCESS)
1543 {
1544 print_error(err, "clCreateKernel failed");
1545 goto CLEANUP;
1546 }
1547 kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
1548 if (err != CL_SUCCESS)
1549 {
1550 print_error(err, "clCreateKernel failed ");
1551 goto CLEANUP;
1552 }
1553 }
1554 if (numCQ == 2)
1555 {
1556 err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
1557 kernel_unsigned, kernel_signed,
1558 kernel_float, vkDevice);
1559 }
1560 else
1561 {
1562 err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
1563 kernel_signed, kernel_float, vkDevice);
1564 }
1565 CLEANUP:
1566 for (int i = 0; i < num_kernels; i++)
1567 {
1568 if (kernel_float[i])
1569 {
1570 clReleaseKernel(kernel_float[i]);
1571 }
1572 if (kernel_unsigned[i])
1573 {
1574 clReleaseKernel(kernel_unsigned[i]);
1575 }
1576 if (kernel_signed[i])
1577 {
1578 clReleaseKernel(kernel_signed[i]);
1579 }
1580 }
1581 for (int i = 0; i < num_kernel_types; i++)
1582 {
1583 if (program[i])
1584 {
1585 clReleaseProgram(program[i]);
1586 }
1587 }
1588 if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1589 if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1590 if (context) clReleaseContext(context);
1591
1592 if (extensions) free(extensions);
1593 if (devices) free(devices);
1594
1595 return err;
1596 }
1597