• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2022 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #define NOMINMAX
18 #include <vulkan_interop_common.hpp>
19 #include <string>
20 #include "harness/errorHelpers.h"
21 
22 #define MAX_2D_IMAGES 5
23 #define MAX_2D_IMAGE_WIDTH 1024
24 #define MAX_2D_IMAGE_HEIGHT 1024
25 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
26 #define MAX_2D_IMAGE_MIP_LEVELS 11
27 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
28 #define NUM_THREADS_PER_GROUP_X 32
29 #define NUM_THREADS_PER_GROUP_Y 32
30 #define NUM_BLOCKS(size, blockSize)                                            \
31     (ROUND_UP((size), (blockSize)) / (blockSize))
32 
33 #define ASSERT(x)                                                              \
34     if (!(x))                                                                  \
35     {                                                                          \
36         fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__,    \
37                 __LINE__);                                                     \
38         exit(1);                                                               \
39     }
40 
41 #define ASSERT_LEQ(x, y)                                                       \
42     if (x > y)                                                                 \
43     {                                                                          \
44         ASSERT(0);                                                             \
45     }
46 
47 namespace {
48 struct Params
49 {
50     uint32_t numImage2DDescriptors;
51 };
52 }
53 static cl_uchar uuid[CL_UUID_SIZE_KHR];
54 static cl_device_id deviceId = NULL;
55 size_t max_width = MAX_2D_IMAGE_WIDTH;
56 size_t max_height = MAX_2D_IMAGE_HEIGHT;
57 
58 const char *kernel_text_numImage_1 = " \
59 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
60 __kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
61 {\n\
62     int threadIdxX = get_global_id(0);\n\
63     int threadIdxY = get_global_id(1);\n\
64     int numThreadsX = get_global_size(0);                                                                                                  \n\
65     int numThreadsY = get_global_size(1);\n\
66     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
67     {\n\
68         return;\n\
69     }\n\
70     %s dataA =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
71     %s dataB =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
72     write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
73     write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
74 \n\
75 }";
76 
77 const char *kernel_text_numImage_2 = " \
78 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
79 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
80 {\n\
81     int threadIdxX = get_global_id(0);\n\
82     int threadIdxY = get_global_id(1);\n\
83     int numThreadsX = get_global_size(0);\n\
84     int numThreadsY = get_global_size(1);\n\
85     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
86     {\n\
87         return;\n\
88     }\n\
89     %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
90     %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
91     %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
92     %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
93     write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
94     write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
95     write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
96     write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
97 \n\
98 }";
99 
100 const char *kernel_text_numImage_4 = " \
101 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
102 __kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
103 {\n\
104     int threadIdxX = get_global_id(0);\n\
105     int threadIdxY = get_global_id(1);\n\
106     int numThreadsX = get_global_size(0);\n\
107     int numThreadsY = get_global_size(1);\n\
108     if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
109     {\n\
110         return;\n\
111     }\n\
112     %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
113     %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
114     %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
115     %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
116     %s dataE =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
117     %s dataF =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
118     %s dataG =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
119     %s dataH =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
120     write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
121     write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
122     write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
123     write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
124     write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
125     write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
126     write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
127     write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
128 \n\
129 }";
130 
131 const uint32_t num2DImagesList[] = { 1, 2, 4 };
132 const uint32_t widthList[] = { 4, 64, 183, 1024 };
133 const uint32_t heightList[] = { 4, 64, 365 };
134 
getKernelType(VulkanFormat format,cl_kernel kernel_float,cl_kernel kernel_signed,cl_kernel kernel_unsigned)135 const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
136                               cl_kernel kernel_signed,
137                               cl_kernel kernel_unsigned)
138 {
139     cl_kernel kernel;
140     switch (format)
141     {
142         case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
143 
144         case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
145 
146         case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
147 
148         case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
149 
150         case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
151 
152         case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
153 
154         case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
155 
156         case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
157 
158         case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
159 
160         case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
161 
162         case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
163 
164         case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
165 
166         case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
167 
168         case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
169 
170         case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
171 
172         case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
173 
174         case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
175 
176         case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
177 
178         case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
179 
180         case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
181 
182         case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
183 
184         default:
185             log_error(" Unsupported format");
186             ASSERT(0);
187             break;
188     }
189     return kernel;
190 }
191 
run_test_with_two_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_command_queue & cmd_queue2,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)192 int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
193                             cl_command_queue &cmd_queue2,
194                             cl_kernel *kernel_unsigned,
195                             cl_kernel *kernel_signed, cl_kernel *kernel_float,
196                             VulkanDevice &vkDevice)
197 {
198     cl_int err = CL_SUCCESS;
199     size_t origin[3] = { 0, 0, 0 };
200     size_t region[3] = { 1, 1, 1 };
201 
202     cl_kernel updateKernelCQ1, updateKernelCQ2;
203     std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
204     const std::vector<VulkanExternalMemoryHandleType>
205         vkExternalMemoryHandleTypeList =
206             getSupportedVulkanExternalMemoryHandleTypeList();
207     char magicValue = 0;
208 
209     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
210     VulkanDeviceMemory vkParamsDeviceMemory(
211         vkDevice, vkParamsBuffer.getSize(),
212         getVulkanMemoryType(vkDevice,
213                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
214     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
215 
216     uint64_t maxImage2DSize =
217         max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
218     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
219     VulkanDeviceMemory vkSrcBufferDeviceMemory(
220         vkDevice, vkSrcBuffer.getSize(),
221         getVulkanMemoryType(vkDevice,
222                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
223     vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
224 
225     char *srcBufferPtr, *dstBufferPtr;
226     srcBufferPtr = (char *)malloc(maxImage2DSize);
227     dstBufferPtr = (char *)malloc(maxImage2DSize);
228 
229     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
230         VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
231         VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
232     VulkanDescriptorSetLayout vkDescriptorSetLayout(
233         vkDevice, vkDescriptorSetLayoutBindingList);
234     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
235 
236     VulkanDescriptorPool vkDescriptorPool(vkDevice,
237                                           vkDescriptorSetLayoutBindingList);
238     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
239                                         vkDescriptorSetLayout);
240 
241     VulkanCommandPool vkCommandPool(vkDevice);
242     VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
243     VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
244     VulkanQueue &vkQueue = vkDevice.getQueue();
245 
246     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
247         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
248     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
249     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
250     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
251     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
252 
253     clVk2CLExternalSemaphore = new clExternalSemaphore(
254         vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
255     clCl2VkExternalSemaphore = new clExternalSemaphore(
256         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
257 
258     std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
259     std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
260     std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
261     std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
262     std::vector<char> vkImage2DShader;
263 
264     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
265     {
266         VulkanFormat vkFormat = vkFormatList[fIdx];
267         log_info("Format: %d\n", vkFormat);
268         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
269         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
270         log_info("elementSize= %d\n", elementSize);
271 
272         std::string fileName = "image2D_"
273             + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
274         log_info("Load %s file", fileName.c_str());
275         vkImage2DShader = readFile(fileName);
276         VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
277 
278         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
279                                                 vkImage2DShaderModule);
280 
281         for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
282         {
283             uint32_t width = widthList[wIdx];
284             log_info("Width: %d\n", width);
285             if (width > max_width) continue;
286             region[0] = width;
287             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
288             {
289                 uint32_t height = heightList[hIdx];
290                 log_info("Height: %d", height);
291                 if (height > max_height) continue;
292                 region[1] = height;
293 
294                 uint32_t numMipLevels = 1;
295                 log_info("Number of mipmap levels: %d\n", numMipLevels);
296 
297                 magicValue++;
298                 char *vkSrcBufferDeviceMemoryPtr =
299                     (char *)vkSrcBufferDeviceMemory.map();
300                 uint64_t srcBufSize = 0;
301                 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
302                 memset(srcBufferPtr, 0, maxImage2DSize);
303                 uint32_t mipLevel = 0;
304                 for (uint32_t row = 0;
305                      row < std::max(height >> mipLevel, uint32_t(1)); row++)
306                 {
307                     for (uint32_t col = 0;
308                          col < std::max(width >> mipLevel, uint32_t(1)); col++)
309                     {
310                         for (uint32_t elementByte = 0;
311                              elementByte < elementSize; elementByte++)
312                         {
313                             vkSrcBufferDeviceMemoryPtr[srcBufSize] =
314                                 (char)(magicValue + mipLevel + row + col);
315                             srcBufferPtr[srcBufSize] =
316                                 (char)(magicValue + mipLevel + row + col);
317                             srcBufSize++;
318                         }
319                     }
320                 }
321                 srcBufSize = ROUND_UP(
322                     srcBufSize,
323                     std::max(
324                         elementSize,
325                         (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
326                 vkSrcBufferDeviceMemory.unmap();
327 
328                 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
329                      niIdx++)
330                 {
331                     uint32_t num2DImages = num2DImagesList[niIdx] + 1;
332                     // added one image for cross-cq case for updateKernelCQ2
333                     log_info("Number of images: %d\n", num2DImages);
334                     ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
335                     uint32_t num_2D_image;
336                     if (useSingleImageKernel)
337                     {
338                         num_2D_image = 1;
339                     }
340                     else
341                     {
342                         num_2D_image = num2DImages;
343                     }
344                     Params *params = (Params *)vkParamsDeviceMemory.map();
345                     params->numImage2DDescriptors = num_2D_image * numMipLevels;
346                     vkParamsDeviceMemory.unmap();
347                     vkDescriptorSet.update(0, vkParamsBuffer);
348                     for (size_t emhtIdx = 0;
349                          emhtIdx < vkExternalMemoryHandleTypeList.size();
350                          emhtIdx++)
351                     {
352                         VulkanExternalMemoryHandleType
353                             vkExternalMemoryHandleType =
354                                 vkExternalMemoryHandleTypeList[emhtIdx];
355                         log_info("External memory handle type: %d \n",
356                                  vkExternalMemoryHandleType);
357                         if ((true == disableNTHandleType)
358                             && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
359                                 == vkExternalMemoryHandleType))
360                         {
361                             // Skip running for WIN32 NT handle.
362                             continue;
363                         }
364                         VulkanImage2D vkDummyImage2D(
365                             vkDevice, vkFormatList[0], widthList[0],
366                             heightList[0], 1, vkExternalMemoryHandleType);
367                         const VulkanMemoryTypeList &memoryTypeList =
368                             vkDummyImage2D.getMemoryTypeList();
369 
370                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
371                              mtIdx++)
372                         {
373                             const VulkanMemoryType &memoryType =
374                                 memoryTypeList[mtIdx];
375                             log_info("Memory type index: %d\n",
376                                      (uint32_t)memoryType);
377                             log_info("Memory type property: %d\n",
378                                      memoryType.getMemoryTypeProperty());
379                             if (!useDeviceLocal)
380                             {
381                                 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
382                                     == memoryType.getMemoryTypeProperty())
383                                 {
384                                     continue;
385                                 }
386                             }
387 
388                             size_t totalImageMemSize = 0;
389                             uint64_t interImageOffset = 0;
390                             {
391                                 VulkanImage2D vkImage2D(
392                                     vkDevice, vkFormat, width, height,
393                                     numMipLevels, vkExternalMemoryHandleType);
394                                 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
395                                 totalImageMemSize =
396                                     ROUND_UP(vkImage2D.getSize(),
397                                              vkImage2D.getAlignment());
398                             }
399                             VulkanImage2DList vkNonDedicatedImage2DList(
400                                 num2DImages, vkDevice, vkFormat, width, height,
401                                 numMipLevels, vkExternalMemoryHandleType);
402                             for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
403                             {
404                                 if (non_dedicated)
405                                 {
406                                     vkNonDedicatedImage2DListDeviceMemory1
407                                         .push_back(new VulkanDeviceMemory(
408                                             vkDevice, totalImageMemSize,
409                                             memoryType,
410                                             vkExternalMemoryHandleType));
411                                 }
412                                 else
413                                 {
414                                     vkNonDedicatedImage2DListDeviceMemory1
415                                         .push_back(new VulkanDeviceMemory(
416                                             vkDevice,
417                                             vkNonDedicatedImage2DList[bIdx],
418                                             memoryType,
419                                             vkExternalMemoryHandleType));
420                                 }
421                                 vkNonDedicatedImage2DListDeviceMemory1[bIdx]
422                                     ->bindImage(vkNonDedicatedImage2DList[bIdx],
423                                                 0);
424                                 nonDedicatedExternalMemory1.push_back(
425                                     new clExternalMemoryImage(
426                                         *vkNonDedicatedImage2DListDeviceMemory1
427                                             [bIdx],
428                                         vkExternalMemoryHandleType, context,
429                                         totalImageMemSize, width, height, 0,
430                                         vkNonDedicatedImage2DList[bIdx],
431                                         deviceId));
432                             }
433                             VulkanImageViewList vkNonDedicatedImage2DViewList(
434                                 vkDevice, vkNonDedicatedImage2DList);
435                             VulkanImage2DList vkNonDedicatedImage2DList2(
436                                 num2DImages, vkDevice, vkFormat, width, height,
437                                 numMipLevels, vkExternalMemoryHandleType);
438                             for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
439                             {
440                                 if (non_dedicated)
441                                 {
442                                     vkNonDedicatedImage2DListDeviceMemory2
443                                         .push_back(new VulkanDeviceMemory(
444                                             vkDevice, totalImageMemSize,
445                                             memoryType,
446                                             vkExternalMemoryHandleType));
447                                 }
448                                 else
449                                 {
450                                     vkNonDedicatedImage2DListDeviceMemory2
451                                         .push_back(new VulkanDeviceMemory(
452                                             vkDevice,
453                                             vkNonDedicatedImage2DList2[bIdx],
454                                             memoryType,
455                                             vkExternalMemoryHandleType));
456                                 }
457                                 vkNonDedicatedImage2DListDeviceMemory2[bIdx]
458                                     ->bindImage(
459                                         vkNonDedicatedImage2DList2[bIdx], 0);
460                                 nonDedicatedExternalMemory2.push_back(
461                                     new clExternalMemoryImage(
462                                         *vkNonDedicatedImage2DListDeviceMemory2
463                                             [bIdx],
464                                         vkExternalMemoryHandleType, context,
465                                         totalImageMemSize, width, height, 0,
466                                         vkNonDedicatedImage2DList2[bIdx],
467                                         deviceId));
468                             }
469                             VulkanImageViewList vkDedicatedImage2DViewList(
470                                 vkDevice, vkNonDedicatedImage2DList2);
471 
472                             cl_mem external_mem_image1[5];
473                             cl_mem external_mem_image2[5];
474                             for (int i = 0; i < num2DImages; i++)
475                             {
476                                 external_mem_image1[i] =
477                                     nonDedicatedExternalMemory1[i]
478                                         ->getExternalMemoryImage();
479                                 external_mem_image2[i] =
480                                     nonDedicatedExternalMemory2[i]
481                                         ->getExternalMemoryImage();
482                             }
483                             VulkanImage2DList &vkImage2DList =
484                                 vkNonDedicatedImage2DList;
485                             VulkanImageViewList &vkImage2DViewList =
486                                 vkNonDedicatedImage2DViewList;
487 
488                             clCl2VkExternalSemaphore->signal(cmd_queue1);
489                             if (!useSingleImageKernel)
490                             {
491                                 for (size_t i2DIdx = 0;
492                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
493                                 {
494                                     for (uint32_t mipLevel = 0;
495                                          mipLevel < numMipLevels; mipLevel++)
496                                     {
497                                         uint32_t i2DvIdx =
498                                             (uint32_t)(i2DIdx * numMipLevels)
499                                             + mipLevel;
500                                         vkDescriptorSet.update(
501                                             1 + i2DvIdx,
502                                             vkImage2DViewList[i2DvIdx]);
503                                     }
504                                 }
505                                 vkCopyCommandBuffer.begin();
506                                 vkCopyCommandBuffer.pipelineBarrier(
507                                     vkImage2DList,
508                                     VULKAN_IMAGE_LAYOUT_UNDEFINED,
509                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
510                                 for (size_t i2DIdx = 0;
511                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
512                                 {
513                                     vkCopyCommandBuffer.copyBufferToImage(
514                                         vkSrcBuffer, vkImage2DList[i2DIdx],
515                                         VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
516                                 }
517                                 vkCopyCommandBuffer.pipelineBarrier(
518                                     vkImage2DList,
519                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
520                                     VULKAN_IMAGE_LAYOUT_GENERAL);
521                                 vkCopyCommandBuffer.end();
522                                 memset(dstBufferPtr, 0, srcBufSize);
523                                 vkQueue.submit(vkCopyCommandBuffer);
524                                 vkShaderCommandBuffer.begin();
525                                 vkShaderCommandBuffer.bindPipeline(
526                                     vkComputePipeline);
527                                 vkShaderCommandBuffer.bindDescriptorSets(
528                                     vkComputePipeline, vkPipelineLayout,
529                                     vkDescriptorSet);
530                                 vkShaderCommandBuffer.dispatch(
531                                     NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
532                                     NUM_BLOCKS(height,
533                                                NUM_THREADS_PER_GROUP_Y / 2),
534                                     1);
535                                 vkShaderCommandBuffer.end();
536                             }
537                             for (uint32_t iter = 0; iter < innerIterations;
538                                  iter++)
539                             {
540                                 if (useSingleImageKernel)
541                                 {
542                                     for (size_t i2DIdx = 0;
543                                          i2DIdx < vkImage2DList.size();
544                                          i2DIdx++)
545                                     {
546                                         vkDescriptorSet.update(
547                                             1, vkImage2DViewList[i2DIdx]);
548                                         vkCopyCommandBuffer.begin();
549                                         vkCopyCommandBuffer.pipelineBarrier(
550                                             vkImage2DList,
551                                             VULKAN_IMAGE_LAYOUT_UNDEFINED,
552                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
553 
554                                         vkCopyCommandBuffer.copyBufferToImage(
555                                             vkSrcBuffer, vkImage2DList[i2DIdx],
556                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
557                                         vkCopyCommandBuffer.pipelineBarrier(
558                                             vkImage2DList,
559                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
560                                             VULKAN_IMAGE_LAYOUT_GENERAL);
561                                         vkCopyCommandBuffer.end();
562                                         memset(dstBufferPtr, 0, srcBufSize);
563                                         vkQueue.submit(vkCopyCommandBuffer);
564                                         vkShaderCommandBuffer.begin();
565                                         vkShaderCommandBuffer.bindPipeline(
566                                             vkComputePipeline);
567                                         vkShaderCommandBuffer
568                                             .bindDescriptorSets(
569                                                 vkComputePipeline,
570                                                 vkPipelineLayout,
571                                                 vkDescriptorSet);
572                                         vkShaderCommandBuffer.dispatch(
573                                             NUM_BLOCKS(width,
574                                                        NUM_THREADS_PER_GROUP_X),
575                                             NUM_BLOCKS(height,
576                                                        NUM_THREADS_PER_GROUP_Y
577                                                            / 2),
578                                             1);
579                                         vkShaderCommandBuffer.end();
580                                         if (i2DIdx < vkImage2DList.size() - 1)
581                                         {
582                                             vkQueue.submit(
583                                                 vkShaderCommandBuffer);
584                                         }
585                                     }
586                                 }
587                                 vkQueue.submit(vkCl2VkSemaphore,
588                                                vkShaderCommandBuffer,
589                                                vkVk2CLSemaphore);
590                                 clVk2CLExternalSemaphore->wait(cmd_queue1);
591                                 switch (num2DImages)
592                                 {
593                                     case 2:
594                                         updateKernelCQ1 = getKernelType(
595                                             vkFormat, kernel_float[0],
596                                             kernel_signed[0],
597                                             kernel_unsigned[0]);
598                                         break;
599                                     case 3:
600                                         updateKernelCQ1 = getKernelType(
601                                             vkFormat, kernel_float[1],
602                                             kernel_signed[1],
603                                             kernel_unsigned[1]);
604                                         break;
605                                     case 5:
606                                         updateKernelCQ1 = getKernelType(
607                                             vkFormat, kernel_float[2],
608                                             kernel_signed[2],
609                                             kernel_unsigned[2]);
610                                         break;
611                                 }
612                                 updateKernelCQ2 = getKernelType(
613                                     vkFormat, kernel_float[3], kernel_signed[3],
614                                     kernel_unsigned[3]);
615                                 // similar kernel-type based on vkFormat
616                                 int j = 0;
617                                 // Setting arguments of updateKernelCQ2
618 
619                                 err = clSetKernelArg(updateKernelCQ2, 0,
620                                                      sizeof(cl_mem),
621                                                      &external_mem_image1[0]);
622                                 err |= clSetKernelArg(updateKernelCQ2, 1,
623                                                       sizeof(cl_mem),
624                                                       &external_mem_image2[0]);
625                                 err |= clSetKernelArg(
626                                     updateKernelCQ2, 2, sizeof(cl_mem),
627                                     &external_mem_image1[num2DImages - 1]);
628                                 err |= clSetKernelArg(
629                                     updateKernelCQ2, 3, sizeof(cl_mem),
630                                     &external_mem_image2[num2DImages - 1]);
631                                 err |= clSetKernelArg(updateKernelCQ2, 4,
632                                                       sizeof(unsigned int),
633                                                       &num2DImages);
634                                 err |= clSetKernelArg(updateKernelCQ2, 5,
635                                                       sizeof(unsigned int),
636                                                       &width);
637                                 err |= clSetKernelArg(updateKernelCQ2, 6,
638                                                       sizeof(unsigned int),
639                                                       &height);
640                                 err |= clSetKernelArg(updateKernelCQ2, 7,
641                                                       sizeof(unsigned int),
642                                                       &numMipLevels);
643                                 for (int i = 0; i < num2DImages - 1; i++, ++j)
644                                 {
645                                     err = clSetKernelArg(
646                                         updateKernelCQ1, j, sizeof(cl_mem),
647                                         &external_mem_image1[i]);
648                                     err |= clSetKernelArg(
649                                         updateKernelCQ1, ++j, sizeof(cl_mem),
650                                         &external_mem_image2[i]);
651                                 }
652                                 err |= clSetKernelArg(updateKernelCQ1, j,
653                                                       sizeof(unsigned int),
654                                                       &num2DImages);
655                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
656                                                       sizeof(unsigned int),
657                                                       &width);
658                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
659                                                       sizeof(unsigned int),
660                                                       &height);
661                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
662                                                       sizeof(unsigned int),
663                                                       &numMipLevels);
664 
665                                 if (err != CL_SUCCESS)
666                                 {
667                                     print_error(
668                                         err,
669                                         "Error: Failed to set arg values \n");
670                                     goto CLEANUP;
671                                 }
672                                 // clVk2CLExternalSemaphore->wait(cmd_queue1);
673                                 size_t global_work_size[3] = { width, height,
674                                                                1 };
675                                 cl_event first_launch;
676                                 err = clEnqueueNDRangeKernel(
677                                     cmd_queue1, updateKernelCQ1, 2, NULL,
678                                     global_work_size, NULL, 0, NULL,
679                                     &first_launch);
680                                 if (err != CL_SUCCESS)
681                                 {
682                                     goto CLEANUP;
683                                 }
684                                 err = clEnqueueNDRangeKernel(
685                                     cmd_queue2, updateKernelCQ2, 2, NULL,
686                                     global_work_size, NULL, 1, &first_launch,
687                                     NULL);
688                                 if (err != CL_SUCCESS)
689                                 {
690                                     goto CLEANUP;
691                                 }
692 
693                                 clFinish(cmd_queue2);
694                                 clCl2VkExternalSemaphore->signal(cmd_queue2);
695                             }
696 
697                             unsigned int flags = 0;
698                             size_t mipmapLevelOffset = 0;
699                             cl_event eventReadImage = NULL;
700                             clFinish(cmd_queue2);
701                             for (int i = 0; i < num2DImages; i++)
702                             {
703                                 err = clEnqueueReadImage(
704                                     cmd_queue1, external_mem_image2[i], CL_TRUE,
705                                     origin, region, 0, 0, dstBufferPtr, 0, NULL,
706                                     &eventReadImage);
707 
708                                 if (err != CL_SUCCESS)
709                                 {
710                                     print_error(err,
711                                                 "clEnqueueReadImage failed with"
712                                                 "error\n");
713                                 }
714 
715                                 if (memcmp(srcBufferPtr, dstBufferPtr,
716                                            srcBufSize))
717                                 {
718                                     log_info("Source and destination buffers "
719                                              "don't match\n");
720                                     if (debug_trace)
721                                     {
722                                         log_info("Source buffer contents: \n");
723                                         for (uint64_t sIdx = 0;
724                                              sIdx < srcBufSize; sIdx++)
725                                         {
726                                             log_info(
727                                                 "%d ",
728                                                 (int)vkSrcBufferDeviceMemoryPtr
729                                                     [sIdx]);
730                                         }
731                                         log_info("Destination buffer contents:"
732                                                  "\n");
733                                         for (uint64_t dIdx = 0;
734                                              dIdx < srcBufSize; dIdx++)
735                                         {
736                                             log_info("%d ",
737                                                      (int)dstBufferPtr[dIdx]);
738                                         }
739                                     }
740                                     err = -1;
741                                     break;
742                                 }
743                             }
744                             for (int i = 0; i < num2DImages; i++)
745                             {
746                                 delete vkNonDedicatedImage2DListDeviceMemory1
747                                     [i];
748                                 delete vkNonDedicatedImage2DListDeviceMemory2
749                                     [i];
750                                 delete nonDedicatedExternalMemory1[i];
751                                 delete nonDedicatedExternalMemory2[i];
752                             }
753                             vkNonDedicatedImage2DListDeviceMemory1.erase(
754                                 vkNonDedicatedImage2DListDeviceMemory1.begin(),
755                                 vkNonDedicatedImage2DListDeviceMemory1.begin()
756                                     + num2DImages);
757                             vkNonDedicatedImage2DListDeviceMemory2.erase(
758                                 vkNonDedicatedImage2DListDeviceMemory2.begin(),
759                                 vkNonDedicatedImage2DListDeviceMemory2.begin()
760                                     + num2DImages);
761                             nonDedicatedExternalMemory1.erase(
762                                 nonDedicatedExternalMemory1.begin(),
763                                 nonDedicatedExternalMemory1.begin()
764                                     + num2DImages);
765                             nonDedicatedExternalMemory2.erase(
766                                 nonDedicatedExternalMemory2.begin(),
767                                 nonDedicatedExternalMemory2.begin()
768                                     + num2DImages);
769                             if (CL_SUCCESS != err)
770                             {
771                                 goto CLEANUP;
772                             }
773                         }
774                     }
775                 }
776             }
777         }
778 
779         vkImage2DShader.clear();
780     }
781 CLEANUP:
782     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
783     if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
784 
785     if (srcBufferPtr) free(srcBufferPtr);
786     if (dstBufferPtr) free(dstBufferPtr);
787     return err;
788 }
789 
run_test_with_one_queue(cl_context & context,cl_command_queue & cmd_queue1,cl_kernel * kernel_unsigned,cl_kernel * kernel_signed,cl_kernel * kernel_float,VulkanDevice & vkDevice)790 int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
791                             cl_kernel *kernel_unsigned,
792                             cl_kernel *kernel_signed, cl_kernel *kernel_float,
793                             VulkanDevice &vkDevice)
794 {
795     cl_int err = CL_SUCCESS;
796     size_t origin[3] = { 0, 0, 0 };
797     size_t region[3] = { 1, 1, 1 };
798     cl_kernel updateKernelCQ1;
799     std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
800     const std::vector<VulkanExternalMemoryHandleType>
801         vkExternalMemoryHandleTypeList =
802             getSupportedVulkanExternalMemoryHandleTypeList();
803     char magicValue = 0;
804 
805     VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
806     VulkanDeviceMemory vkParamsDeviceMemory(
807         vkDevice, vkParamsBuffer.getSize(),
808         getVulkanMemoryType(vkDevice,
809                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
810     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
811 
812     uint64_t maxImage2DSize =
813         max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
814     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
815     VulkanDeviceMemory vkSrcBufferDeviceMemory(
816         vkDevice, vkSrcBuffer.getSize(),
817         getVulkanMemoryType(vkDevice,
818                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
819     vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
820 
821     char *srcBufferPtr, *dstBufferPtr;
822     srcBufferPtr = (char *)malloc(maxImage2DSize);
823     dstBufferPtr = (char *)malloc(maxImage2DSize);
824 
825     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
826         VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
827         VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
828     VulkanDescriptorSetLayout vkDescriptorSetLayout(
829         vkDevice, vkDescriptorSetLayoutBindingList);
830     VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
831 
832     VulkanDescriptorPool vkDescriptorPool(vkDevice,
833                                           vkDescriptorSetLayoutBindingList);
834     VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
835                                         vkDescriptorSetLayout);
836 
837     VulkanCommandPool vkCommandPool(vkDevice);
838     VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
839     VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
840     VulkanQueue &vkQueue = vkDevice.getQueue();
841 
842     VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
843         getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
844     VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
845     VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
846     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
847     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
848 
849     clVk2CLExternalSemaphore = new clExternalSemaphore(
850         vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
851     clCl2VkExternalSemaphore = new clExternalSemaphore(
852         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
853 
854     std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
855     std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
856     std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
857     std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
858     std::vector<char> vkImage2DShader;
859 
860     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
861     {
862         VulkanFormat vkFormat = vkFormatList[fIdx];
863         log_info("Format: %d\n", vkFormat);
864         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
865         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
866         log_info("elementSize= %d\n", elementSize);
867 
868         std::string fileName = "image2D_"
869             + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
870         log_info("Load %s file", fileName.c_str());
871         vkImage2DShader = readFile(fileName);
872         VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
873 
874         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
875                                                 vkImage2DShaderModule);
876 
877         for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
878         {
879             uint32_t width = widthList[wIdx];
880             log_info("Width: %d\n", width);
881             if (width > max_width) continue;
882             region[0] = width;
883             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
884             {
885                 uint32_t height = heightList[hIdx];
886                 log_info("Height: %d\n", height);
887                 if (height > max_height) continue;
888                 region[1] = height;
889 
890                 uint32_t numMipLevels = 1;
891                 log_info("Number of mipmap levels: %d\n", numMipLevels);
892 
893                 magicValue++;
894                 char *vkSrcBufferDeviceMemoryPtr =
895                     (char *)vkSrcBufferDeviceMemory.map();
896                 uint64_t srcBufSize = 0;
897                 memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
898                 memset(srcBufferPtr, 0, maxImage2DSize);
899                 uint32_t mipLevel = 0;
900                 for (uint32_t row = 0;
901                      row < std::max(height >> mipLevel, uint32_t(1)); row++)
902                 {
903                     for (uint32_t col = 0;
904                          col < std::max(width >> mipLevel, uint32_t(1)); col++)
905                     {
906                         for (uint32_t elementByte = 0;
907                              elementByte < elementSize; elementByte++)
908                         {
909                             vkSrcBufferDeviceMemoryPtr[srcBufSize] =
910                                 (char)(magicValue + mipLevel + row + col);
911                             srcBufferPtr[srcBufSize] =
912                                 (char)(magicValue + mipLevel + row + col);
913                             srcBufSize++;
914                         }
915                     }
916                 }
917                 srcBufSize = ROUND_UP(
918                     srcBufSize,
919                     std::max(
920                         elementSize,
921                         (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
922                 vkSrcBufferDeviceMemory.unmap();
923 
924                 for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
925                      niIdx++)
926                 {
927                     uint32_t num2DImages = num2DImagesList[niIdx];
928                     log_info("Number of images: %d\n", num2DImages);
929                     ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
930 
931                     Params *params = (Params *)vkParamsDeviceMemory.map();
932                     uint32_t num_2D_image;
933                     if (useSingleImageKernel)
934                     {
935                         num_2D_image = 1;
936                     }
937                     else
938                     {
939                         num_2D_image = num2DImages;
940                     }
941                     params->numImage2DDescriptors = num_2D_image * numMipLevels;
942                     vkParamsDeviceMemory.unmap();
943                     vkDescriptorSet.update(0, vkParamsBuffer);
944                     for (size_t emhtIdx = 0;
945                          emhtIdx < vkExternalMemoryHandleTypeList.size();
946                          emhtIdx++)
947                     {
948                         VulkanExternalMemoryHandleType
949                             vkExternalMemoryHandleType =
950                                 vkExternalMemoryHandleTypeList[emhtIdx];
951                         log_info("External memory handle type: %d \n",
952                                  vkExternalMemoryHandleType);
953                         if ((true == disableNTHandleType)
954                             && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
955                                 == vkExternalMemoryHandleType))
956                         {
957                             // Skip running for WIN32 NT handle.
958                             continue;
959                         }
960                         VulkanImage2D vkDummyImage2D(
961                             vkDevice, vkFormatList[0], widthList[0],
962                             heightList[0], 1, vkExternalMemoryHandleType);
963                         const VulkanMemoryTypeList &memoryTypeList =
964                             vkDummyImage2D.getMemoryTypeList();
965 
966                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
967                              mtIdx++)
968                         {
969                             const VulkanMemoryType &memoryType =
970                                 memoryTypeList[mtIdx];
971                             log_info("Memory type index: %d\n",
972                                      (uint32_t)memoryType);
973                             log_info("Memory type property: %d\n",
974                                      memoryType.getMemoryTypeProperty());
975                             if (!useDeviceLocal)
976                             {
977                                 if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
978                                     == memoryType.getMemoryTypeProperty())
979                                 {
980                                     continue;
981                                 }
982                             }
983                             size_t totalImageMemSize = 0;
984                             uint64_t interImageOffset = 0;
985                             {
986                                 VulkanImage2D vkImage2D(
987                                     vkDevice, vkFormat, width, height,
988                                     numMipLevels, vkExternalMemoryHandleType);
989                                 ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
990                                 totalImageMemSize =
991                                     ROUND_UP(vkImage2D.getSize(),
992                                              vkImage2D.getAlignment());
993                             }
994                             VulkanImage2DList vkNonDedicatedImage2DList(
995                                 num2DImages, vkDevice, vkFormat, width, height,
996                                 numMipLevels, vkExternalMemoryHandleType);
997                             for (size_t bIdx = 0;
998                                  bIdx < vkNonDedicatedImage2DList.size();
999                                  bIdx++)
1000                             {
1001                                 // Create list of Vulkan device memories and
1002                                 // bind the list of Vulkan images.
1003                                 vkNonDedicatedImage2DListDeviceMemory1
1004                                     .push_back(new VulkanDeviceMemory(
1005                                         vkDevice, totalImageMemSize, memoryType,
1006                                         vkExternalMemoryHandleType));
1007                                 vkNonDedicatedImage2DListDeviceMemory1[bIdx]
1008                                     ->bindImage(vkNonDedicatedImage2DList[bIdx],
1009                                                 0);
1010                                 nonDedicatedExternalMemory1.push_back(
1011                                     new clExternalMemoryImage(
1012                                         *vkNonDedicatedImage2DListDeviceMemory1
1013                                             [bIdx],
1014                                         vkExternalMemoryHandleType, context,
1015                                         totalImageMemSize, width, height, 0,
1016                                         vkNonDedicatedImage2DList[bIdx],
1017                                         deviceId));
1018                             }
1019                             VulkanImageViewList vkNonDedicatedImage2DViewList(
1020                                 vkDevice, vkNonDedicatedImage2DList);
1021 
1022                             VulkanImage2DList vkNonDedicatedImage2DList2(
1023                                 num2DImages, vkDevice, vkFormat, width, height,
1024                                 numMipLevels, vkExternalMemoryHandleType);
1025                             for (size_t bIdx = 0;
1026                                  bIdx < vkNonDedicatedImage2DList2.size();
1027                                  bIdx++)
1028                             {
1029                                 vkNonDedicatedImage2DListDeviceMemory2
1030                                     .push_back(new VulkanDeviceMemory(
1031                                         vkDevice, totalImageMemSize, memoryType,
1032                                         vkExternalMemoryHandleType));
1033                                 vkNonDedicatedImage2DListDeviceMemory2[bIdx]
1034                                     ->bindImage(
1035                                         vkNonDedicatedImage2DList2[bIdx], 0);
1036                                 nonDedicatedExternalMemory2.push_back(
1037                                     new clExternalMemoryImage(
1038                                         *vkNonDedicatedImage2DListDeviceMemory2
1039                                             [bIdx],
1040                                         vkExternalMemoryHandleType, context,
1041                                         totalImageMemSize, width, height, 0,
1042                                         vkNonDedicatedImage2DList2[bIdx],
1043                                         deviceId));
1044                             }
1045                             VulkanImageViewList vkDedicatedImage2DViewList(
1046                                 vkDevice, vkNonDedicatedImage2DList2);
1047                             cl_mem external_mem_image1[4];
1048                             cl_mem external_mem_image2[4];
1049                             for (int i = 0; i < num2DImages; i++)
1050                             {
1051                                 external_mem_image1[i] =
1052                                     nonDedicatedExternalMemory1[i]
1053                                         ->getExternalMemoryImage();
1054                                 external_mem_image2[i] =
1055                                     nonDedicatedExternalMemory2[i]
1056                                         ->getExternalMemoryImage();
1057                             }
1058                             VulkanImage2DList &vkImage2DList =
1059                                 vkNonDedicatedImage2DList;
1060                             VulkanImageViewList &vkImage2DViewList =
1061                                 vkNonDedicatedImage2DViewList;
1062 
1063                             clCl2VkExternalSemaphore->signal(cmd_queue1);
1064                             if (!useSingleImageKernel)
1065                             {
1066                                 for (size_t i2DIdx = 0;
1067                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
1068                                 {
1069                                     for (uint32_t mipLevel = 0;
1070                                          mipLevel < numMipLevels; mipLevel++)
1071                                     {
1072                                         uint32_t i2DvIdx =
1073                                             (uint32_t)(i2DIdx * numMipLevels)
1074                                             + mipLevel;
1075                                         vkDescriptorSet.update(
1076                                             1 + i2DvIdx,
1077                                             vkImage2DViewList[i2DvIdx]);
1078                                     }
1079                                 }
1080                                 vkCopyCommandBuffer.begin();
1081                                 vkCopyCommandBuffer.pipelineBarrier(
1082                                     vkImage2DList,
1083                                     VULKAN_IMAGE_LAYOUT_UNDEFINED,
1084                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1085                                 for (size_t i2DIdx = 0;
1086                                      i2DIdx < vkImage2DList.size(); i2DIdx++)
1087                                 {
1088                                     vkCopyCommandBuffer.copyBufferToImage(
1089                                         vkSrcBuffer, vkImage2DList[i2DIdx],
1090                                         VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1091                                 }
1092                                 vkCopyCommandBuffer.pipelineBarrier(
1093                                     vkImage2DList,
1094                                     VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1095                                     VULKAN_IMAGE_LAYOUT_GENERAL);
1096                                 vkCopyCommandBuffer.end();
1097                                 memset(dstBufferPtr, 0, srcBufSize);
1098                                 vkQueue.submit(vkCopyCommandBuffer);
1099                                 vkShaderCommandBuffer.begin();
1100                                 vkShaderCommandBuffer.bindPipeline(
1101                                     vkComputePipeline);
1102                                 vkShaderCommandBuffer.bindDescriptorSets(
1103                                     vkComputePipeline, vkPipelineLayout,
1104                                     vkDescriptorSet);
1105                                 vkShaderCommandBuffer.dispatch(
1106                                     NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
1107                                     NUM_BLOCKS(height,
1108                                                NUM_THREADS_PER_GROUP_Y / 2),
1109                                     1);
1110                                 vkShaderCommandBuffer.end();
1111                             }
1112                             for (uint32_t iter = 0; iter < innerIterations;
1113                                  iter++)
1114                             {
1115                                 if (useSingleImageKernel)
1116                                 {
1117                                     for (size_t i2DIdx = 0;
1118                                          i2DIdx < vkImage2DList.size();
1119                                          i2DIdx++)
1120                                     {
1121                                         vkDescriptorSet.update(
1122                                             1, vkImage2DViewList[i2DIdx]);
1123                                         vkCopyCommandBuffer.begin();
1124                                         vkCopyCommandBuffer.pipelineBarrier(
1125                                             vkImage2DList,
1126                                             VULKAN_IMAGE_LAYOUT_UNDEFINED,
1127                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1128 
1129                                         vkCopyCommandBuffer.copyBufferToImage(
1130                                             vkSrcBuffer, vkImage2DList[i2DIdx],
1131                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1132                                         vkCopyCommandBuffer.pipelineBarrier(
1133                                             vkImage2DList,
1134                                             VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1135                                             VULKAN_IMAGE_LAYOUT_GENERAL);
1136                                         vkCopyCommandBuffer.end();
1137                                         memset(dstBufferPtr, 0, srcBufSize);
1138                                         vkQueue.submit(vkCopyCommandBuffer);
1139                                         vkShaderCommandBuffer.begin();
1140                                         vkShaderCommandBuffer.bindPipeline(
1141                                             vkComputePipeline);
1142                                         vkShaderCommandBuffer
1143                                             .bindDescriptorSets(
1144                                                 vkComputePipeline,
1145                                                 vkPipelineLayout,
1146                                                 vkDescriptorSet);
1147                                         vkShaderCommandBuffer.dispatch(
1148                                             NUM_BLOCKS(width,
1149                                                        NUM_THREADS_PER_GROUP_X),
1150                                             NUM_BLOCKS(height,
1151                                                        NUM_THREADS_PER_GROUP_Y
1152                                                            / 2),
1153                                             1);
1154                                         vkShaderCommandBuffer.end();
1155                                         if (i2DIdx < vkImage2DList.size() - 1)
1156                                         {
1157                                             vkQueue.submit(
1158                                                 vkShaderCommandBuffer);
1159                                         }
1160                                     }
1161                                 }
1162                                 vkQueue.submit(vkCl2VkSemaphore,
1163                                                vkShaderCommandBuffer,
1164                                                vkVk2CLSemaphore);
1165                                 clVk2CLExternalSemaphore->wait(cmd_queue1);
1166                                 switch (num2DImages)
1167                                 {
1168                                     case 1:
1169                                         updateKernelCQ1 = getKernelType(
1170                                             vkFormat, kernel_float[0],
1171                                             kernel_signed[0],
1172                                             kernel_unsigned[0]);
1173                                         break;
1174                                     case 2:
1175                                         updateKernelCQ1 = getKernelType(
1176                                             vkFormat, kernel_float[1],
1177                                             kernel_signed[1],
1178                                             kernel_unsigned[1]);
1179                                         break;
1180                                     case 4:
1181                                         updateKernelCQ1 = getKernelType(
1182                                             vkFormat, kernel_float[2],
1183                                             kernel_signed[2],
1184                                             kernel_unsigned[2]);
1185                                         break;
1186                                 }
1187                                 int j = 0;
1188                                 for (int i = 0; i < num2DImages; i++, ++j)
1189                                 {
1190                                     err = clSetKernelArg(
1191                                         updateKernelCQ1, j, sizeof(cl_mem),
1192                                         &external_mem_image1[i]);
1193                                     err |= clSetKernelArg(
1194                                         updateKernelCQ1, ++j, sizeof(cl_mem),
1195                                         &external_mem_image2[i]);
1196                                 }
1197                                 err |= clSetKernelArg(updateKernelCQ1, j,
1198                                                       sizeof(unsigned int),
1199                                                       &num2DImages);
1200                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1201                                                       sizeof(unsigned int),
1202                                                       &width);
1203                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1204                                                       sizeof(unsigned int),
1205                                                       &height);
1206                                 err |= clSetKernelArg(updateKernelCQ1, ++j,
1207                                                       sizeof(unsigned int),
1208                                                       &numMipLevels);
1209 
1210                                 if (err != CL_SUCCESS)
1211                                 {
1212                                     print_error(err,
1213                                                 "Error: Failed to set arg "
1214                                                 "values for kernel-1\n");
1215                                     goto CLEANUP;
1216                                 }
1217 
1218                                 size_t global_work_size[3] = { width, height,
1219                                                                1 };
1220                                 err = clEnqueueNDRangeKernel(
1221                                     cmd_queue1, updateKernelCQ1, 2, NULL,
1222                                     global_work_size, NULL, 0, NULL, NULL);
1223                                 if (err != CL_SUCCESS)
1224                                 {
1225                                     goto CLEANUP;
1226                                 }
1227                                 clCl2VkExternalSemaphore->signal(cmd_queue1);
1228                             }
1229 
1230                             unsigned int flags = 0;
1231                             size_t mipmapLevelOffset = 0;
1232                             cl_event eventReadImage = NULL;
1233                             for (int i = 0; i < num2DImages; i++)
1234                             {
1235                                 err = clEnqueueReadImage(
1236                                     cmd_queue1, external_mem_image2[i], CL_TRUE,
1237                                     origin, region, 0, 0, dstBufferPtr, 0, NULL,
1238                                     &eventReadImage);
1239 
1240                                 if (err != CL_SUCCESS)
1241                                 {
1242                                     print_error(err,
1243                                                 "clEnqueueReadImage failed with"
1244                                                 "error\n");
1245                                 }
1246 
1247                                 if (memcmp(srcBufferPtr, dstBufferPtr,
1248                                            srcBufSize))
1249                                 {
1250                                     log_info("Source and destination buffers "
1251                                              "don't match\n");
1252                                     if (debug_trace)
1253                                     {
1254                                         log_info("Source buffer contents: \n");
1255                                         for (uint64_t sIdx = 0;
1256                                              sIdx < srcBufSize; sIdx++)
1257                                         {
1258                                             log_info(
1259                                                 "%d",
1260                                                 (int)vkSrcBufferDeviceMemoryPtr
1261                                                     [sIdx]);
1262                                         }
1263                                         log_info(
1264                                             "Destination buffer contents:");
1265                                         for (uint64_t dIdx = 0;
1266                                              dIdx < srcBufSize; dIdx++)
1267                                         {
1268                                             log_info("%d",
1269                                                      (int)dstBufferPtr[dIdx]);
1270                                         }
1271                                     }
1272                                     err = -1;
1273                                     break;
1274                                 }
1275                             }
1276                             for (int i = 0; i < num2DImages; i++)
1277                             {
1278                                 delete vkNonDedicatedImage2DListDeviceMemory1
1279                                     [i];
1280                                 delete vkNonDedicatedImage2DListDeviceMemory2
1281                                     [i];
1282                                 delete nonDedicatedExternalMemory1[i];
1283                                 delete nonDedicatedExternalMemory2[i];
1284                             }
1285                             vkNonDedicatedImage2DListDeviceMemory1.erase(
1286                                 vkNonDedicatedImage2DListDeviceMemory1.begin(),
1287                                 vkNonDedicatedImage2DListDeviceMemory1.begin()
1288                                     + num2DImages);
1289                             vkNonDedicatedImage2DListDeviceMemory2.erase(
1290                                 vkNonDedicatedImage2DListDeviceMemory2.begin(),
1291                                 vkNonDedicatedImage2DListDeviceMemory2.begin()
1292                                     + num2DImages);
1293                             nonDedicatedExternalMemory1.erase(
1294                                 nonDedicatedExternalMemory1.begin(),
1295                                 nonDedicatedExternalMemory1.begin()
1296                                     + num2DImages);
1297                             nonDedicatedExternalMemory2.erase(
1298                                 nonDedicatedExternalMemory2.begin(),
1299                                 nonDedicatedExternalMemory2.begin()
1300                                     + num2DImages);
1301                             if (CL_SUCCESS != err)
1302                             {
1303                                 goto CLEANUP;
1304                             }
1305                         }
1306                     }
1307                 }
1308             }
1309         }
1310         vkImage2DShader.clear();
1311     }
1312 CLEANUP:
1313     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
1314     if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
1315 
1316     if (srcBufferPtr) free(srcBufferPtr);
1317     if (dstBufferPtr) free(dstBufferPtr);
1318     return err;
1319 }
1320 
test_image_common(cl_device_id device_,cl_context context_,cl_command_queue queue_,int numElements_)1321 int test_image_common(cl_device_id device_, cl_context context_,
1322                       cl_command_queue queue_, int numElements_)
1323 {
1324     int current_device = 0;
1325     int device_count = 0;
1326     int devices_prohibited = 0;
1327     cl_int err = CL_SUCCESS;
1328     cl_platform_id platform = NULL;
1329     size_t extensionSize = 0;
1330     cl_uint num_devices = 0;
1331     cl_uint device_no = 0;
1332     cl_device_id *devices;
1333     char *extensions = NULL;
1334     const char *program_source_const;
1335     cl_command_queue cmd_queue1 = NULL;
1336     cl_command_queue cmd_queue2 = NULL;
1337     cl_context context = NULL;
1338     const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
1339     // One kernel for Cross-CQ case
1340     const uint32_t num_kernel_types = 3;
1341     const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
1342                                                kernel_text_numImage_2,
1343                                                kernel_text_numImage_4 };
1344     char source_1[4096];
1345     char source_2[4096];
1346     char source_3[4096];
1347     size_t program_source_length;
1348     cl_program program[num_kernel_types];
1349     cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
1350     cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
1351     cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
1352     cl_mem external_mem_image1;
1353     cl_mem external_mem_image2;
1354 
1355     VulkanDevice vkDevice;
1356 
1357     cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
1358     // get the platform ID
1359     err = clGetPlatformIDs(1, &platform, NULL);
1360     if (err != CL_SUCCESS)
1361     {
1362         print_error(err, "Error: Failed to get platform\n");
1363         goto CLEANUP;
1364     }
1365 
1366     err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
1367     if (CL_SUCCESS != err)
1368     {
1369         print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
1370         goto CLEANUP;
1371     }
1372     devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
1373     if (NULL == devices)
1374     {
1375         err = CL_OUT_OF_HOST_MEMORY;
1376         print_error(err, "Unable to allocate memory for devices\n");
1377         goto CLEANUP;
1378     }
1379     err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
1380                          NULL);
1381     if (CL_SUCCESS != err)
1382     {
1383         print_error(err, "Failed to get deviceID.\n");
1384         goto CLEANUP;
1385     }
1386     contextProperties[1] = (cl_context_properties)platform;
1387     log_info("Assigned contextproperties for platform\n");
1388     for (device_no = 0; device_no < num_devices; device_no++)
1389     {
1390         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
1391                               &extensionSize);
1392         if (CL_SUCCESS != err)
1393         {
1394             print_error(
1395                 err,
1396                 "Error in clGetDeviceInfo for getting device_extension size\n");
1397             goto CLEANUP;
1398         }
1399         extensions = (char *)malloc(extensionSize);
1400         if (NULL == extensions)
1401         {
1402             err = CL_OUT_OF_HOST_MEMORY;
1403             print_error(err, "Unable to allocate memory for extensions\n");
1404             goto CLEANUP;
1405         }
1406         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
1407                               extensionSize, extensions, NULL);
1408         if (CL_SUCCESS != err)
1409         {
1410             print_error(
1411                 err, "Error in clGetDeviceInfo for getting device_extension\n");
1412             goto CLEANUP;
1413         }
1414         err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
1415                               CL_UUID_SIZE_KHR, uuid, &extensionSize);
1416         if (CL_SUCCESS != err)
1417         {
1418             print_error(err, "clGetDeviceInfo failed with error");
1419             goto CLEANUP;
1420         }
1421         err =
1422             memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
1423         if (err == 0)
1424         {
1425             break;
1426         }
1427     }
1428     if (device_no >= num_devices)
1429     {
1430         err = EXIT_FAILURE;
1431         print_error(err,
1432                     "OpenCL error:"
1433                     "No Vulkan-OpenCL Interop capable GPU found.\n");
1434         goto CLEANUP;
1435     }
1436     deviceId = devices[device_no];
1437     err = setMaxImageDimensions(deviceId, max_width, max_height);
1438     if (CL_SUCCESS != err)
1439     {
1440         print_error(err, "error setting max image dimensions");
1441         goto CLEANUP;
1442     }
1443     log_info("Set max_width to %lu and max_height to %lu\n", max_width,
1444              max_height);
1445     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
1446                                       NULL, NULL, &err);
1447     if (CL_SUCCESS != err)
1448     {
1449         print_error(err, "error creating context");
1450         goto CLEANUP;
1451     }
1452     log_info("Successfully created context !!!\n");
1453 
1454     cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1455     if (CL_SUCCESS != err)
1456     {
1457         err = CL_INVALID_COMMAND_QUEUE;
1458         print_error(err, "Error: Failed to create command queue!\n");
1459         goto CLEANUP;
1460     }
1461     log_info("clCreateCommandQueue successfull \n");
1462 
1463     cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
1464     if (CL_SUCCESS != err)
1465     {
1466         err = CL_INVALID_COMMAND_QUEUE;
1467         print_error(err, "Error: Failed to create command queue!\n");
1468         goto CLEANUP;
1469     }
1470     log_info("clCreateCommandQueue2 successful \n");
1471 
1472     for (int i = 0; i < num_kernels; i++)
1473     {
1474         switch (i)
1475         {
1476             case 0:
1477                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1478                         "f", "f", "f");
1479                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1480                         "i", "i");
1481                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1482                         "ui", "ui", "ui");
1483                 break;
1484             case 1:
1485                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1486                         "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1487                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1488                         "int4", "i", "int4", "i", "i", "i", "i", "i");
1489                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1490                         "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1491                         "ui");
1492                 break;
1493             case 2:
1494                 sprintf(source_1, kernel_source[i], "float4", "f", "float4",
1495                         "f", "float4", "f", "float4", "f", "float4", "f",
1496                         "float4", "f", "float4", "f", "float4", "f", "f", "f",
1497                         "f", "f", "f", "f", "f", "f");
1498                 sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
1499                         "int4", "i", "int4", "i", "int4", "i", "int4", "i",
1500                         "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
1501                         "i", "i");
1502                 sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
1503                         "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
1504                         "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
1505                         "ui", "ui", "ui", "ui", "ui", "ui");
1506                 break;
1507             case 3:
1508                 // Addtional case for creating updateKernelCQ2 which takes two
1509                 // images
1510                 sprintf(source_1, kernel_source[1], "float4", "f", "float4",
1511                         "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
1512                 sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
1513                         "int4", "i", "int4", "i", "i", "i", "i", "i");
1514                 sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
1515                         "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
1516                         "ui");
1517                 break;
1518         }
1519         const char *sourceTexts[num_kernel_types] = { source_1, source_2,
1520                                                       source_3 };
1521         for (int k = 0; k < num_kernel_types; k++)
1522         {
1523             program_source_length = strlen(sourceTexts[k]);
1524             program[k] = clCreateProgramWithSource(
1525                 context, 1, &sourceTexts[k], &program_source_length, &err);
1526             err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
1527         }
1528 
1529         if (err != CL_SUCCESS)
1530         {
1531             print_error(err, "Error: Failed to build program");
1532             goto CLEANUP;
1533         }
1534         // create the kernel
1535         kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
1536         if (err != CL_SUCCESS)
1537         {
1538             print_error(err, "clCreateKernel failed");
1539             goto CLEANUP;
1540         }
1541         kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
1542         if (err != CL_SUCCESS)
1543         {
1544             print_error(err, "clCreateKernel failed");
1545             goto CLEANUP;
1546         }
1547         kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
1548         if (err != CL_SUCCESS)
1549         {
1550             print_error(err, "clCreateKernel failed ");
1551             goto CLEANUP;
1552         }
1553     }
1554     if (numCQ == 2)
1555     {
1556         err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
1557                                       kernel_unsigned, kernel_signed,
1558                                       kernel_float, vkDevice);
1559     }
1560     else
1561     {
1562         err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
1563                                       kernel_signed, kernel_float, vkDevice);
1564     }
1565 CLEANUP:
1566     for (int i = 0; i < num_kernels; i++)
1567     {
1568         if (kernel_float[i])
1569         {
1570             clReleaseKernel(kernel_float[i]);
1571         }
1572         if (kernel_unsigned[i])
1573         {
1574             clReleaseKernel(kernel_unsigned[i]);
1575         }
1576         if (kernel_signed[i])
1577         {
1578             clReleaseKernel(kernel_signed[i]);
1579         }
1580     }
1581     for (int i = 0; i < num_kernel_types; i++)
1582     {
1583         if (program[i])
1584         {
1585             clReleaseProgram(program[i]);
1586         }
1587     }
1588     if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
1589     if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
1590     if (context) clReleaseContext(context);
1591 
1592     if (extensions) free(extensions);
1593     if (devices) free(devices);
1594 
1595     return err;
1596 }
1597