1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "common.h"
17 
18 const char *shared_sub_buffers_test_kernel[] = {
19   "typedef struct Node {\n"
20   "    int global_id;\n"
21   "    int position_in_list;\n"
22   "    __global struct Node* pNext;\n"
23   "} Node;\n"
24 
25   // create linked lists that use nodes from 2 different buffers
26   "__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
27   "{\n"
28   // mix things up, adjacent work items will allocate from different buffers
29   "    if(i & 0x1)\n"
30   "        return &pNodes1[atomic_inc(allocation_index)];\n"
31   "    else\n"
32   "        return &pNodes2[atomic_inc(allocation_index)];\n"
33   "}\n"
34 
35   // The allocation_index parameter must be initialized on the host to N work-items
36   // The first N nodes in pNodes will be the heads of the lists.
37   // This tests passing 4 different sub-buffers that come from two parent buffers.
38   // Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
39   "__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
40   "{\n"
41   "    size_t i = get_global_id(0);\n"
42   "    __global Node *pNode = &pNodes_sub1[i];\n"
43   "    pNode->global_id = i;\n"
44   "    pNode->position_in_list = 0;\n"
45   "    __global Node *pNew;\n"
46   "    for(int j=1; j < list_length; j++) {\n"
47   "        pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
48   "        pNew->global_id = i;\n"
49   "        pNew->position_in_list = j;\n"
50   "        pNode->pNext = pNew;  // link new node onto end of list\n"
51   "        pNode = pNew;   // move to end of list\n"
52   "    }\n"
53   "}\n"
54   // Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
55   "__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
56   "{\n"
57   "    size_t i = get_global_id(0);\n"
58   "    __global Node *pNode = &pNodes_sub1[i];\n"
59   "    for(int j=0; j < list_length; j++) {\n"
60   "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
61   "            atomic_inc(num_correct);\n"
62   "        else \n"
63   "            break;\n"
64   "        pNode = pNode->pNext;\n"
65   "    }\n"
66   "}\n"
67 };
68 
69 
70 // Creates linked list using host code.
create_linked_lists_on_host_sb(cl_command_queue cmdq,cl_mem nodes,cl_mem nodes2,cl_int ListLength,size_t numLists)71 cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
72 {
73   cl_int error = CL_SUCCESS;
74 
75   log_info("SVM: creating linked list on host ");
76 
77   Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
78   test_error2(error, pNodes, "clEnqueueMapBuffer failed");
79 
80   Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
81   test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
82 
83   create_linked_lists(pNodes, numLists, ListLength);
84 
85   error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
86   test_error(error, "clEnqueueUnmapMemObject failed");
87   error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
88   test_error(error, "clEnqueueUnmapMemObject failed");
89   error = clFinish(cmdq);
90   test_error(error, "clFinish failed");
91   return error;
92 }
93 
94 // Verify correctness of the linked list using host code.
verify_linked_lists_on_host_sb(int ci,cl_command_queue cmdq,cl_mem nodes,cl_mem nodes2,cl_int ListLength,size_t numLists)95 cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
96 {
97   cl_int error = CL_SUCCESS;
98 
99   //log_info(" and verifying on host ");
100 
101   Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
102   test_error2(error, pNodes, "clEnqueueMapBuffer failed");
103   Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
104   test_error2(error, pNodes, "clEnqueueMapBuffer failed");
105 
106   error = verify_linked_lists(pNodes, numLists, ListLength);
107   if(error) return -1;
108 
109   error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
110   test_error(error, "clEnqueueUnmapMemObject failed");
111   error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
112   test_error(error, "clEnqueueUnmapMemObject failed");
113   error = clFinish(cmdq);
114   test_error(error, "clFinish failed");
115   return error;
116 }
117 
118 
119 // This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
120 // This tests that passing only the sub-buffers to a kernel works.
121 // The test is derived from the cross-buffer pointers test which
122 // tests that shared buffers are able to contain pointers that point to other shared buffers.
123 // This tests that all devices and the host share a common address space; using only the coarse-grain features.
124 // This is done by creating a linked list on a device and then verifying the correctness of the list
125 // on another device or the host.
126 // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
127 // This basic test is performed for all combinations of devices and the host.
test_svm_shared_sub_buffers(cl_device_id deviceID,cl_context context2,cl_command_queue queue,int num_elements)128 int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
129 {
130   clContextWrapper    context = NULL;
131   clProgramWrapper    program = NULL;
132   cl_uint     num_devices = 0;
133   cl_int      error = CL_SUCCESS;
134   clCommandQueueWrapper queues[MAXQ];
135 
136   error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
137   if(error) return -1;
138 
139   size_t numLists =  num_elements;
140   if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
141 
142   cl_int ListLength = 32;
143 
144   clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
145   test_error(error, "clCreateKernel failed");
146 
147   clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
148   test_error(error, "clCreateKernel failed");
149 
150   size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
151   Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
152   Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
153 
154   {
155     // this buffer holds some of the linked list nodes.
156     clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
157     test_error(error, "clCreateBuffer failed.");
158 
159     cl_buffer_region r;
160     r.origin = 0;
161     r.size = nodes_bufsize / 2;
162     // this should inherit the flag settings from nodes buffer
163     clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
164     test_error(error, "clCreateSubBuffer");
165     r.origin = nodes_bufsize / 2;
166     clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
167     test_error(error, "clCreateSubBuffer");
168 
169 
170     // this buffer holds some of the linked list nodes.
171     clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
172     test_error(error, "clCreateBuffer failed.");
173     r.origin = 0;
174     r.size = nodes_bufsize / 2;
175     // this should inherit the flag settings from nodes buffer
176     clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
177     test_error(error, "clCreateSubBuffer");
178     r.origin = nodes_bufsize / 2;
179     clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
180     test_error(error, "clCreateSubBuffer");
181 
182 
183 
184     // this buffer holds the index into the nodes buffer that is used for node allocation
185     clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
186                                             sizeof(size_t), NULL, &error);
187     test_error(error, "clCreateBuffer failed.");
188 
189     // this buffer holds the count of correct nodes which is computed by the verify kernel.
190     clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
191     test_error(error, "clCreateBuffer failed.");
192 
193     error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
194     error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
195     error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
196     error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
197     error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
198     error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
199 
200     error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
201     error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
202     error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
203     error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
204     error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
205     error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
206     test_error(error, "clSetKernelArg failed");
207 
208     // Create linked list on one device and verify on another device (or the host).
209     // Do this for all possible combinations of devices and host within the platform.
210     for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
211     {
212       for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
213       {
214         if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
215         {
216           error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
217           if(error) return -1;
218         }
219         else
220         {
221           error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
222           if(error) return -1;
223         }
224 
225         if(vi == num_devices)
226         {
227           error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
228           if(error) return -1;
229         }
230         else
231         {
232           error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
233           if(error) return -1;
234         }
235       } // inner loop, vi
236     } // outer loop, ci
237   }
238   clSVMFree(context, pNodes2);
239   clSVMFree(context, pNodes);
240 
241   return 0;
242 }
243