1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "common.h"
17
18 // create linked lists that use nodes from two different buffers.
19 const char *SVMCrossBufferPointers_test_kernel[] = {
20 "\n"
21 "typedef struct Node {\n"
22 " int global_id;\n"
23 " int position_in_list;\n"
24 " __global struct Node* pNext;\n"
25 "} Node;\n"
26 "\n"
27 "__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
28 "{\n"
29 // mix things up, adjacent work items will allocate from different buffers
30 " if(i & 0x1)\n"
31 " return &pNodes1[atomic_inc(allocation_index)];\n"
32 " else\n"
33 " return &pNodes2[atomic_inc(allocation_index)];\n"
34 "}\n"
35 "\n"
36 // The allocation_index parameter must be initialized on the host to N work-items
37 // The first N nodes in pNodes will be the heads of the lists.
38 "__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
39 "{\n"
40 " size_t i = get_global_id(0);\n"
41 " __global Node *pNode = &pNodes[i];\n"
42 "\n"
43 " pNode->global_id = i;\n"
44 " pNode->position_in_list = 0;\n"
45 "\n"
46 " __global Node *pNew;\n"
47 " for(int j=1; j < list_length; j++)\n"
48 " {\n"
49 " pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
50 " pNew->global_id = i;\n"
51 " pNew->position_in_list = j;\n"
52 " pNode->pNext = pNew; // link new node onto end of list\n"
53 " pNode = pNew; // move to end of list\n"
54 " }\n"
55 "}\n"
56 "\n"
57 "__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
58 "{\n"
59 " size_t i = get_global_id(0);\n"
60 " __global Node *pNode = &pNodes[i];\n"
61 "\n"
62 " for(int j=0; j < list_length; j++)\n"
63 " {\n"
64 " if( pNode->global_id == i && pNode->position_in_list == j)\n"
65 " {\n"
66 " atomic_inc(num_correct);\n"
67 " }\n"
68 " else {\n"
69 " break;\n"
70 " }\n"
71 " pNode = pNode->pNext;\n"
72 " }\n"
73 "}\n"
74 };
75
76
77 // Creates linked list using host code.
create_linked_lists_on_host(cl_command_queue cmdq,cl_mem nodes,cl_mem nodes2,cl_int ListLength,size_t numLists)78 cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
79 {
80 cl_int error = CL_SUCCESS;
81
82 log_info("SVM: creating linked list on host ");
83
84 Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
85 test_error2(error, pNodes, "clEnqueueMapBuffer failed");
86
87 Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
88 test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
89
90 create_linked_lists(pNodes, numLists, ListLength);
91
92 error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
93 test_error(error, "clEnqueueUnmapMemObject failed");
94 error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
95 test_error(error, "clEnqueueUnmapMemObject failed");
96 error = clFinish(cmdq);
97 test_error(error, "clFinish failed");
98 return error;
99 }
100
101 // Verify correctness of the linked list using host code.
verify_linked_lists_on_host(int ci,cl_command_queue cmdq,cl_mem nodes,cl_mem nodes2,cl_int ListLength,size_t numLists)102 cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
103 {
104 cl_int error = CL_SUCCESS;
105
106 //log_info(" and verifying on host ");
107
108 Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
109 test_error2(error, pNodes, "clEnqueueMapBuffer failed");
110 Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
111 test_error2(error, pNodes, "clEnqueueMapBuffer failed");
112
113 error = verify_linked_lists(pNodes, numLists, ListLength);
114 if(error) return -1;
115
116 error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
117 test_error(error, "clEnqueueUnmapMemObject failed");
118 error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
119 test_error(error, "clEnqueueUnmapMemObject failed");
120 error = clFinish(cmdq);
121 test_error(error, "clFinish failed");
122 return error;
123 }
124
125 // This tests that shared buffers are able to contain pointers that point to other shared buffers.
126 // This tests that all devices and the host share a common address space; using only the coarse-grain features.
127 // This is done by creating a linked list on a device and then verifying the correctness of the list
128 // on another device or the host.
129 // The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
130 // This basic test is performed for all combinations of devices and the host.
test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID,cl_context context2,cl_command_queue queue,int num_elements)131 int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
132 {
133 clContextWrapper context = NULL;
134 clProgramWrapper program = NULL;
135 cl_uint num_devices = 0;
136 cl_int error = CL_SUCCESS;
137 clCommandQueueWrapper queues[MAXQ];
138
139 error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
140 if(error) return -1;
141
142 size_t numLists = num_elements;
143 cl_int ListLength = 32;
144
145 clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
146 test_error(error, "clCreateKernel failed");
147
148 clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
149 test_error(error, "clCreateKernel failed");
150
151 // this buffer holds some of the linked list nodes.
152 Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
153
154 // this buffer holds some of the linked list nodes.
155 Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
156
157 {
158 clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
159 test_error(error, "clCreateBuffer failed.");
160
161 clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
162 test_error(error, "clCreateBuffer failed.");
163
164 // this buffer holds the index into the nodes buffer that is used for node allocation
165 clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
166 test_error(error, "clCreateBuffer failed.");
167
168 // this buffer holds the count of correct nodes which is computed by the verify kernel.
169 clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
170 test_error(error, "clCreateBuffer failed.");
171
172 error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
173 //error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
174 error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
175 error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
176 error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int), (void *) &ListLength);
177
178 error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
179 error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
180 error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
181 error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int), (void *) &ListLength);
182 test_error(error, "clSetKernelArg failed");
183
184 // Create linked list on one device and verify on another device (or the host).
185 // Do this for all possible combinations of devices and host within the platform.
186 for (int ci=0; ci<(int)num_devices+1; ci++) // ci is CreationIndex, index of device/q to create linked list on
187 {
188 for (int vi=0; vi<(int)num_devices+1; vi++) // vi is VerificationIndex, index of device/q to verify linked list on
189 {
190 if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
191 {
192 error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
193 if(error) return -1;
194 }
195 else
196 {
197 error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
198 if(error) return -1;
199 }
200
201 if(vi == num_devices)
202 {
203 error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
204 if(error) return -1;
205 }
206 else
207 {
208 error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
209 if(error) return -1;
210 }
211 } // inner loop, vi
212 } // outer loop, ci
213 }
214
215 clSVMFree(context, pNodes2);
216 clSVMFree(context, pNodes);
217
218 return 0;
219 }
220