1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24
25 #include "procs.h"
26
27 const char *hostptr_kernel_code =
28 "__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34
35 static const float MAX_ERR = 1e-5f;
36
verify_hostptr(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n)37 static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
38 {
39 cl_float r;
40 int i;
41
42 for (i=0; i<n; i++)
43 {
44 r = inptrA[i] + inptrB[i];
45 if (r != outptr[i])
46 {
47 return -1;
48 }
49 }
50 return 0;
51 }
52
make_random_data(unsigned count,float * ptr,MTdata d)53 static void make_random_data(unsigned count, float *ptr, MTdata d)
54 {
55 cl_uint i;
56 for (i=0; i<count; i++)
57 ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
58 }
59
60 static unsigned char *
generate_rgba8_image(int w,int h,MTdata d)61 generate_rgba8_image(int w, int h, MTdata d)
62 {
63 unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
64 int i;
65
66 for (i=0; i<w*h*4; i++)
67 ptr[i] = (unsigned char)genrand_int32(d);
68
69 return ptr;
70 }
71
72 static unsigned char *
randomize_rgba8_image(unsigned char * ptr,int w,int h,MTdata d)73 randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
74 {
75 int i;
76
77 for (i=0; i<w*h*4; i++)
78 ptr[i] = (unsigned char)genrand_int32(d);
79
80 return ptr;
81 }
82
83 static int
verify_rgba8_image(unsigned char * image,unsigned char * outptr,int w,int h)84 verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
85 {
86 int i;
87
88 for (i=0; i<w*h*4; i++)
89 {
90 if (outptr[i] != image[i])
91 return -1;
92 }
93
94 return 0;
95 }
96
97 int
test_hostptr(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)98 test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
99 {
100 cl_float *input_ptr[2], *output_ptr;
101 cl_program program;
102 cl_kernel kernel;
103 size_t threads[3]={0,0,0};
104 cl_image_format img_format;
105 cl_uchar *rgba8_inptr, *rgba8_outptr;
106 void *lock_buffer;
107 int img_width = 512;
108 int img_height = 512;
109 cl_int err;
110 MTdata d;
111 RoundingMode oldRoundMode;
112 int isRTZ = 0;
113
114 // Block to mark deletion of streams before deletion of host_ptr
115 {
116 clMemWrapper streams[7];
117
118 PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
119
120 // Alloc buffers
121 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
122 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
123 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
124
125 d = init_genrand( gRandomSeed );
126 rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
127 rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
128
129 // Random data
130 make_random_data(num_elements, input_ptr[0], d);
131 make_random_data(num_elements, input_ptr[1], d);
132
133 // Create host-side input
134 streams[0] =
135 clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
136 sizeof(cl_float) * num_elements, input_ptr[0], &err);
137 test_error(err, "clCreateBuffer 0 failed");
138
139 // Create a copied input
140 streams[1] =
141 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
142 sizeof(cl_float) * num_elements, input_ptr[1], &err);
143 test_error(err, "clCreateBuffer 1 failed");
144
145 // Create a host-side output
146 streams[2] =
147 clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
148 sizeof(cl_float) * num_elements, output_ptr, &err);
149 test_error(err, "clCreateBuffer 2 failed");
150
151 // Create a host-side input
152 img_format.image_channel_order = CL_RGBA;
153 img_format.image_channel_data_type = CL_UNORM_INT8;
154 streams[3] =
155 create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
156 img_width, img_height, 0, rgba8_inptr, &err);
157 test_error(err, "create_image_2d 3 failed");
158
159 // Create a copied input
160 img_format.image_channel_order = CL_RGBA;
161 img_format.image_channel_data_type = CL_UNORM_INT8;
162 streams[4] =
163 create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
164 img_width, img_height, 0, rgba8_inptr, &err);
165 test_error(err, "create_image_2d 4 failed");
166
167 // Create a host-side output
168 img_format.image_channel_order = CL_RGBA;
169 img_format.image_channel_data_type = CL_UNORM_INT8;
170 streams[5] =
171 create_image_2d(context, CL_MEM_USE_HOST_PTR, &img_format,
172 img_width, img_height, 0, rgba8_outptr, &err);
173 test_error(err, "create_image_2d 5 failed");
174
175 // Create a copied output
176 img_format.image_channel_data_type = CL_RGBA;
177 img_format.image_channel_data_type = CL_UNORM_INT8;
178 streams[6] =
179 create_image_2d(context, CL_MEM_COPY_HOST_PTR, &img_format,
180 img_width, img_height, 0, rgba8_outptr, &err);
181 test_error(err, "create_image_2d 6 failed");
182
183 err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
184 test_error(err, "create_single_kernel_helper failed");
185
186 // Execute kernel
187 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
188 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
189 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
190 test_error(err, "clSetKernelArg failed");
191
192 threads[0] = (size_t)num_elements;
193 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
194 test_error(err, "clEnqueueNDRangeKernel failed");
195
196 cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
197 test_error( err, "clEnqueueMapBuffer failed" );
198
199 //If we only support rtz mode
200 if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
201 {
202 oldRoundMode = set_round(kRoundTowardZero, kfloat);
203 isRTZ = 1;
204 }
205
206 if (isRTZ)
207 oldRoundMode = set_round(kRoundTowardZero, kfloat);
208
209 // Verify that we got the expected results back on the host side
210 err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
211 if (err)
212 {
213 log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
214 "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
215 } else {
216 log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
217 "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
218 }
219
220 if (isRTZ)
221 set_round(oldRoundMode, kfloat);
222
223 err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
224 test_error( err, "clEnqueueUnmapMemObject failed" );
225
226 size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
227 randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
228 free_mtdata(d); d = NULL;
229
230 // Copy from host-side to host-side
231 log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
232 err = clEnqueueCopyImage(queue, streams[3], streams[5],
233 origin, origin, region, 0, NULL, NULL);
234 test_error(err, "clEnqueueCopyImage failed");
235 log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
236
237 // test the lock buffer interface
238 log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
239 size_t row_pitch;
240 lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
241 CL_MAP_READ, origin, region,
242 &row_pitch, NULL,
243 0, NULL, NULL, &err);
244 test_error(err, "clEnqueueMapImage failed");
245
246 err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
247 if (err != CL_SUCCESS)
248 {
249 log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
250 return -1;
251 }
252 log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
253
254 err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
255 test_error(err, "clEnqueueUnmapMemObject failed");
256
257 // Copy host-side to device-side and read back
258 log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
259 err = clEnqueueCopyImage(queue, streams[3], streams[5],
260 origin, origin, region,
261 0, NULL, NULL);
262 test_error(err, "clEnqueueCopyImage failed");
263
264 err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
265 test_error(err, "clEnqueueReadImage failed");
266
267 err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
268 if (err != CL_SUCCESS)
269 {
270 log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
271 return -1;
272 }
273 log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
274 }
275 // cleanup
276 clReleaseKernel(kernel);
277 clReleaseProgram(program);
278 free(input_ptr[0]);
279 free(input_ptr[1]);
280 free(output_ptr);
281
282 free(rgba8_inptr);
283 free(rgba8_outptr);
284
285 return err;
286 }
287
288
289
290
291
292