• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <time.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include <algorithm>
25 
26 #include "procs.h"
27 #include "harness/testHarness.h"
28 #include "harness/errorHelpers.h"
29 
30 #ifndef uchar
31 typedef unsigned char uchar;
32 #endif
33 
34 //#define CREATE_OUTPUT    1
35 
36 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
37 
38 
39 
40 //--- the code for kernel executables
41 static const char *image_filter_src =
42 "constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
43 "\n"
44 "__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
45 "              read_only image2d_t src_image, write_only image2d_t dst_image )\n"
46 "{\n"
47 "   int     i, j;\n"
48 "   int     indx = 0;\n"
49 "    int        tid_x = get_global_id(0);\n"
50 "    int        tid_y = get_global_id(1);\n"
51 "    float4  filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
52 "\n"
53 "    for (i=-m/2; i<(m+1)/2; i++){\n"
54 "        for (j=-n/2; j<(n+1)/2; j++){\n"
55 "            float   w = filter_weights[indx++];\n"
56 "\n"
57 "            if (w != 0.0f){\n"
58 "                filter_result += w * read_imagef(src_image, sampler,\n"
59 "                                                 (int2)(tid_x + j, tid_y + i));\n"
60 "            }\n"
61 "        }\n"
62 "    }\n"
63 "\n"
64 "    write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
65 "}\n";
66 
67 
68 //--- equivalent non-kernel code
read_imagef(int x,int y,int w,int h,int nChannels,uchar * src,float * srcRgb)69 static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
70 {
71     // clamp the coords
72     int x0 = std::min(std::max(x, 0), w - 1);
73     int y0 = std::min(std::max(y, 0), h - 1);
74 
75     // get tine index
76     int    indx = ( y0 * w + x0 ) * nChannels;
77 
78     // seed the return array
79     int    i;
80     for( i = 0; i < nChannels; i++ ){
81         srcRgb[i] = (float)src[indx+i];
82     }
83 }    // end read_imagef()
84 
85 
write_imagef(uchar * dst,int x,int y,int w,int h,int nChannels,float * dstRgb)86 static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
87 {
88     // get tine index
89     int    indx = ( y * w + x ) * nChannels;
90 
91     // seed the return array
92     int    i;
93     for( i = 0; i < nChannels; i++ ){
94         dst[indx+i] = (uchar)dstRgb[i];
95     }
96 }    // end write_imagef()
97 
98 
basicFilterPixel(int x,int y,int n,int m,int xsize,int ysize,int nChannels,const float * filter_weights,uchar * src,uchar * dst)99 static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
100 {
101     int        i, j, k;
102     int        indx = 0;
103     float    filter_result[] = { 0.f, 0.f, 0.f, 0.f };
104     float    srcRgb[4];
105 
106     for( i = -m/2; i < (m+1)/2; i++ ){
107         for( j = -n/2; j < (n+1)/2; j++ ){
108             float    w = filter_weights[indx++];
109 
110             if( w != 0 ){
111                 read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
112                 for( k = 0; k < nChannels; k++ ){
113                     filter_result[k] += w * srcRgb[k];
114                 }
115             }
116         }
117     }
118 
119     write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
120 
121 }    // end basicFilterPixel()
122 
123 
124 //--- helper functions
createImage(int elements,MTdata d)125 static uchar *createImage( int elements, MTdata d)
126 {
127     int        i;
128     uchar    *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
129     if( ! ptr )
130         return NULL;
131 
132     for( i = 0; i < elements; i++ ){
133         ptr[i] = (uchar)genrand_int32(d);
134     }
135 
136     return ptr;
137 
138 }    // end createImage()
139 
140 
verifyImages(uchar * ptr0,uchar * ptr1,uchar tolerance,int xsize,int ysize,int nChannels)141 static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
142 {
143     int        x, y, z;
144     uchar    *p0 = ptr0;
145     uchar    *p1 = ptr1;
146 
147     for( y = 0; y < ysize; y++ ){
148         for( x = 0; x < xsize; x++ ){
149             for( z = 0; z < nChannels; z++ ){
150                 if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
151                     log_error( "  images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
152                               (int)p0[-1], (int)p1[-1] );
153                     return -1;
154                 }
155             }
156         }
157     }
158 
159     return 0;
160 
161 }    // end verifyImages()
162 
163 
kernelFilter(cl_device_id device,cl_context context,cl_command_queue queue,int w,int h,int nChannels,uchar * inptr,uchar * outptr)164 static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
165                          uchar *inptr, uchar *outptr )
166 {
167     cl_program            program[1];
168     cl_kernel            kernel[1];
169     cl_mem                memobjs[3];
170     cl_image_format        image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
171     cl_event            executeEvent;
172     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
173     size_t                threads[2];
174     float                filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
175     int                    filter_w = 3, filter_h = 3;
176     int                    err = 0;
177 
178     // set thread dimensions
179     threads[0] = w;
180     threads[1] = h;
181 
182     // allocate the input and output image memory objects
183     memobjs[0] =
184         create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
185                         &image_format_desc, w, h, 0, inptr, &err);
186     if( memobjs[0] == (cl_mem)0 ){
187         log_error( " unable to create 2D image using create_image_2d\n" );
188         return -1;
189     }
190 
191     memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
192     if( memobjs[1] == (cl_mem)0 ){
193         log_error( " unable to create 2D image using create_image_2d\n" );
194         clReleaseMemObject( memobjs[0] );
195         return -1;
196     }
197 
198     // allocate an array memory object to load the filter weights
199     memobjs[2] = clCreateBuffer(
200         context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
201         sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
202     if( memobjs[2] == (cl_mem)0 ){
203         log_error( " unable to create array using clCreateBuffer\n" );
204         clReleaseMemObject( memobjs[1] );
205         clReleaseMemObject( memobjs[0] );
206         return -1;
207     }
208 
209     // create the compute program
210     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
211     if( err ){
212         clReleaseMemObject( memobjs[2] );
213         clReleaseMemObject( memobjs[1] );
214         clReleaseMemObject( memobjs[0] );
215         return -1;
216     }
217 
218 
219     // create kernel args object and set arg values.
220     // set the args values
221     err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
222     err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
223     err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
224     err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
225     err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
226 
227     if( err != CL_SUCCESS ){
228         print_error( err, "clSetKernelArg failed\n" );
229         clReleaseKernel( kernel[0] );
230         clReleaseProgram( program[0] );
231         clReleaseMemObject( memobjs[2] );
232         clReleaseMemObject( memobjs[1] );
233         clReleaseMemObject( memobjs[0] );
234         return -1;
235     }
236 
237     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
238 
239     if( err != CL_SUCCESS ){
240         print_error( err, "clEnqueueNDRangeKernel failed\n" );
241     clReleaseEvent( executeEvent );
242         clReleaseKernel( kernel[0] );
243         clReleaseProgram( program[0] );
244         clReleaseMemObject( memobjs[2] );
245         clReleaseMemObject( memobjs[1] );
246         clReleaseMemObject( memobjs[0] );
247         return -1;
248     }
249 
250     // This synchronization point is needed in order to assume the data is valid.
251     // Getting profiling information is not a synchronization point.
252     err = clWaitForEvents( 1, &executeEvent );
253     if( err != CL_SUCCESS )
254     {
255     clReleaseEvent( executeEvent );
256         clReleaseKernel( kernel[0] );
257         clReleaseProgram( program[0] );
258         clReleaseMemObject( memobjs[2] );
259         clReleaseMemObject( memobjs[1] );
260         clReleaseMemObject( memobjs[0] );
261         return -1;
262     }
263 
264     // test profiling
265     while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
266           CL_PROFILING_INFO_NOT_AVAILABLE );
267     if( err != CL_SUCCESS ){
268         print_error( err, "clGetEventProfilingInfo failed" );
269     clReleaseEvent( executeEvent );
270         clReleaseKernel( kernel[0] );
271         clReleaseProgram( program[0] );
272         clReleaseMemObject( memobjs[2] );
273         clReleaseMemObject( memobjs[1] );
274         clReleaseMemObject( memobjs[0] );
275         return -1;
276     }
277 
278     while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
279           CL_PROFILING_INFO_NOT_AVAILABLE );
280     if( err != CL_SUCCESS ){
281         print_error( err, "clGetEventProfilingInfo failed" );
282     clReleaseEvent( executeEvent );
283         clReleaseKernel( kernel[0] );
284         clReleaseProgram( program[0] );
285         clReleaseMemObject( memobjs[2] );
286         clReleaseMemObject( memobjs[1] );
287         clReleaseMemObject( memobjs[0] );
288         return -1;
289     }
290 
291     err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
292     if( err != CL_SUCCESS ){
293         print_error( err, "clGetEventProfilingInfo failed" );
294     clReleaseEvent( executeEvent );
295         clReleaseKernel( kernel[0] );
296         clReleaseProgram( program[0] );
297         clReleaseMemObject( memobjs[2] );
298         clReleaseMemObject( memobjs[1] );
299         clReleaseMemObject( memobjs[0] );
300         return -1;
301     }
302 
303     err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
304     if( err != CL_SUCCESS ){
305         print_error( err, "clGetEventProfilingInfo failed" );
306     clReleaseEvent( executeEvent );
307         clReleaseKernel( kernel[0] );
308         clReleaseProgram( program[0] );
309         clReleaseMemObject( memobjs[2] );
310         clReleaseMemObject( memobjs[1] );
311         clReleaseMemObject( memobjs[0] );
312         return -1;
313     }
314 
315     // read output image
316     size_t origin[3] = { 0, 0, 0 };
317     size_t region[3] = { w, h, 1 };
318     err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
319     if( err != CL_SUCCESS ){
320         print_error( err, "clReadImage failed\n" );
321     clReleaseEvent( executeEvent );
322         clReleaseKernel( kernel[0] );
323         clReleaseProgram( program[0] );
324         clReleaseMemObject( memobjs[2] );
325         clReleaseMemObject( memobjs[1] );
326         clReleaseMemObject( memobjs[0] );
327         return -1;
328     }
329 
330     // release event, kernel, program, and memory objects
331   clReleaseEvent( executeEvent );
332     clReleaseKernel( kernel[0] );
333     clReleaseProgram( program[0] );
334     clReleaseMemObject( memobjs[2] );
335     clReleaseMemObject( memobjs[1] );
336     clReleaseMemObject( memobjs[0] );
337 
338     if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
339         err = -1;
340 
341     return err;
342 
343 }    // end kernelFilter()
344 
345 
basicFilter(int w,int h,int nChannels,uchar * inptr,uchar * outptr)346 static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
347 {
348     const float    filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
349     int            filter_w = 3, filter_h = 3;
350     int            x, y;
351 
352     for( y = 0; y < h; y++ ){
353         for( x = 0; x < w; x++ ){
354             basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
355         }
356     }
357 
358     return 0;
359 
360 }    // end of basicFilter()
361 
362 
test_execute(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)363 int test_execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
364 {
365     uchar    *inptr;
366     uchar    *outptr[2];
367     int        w = 256, h = 256;
368     int        nChannels = 4;
369     int        nElements = w * h * nChannels;
370     int        err = 0;
371     MTdata  d;
372 
373 
374     PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
375 
376     d = init_genrand( gRandomSeed );
377     inptr = createImage( nElements, d );
378     free_mtdata( d);    d = NULL;
379 
380     if( ! inptr ){
381         log_error( " unable to allocate %d bytes of memory for image\n", nElements );
382         return -1;
383     }
384 
385     outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
386     if( ! outptr[0] ){
387         log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
388         free( (void *)inptr );
389         return -1;
390     }
391 
392     outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
393     if( ! outptr[1] ){
394         log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
395         free( (void *)outptr[0] );
396         free( (void *)inptr );
397         return -1;
398     }
399 
400     err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
401 
402     if( ! err ){
403         basicFilter( w, h, nChannels, inptr, outptr[1] );
404 
405         // verify that the images are the same
406         err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
407         if( err )
408             log_error( " images do not match\n" );
409     }
410 
411     // clean up
412     free( (void *)outptr[1] );
413     free( (void *)outptr[0] );
414     free( (void *)inptr );
415 
416     return err;
417 
418 }    // end execute()
419 
420 
421 
422