• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17 
18 #if !defined(_WIN32)
19 #include <sys/mman.h>
20 #endif
21 
22 extern bool gTestImage2DFromBuffer;
23 extern cl_mem_flags gMemFlagsToUse;
24 extern int gtestTypesToRun;
25 
26 extern int test_write_image_1D_set(cl_device_id device, cl_context context,
27                                    cl_command_queue queue,
28                                    const cl_image_format *format,
29                                    ExplicitType inputType, MTdata d);
30 extern int test_write_image_3D_set(cl_device_id device, cl_context context,
31                                    cl_command_queue queue,
32                                    const cl_image_format *format,
33                                    ExplicitType inputType, MTdata d);
34 extern int test_write_image_1D_array_set(cl_device_id device,
35                                          cl_context context,
36                                          cl_command_queue queue,
37                                          const cl_image_format *format,
38                                          ExplicitType inputType, MTdata d);
39 extern int test_write_image_2D_array_set(cl_device_id device,
40                                          cl_context context,
41                                          cl_command_queue queue,
42                                          const cl_image_format *format,
43                                          ExplicitType inputType, MTdata d);
44 
45 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
46 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
47 
48 const char *writeKernelSourcePattern =
49 "__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
50 "{\n"
51 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
52 "%s"
53 "   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
54 "}";
55 
56 const char *read_writeKernelSourcePattern =
57 "__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
58 "{\n"
59 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
60 "%s"
61 "   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
62 "}";
63 
64 const char *offset2DKernelSource =
65 "   int offset = tidY*get_image_width(output) + tidX;\n";
66 
67 const char *offset2DLodKernelSource =
68 "   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
69 "   int offset = tidY * width_lod + tidX;\n";
70 
test_write_image(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)71 int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
72                      image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
73 {
74     int                 totalErrors = 0;
75     size_t              num_flags   = 0;
76     const cl_mem_flags  *mem_flag_types = NULL;
77     const char *        *mem_flag_names = NULL;
78     const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
79     const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
80     const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
81     const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
82 
83     if(gtestTypesToRun & kWriteTests)
84     {
85         mem_flag_types = write_only_mem_flag_types;
86         mem_flag_names = write_only_mem_flag_names;
87         num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
88     }
89     else
90     {
91         mem_flag_types = read_write_mem_flag_types;
92         mem_flag_names = read_write_mem_flag_names;
93         num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
94     }
95 
96     size_t  pixelSize       = get_pixel_size( imageInfo->format );
97     int     channel_scale   = (imageInfo->format->image_channel_order == CL_DEPTH) ? 1 : 4;
98 
99     for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
100     {
101         int error;
102         size_t threads[2];
103         bool verifyRounding = false;
104         int forceCorrectlyRoundedWrites = 0;
105 
106 #if defined( __APPLE__ )
107         // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
108         if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
109             forceCorrectlyRoundedWrites = 1;
110 #endif
111 
112         if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
113             if( DetectFloatToHalfRoundingMode(queue) )
114                 return 1;
115 
116         BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues, imageBufferValues;
117 
118         create_random_image_data( inputType, imageInfo, imageValues, d, gTestImage2DFromBuffer );
119 
120         if(!gTestMipmaps)
121         {
122             if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
123             {
124                 /* Pilot data for sRGB images */
125                 if(is_sRGBA_order(imageInfo->format->image_channel_order))
126                 {
127                     // We want to generate ints (mostly) in range of the target format which should be [0,255]
128                     // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
129                     // it can test some out-of-range data points
130                     const unsigned int test_range_ext = 16;
131                     int formatMin = 0 - test_range_ext;
132                     int formatMax = 255 + test_range_ext;
133                     int pixel_value = 0;
134 
135                     // First, fill with arbitrary floats
136                     for( size_t y = 0; y < imageInfo->height; y++ )
137                     {
138                         float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
139                         for( size_t i = 0; i < imageInfo->width * 4; i++ )
140                         {
141                             pixel_value = random_in_range( formatMin, (int)formatMax, d );
142                             inputValues[ i ] = (float)(pixel_value/255.0f);
143                         }
144                     }
145 
146                     // Throw a few extra test values in there
147                     float *inputValues = (float *)(char*)imageValues;
148                     size_t i = 0;
149 
150                     // Piloting some debug inputs.
151                     inputValues[ i++ ] = -0.5f;
152                     inputValues[ i++ ] = 0.5f;
153                     inputValues[ i++ ] = 2.0f;
154                     inputValues[ i++ ] = 0.5f;
155 
156                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
157                     // is correct
158                     if( imageInfo->width > 12 )
159                     {
160                         float formatMax = (float)get_format_max_int( imageInfo->format );
161                         inputValues[ i++ ] = 4.0f / formatMax;
162                         inputValues[ i++ ] = 4.3f / formatMax;
163                         inputValues[ i++ ] = 4.5f / formatMax;
164                         inputValues[ i++ ] = 4.7f / formatMax;
165                         inputValues[ i++ ] = 5.0f / formatMax;
166                         inputValues[ i++ ] = 5.3f / formatMax;
167                         inputValues[ i++ ] = 5.5f / formatMax;
168                         inputValues[ i++ ] = 5.7f / formatMax;
169                     }
170                 }
171                 else
172                 {
173                     // First, fill with arbitrary floats
174                     for( size_t y = 0; y < imageInfo->height; y++ )
175                     {
176                         float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * channel_scale;
177                         for( size_t i = 0; i < imageInfo->width * channel_scale; i++ )
178                             inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
179                     }
180 
181                     // Throw a few extra test values in there
182                     float *inputValues = (float *)(char*)imageValues;
183                     size_t i = 0;
184                     inputValues[ i++ ] = -0.0000000000009f;
185                     inputValues[ i++ ] = 1.f;
186                     inputValues[ i++ ] = -1.f;
187                     inputValues[ i++ ] = 2.f;
188 
189                     // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
190                     // is correct
191                     if( imageInfo->width > 12 )
192                     {
193                         float formatMax = (float)get_format_max_int( imageInfo->format );
194                         inputValues[ i++ ] = 4.0f / formatMax;
195                         inputValues[ i++ ] = 4.3f / formatMax;
196                         inputValues[ i++ ] = 4.5f / formatMax;
197                         inputValues[ i++ ] = 4.7f / formatMax;
198                         inputValues[ i++ ] = 5.0f / formatMax;
199                         inputValues[ i++ ] = 5.3f / formatMax;
200                         inputValues[ i++ ] = 5.5f / formatMax;
201                         inputValues[ i++ ] = 5.7f / formatMax;
202                         verifyRounding = true;
203                     }
204                 }
205             }
206             else if( inputType == kUInt )
207             {
208                 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
209                 size_t i = 0;
210                 inputValues[ i++ ] = 0;
211                 inputValues[ i++ ] = 65535;
212                 inputValues[ i++ ] = 7271820;
213                 inputValues[ i++ ] = 0;
214             }
215         }
216 
217         // Construct testing sources
218         clProtectedImage protImage;
219         clMemWrapper unprotImage;
220         cl_mem image;
221         cl_mem imageBuffer;
222 
223         if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
224         {
225             if (gTestImage2DFromBuffer)
226             {
227                 imageBuffer = clCreateBuffer( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR,
228                                              imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
229                 test_error( error, "Unable to create buffer" );
230                 unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
231                                                      imageInfo->width, imageInfo->height, imageInfo->rowPitch,
232                                                      imageBuffer, &error );
233 
234             }
235             else
236             {
237                 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
238                 // Do not use protected images for max image size test since it rounds the row size to a page size
239                 if (gTestMaxImages) {
240                     create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
241 
242                     unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
243                                               imageInfo->width, imageInfo->height, 0,
244                                               maxImageUseHostPtrBackingStore, &error );
245                 } else {
246                     error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
247                 }
248             }
249             if( error != CL_SUCCESS )
250             {
251                 if (gTestImage2DFromBuffer) {
252                     clReleaseMemObject(imageBuffer);
253                     if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
254                         log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
255                         return 0;
256                     }
257                 }
258 
259                 log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
260                           imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
261                 return error;
262             }
263 
264             if (gTestMaxImages || gTestImage2DFromBuffer)
265                 image = (cl_mem)unprotImage;
266             else
267                 image = (cl_mem)protImage;
268         }
269         else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
270         {
271             if( gTestMipmaps )
272             {
273                 cl_image_desc image_desc = {0};
274                 image_desc.image_type = imageInfo->type;
275                 image_desc.num_mip_levels = imageInfo->num_mip_levels;
276                 image_desc.image_width = imageInfo->width;
277                 image_desc.image_height = imageInfo->height;
278 
279                 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
280                                              imageInfo->format, &image_desc, NULL, &error);
281                 if( error != CL_SUCCESS )
282                 {
283                     log_error( "ERROR: Unable to create %d level 2D image of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height,
284                                IGetErrorString( error ), mem_flag_names[mem_flag_index] );
285                     return error;
286                 }
287             }
288             else if (gTestImage2DFromBuffer)
289             {
290                 generate_random_image_data( imageInfo, imageBufferValues, d );
291                 imageBuffer = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
292                                              imageInfo->rowPitch * imageInfo->height, imageBufferValues, &error);
293                 test_error( error, "Unable to create buffer" );
294                 unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
295                                                      imageInfo->width, imageInfo->height, imageInfo->rowPitch,
296                                                      imageBuffer, &error );
297 
298             }
299             else
300             {
301                 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
302                 // it works just as if no flag is specified, so we just do the same thing either way
303                 // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
304                 unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
305                                           imageInfo->width, imageInfo->height, 0,
306                                           imageValues, &error );
307             }
308             if( error != CL_SUCCESS )
309             {
310                 if (gTestImage2DFromBuffer) {
311                     clReleaseMemObject(imageBuffer);
312                     if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
313                         log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
314                         return 0;
315                     }
316                 }
317 
318                 log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
319                           imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
320                 return error;
321             }
322             image = unprotImage;
323         }
324 
325         error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
326         test_error( error, "Unable to set kernel arguments" );
327 
328         size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
329         size_t origin[ 3 ] = { 0, 0, 0 };
330         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
331         size_t resultSize;
332 
333         int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
334         for( int lod = 0; lod < num_lod_loops; lod++)
335         {
336             if(gTestMipmaps)
337             {
338                 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
339             }
340             // Run the kernel
341             threads[0] = (size_t)width_lod;
342             threads[1] = (size_t)height_lod;
343 
344             clMemWrapper inputStream;
345 
346             char *imagePtrOffset = imageValues + nextLevelOffset;
347 
348             inputStream =
349                 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
350                                get_explicit_type_size(inputType) * channel_scale
351                                    * width_lod * height_lod,
352                                imagePtrOffset, &error);
353             test_error( error, "Unable to create input buffer" );
354 
355             // Set arguments
356             error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
357             test_error( error, "Unable to set kernel arguments" );
358 
359             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
360             test_error( error, "Unable to run kernel" );
361 
362             // Get results
363             if( gTestMipmaps )
364                 resultSize = width_lod * height_lod * get_pixel_size(imageInfo->format);
365             else
366                 resultSize = imageInfo->rowPitch * imageInfo->height;
367             clProtectedArray PA(resultSize);
368             char *resultValues = (char *)((void *)PA);
369 
370             if( gDebugTrace )
371                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
372 
373             origin[2] = lod;
374             region[0] = width_lod;
375             region[1] = height_lod;
376             error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
377             test_error( error, "Unable to read results from kernel" );
378             if( gDebugTrace )
379                 log_info( "    results read\n" );
380 
381             // Validate results element by element
382             char *imagePtr = (char*)imageValues + nextLevelOffset;
383             int numTries = 5;
384             for( size_t y = 0, i = 0; y < height_lod; y++ )
385             {
386                 char *resultPtr;
387                 if( gTestMipmaps )
388                     resultPtr = (char *)resultValues + y * width_lod * pixelSize;
389                 else
390                     resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
391                 for( size_t x = 0; x < width_lod; x++, i++ )
392                 {
393                     char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
394 
395                     // Convert this pixel
396                     if( inputType == kFloat )
397                         pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
398                     else if( inputType == kInt )
399                         pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
400                     else // if( inputType == kUInt )
401                         pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
402 
403                     // Compare against the results
404                     if(is_sRGBA_order(imageInfo->format->image_channel_order))
405                     {
406                         // Compare sRGB-mapped values
407                         cl_float expected[4]    = {0};
408                         cl_float* input_values  = (float*)imagePtr;
409                         cl_uchar *actual        = (cl_uchar*)resultPtr;
410                         float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
411                         float err[4]            = {0.0f};
412 
413                         for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
414                         {
415                             if(j < 3)
416                             {
417                                 expected[j] = sRGBmap(input_values[j]);
418                             }
419                             else // there is no sRGB conversion for alpha component if it exists
420                             {
421                                 expected[j] = NORMALIZE(input_values[j], 255.0f);
422                             }
423 
424                             err[j] = fabsf( expected[ j ] - actual[ j ] );
425                         }
426 
427                         if ((err[0] > max_err) ||
428                             (err[1] > max_err) ||
429                             (err[2] > max_err) ||
430                             (err[3] > 0)) // there is no conversion for alpha so the error should be zero
431                         {
432                             log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
433                             log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
434                             log_error( "       Expected:  %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
435                             log_error( "       Actual:    %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
436                             return 1;
437                         }
438                     }
439                     else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
440                     {
441                         float *expected = (float *)resultBuffer;
442                         float *actual = (float *)resultPtr;
443 
444                         if( !validate_float_write_results( expected, actual, imageInfo ) )
445                         {
446                             unsigned int *e = (unsigned int *)resultBuffer;
447                             unsigned int *a = (unsigned int *)resultPtr;
448                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
449                             log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
450                             log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
451                             log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
452                             log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
453                             totalErrors++;
454                             if( ( --numTries ) == 0 )
455                                 return 1;
456                         }
457                     }
458                     else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
459                     {
460                         cl_half *e = (cl_half *)resultBuffer;
461                         cl_half *a = (cl_half *)resultPtr;
462                         if( !validate_half_write_results( e, a, imageInfo ) )
463                         {
464                             totalErrors++;
465                             log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[ mem_flag_index ] );
466                             log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
467                             log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
468                             if( inputType == kFloat )
469                             {
470                                 float *p = (float *)(char *)imagePtr;
471                                 log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
472                                 log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
473                             }
474                             if( ( --numTries ) == 0 )
475                                 return 1;
476                         }
477                     }
478                     else
479                     {
480                         // Exact result passes every time
481                         if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
482                         {
483                             // result is inexact.  Calculate error
484                             int failure = 1;
485                             float errors[4] = {NAN, NAN, NAN, NAN};
486                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
487 
488                             // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
489                             if( 0 == forceCorrectlyRoundedWrites    &&
490                                (
491                                 imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
492                                 imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
493                                 imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
494                                 imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
495                                 imageInfo->format->image_channel_data_type == CL_SNORM_INT16
496                                 ))
497                             {
498                                 if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
499                                    ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
500                                     failure = 0;
501                             }
502 
503 
504                             if( failure )
505                             {
506                                 totalErrors++;
507                                 // Is it our special rounding test?
508                                 if( verifyRounding && i >= 1 && i <= 2 )
509                                 {
510                                     // Try to guess what the rounding mode of the device really is based on what it returned
511                                     const char *deviceRounding = "unknown";
512                                     unsigned int deviceResults[8];
513                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
514                                     read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
515 
516                                     if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
517                                        deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
518                                         deviceRounding = "truncate";
519                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
520                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
521                                         deviceRounding = "round to nearest";
522                                     else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
523                                             deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
524                                         deviceRounding = "round to even";
525 
526                                     log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
527                                     log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
528                                               deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
529                                     log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
530                                     return 1;
531                                 }
532                                 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
533                                 switch(imageInfo->format->image_channel_data_type)
534                                 {
535                                     case CL_UNORM_INT8:
536                                     case CL_SNORM_INT8:
537                                     case CL_UNSIGNED_INT8:
538                                     case CL_SIGNED_INT8:
539                                     case CL_UNORM_INT_101010:
540                                         log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
541                                         log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
542                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
543                                         break;
544                                     case CL_UNORM_INT16:
545                                     case CL_SNORM_INT16:
546                                     case CL_UNSIGNED_INT16:
547                                     case CL_SIGNED_INT16:
548 #ifdef CL_SFIXED14_APPLE
549                                     case CL_SFIXED14_APPLE:
550 #endif
551                                         log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
552                                         log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
553                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
554                                         break;
555                                     case CL_HALF_FLOAT:
556                                         log_error("    Expected: 0x%4.4x "
557                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
558                                                   ((cl_half *)resultBuffer)[0],
559                                                   ((cl_half *)resultBuffer)[1],
560                                                   ((cl_half *)resultBuffer)[2],
561                                                   ((cl_half *)resultBuffer)[3]);
562                                         log_error("    Actual:   0x%4.4x "
563                                                   "0x%4.4x 0x%4.4x 0x%4.4x\n",
564                                                   ((cl_half *)resultPtr)[0],
565                                                   ((cl_half *)resultPtr)[1],
566                                                   ((cl_half *)resultPtr)[2],
567                                                   ((cl_half *)resultPtr)[3]);
568                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
569                                         break;
570                                     case CL_UNSIGNED_INT32:
571                                     case CL_SIGNED_INT32:
572                                         log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
573                                         log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
574                                         break;
575                                     case CL_FLOAT:
576                                         log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
577                                         log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
578                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
579                                         break;
580                                 }
581 
582                                 float *v = (float *)(char *)imagePtr;
583                                 log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
584                                 log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
585                                 log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
586 
587                                 if( ( --numTries ) == 0 )
588                                     return 1;
589                             }
590                         }
591                     }
592                     imagePtr += get_explicit_type_size( inputType ) * channel_scale;
593                     resultPtr += get_pixel_size( imageInfo->format );
594                 }
595             }
596             {
597                 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format);
598                 width_lod = (width_lod >> 1) ?(width_lod >> 1) : 1;
599                 height_lod = (height_lod >> 1) ?(height_lod >> 1) : 1;
600             }
601         }
602 
603         if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
604     }
605 
606 
607     // All done!
608     return totalErrors;
609 }
610 
611 
test_write_image_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)612 int test_write_image_set(cl_device_id device, cl_context context,
613                          cl_command_queue queue, const cl_image_format *format,
614                          ExplicitType inputType, MTdata d)
615 {
616     char programSrc[10240];
617     const char *ptr;
618     const char *readFormat;
619     clProgramWrapper program;
620     clKernelWrapper kernel;
621     const char *KernelSourcePattern = NULL;
622     int error;
623 
624     if (gTestImage2DFromBuffer)
625     {
626       if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
627       {
628         switch (format->image_channel_data_type)
629         {
630           case CL_UNORM_INT8:
631           case CL_UNORM_INT16:
632           case CL_SNORM_INT8:
633           case CL_SNORM_INT16:
634           case CL_HALF_FLOAT:
635           case CL_FLOAT:
636           case CL_SIGNED_INT8:
637           case CL_SIGNED_INT16:
638           case CL_SIGNED_INT32:
639           case CL_UNSIGNED_INT8:
640           case CL_UNSIGNED_INT16:
641           case CL_UNSIGNED_INT32:
642             log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
643                      GetChannelTypeName( format->image_channel_data_type ));
644             return 0;
645           default:
646             break;
647         }
648       }
649     }
650 
651     // Get our operating parameters
652     size_t maxWidth, maxHeight;
653     cl_ulong maxAllocSize, memSize;
654 
655     image_descriptor imageInfo = { 0x0 };
656 
657     imageInfo.format = format;
658     imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
659     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
660 
661     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
662     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
663     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
664     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
665     test_error( error, "Unable to get max image 2D size from device" );
666 
667     if (memSize > (cl_ulong)SIZE_MAX) {
668       memSize = (cl_ulong)SIZE_MAX;
669     }
670 
671     // Determine types
672     if( inputType == kInt )
673         readFormat = "i";
674     else if( inputType == kUInt )
675         readFormat = "ui";
676     else // kFloat
677         readFormat = "f";
678 
679     if(gtestTypesToRun & kWriteTests)
680     {
681         KernelSourcePattern = writeKernelSourcePattern;
682     }
683     else
684     {
685         KernelSourcePattern = read_writeKernelSourcePattern;
686     }
687 
688     // Construct the source
689     sprintf( programSrc,
690              KernelSourcePattern,
691              get_explicit_type_name( inputType ),
692              (format->image_channel_order == CL_DEPTH) ? "" : "4",
693              (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
694              gTestMipmaps ? ", int lod" : "",
695              gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
696              readFormat,
697              gTestMipmaps ? ", lod" : "" );
698 
699     ptr = programSrc;
700     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
701                                         "sample_kernel");
702     test_error( error, "Unable to create testing kernel" );
703 
704     // Run tests
705     if( gTestSmallImages )
706     {
707         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
708         {
709             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
710             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
711             {
712                 if( gTestMipmaps )
713                     imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
714 
715                 if( gDebugTrace )
716                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
717                 int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
718                 if( retCode )
719                     return retCode;
720             }
721         }
722     }
723     else if( gTestMaxImages )
724     {
725         // Try a specific set of maximum sizes
726         size_t numbeOfSizes;
727         size_t sizes[100][3];
728 
729         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
730 
731         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
732         {
733             imageInfo.width = sizes[ idx ][ 0 ];
734             imageInfo.height = sizes[ idx ][ 1 ];
735             imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
736             if( gTestMipmaps )
737                 imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
738             log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
739             int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
740             if( retCode )
741                 return retCode;
742         }
743     }
744     else if( gTestRounding )
745     {
746         size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
747         imageInfo.height = typeRange / 256;
748         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
749 
750         imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
751         int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
752         if( retCode )
753             return retCode;
754     }
755     else
756     {
757 
758         cl_uint imagePitchAlign = 0;
759         if (gTestImage2DFromBuffer)
760         {
761 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
762             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
763             if (!imagePitchAlign)
764               imagePitchAlign = 1;
765 #endif
766             test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
767         }
768 
769         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
770         {
771             cl_ulong size;
772             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
773             // image, the result array, plus offset arrays, will fit in the global ram space
774             do
775             {
776                 imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
777                 imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
778 
779                 if(gTestMipmaps)
780                 {
781                     imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, d);
782                     size = 4 * compute_mipmapped_image_size(imageInfo);
783                 }
784                 else
785                 {
786                     imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
787                     if( gEnablePitch )
788                     {
789                         size_t extraWidth = (int)random_log_in_range( 0, 64, d );
790                         imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
791                     }
792 
793                     // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
794                     if (gTestImage2DFromBuffer)
795                     {
796                         size_t pitch = imagePitchAlign * get_pixel_size( imageInfo.format );
797                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
798                     }
799 
800                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
801                 }
802             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
803 
804             if( gDebugTrace )
805                 log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
806 
807             int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
808             if( retCode )
809                 return retCode;
810         }
811     }
812 
813     return 0;
814 }
815 
test_write_image_formats(cl_device_id device,cl_context context,cl_command_queue queue,const std::vector<cl_image_format> & formatList,const std::vector<bool> & filterFlags,image_sampler_data * imageSampler,ExplicitType inputType,cl_mem_object_type imageType)816 int test_write_image_formats(cl_device_id device, cl_context context,
817                              cl_command_queue queue,
818                              const std::vector<cl_image_format> &formatList,
819                              const std::vector<bool> &filterFlags,
820                              image_sampler_data *imageSampler,
821                              ExplicitType inputType,
822                              cl_mem_object_type imageType)
823 {
824     if( imageSampler->filter_mode == CL_FILTER_LINEAR )
825         // No need to run for linear filters
826         return 0;
827 
828     int ret = 0;
829 
830     log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
831 
832 
833     RandomSeed seed( gRandomSeed );
834 
835     for (unsigned int i = 0; i < formatList.size(); i++)
836     {
837         const cl_image_format &imageFormat = formatList[i];
838 
839         if( filterFlags[ i ] )
840             continue;
841 
842         gTestCount++;
843 
844         print_write_header( &imageFormat, false );
845         int retCode;
846         switch (imageType)
847         {
848             case CL_MEM_OBJECT_IMAGE1D:
849                 retCode = test_write_image_1D_set( device, context, queue, &imageFormat, inputType, seed );
850                 break;
851             case CL_MEM_OBJECT_IMAGE2D:
852                 retCode = test_write_image_set( device, context, queue, &imageFormat, inputType, seed );
853                 break;
854             case CL_MEM_OBJECT_IMAGE3D:
855                 retCode = test_write_image_3D_set( device, context, queue, &imageFormat, inputType, seed );
856                 break;
857             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
858                 retCode = test_write_image_1D_array_set( device, context, queue, &imageFormat, inputType, seed );
859                 break;
860             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
861                 retCode = test_write_image_2D_array_set( device, context, queue, &imageFormat, inputType, seed );
862                 break;
863         }
864 
865         if( retCode != 0 )
866         {
867             gFailCount++;
868             log_error( "FAILED: " );
869             print_write_header( &imageFormat, true );
870             log_info( "\n" );
871         }
872         ret += retCode;
873     }
874     return ret;
875 }
876 
877 
878