1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17
18 #if !defined(_WIN32)
19 #include <sys/mman.h>
20 #endif
21
22 extern cl_mem_flags gMemFlagsToUse;
23 extern int gtestTypesToRun;
24
25 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
26 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
27
28 // Utility function to clamp down image sizes for certain tests to avoid
29 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)30 static size_t reduceImageSizeRange(size_t maxDimSize) {
31 size_t DimSize = maxDimSize/32;
32 if (DimSize < (size_t) 16)
33 return 16;
34 else if (DimSize > (size_t) 128)
35 return 128;
36 else
37 return DimSize;
38 }
39
reduceImageDepth(size_t maxDepth)40 static size_t reduceImageDepth(size_t maxDepth) {
41 size_t Depth = maxDepth/32;
42 if (Depth < (size_t) 8)
43 return 8;
44 else if (Depth > (size_t) 32)
45 return 32;
46 else
47 return Depth;
48 }
49
50 const char *write2DArrayKernelSourcePattern =
51 "__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
52 "{\n"
53 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
54 "%s"
55 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
56 "}";
57
58 const char *readwrite2DArrayKernelSourcePattern =
59 "__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
60 "{\n"
61 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
62 "%s"
63 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
64 "}";
65
66 const char *offset2DArrayKernelSource =
67 " int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
68
69 const char *offset2DArrayLodKernelSource =
70 " int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
71 " int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
72 " int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
73
test_write_image_2D_array(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)74 int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
75 image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
76 {
77 int totalErrors = 0;
78
79 size_t num_flags = 0;
80 const cl_mem_flags *mem_flag_types = NULL;
81 const char * *mem_flag_names = NULL;
82 const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
83 const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
84 const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
85 const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
86 if(gtestTypesToRun & kWriteTests)
87 {
88 mem_flag_types = write_only_mem_flag_types;
89 mem_flag_names = write_only_mem_flag_names;
90 num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
91 }
92 else
93 {
94 mem_flag_types = read_write_mem_flag_types;
95 mem_flag_names = read_write_mem_flag_names;
96 num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
97 }
98
99 size_t pixelSize = get_pixel_size( imageInfo->format );
100
101 for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
102 {
103 int error;
104 size_t threads[3];
105 bool verifyRounding = false;
106 int forceCorrectlyRoundedWrites = 0;
107
108 #if defined( __APPLE__ )
109 // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
110 if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
111 forceCorrectlyRoundedWrites = 1;
112 #endif
113
114 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
115 if( DetectFloatToHalfRoundingMode(queue) )
116 return 1;
117
118 BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
119
120 create_random_image_data( inputType, imageInfo, imageValues, d );
121
122 if(!gTestMipmaps)
123 {
124 if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
125 {
126 /* Pilot data for sRGB images */
127 if(is_sRGBA_order(imageInfo->format->image_channel_order))
128 {
129 // We want to generate ints (mostly) in range of the target format which should be [0,255]
130 // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
131 // it can test some out-of-range data points
132 const unsigned int test_range_ext = 16;
133 int formatMin = 0 - test_range_ext;
134 int formatMax = 255 + test_range_ext;
135 int pixel_value = 0;
136
137 // First, fill with arbitrary floats
138 for( size_t z = 0; z < imageInfo->arraySize; z++ )
139 {
140 for( size_t y = 0; y < imageInfo->height; y++ )
141 {
142 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
143 for( size_t i = 0; i < imageInfo->width * 4; i++ )
144 {
145 pixel_value = random_in_range( formatMin, (int)formatMax, d );
146 inputValues[ i ] = (float)(pixel_value/255.0f);
147 }
148 }
149 }
150
151 // Throw a few extra test values in there
152 float *inputValues = (float *)(char*)imageValues;
153 size_t i = 0;
154 // Piloting some debug inputs.
155 inputValues[ i++ ] = -0.5f;
156 inputValues[ i++ ] = 0.5f;
157 inputValues[ i++ ] = 2.f;
158 inputValues[ i++ ] = 0.5f;
159
160 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
161 // is correct
162 if( imageInfo->width > 12 )
163 {
164 float formatMax = (float)get_format_max_int( imageInfo->format );
165 inputValues[ i++ ] = 4.0f / formatMax;
166 inputValues[ i++ ] = 4.3f / formatMax;
167 inputValues[ i++ ] = 4.5f / formatMax;
168 inputValues[ i++ ] = 4.7f / formatMax;
169 inputValues[ i++ ] = 5.0f / formatMax;
170 inputValues[ i++ ] = 5.3f / formatMax;
171 inputValues[ i++ ] = 5.5f / formatMax;
172 inputValues[ i++ ] = 5.7f / formatMax;
173 }
174 }
175 else
176 {
177 // First, fill with arbitrary floats
178 for( size_t z = 0; z < imageInfo->arraySize; z++ )
179 {
180 for( size_t y = 0; y < imageInfo->height; y++ )
181 {
182 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
183 for( size_t i = 0; i < imageInfo->width * 4; i++ )
184 inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
185 }
186 }
187
188 // Throw a few extra test values in there
189 float *inputValues = (float *)(char*)imageValues;
190 size_t i = 0;
191 inputValues[ i++ ] = -0.0000000000009f;
192 inputValues[ i++ ] = 1.f;
193 inputValues[ i++ ] = -1.f;
194 inputValues[ i++ ] = 2.f;
195
196 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
197 // is correct
198 if( imageInfo->width > 12 )
199 {
200 float formatMax = (float)get_format_max_int( imageInfo->format );
201 inputValues[ i++ ] = 4.0f / formatMax;
202 inputValues[ i++ ] = 4.3f / formatMax;
203 inputValues[ i++ ] = 4.5f / formatMax;
204 inputValues[ i++ ] = 4.7f / formatMax;
205 inputValues[ i++ ] = 5.0f / formatMax;
206 inputValues[ i++ ] = 5.3f / formatMax;
207 inputValues[ i++ ] = 5.5f / formatMax;
208 inputValues[ i++ ] = 5.7f / formatMax;
209 verifyRounding = true;
210 }
211 }
212 }
213 else if( inputType == kUInt )
214 {
215 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
216 size_t i = 0;
217 inputValues[ i++ ] = 0;
218 inputValues[ i++ ] = 65535;
219 inputValues[ i++ ] = 7271820;
220 inputValues[ i++ ] = 0;
221 }
222 }
223
224 // Construct testing sources
225 clProtectedImage protImage;
226 clMemWrapper unprotImage;
227 cl_mem image;
228
229 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
230 {
231 create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
232
233 unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
234 imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
235 maxImageUseHostPtrBackingStore, &error );
236
237 if( error != CL_SUCCESS )
238 {
239 log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
240 return error;
241 }
242
243 image = (cl_mem)unprotImage;
244 }
245 else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
246 {
247 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
248 // it works just as if no flag is specified, so we just do the same thing either way
249 // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
250 if( gTestMipmaps )
251 {
252 cl_image_desc image_desc = {0};
253 image_desc.image_type = imageInfo->type;
254 image_desc.num_mip_levels = imageInfo->num_mip_levels;
255 image_desc.image_width = imageInfo->width;
256 image_desc.image_height = imageInfo->height;
257 image_desc.image_array_size = imageInfo->arraySize;
258
259 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
260 imageInfo->format, &image_desc, NULL, &error);
261 if( error != CL_SUCCESS )
262 {
263 log_error( "ERROR: Unable to create %d level 2D image array of size %ld x %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->arraySize,
264 IGetErrorString( error ), mem_flag_names[mem_flag_index] );
265 return error;
266 }
267 }
268 else
269 {
270 unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
271 imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
272 if( error != CL_SUCCESS )
273 {
274 log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
275 return error;
276 }
277 }
278 image = unprotImage;
279 }
280
281 error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
282 test_error( error, "Unable to set kernel arguments" );
283
284 size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
285 size_t origin[ 4 ] = { 0, 0, 0, 0 };
286 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
287 size_t resultSize;
288
289 int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
290 for( int lod = 0; lod < num_lod_loops; lod++)
291 {
292 if(gTestMipmaps)
293 {
294 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
295 }
296 // Run the kernel
297 threads[0] = (size_t)width_lod;
298 threads[1] = (size_t)height_lod;
299 threads[2] = (size_t)imageInfo->arraySize;
300
301 clMemWrapper inputStream;
302
303 char *imagePtrOffset = imageValues + nextLevelOffset;
304 inputStream =
305 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
306 get_explicit_type_size(inputType) * 4 * width_lod
307 * height_lod * imageInfo->arraySize,
308 imagePtrOffset, &error);
309 test_error( error, "Unable to create input buffer" );
310
311 // Set arguments
312 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
313 test_error( error, "Unable to set kernel arguments" );
314
315 error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
316 test_error( error, "Unable to run kernel" );
317
318 // Get results
319 if( gTestMipmaps )
320 resultSize = width_lod * height_lod *imageInfo->arraySize * pixelSize;
321 else
322 resultSize = imageInfo->slicePitch *imageInfo->arraySize;
323 clProtectedArray PA(resultSize);
324 char *resultValues = (char *)((void *)PA);
325
326 if( gDebugTrace )
327 log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
328
329 origin[3] = lod;
330 region[0] = width_lod;
331 region[1] = height_lod;
332
333 error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
334 test_error( error, "Unable to read results from kernel" );
335 if( gDebugTrace )
336 log_info( " results read\n" );
337
338 // Validate results element by element
339 char *imagePtr = imageValues + nextLevelOffset;
340 int numTries = 5;
341 for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
342 {
343 for( size_t y = 0; y < height_lod; y++ )
344 {
345 char *resultPtr;
346 if( gTestMipmaps )
347 resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
348 else
349 resultPtr = (char*)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
350 for( size_t x = 0; x < width_lod; x++, i++ )
351 {
352 char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
353
354 // Convert this pixel
355 if( inputType == kFloat )
356 pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
357 else if( inputType == kInt )
358 pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
359 else // if( inputType == kUInt )
360 pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
361
362 // Compare against the results
363 if(is_sRGBA_order(imageInfo->format->image_channel_order))
364 {
365 // Compare sRGB-mapped values
366 cl_float expected[4] = {0};
367 cl_float* input_values = (float*)imagePtr;
368 cl_uchar *actual = (cl_uchar*)resultPtr;
369 float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
370 float err[4] = {0.0f};
371
372 for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
373 {
374 if(j < 3)
375 {
376 expected[j] = sRGBmap(input_values[j]);
377 }
378 else // there is no sRGB conversion for alpha component if it exists
379 {
380 expected[j] = NORMALIZE(input_values[j], 255.0f);
381 }
382
383 err[j] = fabsf( expected[ j ] - actual[ j ] );
384 }
385
386 if ((err[0] > max_err) ||
387 (err[1] > max_err) ||
388 (err[2] > max_err) ||
389 (err[3] > 0)) // there is no conversion for alpha so the error should be zero
390 {
391 log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
392 log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
393 log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
394 log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
395 return 1;
396 }
397 }
398 else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
399 {
400 float *expected = (float *)resultBuffer;
401 float *actual = (float *)resultPtr;
402
403 if( !validate_float_write_results( expected, actual, imageInfo ) )
404 {
405 unsigned int *e = (unsigned int *)resultBuffer;
406 unsigned int *a = (unsigned int *)resultPtr;
407 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
408 log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
409 log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
410 log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
411 log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
412 totalErrors++;
413 if( ( --numTries ) == 0 )
414 return 1;
415 }
416 }
417 else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
418 {
419 cl_half *e = (cl_half *)resultBuffer;
420 cl_half *a = (cl_half *)resultPtr;
421 if( !validate_half_write_results( e, a, imageInfo ) )
422 {
423 totalErrors++;
424 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
425 unsigned short *e = (unsigned short *)resultBuffer;
426 unsigned short *a = (unsigned short *)resultPtr;
427 log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
428 log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
429 if( inputType == kFloat )
430 {
431 float *p = (float *)imagePtr;
432 log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
433 log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
434 }
435 if( ( --numTries ) == 0 )
436 return 1;
437 }
438 }
439 else
440 {
441 // Exact result passes every time
442 if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
443 {
444 // result is inexact. Calculate error
445 int failure = 1;
446 float errors[4] = {NAN, NAN, NAN, NAN};
447 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
448
449 // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
450 if( 0 == forceCorrectlyRoundedWrites &&
451 (
452 imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
453 imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
454 imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
455 imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
456 imageInfo->format->image_channel_data_type == CL_SNORM_INT16
457 ))
458 {
459 if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
460 ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
461 failure = 0;
462 }
463
464
465 if( failure )
466 {
467 totalErrors++;
468 // Is it our special rounding test?
469 if( verifyRounding && i >= 1 && i <= 2 )
470 {
471 // Try to guess what the rounding mode of the device really is based on what it returned
472 const char *deviceRounding = "unknown";
473 unsigned int deviceResults[8];
474 read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
475 read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
476
477 if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
478 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
479 deviceRounding = "truncate";
480 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
481 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
482 deviceRounding = "round to nearest";
483 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
484 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
485 deviceRounding = "round to even";
486
487 log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
488 log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
489 deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
490 log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
491 return 1;
492 }
493 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
494 switch(imageInfo->format->image_channel_data_type)
495 {
496 case CL_UNORM_INT8:
497 case CL_SNORM_INT8:
498 case CL_UNSIGNED_INT8:
499 case CL_SIGNED_INT8:
500 log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
501 log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
502 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
503 break;
504 case CL_UNORM_INT16:
505 case CL_SNORM_INT16:
506 case CL_UNSIGNED_INT16:
507 case CL_SIGNED_INT16:
508 #ifdef CL_SFIXED14_APPLE
509 case CL_SFIXED14_APPLE:
510 #endif
511 log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
512 log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
513 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
514 break;
515 case CL_HALF_FLOAT:
516 log_error(
517 " Expected: 0x%4.4x 0x%4.4x "
518 "0x%4.4x 0x%4.4x\n",
519 ((cl_half *)resultBuffer)[0],
520 ((cl_half *)resultBuffer)[1],
521 ((cl_half *)resultBuffer)[2],
522 ((cl_half *)resultBuffer)[3]);
523 log_error(
524 " Actual: 0x%4.4x 0x%4.4x "
525 "0x%4.4x 0x%4.4x\n",
526 ((cl_half *)resultPtr)[0],
527 ((cl_half *)resultPtr)[1],
528 ((cl_half *)resultPtr)[2],
529 ((cl_half *)resultPtr)[3]);
530 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
531 break;
532 case CL_UNSIGNED_INT32:
533 case CL_SIGNED_INT32:
534 log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
535 log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
536 break;
537 case CL_FLOAT:
538 log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
539 log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
540 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
541 break;
542 }
543
544 float *v = (float *)(char *)imagePtr;
545 log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
546 log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
547 log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
548
549 if( ( --numTries ) == 0 )
550 return 1;
551 }
552 }
553 }
554 imagePtr += get_explicit_type_size( inputType ) * (( imageInfo->format->image_channel_order == CL_DEPTH ) ? 1 : 4);
555 resultPtr += get_pixel_size( imageInfo->format );
556 }
557 }
558 }
559 {
560 nextLevelOffset += width_lod*height_lod*imageInfo->arraySize*pixelSize;
561 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
562 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
563 }
564 }
565 }
566 // All done!
567 return totalErrors;
568 }
569
570
test_write_image_2D_array_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)571 int test_write_image_2D_array_set(cl_device_id device, cl_context context,
572 cl_command_queue queue,
573 const cl_image_format *format,
574 ExplicitType inputType, MTdata d)
575 {
576 char programSrc[10240];
577 const char *ptr;
578 const char *readFormat;
579 clProgramWrapper program;
580 clKernelWrapper kernel;
581 const char *KernelSourcePattern = NULL;
582 int error;
583
584 // Get our operating parameters
585 size_t maxWidth, maxHeight, maxArraySize;
586 cl_ulong maxAllocSize, memSize;
587
588 image_descriptor imageInfo = { 0x0 };
589
590 imageInfo.format = format;
591 imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
592 imageInfo.depth = 1;
593 imageInfo.slicePitch = 0;
594
595 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
596 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
597 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
598 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
599 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
600 test_error( error, "Unable to get max image 3D size from device" );
601
602 if (memSize > (cl_ulong)SIZE_MAX) {
603 memSize = (cl_ulong)SIZE_MAX;
604 }
605
606 // Determine types
607 if( inputType == kInt )
608 readFormat = "i";
609 else if( inputType == kUInt )
610 readFormat = "ui";
611 else // kFloat
612 readFormat = "f";
613
614 if(gtestTypesToRun & kWriteTests)
615 {
616 KernelSourcePattern = write2DArrayKernelSourcePattern;
617 }
618 else
619 {
620 KernelSourcePattern = readwrite2DArrayKernelSourcePattern;
621 }
622 // Construct the source
623 // Construct the source
624 sprintf( programSrc,
625 KernelSourcePattern,
626 get_explicit_type_name( inputType ),
627 (format->image_channel_order == CL_DEPTH) ? "" : "4",
628 (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
629 gTestMipmaps ? " , int lod" : "",
630 gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
631 readFormat,
632 gTestMipmaps ? ", lod" : "" );
633
634 ptr = programSrc;
635 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
636 "sample_kernel");
637 test_error( error, "Unable to create testing kernel" );
638
639 // Run tests
640 if( gTestSmallImages )
641 {
642 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
643 {
644 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
645 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
646 {
647 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
648 for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
649 {
650 if( gTestMipmaps )
651 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
652
653 if( gDebugTrace )
654 log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
655 int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
656 if( retCode )
657 return retCode;
658 }
659 }
660 }
661 }
662 else if( gTestMaxImages )
663 {
664 // Try a specific set of maximum sizes
665 size_t numbeOfSizes;
666 size_t sizes[100][3];
667
668 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
669
670 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
671 {
672 imageInfo.width = sizes[ idx ][ 0 ];
673 imageInfo.height = sizes[ idx ][ 1 ];
674 imageInfo.arraySize = sizes[ idx ][ 2 ];
675 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
676 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
677 if( gTestMipmaps )
678 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
679 log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
680 int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
681 if( retCode )
682 return retCode;
683 }
684 }
685 else if( gTestRounding )
686 {
687 size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
688 imageInfo.height = typeRange / 256;
689 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
690 imageInfo.arraySize = 2;
691
692 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
693 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
694 int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
695 if( retCode )
696 return retCode;
697 }
698 else
699 {
700 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
701 {
702 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
703 int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
704 int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
705
706 cl_ulong size, buffSize;
707 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
708 // image, the result array, plus offset arrays, will fit in the global ram space
709 do
710 {
711 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
712 imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, d );
713 imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
714
715 if(gTestMipmaps)
716 {
717 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1), d);
718 //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
719 size = 4 * compute_mipmapped_image_size(imageInfo);
720 buffSize = size * get_explicit_type_size( inputType );
721 }
722 else
723 {
724 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
725 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
726 if( gEnablePitch )
727 {
728 size_t extraWidth = (int)random_log_in_range( 0, 64, d );
729 imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
730
731 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
732 extraWidth = (int)random_log_in_range( 0, 64, d );
733 imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
734 }
735
736
737 // Image size and buffer size may differ due to different pixel size.
738 // See creation of buffer at line ~153.
739 size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4;
740 buffSize = (cl_ulong)imageInfo.width * (cl_ulong)imageInfo.height * imageInfo.arraySize * get_explicit_type_size(inputType) * 4;
741 }
742 } while( size > maxAllocSize || buffSize > maxAllocSize || ( size * 3 ) > memSize );
743
744 if( gDebugTrace )
745 log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize,
746 imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
747
748 int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
749 if( retCode )
750 return retCode;
751 }
752 }
753
754 return 0;
755 }
756