1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../testBase.h"
17
18 #if !defined(_WIN32)
19 #include <sys/mman.h>
20 #endif
21
22 extern cl_mem_flags gMemFlagsToUse;
23 extern int gtestTypesToRun;
24
25 extern bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo );
26 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
27
28 // Utility function to clamp down image sizes for certain tests to avoid
29 // using too much memory.
reduceImageSizeRange(size_t maxDimSize,MTdata & seed)30 static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
31 size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
32 if (DimSize > (size_t) 128)
33 return 128;
34 else
35 return DimSize;
36 }
37
reduceImageDepth(size_t maxDimSize,MTdata & seed)38 static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
39 size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
40 if (DimSize > (size_t) 32)
41 return 32;
42 else
43 return DimSize;
44 }
45
46
47 const char *write3DKernelSourcePattern =
48 "%s"
49 "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
50 "{\n"
51 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
52 "%s"
53 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
54 "}";
55
56 const char *readwrite3DKernelSourcePattern =
57 "%s"
58 "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
59 "{\n"
60 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
61 "%s"
62 " write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
63 "}";
64
65 const char *khr3DWritesPragma =
66 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
67
68 const char *offset3DSource=
69 " int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
70
71 const char *offset3DLodSource =
72 " int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
73 " int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
74 " int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
75
test_write_image_3D(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,ExplicitType inputType,MTdata d)76 int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
77 image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
78 {
79 int totalErrors = 0;
80
81 size_t num_flags = 0;
82 const cl_mem_flags *mem_flag_types = NULL;
83 const char * *mem_flag_names = NULL;
84 const cl_mem_flags write_only_mem_flag_types[2] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
85 const char * write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
86 const cl_mem_flags read_write_mem_flag_types[1] = { CL_MEM_READ_WRITE};
87 const char * read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
88
89 if(gtestTypesToRun & kWriteTests)
90 {
91 mem_flag_types = write_only_mem_flag_types;
92 mem_flag_names = write_only_mem_flag_names;
93 num_flags = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
94 }
95 else
96 {
97 mem_flag_types = read_write_mem_flag_types;
98 mem_flag_names = read_write_mem_flag_names;
99 num_flags = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
100 }
101
102 size_t pixelSize = get_pixel_size( imageInfo->format );
103
104 for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
105 {
106 int error;
107 size_t threads[3];
108 bool verifyRounding = false;
109 int forceCorrectlyRoundedWrites = 0;
110
111 #if defined( __APPLE__ )
112 // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
113 if( GetDeviceType(device) == CL_DEVICE_TYPE_CPU )
114 forceCorrectlyRoundedWrites = 1;
115 #endif
116
117 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
118 if( DetectFloatToHalfRoundingMode(queue) )
119 return 1;
120
121 BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
122
123 create_random_image_data( inputType, imageInfo, imageValues, d );
124
125 if(!gTestMipmaps)
126 {
127 if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
128 {
129 /* Pilot data for sRGB images */
130 if(is_sRGBA_order(imageInfo->format->image_channel_order))
131 {
132 // We want to generate ints (mostly) in range of the target format which should be [0,255]
133 // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
134 // it can test some out-of-range data points
135 const unsigned int test_range_ext = 16;
136 int formatMin = 0 - test_range_ext;
137 int formatMax = 255 + test_range_ext;
138 int pixel_value = 0;
139
140 // First, fill with arbitrary floats
141 for( size_t z = 0; z < imageInfo->depth; z++ )
142 {
143 for( size_t y = 0; y < imageInfo->height; y++ )
144 {
145 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
146 for( size_t i = 0; i < imageInfo->width * 4; i++ )
147 {
148 pixel_value = random_in_range( formatMin, (int)formatMax, d );
149 inputValues[ i ] = (float)(pixel_value/255.0f);
150 }
151 }
152 }
153
154 // Throw a few extra test values in there
155 float *inputValues = (float *)(char*)imageValues;
156 size_t i = 0;
157
158 // Piloting some debug inputs.
159 inputValues[ i++ ] = -0.5f;
160 inputValues[ i++ ] = 0.5f;
161 inputValues[ i++ ] = 2.f;
162 inputValues[ i++ ] = 0.5f;
163
164 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
165 // is correct
166 if( imageInfo->width > 12 )
167 {
168 float formatMax = (float)get_format_max_int( imageInfo->format );
169 inputValues[ i++ ] = 4.0f / formatMax;
170 inputValues[ i++ ] = 4.3f / formatMax;
171 inputValues[ i++ ] = 4.5f / formatMax;
172 inputValues[ i++ ] = 4.7f / formatMax;
173 inputValues[ i++ ] = 5.0f / formatMax;
174 inputValues[ i++ ] = 5.3f / formatMax;
175 inputValues[ i++ ] = 5.5f / formatMax;
176 inputValues[ i++ ] = 5.7f / formatMax;
177 }
178 }
179 else
180 {
181 // First, fill with arbitrary floats
182 for( size_t z = 0; z < imageInfo->depth; z++ )
183 {
184 for( size_t y = 0; y < imageInfo->height; y++ )
185 {
186 float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
187 for( size_t i = 0; i < imageInfo->width * 4; i++ )
188 inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
189 }
190 }
191
192 // Throw a few extra test values in there
193 float *inputValues = (float *)(char*)imageValues;
194 size_t i = 0;
195 inputValues[ i++ ] = -0.0000000000009f;
196 inputValues[ i++ ] = 1.f;
197 inputValues[ i++ ] = -1.f;
198 inputValues[ i++ ] = 2.f;
199
200 // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
201 // is correct
202 if( imageInfo->width > 12 )
203 {
204 float formatMax = (float)get_format_max_int( imageInfo->format );
205 inputValues[ i++ ] = 4.0f / formatMax;
206 inputValues[ i++ ] = 4.3f / formatMax;
207 inputValues[ i++ ] = 4.5f / formatMax;
208 inputValues[ i++ ] = 4.7f / formatMax;
209 inputValues[ i++ ] = 5.0f / formatMax;
210 inputValues[ i++ ] = 5.3f / formatMax;
211 inputValues[ i++ ] = 5.5f / formatMax;
212 inputValues[ i++ ] = 5.7f / formatMax;
213 verifyRounding = true;
214 }
215 }
216 }
217 else if( inputType == kUInt )
218 {
219 unsigned int *inputValues = (unsigned int*)(char*)imageValues;
220 size_t i = 0;
221 inputValues[ i++ ] = 0;
222 inputValues[ i++ ] = 65535;
223 inputValues[ i++ ] = 7271820;
224 inputValues[ i++ ] = 0;
225 }
226 }
227
228 // Construct testing sources
229 clProtectedImage protImage;
230 clMemWrapper unprotImage;
231 cl_mem image;
232
233 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
234 {
235 create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
236
237 unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
238 imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
239 maxImageUseHostPtrBackingStore, &error );
240
241 if( error != CL_SUCCESS )
242 {
243 log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
244 return error;
245 }
246
247 image = (cl_mem)unprotImage;
248 }
249 else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
250 {
251 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
252 // it works just as if no flag is specified, so we just do the same thing either way
253 // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
254 if(gTestMipmaps)
255 {
256 cl_image_desc image_desc = {0};
257 image_desc.image_type = imageInfo->type;
258 image_desc.num_mip_levels = imageInfo->num_mip_levels;
259 image_desc.image_width = imageInfo->width;
260 image_desc.image_height = imageInfo->height;
261 image_desc.image_depth = imageInfo->depth;
262
263 unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
264 imageInfo->format, &image_desc, NULL, &error);
265 if( error != CL_SUCCESS )
266 {
267 log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
268 IGetErrorString( error ), mem_flag_names[mem_flag_index] );
269 return error;
270 }
271 }
272 else
273 {
274 unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
275 imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
276 if( error != CL_SUCCESS )
277 {
278 log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
279 return error;
280 }
281 }
282 image = unprotImage;
283 }
284
285 error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
286 test_error( error, "Unable to set kernel arguments" );
287
288 size_t width_lod = imageInfo->width;
289 size_t height_lod = imageInfo->height;
290 size_t depth_lod = imageInfo->depth;
291 size_t nextLevelOffset = 0;
292 size_t origin[ 4 ] = { 0, 0, 0, 0 };
293 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
294
295 int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
296 for( int lod = 0; lod < num_lod_loops; lod++)
297 {
298 if(gTestMipmaps)
299 {
300 error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
301 }
302 // Run the kernel
303 threads[0] = (size_t)width_lod;
304 threads[1] = (size_t)height_lod;
305 threads[2] = (size_t)depth_lod;
306
307 clMemWrapper inputStream;
308
309 char *imagePtrOffset = imageValues + nextLevelOffset;
310 inputStream =
311 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
312 get_explicit_type_size(inputType) * 4 * width_lod
313 * height_lod * depth_lod,
314 imagePtrOffset, &error);
315 test_error( error, "Unable to create input buffer" );
316
317 // Set arguments
318 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
319 test_error( error, "Unable to set kernel arguments" );
320
321 error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
322 test_error( error, "Unable to run kernel" );
323
324 // Get results
325 size_t resultSize;
326 if(gTestMipmaps)
327 resultSize = width_lod * height_lod * depth_lod * pixelSize;
328 else
329 resultSize = imageInfo->slicePitch *imageInfo->depth;
330 clProtectedArray PA(resultSize);
331 char *resultValues = (char *)((void *)PA);
332
333 if( gDebugTrace )
334 log_info( " reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
335
336 origin[3] = lod;
337 region[0] = width_lod;
338 region[1] = height_lod;
339 region[2] = depth_lod;
340 error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
341 test_error( error, "Unable to read results from kernel" );
342 if( gDebugTrace )
343 log_info( " results read\n" );
344
345 // Validate results element by element
346 char *imagePtr = (char*)imageValues + nextLevelOffset;
347 int numTries = 5;
348 for( size_t z = 0, i = 0; z < depth_lod; z++ )
349 {
350 for( size_t y = 0; y < height_lod; y++ )
351 {
352 char *resultPtr;
353 if( gTestMipmaps )
354 resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
355 else
356 resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
357 for( size_t x = 0; x < width_lod; x++, i++ )
358 {
359 char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
360
361 // Convert this pixel
362 if( inputType == kFloat )
363 pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
364 else if( inputType == kInt )
365 pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
366 else // if( inputType == kUInt )
367 pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
368
369 // Compare against the results
370 if(is_sRGBA_order(imageInfo->format->image_channel_order))
371 {
372 // Compare sRGB-mapped values
373 cl_float expected[4] = {0};
374 cl_float* input_values = (float*)imagePtr;
375 cl_uchar *actual = (cl_uchar*)resultPtr;
376 float max_err = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
377 float err[4] = {0.0f};
378
379 for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
380 {
381 if(j < 3)
382 {
383 expected[j] = sRGBmap(input_values[j]);
384 }
385 else // there is no sRGB conversion for alpha component if it exists
386 {
387 expected[j] = NORMALIZE(input_values[j], 255.0f);
388 }
389
390 err[j] = fabsf( expected[ j ] - actual[ j ] );
391 }
392
393 if ((err[0] > max_err) ||
394 (err[1] > max_err) ||
395 (err[2] > max_err) ||
396 (err[3] > FLT_EPSILON)) // there is no conversion for alpha
397 {
398 log_error( " Error: %g %g %g %g\n", err[0], err[1], err[2], err[3]);
399 log_error( " Input: %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
400 log_error( " Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
401 log_error( " Actual: %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
402 return 1;
403 }
404 }
405 else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
406 {
407 float *expected = (float *)resultBuffer;
408 float *actual = (float *)resultPtr;
409
410 if( !validate_float_write_results( expected, actual, imageInfo ) )
411 {
412 unsigned int *e = (unsigned int *)resultBuffer;
413 unsigned int *a = (unsigned int *)resultPtr;
414 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
415 log_error( " Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
416 log_error( " Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
417 log_error( " Actual: %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
418 log_error( " Actual: %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
419 totalErrors++;
420 if( ( --numTries ) == 0 )
421 return 1;
422 }
423 }
424 else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
425 {
426 cl_half *e = (cl_half *)resultBuffer;
427 cl_half *a = (cl_half *)resultPtr;
428 if( !validate_half_write_results( e, a, imageInfo ) )
429 {
430 totalErrors++;
431 log_error( "ERROR: Sample %ld (%ld,%ld,%ld) did not validate! (%s)\n", i, x, y, z, mem_flag_names[ mem_flag_index ] );
432 unsigned short *e = (unsigned short *)resultBuffer;
433 unsigned short *a = (unsigned short *)resultPtr;
434 log_error( " Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
435 log_error( " Actual: 0x%04x 0x%04x 0x%04x 0x%04x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
436 if( inputType == kFloat )
437 {
438 float *p = (float *)imagePtr;
439 log_error( " Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
440 log_error( " : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
441 }
442 if( ( --numTries ) == 0 )
443 return 1;
444 }
445 }
446 else
447 {
448 // Exact result passes every time
449 if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
450 {
451 // result is inexact. Calculate error
452 int failure = 1;
453 float errors[4] = {NAN, NAN, NAN, NAN};
454 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
455
456 // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
457 if( 0 == forceCorrectlyRoundedWrites &&
458 (
459 imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
460 imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
461 imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
462 imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
463 imageInfo->format->image_channel_data_type == CL_SNORM_INT16
464 ))
465 {
466 if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
467 ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f) )
468 failure = 0;
469 }
470
471
472 if( failure )
473 {
474 totalErrors++;
475 // Is it our special rounding test?
476 if( verifyRounding && i >= 1 && i <= 2 )
477 {
478 // Try to guess what the rounding mode of the device really is based on what it returned
479 const char *deviceRounding = "unknown";
480 unsigned int deviceResults[8];
481 read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
482 read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
483
484 if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
485 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
486 deviceRounding = "truncate";
487 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
488 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
489 deviceRounding = "round to nearest";
490 else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
491 deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
492 deviceRounding = "round to even";
493
494 log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
495 log_error( " Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
496 deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
497 log_error( " Rounding mode of device appears to be %s\n", deviceRounding );
498 return 1;
499 }
500 log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
501 switch(imageInfo->format->image_channel_data_type)
502 {
503 case CL_UNORM_INT8:
504 case CL_SNORM_INT8:
505 case CL_UNSIGNED_INT8:
506 case CL_SIGNED_INT8:
507 log_error( " Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
508 log_error( " Actual: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
509 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
510 break;
511 case CL_UNORM_INT16:
512 case CL_SNORM_INT16:
513 case CL_UNSIGNED_INT16:
514 case CL_SIGNED_INT16:
515 #ifdef CL_SFIXED14_APPLE
516 case CL_SFIXED14_APPLE:
517 #endif
518 log_error( " Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
519 log_error( " Actual: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
520 log_error( " Error: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
521 break;
522 case CL_HALF_FLOAT:
523 log_error(
524 " Expected: 0x%4.4x 0x%4.4x "
525 "0x%4.4x 0x%4.4x\n",
526 ((cl_half *)resultBuffer)[0],
527 ((cl_half *)resultBuffer)[1],
528 ((cl_half *)resultBuffer)[2],
529 ((cl_half *)resultBuffer)[3]);
530 log_error(
531 " Actual: 0x%4.4x 0x%4.4x "
532 "0x%4.4x 0x%4.4x\n",
533 ((cl_half *)resultPtr)[0],
534 ((cl_half *)resultPtr)[1],
535 ((cl_half *)resultPtr)[2],
536 ((cl_half *)resultPtr)[3]);
537 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
538 break;
539 case CL_UNSIGNED_INT32:
540 case CL_SIGNED_INT32:
541 log_error( " Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
542 log_error( " Actual: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
543 break;
544 case CL_FLOAT:
545 log_error( " Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
546 log_error( " Actual: %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
547 log_error( " Ulps: %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
548 break;
549 }
550
551 float *v = (float *)(char *)imagePtr;
552 log_error( " src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
553 log_error( " : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
554 log_error( " src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[ 1], v[ 2 ], v[ 3 ] );
555
556 if( ( --numTries ) == 0 )
557 return 1;
558 }
559 }
560 }
561 imagePtr += get_explicit_type_size( inputType ) * 4;
562 resultPtr += get_pixel_size( imageInfo->format );
563 }
564 }
565 }
566 {
567 nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
568 width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
569 height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
570 depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
571 }
572 }
573 }
574 // All done!
575 return totalErrors;
576 }
577
578
test_write_image_3D_set(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,ExplicitType inputType,MTdata d)579 int test_write_image_3D_set(cl_device_id device, cl_context context,
580 cl_command_queue queue,
581 const cl_image_format *format,
582 ExplicitType inputType, MTdata d)
583 {
584 char programSrc[10240];
585 const char *ptr;
586 const char *readFormat;
587 clProgramWrapper program;
588 clKernelWrapper kernel;
589 const char *KernelSourcePattern = NULL;
590
591 int error;
592
593 // Get our operating parameters
594 size_t maxWidth, maxHeight, maxDepth;
595 cl_ulong maxAllocSize, memSize;
596
597 image_descriptor imageInfo = { 0x0 };
598
599 imageInfo.format = format;
600 imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
601
602 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
603 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
604 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
605 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
606 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
607 test_error( error, "Unable to get max image 3D size from device" );
608
609 if (memSize > (cl_ulong)SIZE_MAX) {
610 memSize = (cl_ulong)SIZE_MAX;
611 }
612
613 // Determine types
614 if( inputType == kInt )
615 readFormat = "i";
616 else if( inputType == kUInt )
617 readFormat = "ui";
618 else // kFloat
619 readFormat = "f";
620
621 if(gtestTypesToRun & kWriteTests)
622 {
623 KernelSourcePattern = write3DKernelSourcePattern;
624 }
625 else
626 {
627 KernelSourcePattern = readwrite3DKernelSourcePattern;
628 }
629
630 // Construct the source
631 sprintf( programSrc,
632 KernelSourcePattern,
633 gTestMipmaps ? "" : khr3DWritesPragma,
634 get_explicit_type_name( inputType ),
635 gTestMipmaps ? ", int lod" : "",
636 gTestMipmaps ? offset3DLodSource : offset3DSource,
637 readFormat,
638 gTestMipmaps ? ", lod" : "" );
639
640 ptr = programSrc;
641 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
642 "sample_kernel");
643 test_error( error, "Unable to create testing kernel" );
644
645 // Run tests
646 if( gTestSmallImages )
647 {
648 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
649 {
650 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
651 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
652 {
653 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
654 for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
655 {
656 if (gTestMipmaps)
657 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
658
659 if( gDebugTrace )
660 log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
661 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
662 if( retCode )
663 return retCode;
664 }
665 }
666 }
667 }
668 else if( gTestMaxImages )
669 {
670 // Try a specific set of maximum sizes
671 size_t numbeOfSizes;
672 size_t sizes[100][3];
673
674 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
675
676 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
677 {
678 imageInfo.width = sizes[ idx ][ 0 ];
679 imageInfo.height = sizes[ idx ][ 1 ];
680 imageInfo.depth = sizes[ idx ][ 2 ];
681 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
682 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
683 if (gTestMipmaps)
684 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
685 log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
686 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
687 if( retCode )
688 return retCode;
689 }
690 }
691 else if( gTestRounding )
692 {
693 size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
694 imageInfo.height = typeRange / 256;
695 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
696 imageInfo.depth = 1;
697
698 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
699 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
700 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
701 if( retCode )
702 return retCode;
703 }
704 else
705 {
706 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
707 {
708 cl_ulong size;
709 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
710 // image, the result array, plus offset arrays, will fit in the global ram space
711 do
712 {
713 imageInfo.width = reduceImageSizeRange(maxWidth, d );
714 imageInfo.height = reduceImageSizeRange(maxHeight, d );
715 imageInfo.depth = reduceImageDepth(maxDepth, d );
716
717 if(gTestMipmaps)
718 {
719 imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
720 //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
721 size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
722 }
723 else
724 {
725 imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
726 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
727 if( gEnablePitch )
728 {
729 size_t extraWidth = (int)random_log_in_range( 0, 64, d );
730 imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
731
732 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
733 extraWidth = (int)random_log_in_range( 0, 64, d );
734 imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
735 }
736
737 size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
738 }
739 } while( size > maxAllocSize || ( size * 3 ) > memSize );
740
741 if( gDebugTrace )
742 log_info( " at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
743 imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
744
745 int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
746 if( retCode )
747 return retCode;
748 }
749 }
750
751 return 0;
752 }
753