// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/compat.h" #include #include #include #include #include "procs.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" #include "harness/conversions.h" //#define USE_LOCAL_THREADS 1 #ifndef uchar typedef unsigned char uchar; #endif #ifndef TestStruct typedef struct{ int a; float b; } TestStruct; #endif const char *stream_write_int_kernel_code[] = { "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" }; const char *stream_write_uint_kernel_code[] = { "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" }; const char *stream_write_ushort_kernel_code[] = { "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" }; const char *stream_write_short_kernel_code[] = { "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" }; const char *stream_write_char_kernel_code[] = { "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" }; const char *stream_write_uchar_kernel_code[] = { "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" }; const char *stream_write_float_kernel_code[] = { "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" }; const char *stream_write_half_kernel_code[] = { "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = vload_half( tid * 2, src );\n" "}\n", "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = vload_half2( tid * 2, src );\n" "}\n", "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = vload_half4( tid * 2, src );\n" "}\n", "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = vload_half8( tid * 2, src );\n" "}\n", "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = vload_half16( tid * 2, src );\n" "}\n" }; static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" }; const char *stream_write_long_kernel_code[] = { "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" }; const char *stream_write_ulong_kernel_code[] = { "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n", "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid] = src[tid];\n" "}\n" }; static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" }; static const char *stream_write_struct_kernel_code[] = { "typedef struct{\n" "int a;\n" "float b;\n" "} TestStruct;\n" "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n" "{\n" " int tid = get_global_id(0);\n" "\n" " dst[tid].a = src[tid].a;\n" " dst[tid].b = src[tid].b;\n" "}\n" }; static const char *struct_kernel_name[] = { "read_write_struct" }; static int verify_write_int( void *ptr1, void *ptr2, int n ) { int i; int *inptr = (int *)ptr1; int *outptr = (int *)ptr2; for (i=0; i threads[0] ) localThreads[0] = threads[0]; #endif ptrSizes[0] = size; ptrSizes[1] = ptrSizes[0] << 1; ptrSizes[2] = ptrSizes[1] << 1; ptrSizes[3] = ptrSizes[2] << 1; ptrSizes[4] = ptrSizes[3] << 1; loops = ( loops < 5 ? loops : 5 ); for( i = 0; i < loops; i++ ) { outPtrSizes[i] = ptrSizes[i]; } for( i = 0; i < loops; i++ ){ ii = i << 1; streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err ); if( ! streams[ii] ){ free( outptr[i] ); log_error( " clCreateBuffer failed\n" ); return -1; } if( ! strcmp( type, "half" ) ){ outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 ); streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outPtrSizes[i] * 2 * num_elements, NULL, &err ); } else{ outptr[i] = malloc( outPtrSizes[i] * num_elements ); streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outPtrSizes[i] * num_elements, NULL, &err ); } if( ! streams[ii+1] ){ clReleaseMemObject(streams[ii]); free( outptr[i] ); log_error( " clCreateBuffer failed\n" ); return -1; } err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent ); if( err != CL_SUCCESS ){ clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); print_error( err, " clWriteArray failed" ); return -1; } // This synchronization point is needed in order to assume the data is valid. // Getting profiling information is not a synchronization point. err = clWaitForEvents( 1, &writeEvent ); if( err != CL_SUCCESS ) { print_error( err, "Unable to wait for event completion" ); clReleaseEvent(writeEvent); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } // test profiling while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); if( err != CL_SUCCESS ){ print_error( err, "clGetEventProfilingInfo failed" ); clReleaseEvent(writeEvent); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE ); if( err != CL_SUCCESS ){ print_error( err, "clGetEventProfilingInfo failed" ); clReleaseEvent(writeEvent); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL ); if( err != CL_SUCCESS ){ print_error( err, "clGetEventProfilingInfo failed" ); clReleaseEvent(writeEvent); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL ); if( err != CL_SUCCESS ){ print_error( err, "clGetEventProfilingInfo failed" ); clReleaseEvent(writeEvent); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] ); if( err ){ clReleaseEvent(writeEvent); clReleaseMemObject(streams[ii]); clReleaseMemObject(streams[ii+1]); free( outptr[i] ); log_error( " Error creating program for %s\n", type ); return -1; } err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] ); err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] ); if (err != CL_SUCCESS){ clReleaseEvent(writeEvent); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); print_error( err, " clSetKernelArg failed" ); return -1; } #ifdef USE_LOCAL_THREADS err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL ); #else err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); #endif if( err != CL_SUCCESS ){ print_error( err, " clEnqueueNDRangeKernel failed" ); clReleaseEvent(writeEvent); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); return -1; } if( ! strcmp( type, "half" ) ){ err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL ); } else{ err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL ); } if( err != CL_SUCCESS ){ clReleaseEvent(writeEvent); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); print_error( err, " clEnqueueReadBuffer failed" ); return -1; } char *inP = (char *)inptr[i]; char *outP = (char *)outptr[i]; int err2 = 0; for( size_t p = 0; p < (size_t)num_elements; p++ ) { if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) ) { log_error( " %s%d data failed to verify\n", type, 1< threads[0] ) localThreads[0] = threads[0]; #endif ptrSizes[0] = size; ptrSizes[1] = ptrSizes[0] << 1; ptrSizes[2] = ptrSizes[1] << 1; ptrSizes[3] = ptrSizes[2] << 1; ptrSizes[4] = ptrSizes[3] << 1; loops = ( loops < 5 ? loops : 5 ); for( i = 0; i < loops; i++ ){ inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements); for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){ inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f ); inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX ); } ii = i << 1; streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL); if( ! streams[ii] ){ free( outptr[i] ); log_error( " clCreateBuffer failed\n" ); return -1; } outptr[i] = malloc( ptrSizes[i] * num_elements ); streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL); if( ! streams[ii+1] ){ clReleaseMemObject(streams[ii]); free( outptr[i] ); log_error( " clCreateBuffer failed\n" ); return -1; } err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL); if( err != CL_SUCCESS ){ clReleaseMemObject(streams[ii]); clReleaseMemObject(streams[ii+1]); free( outptr[i] ); print_error( err, " clWriteArray failed" ); return -1; } err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] ); if( err ){ clReleaseMemObject(streams[ii]); clReleaseMemObject(streams[ii+1]); free( outptr[i] ); log_error( " Error creating program for struct\n" ); return -1; } err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] ); err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] ); if (err != CL_SUCCESS){ clReleaseProgram( program[i] ); clReleaseKernel( kernel[i] ); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); free( outptr[i] ); print_error( err, " clSetKernelArg failed" ); return -1; } #ifdef USE_LOCAL_THREADS err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL ); #else err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); #endif if( err != CL_SUCCESS ){ print_error( err, " clEnqueueNDRangeKernel failed" ); clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); free( outptr[i] ); return -1; } err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL ); if( err != CL_SUCCESS ){ clReleaseMemObject( streams[ii] ); clReleaseMemObject( streams[ii+1] ); clReleaseKernel( kernel[i] ); clReleaseProgram( program[i] ); free( outptr[i] ); print_error( err, " clEnqueueReadBuffer failed" ); return -1; } if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){ log_error( " STREAM_WRITE struct%d test failed\n", 1<