// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" #include namespace { cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo &info = *(BuildKernelInfo *)p; auto generator = [](const std::string &kernel_name, const char *builtin, cl_uint vector_size_index) { return GetTernaryKernel(kernel_name, builtin, ParameterType::Float, ParameterType::Float, ParameterType::Float, ParameterType::Float, vector_size_index); }; return BuildKernels(info, job_id, generator); } } // anonymous namespace int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode) { int error; logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); Programs programs; const unsigned thread_id = 0; // Test is currently not multithreaded. KernelMatrix kernels; float maxError = 0.0f; float maxErrorVal = 0.0f; float maxErrorVal2 = 0.0f; float maxErrorVal3 = 0.0f; uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE); // Init the kernels BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode, relaxedMode }; if ((error = ThreadPool_Do(BuildKernelFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info))) return error; for (uint64_t i = 0; i < (1ULL << 32); i += step) { // Init input array cl_uint *p = (cl_uint *)gIn; cl_uint *p2 = (cl_uint *)gIn2; cl_uint *p3 = (cl_uint *)gIn3; for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) { p[j] = genrand_int32(d); p2[j] = genrand_int32(d); p3[j] = genrand_int32(d); } if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error); return error; } if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error); return error; } if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, BUFFER_SIZE, gIn3, 0, NULL, NULL))) { vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error); return error; } // Write garbage into output arrays for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { uint32_t pattern = 0xffffdead; if (gHostFill) { memset_pattern4(gOut[j], &pattern, BUFFER_SIZE); if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, BUFFER_SIZE, gOut[j], 0, NULL, NULL))) { vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j); return error; } } else { if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern, sizeof(pattern), 0, BUFFER_SIZE, 0, NULL, NULL))) { vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n", error); return error; } } } // Run the kernels for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { size_t vectorSize = sizeof(cl_float) * sizeValues[j]; size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize; // BUFFER_SIZE / vectorSize rounded up if ((error = clSetKernelArg(kernels[j][thread_id], 0, sizeof(gOutBuffer[j]), &gOutBuffer[j]))) { LogBuildError(programs[j]); return error; } if ((error = clSetKernelArg(kernels[j][thread_id], 1, sizeof(gInBuffer), &gInBuffer))) { LogBuildError(programs[j]); return error; } if ((error = clSetKernelArg(kernels[j][thread_id], 2, sizeof(gInBuffer2), &gInBuffer2))) { LogBuildError(programs[j]); return error; } if ((error = clSetKernelArg(kernels[j][thread_id], 3, sizeof(gInBuffer3), &gInBuffer3))) { LogBuildError(programs[j]); return error; } if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id], 1, NULL, &localCount, NULL, 0, NULL, NULL))) { vlog_error("FAILED -- could not execute kernel\n"); return error; } } // Get that moving if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); // Calculate the correctly rounded reference result float *r = (float *)gOut_Ref; float *s = (float *)gIn; float *s2 = (float *)gIn2; float *s3 = (float *)gIn3; for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++) r[j] = (float)f->func.f_fff(s[j], s2[j], s3[j]); // Read the data back for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, BUFFER_SIZE, gOut[j], 0, NULL, NULL))) { vlog_error("ReadArray failed %d\n", error); return error; } } if (gSkipCorrectnessTesting) break; // Verify data -- No verification possible. // MAD is a random number generator. if (0 == (i & 0x0fffffff)) { vlog("."); fflush(stdout); } } if (!gSkipCorrectnessTesting) { if (gWimpyMode) vlog("Wimp pass"); else vlog("passed"); vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3); } vlog("\n"); return CL_SUCCESS; }