• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "gl/setup.h"
18 #include "harness/genericThread.h"
19 
20 #if defined( __APPLE__ )
21 #include <OpenGL/glu.h>
22 #else
23 #include <GL/glu.h>
24 #include <CL/cl_gl.h>
25 #if !defined(_WIN32) && !defined(__ANDROID__)
26 #include <GL/glx.h>
27 #endif
28 #endif
29 
30 #ifndef GLsync
31 // For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
32 #if !defined(_WIN32)
33 #include <inttypes.h>
34 #endif // !_WIN32
35 typedef int64_t GLint64;
36 typedef uint64_t GLuint64;
37 typedef struct __GLsync *GLsync;
38 
39 #ifndef APIENTRY
40 #define APIENTRY
41 #endif
42 
43 typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags);
44 glFenceSyncPtr glFenceSyncFunc;
45 
46 typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync);
47 glIsSyncPtr glIsSyncFunc;
48 
49 typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync);
50 glDeleteSyncPtr glDeleteSyncFunc;
51 
52 typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
53 glClientWaitSyncPtr glClientWaitSyncFunc;
54 
55 typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
56 glWaitSyncPtr glWaitSyncFunc;
57 
58 typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
59 glGetInteger64vPtr glGetInteger64vFunc;
60 
61 typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
62                                GLint *values);
63 glGetSyncivPtr glGetSyncivFunc;
64 
65 #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
66 
InitSyncFns(void)67 static void InitSyncFns( void )
68 {
69     glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
70     glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
71     glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
72     glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
73     glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
74     glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
75     glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
76 }
77 #ifndef GL_ARB_sync
78 #define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
79 
80 #define GL_OBJECT_TYPE            0x9112
81 #define GL_SYNC_CONDITION            0x9113
82 #define GL_SYNC_STATUS            0x9114
83 #define GL_SYNC_FLAGS            0x9115
84 
85 #define GL_SYNC_FENCE            0x9116
86 
87 #define GL_SYNC_GPU_COMMANDS_COMPLETE    0x9117
88 
89 #define GL_UNSIGNALED            0x9118
90 #define GL_SIGNALED            0x9119
91 
92 #define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
93 
94 #define GL_TIMEOUT_IGNORED            0xFFFFFFFFFFFFFFFFull
95 
96 #define GL_ALREADY_SIGNALED        0x911A
97 #define GL_TIMEOUT_EXPIRED            0x911B
98 #define GL_CONDITION_SATISFIED        0x911C
99 #define GL_WAIT_FAILED            0x911D
100 #endif
101 
102 #define USING_ARB_sync 1
103 #endif
104 
105 typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
106 
107 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
108 
109 
110 static const char *updateBuffersKernel[] = {
111     "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
112     "{\n"
113     "    size_t tid = get_global_id(0);\n"
114     "\n"
115     "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
116     "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
117     "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
118     "\n"
119     "    int rowV = rowIdx + 1;\n"
120     "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
121     "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
122     "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
123     "}\n" };
124 
125 //Passthrough VertexShader
126 static const char *vertexshader =
127 "#version 150\n"
128 "uniform mat4 projMatrix;\n"
129 "in vec4 inPosition;\n"
130 "in vec4 inColor;\n"
131 "out vec4 vertColor;\n"
132 "void main (void) {\n"
133 "    gl_Position = projMatrix*inPosition;\n"
134 "   vertColor = inColor;\n"
135 "}\n";
136 
137 //Passthrough FragmentShader
138 static const char *fragmentshader =
139 "#version 150\n"
140 "in vec4 vertColor;\n"
141 "out vec4 outColor;\n"
142 "void main (void) {\n"
143 "    outColor = vertColor;\n"
144 "}\n";
145 
createShaderProgram(GLint * posLoc,GLint * colLoc)146 GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
147 {
148     GLint  logLength, status;
149     GLuint program = glCreateProgram();
150     GLuint vpShader;
151 
152     vpShader = glCreateShader(GL_VERTEX_SHADER);
153     glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
154     glCompileShader(vpShader);
155     glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
156     if (logLength > 0) {
157         GLchar *log = (GLchar*) malloc(logLength);
158         glGetShaderInfoLog(vpShader, logLength, &logLength, log);
159         log_info("Vtx Shader compile log:\n%s", log);
160         free(log);
161     }
162 
163     glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
164     if (status == 0)
165     {
166         log_error("Failed to compile vtx shader:\n");
167         return 0;
168     }
169 
170     glAttachShader(program, vpShader);
171 
172     GLuint fpShader;
173     fpShader = glCreateShader(GL_FRAGMENT_SHADER);
174     glShaderSource(fpShader, 1, (const GLchar **)&fragmentshader, NULL);
175     glCompileShader(fpShader);
176 
177     glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
178     if (logLength > 0) {
179         GLchar *log = (GLchar*)malloc(logLength);
180         glGetShaderInfoLog(fpShader, logLength, &logLength, log);
181         log_info("Frag Shader compile log:\n%s", log);
182         free(log);
183     }
184 
185     glAttachShader(program, fpShader);
186     glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
187     if (status == 0)
188     {
189         log_error("Failed to compile frag shader:\n\n");
190         return 0;
191     }
192 
193     glLinkProgram(program);
194     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
195     if (logLength > 0) {
196         GLchar *log = (GLchar*)malloc(logLength);
197         glGetProgramInfoLog(program, logLength, &logLength, log);
198         log_info("Program link log:\n%s", log);
199         free(log);
200     }
201 
202     glGetProgramiv(program, GL_LINK_STATUS, &status);
203     if (status == 0)
204     {
205         log_error("Failed to link program\n");
206         return 0;
207     }
208 
209     *posLoc = glGetAttribLocation(program, "inPosition");
210     *colLoc = glGetAttribLocation(program, "inColor");
211 
212     return program;
213 }
214 
destroyShaderProgram(GLuint program)215 void destroyShaderProgram(GLuint program)
216 {
217     GLuint shaders[2];
218     GLsizei count;
219     glUseProgram(0);
220     glGetAttachedShaders(program, 2, &count, shaders);
221     int i;
222     for(i = 0; i < count; i++)
223     {
224         glDetachShader(program, shaders[i]);
225         glDeleteShader(shaders[i]);
226     }
227     glDeleteProgram(program);
228 }
229 
230 // This function queues up and runs the above CL kernel that writes the vertex data
run_cl_kernel(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,cl_int rowIdx,cl_event fenceEvent,size_t numThreads)231 cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
232                      cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
233 {
234     cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
235     test_error( error, "Unable to set kernel arguments" );
236 
237     clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
238     int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
239     cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
240 
241     error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
242     test_error( error, "Unable to acquire GL obejcts");
243     error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
244     test_error( error, "Unable to acquire GL obejcts");
245 
246     cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
247 
248     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
249     test_error( error, "Unable to execute test kernel" );
250 
251     error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
252     test_error(error, "clEnqueueReleaseGLObjects failed");
253     error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
254     test_error(error, "clEnqueueReleaseGLObjects failed");
255 
256     evts[ 0 ] = relEvent1;
257     evts[ 1 ] = relEvent2;
258     error = clWaitForEvents( 2, evts );
259     test_error( error, "Unable to wait for release events" );
260 
261     return 0;
262 }
263 
264 class RunThread : public genericThread
265 {
266 public:
267 
268     cl_kernel mKernel;
269     cl_command_queue mQueue;
270     cl_mem mStream0, mStream1;
271     cl_int mRowIdx;
272     cl_event mFenceEvent;
273     size_t mNumThreads;
274 
RunThread(cl_kernel kernel,cl_command_queue queue,cl_mem stream0,cl_mem stream1,size_t numThreads)275     RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
276     : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
277     {
278     }
279 
SetRunData(cl_int rowIdx,cl_event fenceEvent)280     void SetRunData( cl_int rowIdx, cl_event fenceEvent )
281     {
282         mRowIdx = rowIdx;
283         mFenceEvent = fenceEvent;
284     }
285 
IRun(void)286     virtual void * IRun( void )
287     {
288         cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
289         return (void *)(uintptr_t)error;
290     }
291 };
292 
293 
test_fence_sync_single(cl_device_id device,cl_context context,cl_command_queue queue,bool separateThreads,GLint rend_vs,GLint read_vs,cl_device_id rend_device)294 int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
295 {
296     int error;
297     const int framebufferSize = 512;
298 
299 
300     if( !is_extension_available( device, "cl_khr_gl_event" ) )
301     {
302         log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
303         return 0;
304     }
305 
306     // Ask OpenCL for the platforms.  Warn if more than one platform found,
307     // since this might not be the platform we want.  By default, we simply
308     // use the first returned platform.
309 
310     cl_uint nplatforms;
311     cl_platform_id platform;
312     clGetPlatformIDs(0, NULL, &nplatforms);
313     clGetPlatformIDs(1, &platform, NULL);
314 
315     if (nplatforms > 1) {
316         log_info("clGetPlatformIDs returned multiple values.  This is not "
317             "an error, but might result in obtaining incorrect function "
318             "pointers if you do not want the first returned platform.\n");
319 
320         // Show them the platform name, in case it is a problem.
321 
322         size_t size;
323         char *name;
324 
325         clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
326         name = (char*)malloc(size);
327         clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
328 
329         log_info("Using platform with name: %s \n", name);
330         free(name);
331     }
332 
333     clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR");
334     if( clCreateEventFromGLsyncKHR_ptr == NULL )
335     {
336         log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
337         clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE");
338         return -1;
339     }
340 
341 #ifdef USING_ARB_sync
342     char *gl_version_str = (char*)glGetString( GL_VERSION );
343     float glCoreVersion;
344     sscanf(gl_version_str, "%f", &glCoreVersion);
345     if( glCoreVersion < 3.0f )
346     {
347         log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
348         return 0;
349     }
350 
351 #ifdef __APPLE__
352     CGLContextObj currCtx = CGLGetCurrentContext();
353     CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
354     GLint val, screen;
355     CGLGetVirtualScreen(currCtx, &screen);
356     CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
357     if(val != kCGLOGLPVersion_3_2_Core)
358     {
359         log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
360                   "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
361         return -1;
362     }
363 #else
364 #ifdef _WIN32
365     HDC hdc = wglGetCurrentDC();
366     HGLRC hglrc = wglGetCurrentContext();
367 #else
368     Display* dpy = glXGetCurrentDisplay();
369     GLXDrawable drawable = glXGetCurrentDrawable();
370     GLXContext ctx = glXGetCurrentContext();
371 #endif
372 #endif
373 
374     InitSyncFns();
375 #endif
376 
377 #ifdef __APPLE__
378     CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
379 #else
380 #ifdef _WIN32
381     wglMakeCurrent(hdc, hglrc);
382 #else
383     glXMakeCurrent(dpy, drawable, ctx);
384 #endif
385 #endif
386 
387     GLint posLoc, colLoc;
388     GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
389     if(!shaderprogram)
390     {
391         log_error("Failed to create shader program\n");
392         return -1;
393     }
394 
395     float l = 0.0f; float r = framebufferSize;
396     float b = 0.0f; float t = framebufferSize;
397 
398     float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
399         0.0f, 2.0f/(t-b), 0.0f, 0.0f,
400         0.0f, 0.0f, -1.0f, 0.0f,
401         -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
402     };
403 
404     glUseProgram(shaderprogram);
405     GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
406     glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
407     glUseProgram(0);
408 
409     // Note: the framebuffer is just the target to verify our results against, so we don't
410     // really care to go through all the possible formats in this case
411     glFramebufferWrapper glFramebuffer;
412     glRenderbufferWrapper glRenderbuffer;
413     error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
414                                     GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
415                                     &glFramebuffer, &glRenderbuffer );
416     if( error != 0 )
417         return error;
418 
419     GLuint vao;
420     glGenVertexArrays(1, &vao);
421     glBindVertexArray(vao);
422 
423     glBufferWrapper vtxBuffer, colorBuffer;
424     glGenBuffers( 1, &vtxBuffer );
425     glGenBuffers( 1, &colorBuffer );
426 
427     const int numHorizVertices = ( framebufferSize * 64 ) + 1;
428 
429     glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
430     glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
431 
432     glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
433     glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
434 
435     // Now that the requisite objects are bound, we can attempt program
436     // validation:
437 
438     glValidateProgram(shaderprogram);
439 
440     GLint logLength, status;
441     glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
442     if (logLength > 0) {
443         GLchar *log = (GLchar*)malloc(logLength);
444         glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
445         log_info("Program validate log:\n%s", log);
446         free(log);
447     }
448 
449     glGetProgramiv(shaderprogram, GL_VALIDATE_STATUS, &status);
450     if (status == 0)
451     {
452         log_error("Failed to validate program\n");
453         return 0;
454     }
455 
456     clProgramWrapper program;
457     clKernelWrapper kernel;
458     clMemWrapper streams[ 2 ];
459 
460     if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
461         return -1;
462 
463     streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
464     test_error( error, "Unable to create CL buffer from GL vertex buffer" );
465 
466     streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
467     test_error( error, "Unable to create CL buffer from GL color buffer" );
468 
469     error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
470     test_error( error, "Unable to set kernel arguments" );
471 
472     error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
473     test_error( error, "Unable to set kernel arguments" );
474 
475     cl_int horizWrap = (cl_int)framebufferSize;
476     error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
477     test_error( error, "Unable to set kernel arguments" );
478 
479     glViewport( 0, 0, framebufferSize, framebufferSize );
480     glClearColor( 0, 0, 0, 0 );
481     glClear( GL_COLOR_BUFFER_BIT );
482     glClear( GL_DEPTH_BUFFER_BIT );
483     glDisable( GL_DEPTH_TEST );
484     glEnable( GL_BLEND );
485     glBlendFunc( GL_ONE, GL_ONE );
486 
487     clEventWrapper fenceEvent;
488     GLsync glFence = 0;
489 
490     // Do a loop through 8 different horizontal stripes against the framebuffer
491     RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
492 
493     for( int i = 0; i < 8; i++ )
494     {
495         // if current rendering device is not the compute device and
496         // separateThreads == false which means compute is going on same
497         // thread and we are using implicit synchronization (no GLSync obj used)
498         // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
499         // to wait for rendering to finish on other device before CL can start
500         // writing to CL/GL shared mem objects. When separateThreads is true i.e.
501         // we are using GLSync obj to synchronize then we dont need to call glFinish
502         // here since CL should wait for rendering on other device before this
503         // GLSync object to finish before it starts writing to shared mem object.
504         // Also rend_device == compute_device no need to call glFinish
505         if(rend_device != device && !separateThreads)
506             glFinish();
507 
508         if( separateThreads )
509         {
510             if (fenceEvent != NULL)
511             {
512                 clReleaseEvent(fenceEvent);
513                 glDeleteSyncFunc(glFence);
514             }
515 
516             glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
517             fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
518             test_error(error, "Unable to create CL event from GL fence");
519 
520             // in case of explicit synchronization, we just wait for the sync object to complete
521             // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
522             // to flush on the context on which glSync is created
523             glFlush();
524 
525             thread.SetRunData( (cl_int)i, fenceEvent );
526             thread.Start();
527 
528             error = (cl_int)(size_t)thread.Join();
529         }
530         else
531         {
532             error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
533         }
534         test_error( error, "Unable to run CL kernel" );
535 
536         glUseProgram(shaderprogram);
537         glEnableVertexAttribArray(posLoc);
538         glEnableVertexAttribArray(colLoc);
539         glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
540         glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
541         glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
542         glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
543         glBindBuffer( GL_ARRAY_BUFFER, 0 );
544 
545         glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
546 
547         glDisableVertexAttribArray(posLoc);
548         glDisableVertexAttribArray(colLoc);
549         glUseProgram(0);
550 
551         if( separateThreads )
552         {
553             // If we're on the same thread, then we're testing implicit syncing, so we
554             // don't need the actual fence code
555             if( fenceEvent != NULL )
556             {
557                 clReleaseEvent( fenceEvent );
558                 glDeleteSyncFunc( glFence );
559             }
560 
561             glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
562             fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
563             test_error( error, "Unable to create CL event from GL fence" );
564 
565             // in case of explicit synchronization, we just wait for the sync object to complete
566             // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
567             // to flush on the context on which glSync is created
568             glFlush();
569         }
570         else
571             glFinish();
572     }
573 
574     if( glFence != 0 )
575         // Don't need the final release for fenceEvent, because the wrapper will take care of that
576         glDeleteSyncFunc( glFence );
577 
578 #ifdef __APPLE__
579     CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
580 #else
581 #ifdef _WIN32
582     wglMakeCurrent(hdc, hglrc);
583 #else
584     glXMakeCurrent(dpy, drawable, ctx);
585 #endif
586 #endif
587     // Grab the contents of the final framebuffer
588     BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
589                                                          GL_COLOR_ATTACHMENT0_EXT,
590                                                          GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar,
591                                                          framebufferSize, 128 ) );
592 
593     // Check the contents now. We should end up with solid color bands 32 pixels high and the
594     // full width of the framebuffer, at values (128,128,128) due to the additive blending
595     for( int i = 0; i < 8; i++ )
596     {
597         for( int y = 0; y < 4; y++ )
598         {
599             // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
600             cl_uchar valA = ( ( ( i + 1 ) & 1 )      ) * numHorizVertices * 2 / framebufferSize;
601             cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
602             cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
603 
604             cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
605             for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
606             {
607                 if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
608                    ( row[ x * 4 + 2 ] != valC ) )
609                 {
610                     log_error( "ERROR: Output framebuffer did not validate!\n" );
611                     DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
612                     log_error( "RUNS:\n" );
613                     uint32_t *p = (uint32_t *)(char *)resultData;
614                     size_t a = 0;
615                     for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
616                     {
617                         if( p[ a ] != 0 )
618                         {
619                             if( p[ t ] == 0 )
620                             {
621                                 log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
622                                           (int)( a % framebufferSize ), (int)( a / framebufferSize ),
623                                           (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
624                                           p[ a ] );
625                                 a = t;
626                             }
627                         }
628                         else
629                         {
630                             if( p[ t ] != 0 )
631                             {
632                                 a = t;
633                             }
634                         }
635 
636                     }
637                     return -1;
638                 }
639             }
640         }
641     }
642 
643     destroyShaderProgram(shaderprogram);
644     glDeleteVertexArrays(1, &vao);
645     return 0;
646 }
647 
test_fence_sync(cl_device_id device,cl_context context,cl_command_queue queue,int numElements)648 int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
649 {
650     GLint vs_count = 0;
651     cl_device_id *device_list = NULL;
652 
653     if( !is_extension_available( device, "cl_khr_gl_event" ) )
654     {
655         log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
656         return 0;
657     }
658 #ifdef __APPLE__
659     CGLContextObj ctx = CGLGetCurrentContext();
660     CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
661     CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
662 
663     device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
664     clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
665 #else
666     // Need platform specific way of getting devices from CL context to which OpenGL can render
667     // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
668     size_t device_cb;
669     cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
670     if( err != CL_SUCCESS )
671     {
672       print_error( err, "Unable to get device count from context" );
673       return -1;
674     }
675     vs_count = (GLint)device_cb / sizeof(cl_device_id);
676 
677     if (vs_count < 1) {
678       log_error("No devices found.\n");
679       return -1;
680     }
681 
682     device_list = (cl_device_id *) malloc(device_cb);
683     err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL);
684     if( err != CL_SUCCESS ) {
685       free(device_list);
686       print_error( err, "Unable to get device list from context" );
687       return -1;
688     }
689 
690 #endif
691 
692     GLint rend_vs, read_vs;
693     int error = 0;
694     int any_failed = 0;
695 
696     // Loop through all the devices capable to OpenGL rendering
697     // and set them as current rendering target
698     for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
699     {
700         // Loop through all the devices and set them as current
701         // compute target
702         for(read_vs = 0; read_vs < vs_count; read_vs++)
703         {
704             cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
705             char rend_name[200], read_name[200];
706 
707             clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
708             clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
709 
710             log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
711             error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
712             any_failed |= error;
713             if( error != 0 )
714                 log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
715             else
716                 log_info("Implicit syncing Passed\n");
717 
718             error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
719             any_failed |= error;
720             if( error != 0 )
721                 log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
722             else
723                 log_info("Explicit syncing Passed\n");
724         }
725     }
726 
727     free(device_list);
728 
729     return any_failed;
730 }
731