1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
20 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
21 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
22
23 namespace tflite {
24 namespace gpu {
25 namespace cl {
26 namespace {
27
28 #ifndef EGL_VERSION_1_5
29 typedef void* EGLSync;
30 #define EGL_SYNC_CL_EVENT 0x30FE
31 #define EGL_CL_EVENT_HANDLE 0x309C
32 #define EGL_NO_SYNC 0
33 #endif /* EGL_VERSION_1_5 */
34
35 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
36 // it should use KHR_cl_event2 extension. More details are in b/129974818.
37 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
38 EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
39
40 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
41
42 } // namespace
43
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)44 Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
45 EglSync* sync) {
46 if (!IsEglSyncFromClEventSupported()) {
47 return UnimplementedError("CreateEglSyncFromClEvent is not supported");
48 }
49 EGLSync egl_sync;
50 const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
51 reinterpret_cast<EGLAttrib>(event), EGL_NONE};
52 RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
53 EGL_SYNC_CL_EVENT, attributes));
54 if (egl_sync == EGL_NO_SYNC) {
55 return InternalError("Returned empty EGL sync");
56 }
57 *sync = EglSync(display, egl_sync);
58 return OkStatus();
59 }
60
IsEglSyncFromClEventSupported()61 bool IsEglSyncFromClEventSupported() {
62 // In C++11, static initializers are guaranteed to be evaluated only once.
63 static bool supported = []() -> bool {
64 // This function requires EGL 1.5 to work
65 g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
66 eglGetProcAddress("eglCreateSync"));
67 // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
68 if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
69 g_eglCreateSync = nullptr;
70 }
71 return (g_eglCreateSync != nullptr);
72 }();
73 return supported;
74 }
75
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)76 Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
77 CLEvent* event) {
78 cl_int error_code;
79 cl_event new_event = clCreateEventFromEGLSyncKHR(
80 context, egl_sync.sync(), egl_sync.display(), &error_code);
81 if (error_code != CL_SUCCESS) {
82 return InternalError(
83 absl::StrCat("Unable to create CL sync from EGL sync. ",
84 CLErrorCodeToString(error_code)));
85 }
86 *event = CLEvent(new_event);
87 return OkStatus();
88 }
89
IsClEventFromEglSyncSupported(const CLDevice & device)90 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
91 return device.SupportsExtension("cl_khr_egl_event");
92 }
93
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)94 Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id, AccessType access_type,
95 CLContext* context, CLMemory* memory) {
96 cl_int error_code;
97 auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
98 gl_ssbo_id, &error_code);
99 if (error_code != CL_SUCCESS) {
100 return InternalError(
101 absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
102 CLErrorCodeToString(error_code)));
103 }
104 *memory = CLMemory(mem, true);
105 return OkStatus();
106 }
107
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)108 Status CreateClMemoryFromGlTexture(GLenum texture_target, GLuint texture_id,
109 AccessType access_type, CLContext* context,
110 CLMemory* memory) {
111 cl_int error_code;
112 auto mem =
113 clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
114 texture_target, 0, texture_id, &error_code);
115 if (error_code != CL_SUCCESS) {
116 return InternalError(
117 absl::StrCat("Unable to create CL buffer from GL texture. ",
118 CLErrorCodeToString(error_code)));
119 }
120 *memory = CLMemory(mem, true);
121 return OkStatus();
122 }
123
IsGlSharingSupported(const CLDevice & device)124 bool IsGlSharingSupported(const CLDevice& device) {
125 return clCreateFromGLBuffer && clCreateFromGLTexture &&
126 device.SupportsExtension("cl_khr_gl_sharing");
127 }
128
~AcquiredGlObjects()129 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
130
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)131 Status AcquiredGlObjects::Acquire(const std::vector<cl_mem>& memory,
132 cl_command_queue queue,
133 const std::vector<cl_event>& wait_events,
134 CLEvent* acquire_event,
135 AcquiredGlObjects* objects) {
136 if (!memory.empty()) {
137 cl_event new_event;
138 cl_int error_code = clEnqueueAcquireGLObjects(
139 queue, memory.size(), memory.data(), wait_events.size(),
140 wait_events.data(), acquire_event ? &new_event : nullptr);
141 if (error_code != CL_SUCCESS) {
142 return InternalError(absl::StrCat("Unable to acquire GL object. ",
143 CLErrorCodeToString(error_code)));
144 }
145 if (acquire_event) {
146 *acquire_event = CLEvent(new_event);
147 }
148 clFlush(queue);
149 }
150 *objects = AcquiredGlObjects(memory, queue);
151 return OkStatus();
152 }
153
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)154 Status AcquiredGlObjects::Release(const std::vector<cl_event>& wait_events,
155 CLEvent* release_event) {
156 if (queue_ && !memory_.empty()) {
157 cl_event new_event;
158 cl_int error_code = clEnqueueReleaseGLObjects(
159 queue_, memory_.size(), memory_.data(), wait_events.size(),
160 wait_events.data(), release_event ? &new_event : nullptr);
161 if (error_code != CL_SUCCESS) {
162 return InternalError(absl::StrCat("Unable to release GL object. ",
163 CLErrorCodeToString(error_code)));
164 }
165 if (release_event) {
166 *release_event = CLEvent(new_event);
167 }
168 clFlush(queue_);
169 queue_ = nullptr;
170 }
171 return OkStatus();
172 }
173
GlInteropFabric(EGLDisplay egl_display,Environment * environment)174 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
175 Environment* environment)
176 : is_egl_sync_supported_(true),
177 is_egl_to_cl_mapping_supported_(
178 IsClEventFromEglSyncSupported(environment->device())),
179 is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
180 egl_display_(egl_display),
181 context_(environment->context().context()),
182 queue_(environment->queue()->queue()) {}
183
RegisterMemory(cl_mem memory)184 void GlInteropFabric::RegisterMemory(cl_mem memory) {
185 memory_.push_back(memory);
186 }
187
UnregisterMemory(cl_mem memory)188 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
189 auto it = std::find(memory_.begin(), memory_.end(), memory);
190 if (it != memory_.end()) {
191 memory_.erase(it);
192 }
193 }
194
Start()195 Status GlInteropFabric::Start() {
196 if (!is_enabled()) {
197 return OkStatus();
198 }
199
200 // In GL-CL interoperability, we need to make sure GL finished processing of
201 // all commands that might affect GL objects. There are a few ways:
202 // a) glFinish
203 // slow, but portable
204 // b) EglSync + ClientWait
205 // faster alternative for glFinish, but still slow as it stalls GPU
206 // pipeline.
207 // c) EglSync->CLEvent or GlSync->CLEvent mapping
208 // Fast, as it allows to map sync to CL event and use it as a dependency
209 // later without stalling GPU pipeline.
210 if (is_egl_sync_supported_) {
211 EglSync sync;
212 RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
213 if (is_egl_to_cl_mapping_supported_) {
214 // (c) EglSync->CLEvent or GlSync->CLEvent mapping
215 glFlush();
216 RETURN_IF_ERROR(
217 CreateClEventFromEglSync(context_, sync, &inbound_event_));
218 } else {
219 // (b) EglSync + ClientWait
220 RETURN_IF_ERROR(sync.ClientWait());
221 }
222 } else {
223 // (a) glFinish / GL fence sync
224 RETURN_IF_ERROR(gl::GlActiveSyncWait());
225 }
226
227 // Acquire all GL objects needed while processing.
228 auto make_acquire_wait = [&]() -> std::vector<cl_event> {
229 if (inbound_event_.is_valid()) {
230 return {inbound_event_.event()};
231 }
232 return {};
233 };
234 return AcquiredGlObjects::Acquire(memory_, queue_, make_acquire_wait(),
235 nullptr, &gl_objects_);
236 }
237
Finish()238 Status GlInteropFabric::Finish() {
239 if (!is_enabled()) {
240 return OkStatus();
241 }
242 RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event_));
243
244 // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
245 // EglSync egl_outbound_sync;
246 // RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event_.event(),
247 // egl_display_,
248 // &egl_outbound_sync));
249 // // Instruct GL pipeline to wait until corresponding CL event is signaled.
250 // RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
251 // glFlush();
252 // } else {
253 // // Slower option if proper sync is not supported. It is equivalent to
254 // // clFinish, but, hopefully, faster.
255 // outbound_event_.Wait();
256 // }
257
258 // This slow sync is the only working solution right now. We have to debug why
259 // above version is not working fast and reliable.
260 outbound_event_.Wait();
261 return OkStatus();
262 }
263
264 } // namespace cl
265 } // namespace gpu
266 } // namespace tflite
267