• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17 
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
20 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
21 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
22 
23 namespace tflite {
24 namespace gpu {
25 namespace cl {
26 namespace {
27 
28 #ifndef EGL_VERSION_1_5
29 typedef void* EGLSync;
30 #define EGL_SYNC_CL_EVENT 0x30FE
31 #define EGL_CL_EVENT_HANDLE 0x309C
32 #define EGL_NO_SYNC 0
33 #endif /* EGL_VERSION_1_5 */
34 
35 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
36 // it should use KHR_cl_event2 extension. More details are in b/129974818.
37 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
38     EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
39 
40 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
41 
42 }  // namespace
43 
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)44 Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
45                                 EglSync* sync) {
46   if (!IsEglSyncFromClEventSupported()) {
47     return UnimplementedError("CreateEglSyncFromClEvent is not supported");
48   }
49   EGLSync egl_sync;
50   const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
51                                   reinterpret_cast<EGLAttrib>(event), EGL_NONE};
52   RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
53                                       EGL_SYNC_CL_EVENT, attributes));
54   if (egl_sync == EGL_NO_SYNC) {
55     return InternalError("Returned empty EGL sync");
56   }
57   *sync = EglSync(display, egl_sync);
58   return OkStatus();
59 }
60 
IsEglSyncFromClEventSupported()61 bool IsEglSyncFromClEventSupported() {
62   // In C++11, static initializers are guaranteed to be evaluated only once.
63   static bool supported = []() -> bool {
64     // This function requires EGL 1.5 to work
65     g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
66         eglGetProcAddress("eglCreateSync"));
67     // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
68     if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
69       g_eglCreateSync = nullptr;
70     }
71     return (g_eglCreateSync != nullptr);
72   }();
73   return supported;
74 }
75 
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)76 Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
77                                 CLEvent* event) {
78   cl_int error_code;
79   cl_event new_event = clCreateEventFromEGLSyncKHR(
80       context, egl_sync.sync(), egl_sync.display(), &error_code);
81   if (error_code != CL_SUCCESS) {
82     return InternalError(
83         absl::StrCat("Unable to create CL sync from EGL sync. ",
84                      CLErrorCodeToString(error_code)));
85   }
86   *event = CLEvent(new_event);
87   return OkStatus();
88 }
89 
IsClEventFromEglSyncSupported(const CLDevice & device)90 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
91   return device.SupportsExtension("cl_khr_egl_event");
92 }
93 
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)94 Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id, AccessType access_type,
95                                   CLContext* context, CLMemory* memory) {
96   cl_int error_code;
97   auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
98                                   gl_ssbo_id, &error_code);
99   if (error_code != CL_SUCCESS) {
100     return InternalError(
101         absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
102                      CLErrorCodeToString(error_code)));
103   }
104   *memory = CLMemory(mem, true);
105   return OkStatus();
106 }
107 
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)108 Status CreateClMemoryFromGlTexture(GLenum texture_target, GLuint texture_id,
109                                    AccessType access_type, CLContext* context,
110                                    CLMemory* memory) {
111   cl_int error_code;
112   auto mem =
113       clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
114                             texture_target, 0, texture_id, &error_code);
115   if (error_code != CL_SUCCESS) {
116     return InternalError(
117         absl::StrCat("Unable to create CL buffer from GL texture. ",
118                      CLErrorCodeToString(error_code)));
119   }
120   *memory = CLMemory(mem, true);
121   return OkStatus();
122 }
123 
IsGlSharingSupported(const CLDevice & device)124 bool IsGlSharingSupported(const CLDevice& device) {
125   return clCreateFromGLBuffer && clCreateFromGLTexture &&
126          device.SupportsExtension("cl_khr_gl_sharing");
127 }
128 
~AcquiredGlObjects()129 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
130 
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)131 Status AcquiredGlObjects::Acquire(const std::vector<cl_mem>& memory,
132                                   cl_command_queue queue,
133                                   const std::vector<cl_event>& wait_events,
134                                   CLEvent* acquire_event,
135                                   AcquiredGlObjects* objects) {
136   if (!memory.empty()) {
137     cl_event new_event;
138     cl_int error_code = clEnqueueAcquireGLObjects(
139         queue, memory.size(), memory.data(), wait_events.size(),
140         wait_events.data(), acquire_event ? &new_event : nullptr);
141     if (error_code != CL_SUCCESS) {
142       return InternalError(absl::StrCat("Unable to acquire GL object. ",
143                                         CLErrorCodeToString(error_code)));
144     }
145     if (acquire_event) {
146       *acquire_event = CLEvent(new_event);
147     }
148     clFlush(queue);
149   }
150   *objects = AcquiredGlObjects(memory, queue);
151   return OkStatus();
152 }
153 
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)154 Status AcquiredGlObjects::Release(const std::vector<cl_event>& wait_events,
155                                   CLEvent* release_event) {
156   if (queue_ && !memory_.empty()) {
157     cl_event new_event;
158     cl_int error_code = clEnqueueReleaseGLObjects(
159         queue_, memory_.size(), memory_.data(), wait_events.size(),
160         wait_events.data(), release_event ? &new_event : nullptr);
161     if (error_code != CL_SUCCESS) {
162       return InternalError(absl::StrCat("Unable to release GL object. ",
163                                         CLErrorCodeToString(error_code)));
164     }
165     if (release_event) {
166       *release_event = CLEvent(new_event);
167     }
168     clFlush(queue_);
169     queue_ = nullptr;
170   }
171   return OkStatus();
172 }
173 
GlInteropFabric(EGLDisplay egl_display,Environment * environment)174 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
175                                  Environment* environment)
176     : is_egl_sync_supported_(true),
177       is_egl_to_cl_mapping_supported_(
178           IsClEventFromEglSyncSupported(environment->device())),
179       is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
180       egl_display_(egl_display),
181       context_(environment->context().context()),
182       queue_(environment->queue()->queue()) {}
183 
RegisterMemory(cl_mem memory)184 void GlInteropFabric::RegisterMemory(cl_mem memory) {
185   memory_.push_back(memory);
186 }
187 
UnregisterMemory(cl_mem memory)188 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
189   auto it = std::find(memory_.begin(), memory_.end(), memory);
190   if (it != memory_.end()) {
191     memory_.erase(it);
192   }
193 }
194 
Start()195 Status GlInteropFabric::Start() {
196   if (!is_enabled()) {
197     return OkStatus();
198   }
199 
200   // In GL-CL interoperability, we need to make sure GL finished processing of
201   // all commands that might affect GL objects. There are a few ways:
202   //   a) glFinish
203   //      slow, but portable
204   //   b) EglSync + ClientWait
205   //      faster alternative for glFinish, but still slow as it stalls GPU
206   //      pipeline.
207   //   c) EglSync->CLEvent or GlSync->CLEvent mapping
208   //      Fast, as it allows to map sync to CL event and use it as a dependency
209   //      later without stalling GPU pipeline.
210   if (is_egl_sync_supported_) {
211     EglSync sync;
212     RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
213     if (is_egl_to_cl_mapping_supported_) {
214       // (c) EglSync->CLEvent or GlSync->CLEvent mapping
215       glFlush();
216       RETURN_IF_ERROR(
217           CreateClEventFromEglSync(context_, sync, &inbound_event_));
218     } else {
219       // (b) EglSync + ClientWait
220       RETURN_IF_ERROR(sync.ClientWait());
221     }
222   } else {
223     // (a) glFinish / GL fence sync
224     RETURN_IF_ERROR(gl::GlActiveSyncWait());
225   }
226 
227   // Acquire all GL objects needed while processing.
228   auto make_acquire_wait = [&]() -> std::vector<cl_event> {
229     if (inbound_event_.is_valid()) {
230       return {inbound_event_.event()};
231     }
232     return {};
233   };
234   return AcquiredGlObjects::Acquire(memory_, queue_, make_acquire_wait(),
235                                     nullptr, &gl_objects_);
236 }
237 
Finish()238 Status GlInteropFabric::Finish() {
239   if (!is_enabled()) {
240     return OkStatus();
241   }
242   RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event_));
243 
244   // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
245   //   EglSync egl_outbound_sync;
246   //   RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event_.event(),
247   //                                            egl_display_,
248   //                                            &egl_outbound_sync));
249   //   // Instruct GL pipeline to wait until corresponding CL event is signaled.
250   //   RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
251   //   glFlush();
252   // } else {
253   //   // Slower option if proper sync is not supported. It is equivalent to
254   //   // clFinish, but, hopefully, faster.
255   //   outbound_event_.Wait();
256   // }
257 
258   // This slow sync is the only working solution right now. We have to debug why
259   // above version is not working fast and reliable.
260   outbound_event_.Wait();
261   return OkStatus();
262 }
263 
264 }  // namespace cl
265 }  // namespace gpu
266 }  // namespace tflite
267