1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
18
19 #include <cstring>
20 #include <functional>
21 #include <vector>
22
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/status.h"
25 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
26 #include "tensorflow/lite/delegates/gpu/gl/gl_errors.h"
27 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
28
29 namespace tflite {
30 namespace gpu {
31 namespace gl {
32
33 // Buffer is an RAII wrapper for OpenGL buffer object.
34 // See https://www.khronos.org/opengl/wiki/Buffer_Object for more information.
35 //
36 // Buffer is moveable but not copyable.
37 class GlBuffer {
38 public:
39 // @param has_ownership indicates that GlBuffer is responsible for
40 // corresponding GL buffer deletion.
GlBuffer(GLenum target,GLuint id,size_t bytes_size,size_t offset,bool has_ownership)41 GlBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
42 bool has_ownership)
43 : target_(target),
44 id_(id),
45 bytes_size_(bytes_size),
46 offset_(offset),
47 has_ownership_(has_ownership) {}
48
49 // Creates invalid buffer.
GlBuffer()50 GlBuffer() : GlBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false) {}
51
52 // Move-only
53 GlBuffer(GlBuffer&& buffer);
54 GlBuffer& operator=(GlBuffer&& buffer);
55 GlBuffer(const GlBuffer&) = delete;
56 GlBuffer& operator=(const GlBuffer&) = delete;
57
58 ~GlBuffer();
59
60 // Reads data from buffer into CPU memory. Data should point to a region that
61 // has at least bytes_size available.
62 template <typename T>
63 Status Read(absl::Span<T> data) const;
64
65 // Writes data to a buffer.
66 template <typename T>
67 Status Write(absl::Span<const T> data);
68
69 // Maps GPU memory to CPU address space and calls reader that may read from
70 // that memory.
71 template <typename T>
72 Status MappedRead(
73 const std::function<Status(absl::Span<const T>)>& reader) const;
74
75 // Maps GPU memory to CPU address space and calls writer that may write into
76 // that memory.
77 template <typename T>
78 Status MappedWrite(const std::function<Status(absl::Span<T>)>& writer);
79
80 Status MakeView(size_t offset, size_t bytes_size, GlBuffer* gl_buffer);
81
82 // Makes a copy without ownership of the buffer.
83 GlBuffer MakeRef();
84
85 // Binds a buffer to an index.
86 Status BindToIndex(uint32_t index) const;
87
88 // Releases the ownership of the buffer object.
Release()89 void Release() { has_ownership_ = false; }
90
bytes_size()91 size_t bytes_size() const { return bytes_size_; }
92
target()93 const GLenum target() const { return target_; }
94
id()95 const GLuint id() const { return id_; }
96
is_valid()97 bool is_valid() const { return id_ != GL_INVALID_INDEX; }
98
offset()99 size_t offset() const { return offset_; }
100
101 // @return true if this object actually owns corresponding GL buffer
102 // and manages it's lifetime.
has_ownership()103 bool has_ownership() const { return has_ownership_; }
104
105 private:
106 void Invalidate();
107
108 GLenum target_;
109 GLuint id_;
110 size_t bytes_size_;
111 size_t offset_;
112 bool has_ownership_;
113 };
114
115 Status CopyBuffer(const GlBuffer& read_buffer, const GlBuffer& write_buffer);
116
117 Status GetSSBOSize(GLuint id, int64_t* size_bytes);
118
119 // Creates new shader storage buffer that will be modified and used many
120 // times.
121 //
122 // See https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object for
123 // details.
124 template <typename T>
125 Status CreateReadWriteShaderStorageBuffer(uint32_t num_elements,
126 GlBuffer* gl_buffer);
127
128 // Creates new shader storage buffer that will be filled with data once which
129 // will be used many times.
130 template <typename T>
131 Status CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,
132 GlBuffer* gl_buffer);
133
134 // Adapts raw Buffer::Read method to read data into a vector.
135 template <typename T>
AppendFromBuffer(const GlBuffer & buffer,std::vector<T> * data)136 Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
137 if (buffer.bytes_size() % sizeof(T) != 0) {
138 return InvalidArgumentError("Buffer is not aligned");
139 }
140 size_t num_elements = buffer.bytes_size() / sizeof(T);
141 data->resize(data->size() + num_elements);
142 return buffer.Read<T>(
143 absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
144 }
145
146 // Persistent buffer provides CPU pointer to the buffer that is valid all the
147 // time. A user should properly synchronize the access to the buffer on CPU and
148 // GPU sides.
149 class GlPersistentBuffer : public GlBuffer {
150 public:
151 GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
152 bool has_ownership, void* data);
153 GlPersistentBuffer();
154
155 // Move-only
156 GlPersistentBuffer(GlPersistentBuffer&& buffer);
157 GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
158 GlPersistentBuffer(const GlPersistentBuffer&) = delete;
159 GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
160
161 ~GlPersistentBuffer();
162
data()163 void* data() { return data_; }
164
165 private:
166 void* data_;
167 };
168
169 // Creates read-write persistent buffer with valid CPU pointer
170 Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
171
172 ////////////////////////////////////////////////////////////////////////////////
173 // Implementation details are below.
174
175 namespace gl_buffer_internal {
176
177 // RAII for creating and/or owning buffer id.
178 class BufferId {
179 public:
BufferId()180 BufferId() : id_(GL_INVALID_INDEX) {
181 TFLITE_GPU_CALL_GL(glGenBuffers, 1 /* number of buffers */, &id_)
182 .IgnoreError();
183 // only possible error here is when a number of buffers is negative.
184 }
185
BufferId(GLuint id)186 explicit BufferId(GLuint id) : id_(id) {}
187
~BufferId()188 ~BufferId() {
189 if (id_ != GL_INVALID_INDEX) {
190 TFLITE_GPU_CALL_GL(glDeleteBuffers, 1, &id_).IgnoreError();
191 }
192 }
193
id()194 GLuint id() const { return id_; }
195
Release()196 GLuint Release() {
197 GLuint id = GL_INVALID_INDEX;
198 std::swap(id, id_);
199 return id;
200 }
201
202 private:
203 GLuint id_;
204 };
205
206 // RAII for binding and unbinding a buffer.
207 class BufferBinder {
208 public:
BufferBinder(GLenum target,GLuint id)209 BufferBinder(GLenum target, GLuint id) : target_(target), prev_id_(0) {
210 TFLITE_GPU_CALL_GL(glBindBuffer, target_, id).IgnoreError();
211 }
212
BufferBinder(GLenum target,GLuint id,GLuint prev_id)213 BufferBinder(GLenum target, GLuint id, GLuint prev_id)
214 : target_(target), prev_id_(prev_id) {
215 TFLITE_GPU_CALL_GL(glBindBuffer, target_, id).IgnoreError();
216 }
217
~BufferBinder()218 ~BufferBinder() {
219 TFLITE_GPU_CALL_GL(glBindBuffer, target_, prev_id_).IgnoreError();
220 }
221
222 private:
223 const GLenum target_;
224 GLuint prev_id_;
225 };
226
227 // RAII for mapping and unmapping a buffer.
228 class BufferMapper {
229 public:
BufferMapper(GLenum target,size_t offset,size_t bytes,GLbitfield access)230 BufferMapper(GLenum target, size_t offset, size_t bytes, GLbitfield access)
231 : target_(target),
232 data_(glMapBufferRange(target_, offset, bytes, access)) {}
233
~BufferMapper()234 ~BufferMapper() { TFLITE_GPU_CALL_GL(glUnmapBuffer, target_).IgnoreError(); }
235
data()236 void* data() { return data_; }
237
238 private:
239 const GLenum target_;
240 void* data_;
241 };
242
243 } // namespace gl_buffer_internal
244
245 template <typename T>
CreateReadWriteShaderStorageBuffer(uint32_t num_elements,GlBuffer * gl_buffer)246 Status CreateReadWriteShaderStorageBuffer(uint32_t num_elements,
247 GlBuffer* gl_buffer) {
248 gl_buffer_internal::BufferId id;
249 gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
250 // TODO(akulik): benchmark DYNAMIC vs STREAM buffer
251 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glBufferData, GL_SHADER_STORAGE_BUFFER,
252 num_elements * sizeof(T), nullptr,
253 GL_STREAM_COPY));
254 *gl_buffer = GlBuffer{GL_SHADER_STORAGE_BUFFER, id.Release(),
255 num_elements * sizeof(T), 0, true};
256 return OkStatus();
257 }
258
259 template <typename T>
CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,GlBuffer * gl_buffer)260 Status CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,
261 GlBuffer* gl_buffer) {
262 gl_buffer_internal::BufferId id;
263 gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
264 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glBufferData, GL_SHADER_STORAGE_BUFFER,
265 data.size() * sizeof(T), data.data(),
266 GL_STATIC_READ));
267 *gl_buffer = GlBuffer{GL_SHADER_STORAGE_BUFFER, id.Release(),
268 data.size() * sizeof(T), 0, true};
269 return OkStatus();
270 }
271
272 template <typename T>
Read(absl::Span<T> data)273 Status GlBuffer::Read(absl::Span<T> data) const {
274 if (data.size() * sizeof(T) < bytes_size()) {
275 return InvalidArgumentError(
276 "Read from buffer failed. Destination data is shorter than buffer.");
277 }
278 // TODO(akulik): glCopyBufferSubData is actually available in ES 3.1, try it.
279 return MappedRead<T>([this, data](absl::Span<const T> src) {
280 std::memcpy(data.data(), src.data(), bytes_size());
281 return OkStatus();
282 });
283 }
284
285 template <typename T>
Write(absl::Span<const T> data)286 Status GlBuffer::Write(absl::Span<const T> data) {
287 if (data.size() * sizeof(T) > bytes_size_) {
288 return InvalidArgumentError(
289 "Write to buffer failed. Source data is larger than buffer.");
290 }
291 gl_buffer_internal::BufferBinder binder(target_, id_);
292 return TFLITE_GPU_CALL_GL(glBufferSubData, target_, offset_, bytes_size_,
293 data.data());
294 }
295
296 template <typename T>
MappedRead(const std::function<Status (absl::Span<const T> d)> & reader)297 Status GlBuffer::MappedRead(
298 const std::function<Status(absl::Span<const T> d)>& reader) const {
299 if (bytes_size_ % sizeof(T) != 0) {
300 return InvalidArgumentError("Buffer is not aligned");
301 }
302 gl_buffer_internal::BufferBinder binder(target_, id_);
303 gl_buffer_internal::BufferMapper mapper(target_, offset_, bytes_size_,
304 GL_MAP_READ_BIT);
305 if (!mapper.data()) {
306 return GetOpenGlErrors();
307 }
308 return reader(absl::MakeSpan(reinterpret_cast<const T*>(mapper.data()),
309 bytes_size_ / sizeof(T)));
310 }
311
312 template <typename T>
MappedWrite(const std::function<Status (absl::Span<T> d)> & writer)313 Status GlBuffer::MappedWrite(
314 const std::function<Status(absl::Span<T> d)>& writer) {
315 if (bytes_size_ % sizeof(T) != 0) {
316 return InvalidArgumentError("Buffer is not aligned");
317 }
318 gl_buffer_internal::BufferBinder binder(target_, id_);
319 gl_buffer_internal::BufferMapper mapper(target_, offset_, bytes_size_,
320 GL_MAP_WRITE_BIT);
321 if (!mapper.data()) {
322 return GetOpenGlErrors();
323 }
324 return writer(absl::MakeSpan(reinterpret_cast<T*>(mapper.data()),
325 bytes_size_ / sizeof(T)));
326 }
327
328 } // namespace gl
329 } // namespace gpu
330 } // namespace tflite
331
332 #endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
333