1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
18
19 #include <cstring>
20 #include <functional>
21 #include <vector>
22
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/status.h"
25 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
26 #include "tensorflow/lite/delegates/gpu/gl/gl_errors.h"
27 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
28
29 namespace tflite {
30 namespace gpu {
31 namespace gl {
32
33 // Buffer is an RAII wrapper for OpenGL buffer object.
34 // See https://www.khronos.org/opengl/wiki/Buffer_Object for more information.
35 //
36 // Buffer is moveable but not copyable.
37 class GlBuffer {
38 public:
39 // @param has_ownership indicates that GlBuffer is responsible for
40 // corresponding GL buffer deletion.
GlBuffer(GLenum target,GLuint id,size_t bytes_size,size_t offset,bool has_ownership)41 GlBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
42 bool has_ownership)
43 : target_(target),
44 id_(id),
45 bytes_size_(bytes_size),
46 offset_(offset),
47 has_ownership_(has_ownership) {}
48
49 // Creates invalid buffer.
GlBuffer()50 GlBuffer() : GlBuffer(GL_INVALID_ENUM, GL_INVALID_INDEX, 0, 0, false) {}
51
52 // Move-only
53 GlBuffer(GlBuffer&& buffer);
54 GlBuffer& operator=(GlBuffer&& buffer);
55 GlBuffer(const GlBuffer&) = delete;
56 GlBuffer& operator=(const GlBuffer&) = delete;
57
58 ~GlBuffer();
59
60 // Reads data from buffer into CPU memory. Data should point to a region that
61 // has at least bytes_size available.
62 template <typename T>
63 absl::Status Read(absl::Span<T> data) const;
64
65 // Writes data to a buffer.
66 template <typename T>
67 absl::Status Write(absl::Span<const T> data);
68
69 // Maps GPU memory to CPU address space and calls reader that may read from
70 // that memory.
71 template <typename T>
72 absl::Status MappedRead(
73 const std::function<absl::Status(absl::Span<const T>)>& reader) const;
74
75 // Maps GPU memory to CPU address space and calls writer that may write into
76 // that memory.
77 template <typename T>
78 absl::Status MappedWrite(
79 const std::function<absl::Status(absl::Span<T>)>& writer);
80
81 absl::Status MakeView(size_t offset, size_t bytes_size, GlBuffer* gl_buffer);
82
83 // Makes a copy without ownership of the buffer.
84 GlBuffer MakeRef();
85
86 // Binds a buffer to an index.
87 absl::Status BindToIndex(uint32_t index) const;
88
89 // Releases the ownership of the buffer object.
Release()90 void Release() { has_ownership_ = false; }
91
bytes_size()92 size_t bytes_size() const { return bytes_size_; }
93
target()94 const GLenum target() const { return target_; }
95
id()96 const GLuint id() const { return id_; }
97
is_valid()98 bool is_valid() const { return id_ != GL_INVALID_INDEX; }
99
offset()100 size_t offset() const { return offset_; }
101
102 // @return true if this object actually owns corresponding GL buffer
103 // and manages it's lifetime.
has_ownership()104 bool has_ownership() const { return has_ownership_; }
105
106 private:
107 void Invalidate();
108
109 GLenum target_;
110 GLuint id_;
111 size_t bytes_size_;
112 size_t offset_;
113 bool has_ownership_;
114 };
115
116 absl::Status CopyBuffer(const GlBuffer& read_buffer,
117 const GlBuffer& write_buffer);
118
119 absl::Status GetSSBOSize(GLuint id, int64_t* size_bytes);
120
121 // Creates new shader storage buffer that will be modified and used many
122 // times.
123 //
124 // See https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object for
125 // details.
126 template <typename T>
127 absl::Status CreateReadWriteShaderStorageBuffer(uint32_t num_elements,
128 GlBuffer* gl_buffer);
129
130 // Creates new shader storage buffer that will be filled with data once which
131 // will be used many times.
132 template <typename T>
133 absl::Status CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,
134 GlBuffer* gl_buffer);
135
136 // Adapts raw Buffer::Read method to read data into a vector.
137 template <typename T>
AppendFromBuffer(const GlBuffer & buffer,std::vector<T> * data)138 absl::Status AppendFromBuffer(const GlBuffer& buffer, std::vector<T>* data) {
139 if (buffer.bytes_size() % sizeof(T) != 0) {
140 return absl::InvalidArgumentError("Buffer is not aligned");
141 }
142 size_t num_elements = buffer.bytes_size() / sizeof(T);
143 data->resize(data->size() + num_elements);
144 return buffer.Read<T>(
145 absl::MakeSpan(data->data() + data->size() - num_elements, num_elements));
146 }
147
148 // Persistent buffer provides CPU pointer to the buffer that is valid all the
149 // time. A user should properly synchronize the access to the buffer on CPU and
150 // GPU sides.
151 class GlPersistentBuffer : public GlBuffer {
152 public:
153 GlPersistentBuffer(GLenum target, GLuint id, size_t bytes_size, size_t offset,
154 bool has_ownership, void* data);
155 GlPersistentBuffer();
156
157 // Move-only
158 GlPersistentBuffer(GlPersistentBuffer&& buffer);
159 GlPersistentBuffer& operator=(GlPersistentBuffer&& buffer);
160 GlPersistentBuffer(const GlPersistentBuffer&) = delete;
161 GlPersistentBuffer& operator=(const GlPersistentBuffer&) = delete;
162
163 ~GlPersistentBuffer();
164
data()165 void* data() { return data_; }
166
167 private:
168 void* data_;
169 };
170
171 // Creates read-write persistent buffer with valid CPU pointer
172 absl::Status CreatePersistentBuffer(size_t size, GlPersistentBuffer* gl_buffer);
173
174 ////////////////////////////////////////////////////////////////////////////////
175 // Implementation details are below.
176
177 namespace gl_buffer_internal {
178
179 // RAII for creating and/or owning buffer id.
180 class BufferId {
181 public:
BufferId()182 BufferId() : id_(GL_INVALID_INDEX) {
183 TFLITE_GPU_CALL_GL(glGenBuffers, 1 /* number of buffers */, &id_)
184 .IgnoreError();
185 // only possible error here is when a number of buffers is negative.
186 }
187
BufferId(GLuint id)188 explicit BufferId(GLuint id) : id_(id) {}
189
~BufferId()190 ~BufferId() {
191 if (id_ != GL_INVALID_INDEX) {
192 TFLITE_GPU_CALL_GL(glDeleteBuffers, 1, &id_).IgnoreError();
193 }
194 }
195
id()196 GLuint id() const { return id_; }
197
Release()198 GLuint Release() {
199 GLuint id = GL_INVALID_INDEX;
200 std::swap(id, id_);
201 return id;
202 }
203
204 private:
205 GLuint id_;
206 };
207
208 // RAII for binding and unbinding a buffer.
209 class BufferBinder {
210 public:
BufferBinder(GLenum target,GLuint id)211 BufferBinder(GLenum target, GLuint id) : target_(target), prev_id_(0) {
212 TFLITE_GPU_CALL_GL(glBindBuffer, target_, id).IgnoreError();
213 }
214
BufferBinder(GLenum target,GLuint id,GLuint prev_id)215 BufferBinder(GLenum target, GLuint id, GLuint prev_id)
216 : target_(target), prev_id_(prev_id) {
217 TFLITE_GPU_CALL_GL(glBindBuffer, target_, id).IgnoreError();
218 }
219
~BufferBinder()220 ~BufferBinder() {
221 TFLITE_GPU_CALL_GL(glBindBuffer, target_, prev_id_).IgnoreError();
222 }
223
224 private:
225 const GLenum target_;
226 GLuint prev_id_;
227 };
228
229 // RAII for mapping and unmapping a buffer.
230 class BufferMapper {
231 public:
232 BufferMapper(GLenum target, size_t offset, size_t bytes, GLbitfield access);
233
234 ~BufferMapper();
235
data()236 void* data() { return data_; }
237
238 private:
239 const GLenum target_;
240 void* data_;
241 };
242
243 } // namespace gl_buffer_internal
244
245 template <typename T>
CreateReadWriteShaderStorageBuffer(uint32_t num_elements,GlBuffer * gl_buffer)246 absl::Status CreateReadWriteShaderStorageBuffer(uint32_t num_elements,
247 GlBuffer* gl_buffer) {
248 gl_buffer_internal::BufferId id;
249 gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
250 // TODO(akulik): benchmark DYNAMIC vs STREAM buffer
251 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glBufferData, GL_SHADER_STORAGE_BUFFER,
252 num_elements * sizeof(T), nullptr,
253 GL_STREAM_COPY));
254 *gl_buffer = GlBuffer{GL_SHADER_STORAGE_BUFFER, id.Release(),
255 num_elements * sizeof(T), 0, true};
256 return absl::OkStatus();
257 }
258
259 template <typename T>
CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,GlBuffer * gl_buffer)260 absl::Status CreateReadOnlyShaderStorageBuffer(absl::Span<const T> data,
261 GlBuffer* gl_buffer) {
262 gl_buffer_internal::BufferId id;
263 gl_buffer_internal::BufferBinder binder(GL_SHADER_STORAGE_BUFFER, id.id());
264 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glBufferData, GL_SHADER_STORAGE_BUFFER,
265 data.size() * sizeof(T), data.data(),
266 GL_STATIC_READ));
267 *gl_buffer = GlBuffer{GL_SHADER_STORAGE_BUFFER, id.Release(),
268 data.size() * sizeof(T), 0, true};
269 return absl::OkStatus();
270 }
271
272 template <typename T>
Read(absl::Span<T> data)273 absl::Status GlBuffer::Read(absl::Span<T> data) const {
274 if (data.size() * sizeof(T) < bytes_size()) {
275 return absl::InvalidArgumentError(
276 "Read from buffer failed. Destination data is shorter than buffer.");
277 }
278 // TODO(akulik): glCopyBufferSubData is actually available in ES 3.1, try it.
279 return MappedRead<T>([this, data](absl::Span<const T> src) {
280 std::memcpy(data.data(), src.data(), bytes_size());
281 return absl::OkStatus();
282 });
283 }
284
285 template <typename T>
Write(absl::Span<const T> data)286 absl::Status GlBuffer::Write(absl::Span<const T> data) {
287 if (data.size() * sizeof(T) > bytes_size_) {
288 return absl::InvalidArgumentError(
289 "Write to buffer failed. Source data is larger than buffer.");
290 }
291 gl_buffer_internal::BufferBinder binder(target_, id_);
292 return TFLITE_GPU_CALL_GL(glBufferSubData, target_, offset_, bytes_size_,
293 data.data());
294 }
295
296 template <typename T>
MappedRead(const std::function<absl::Status (absl::Span<const T> d)> & reader)297 absl::Status GlBuffer::MappedRead(
298 const std::function<absl::Status(absl::Span<const T> d)>& reader) const {
299 if (bytes_size_ % sizeof(T) != 0) {
300 return absl::InvalidArgumentError("Buffer is not aligned");
301 }
302 gl_buffer_internal::BufferBinder binder(target_, id_);
303 gl_buffer_internal::BufferMapper mapper(target_, offset_, bytes_size_,
304 GL_MAP_READ_BIT);
305 if (!mapper.data()) {
306 return GetOpenGlErrors();
307 }
308 return reader(absl::MakeSpan(reinterpret_cast<const T*>(mapper.data()),
309 bytes_size_ / sizeof(T)));
310 }
311
312 template <typename T>
MappedWrite(const std::function<absl::Status (absl::Span<T> d)> & writer)313 absl::Status GlBuffer::MappedWrite(
314 const std::function<absl::Status(absl::Span<T> d)>& writer) {
315 if (bytes_size_ % sizeof(T) != 0) {
316 return absl::InvalidArgumentError("Buffer is not aligned");
317 }
318 gl_buffer_internal::BufferBinder binder(target_, id_);
319 gl_buffer_internal::BufferMapper mapper(target_, offset_, bytes_size_,
320 GL_MAP_WRITE_BIT);
321 if (!mapper.data()) {
322 return GetOpenGlErrors();
323 }
324 return writer(absl::MakeSpan(reinterpret_cast<T*>(mapper.data()),
325 bytes_size_ / sizeof(T)));
326 }
327
328 } // namespace gl
329 } // namespace gpu
330 } // namespace tflite
331
332 #endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_GL_BUFFER_H_
333