1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/gl/api2.h"
17
18 #include <algorithm>
19 #include <cstring>
20 #include <string>
21 #include <utility>
22 #include <vector>
23
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/gl/compiler.h"
30 #include "tensorflow/lite/delegates/gpu/gl/egl_environment.h"
31 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
32 #include "tensorflow/lite/delegates/gpu/gl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/gl/kernels/registry.h"
34 #include "tensorflow/lite/delegates/gpu/gl/object.h"
35 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
36 #include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h"
37 #include "tensorflow/lite/delegates/gpu/gl/runtime.h"
38 #include "tensorflow/lite/delegates/gpu/gl/variable.h"
39 #include "tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.h"
40
41 namespace tflite {
42 namespace gpu {
43 namespace gl {
44 namespace {
45
GetShaderHeader(uint3 localsize)46 std::string GetShaderHeader(uint3 localsize) {
47 return absl::StrCat("#version 310 es\nlayout(local_size_x = ", localsize.x,
48 ", local_size_y = ", localsize.y,
49 ", local_size_z = ", localsize.z, ") in;\n");
50 }
51
52 // Wraps given SSBO into GlBuffer object that does not have ownership.
WrapSSBO(OpenGlBuffer ssbo,GlBuffer * buffer)53 absl::Status WrapSSBO(OpenGlBuffer ssbo, GlBuffer* buffer) {
54 int64_t size_bytes;
55 RETURN_IF_ERROR(GetSSBOSize(ssbo.id, &size_bytes));
56 *buffer = GlBuffer(GL_SHADER_STORAGE_BUFFER, ssbo.id, size_bytes, 0, false);
57 return absl::OkStatus();
58 }
59
MaybeAllocateGlBuffer(const TensorObjectDef & def,GlBuffer * ssbo)60 absl::Status MaybeAllocateGlBuffer(const TensorObjectDef& def, GlBuffer* ssbo) {
61 if (def.object_def.object_type != gpu::ObjectType::OPENGL_SSBO) {
62 return absl::InvalidArgumentError("Tensor object is not GL SSBO");
63 }
64 const uint32_t num_elements = NumElements(def);
65 switch (def.object_def.data_type) {
66 case DataType::FLOAT32:
67 return CreateReadWriteShaderStorageBuffer<float>(num_elements, ssbo);
68 case DataType::FLOAT16:
69 return CreateReadWriteShaderStorageBuffer<uint16_t>(num_elements, ssbo);
70 default:
71 return absl::InternalError(
72 "Unable to create new GL SSBO. Unsupported data type.");
73 }
74 return absl::OkStatus();
75 }
76
77 // Does one-step conversion between internal and external objects.
78 // It may also allocate external objects if requested.
79 class DefaultTensorTie : public TensorTie {
80 public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj,ObjectManager * objects)81 DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj,
82 ObjectManager* objects)
83 : TensorTie(def), objects_(objects), internal_obj_(internal_obj) {}
84
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)85 static bool IsSupported(
86 const TensorTieDef& def,
87 const TensorObjectConverterBuilder& converter_builder) {
88 return converter_builder.IsSupported(def.internal_def, def.external_def) &&
89 converter_builder.IsSupported(def.external_def, def.internal_def);
90 }
91
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)92 static absl::Status New(const TensorTieDef& def,
93 TensorObjectConverterBuilder* converter_builder,
94 ObjectManager* objects,
95 std::unique_ptr<TensorTie>* tie) {
96 auto tie_impl =
97 absl::make_unique<DefaultTensorTie>(def, TensorObject{}, objects);
98 RETURN_IF_ERROR(tie_impl->Init(converter_builder));
99 *tie = std::move(tie_impl);
100 return absl::OkStatus();
101 }
102
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,TensorObject internal_object,std::unique_ptr<TensorTie> * tie)103 static absl::Status New(const TensorTieDef& def,
104 TensorObjectConverterBuilder* converter_builder,
105 TensorObject internal_object,
106 std::unique_ptr<TensorTie>* tie) {
107 if (!IsValid(def.internal_def, internal_object)) {
108 return absl::InternalError("Internal object does not match definition.");
109 }
110
111 auto tie_impl =
112 absl::make_unique<DefaultTensorTie>(def, internal_object, nullptr);
113 RETURN_IF_ERROR(tie_impl->Init(converter_builder));
114 *tie = std::move(tie_impl);
115 return absl::OkStatus();
116 }
117
CopyToExternalObject()118 absl::Status CopyToExternalObject() final {
119 if (!converter_to_) {
120 return absl::OkStatus();
121 }
122 return converter_to_->Convert(internal_obj_, GetExternalObject());
123 }
124
CopyFromExternalObject()125 absl::Status CopyFromExternalObject() final {
126 if (!converter_from_) {
127 return absl::OkStatus();
128 }
129 return converter_from_->Convert(GetExternalObject(), internal_obj_);
130 }
131
SetExternalObject(TensorObject obj)132 absl::Status SetExternalObject(TensorObject obj) final {
133 if (!def().external_def.object_def.user_provided) {
134 return absl::InvalidArgumentError("External object is read-only");
135 }
136 if (!IsValid(def().external_def, obj)) {
137 return absl::InvalidArgumentError("Given object is not valid");
138 }
139 external_obj_ = obj;
140
141 // Internal object is not initialized when external object is going to be
142 // used as is, with not conversion. In this case we don't need to have a
143 // separate internal object, we are just registering the appropriate
144 // external object in the object manager for the future binding in the
145 // inference runner.
146 if (!IsObjectInitialized(internal_obj_)) {
147 if (def().external_def.object_def.object_type ==
148 gpu::ObjectType::OPENGL_SSBO) {
149 auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
150 GlBuffer buffer;
151 RETURN_IF_ERROR(WrapSSBO(*ssbo, &buffer));
152 RETURN_IF_ERROR(objects_->RegisterBuffer(def().id, std::move(buffer)));
153 } else {
154 return absl::InternalError("Unexpected object type.");
155 }
156 }
157 return absl::OkStatus();
158 }
159
GetExternalObject()160 TensorObject GetExternalObject() final { return external_obj_; }
161
162 private:
IsSameDef() const163 bool IsSameDef() const {
164 const auto& external_def = def().external_def.object_def;
165 const auto& internal_def = def().internal_def.object_def;
166 return (external_def.object_type == internal_def.object_type &&
167 external_def.data_type == internal_def.data_type &&
168 external_def.data_layout == internal_def.data_layout) ||
169 // Check for equivalent layouts that have the same size.
170 (external_def.object_type == internal_def.object_type &&
171 external_def.data_type == internal_def.data_type &&
172 external_def.data_layout == DataLayout::BHWC &&
173 internal_def.data_layout == DataLayout::DHWC4 &&
174 def().external_def.dimensions.c == 4);
175 }
176
Init(TensorObjectConverterBuilder * converter_builder)177 absl::Status Init(TensorObjectConverterBuilder* converter_builder) {
178 // First check is an object is user provided.
179 const auto& external_def = def().external_def.object_def;
180
181 const bool is_same_def = IsSameDef();
182
183 if (!is_same_def) {
184 RETURN_IF_ERROR(converter_builder->MakeConverter(
185 def().internal_def, def().external_def, &converter_to_));
186 RETURN_IF_ERROR(converter_builder->MakeConverter(
187 def().external_def, def().internal_def, &converter_from_));
188 }
189
190 if (external_def.user_provided) {
191 if (is_same_def) {
192 // Entering this scope indicates that external object is used with no
193 // conversion to internal one. We still need to register the stub buffer
194 // in the object manager, even that the real external object is not
195 // available yet. Later, when the SetExternalObject() is called, the
196 // proper external object will rewrite this record. The stub value will
197 // allow us to correctly prepare the runtime for the late binding of
198 // this object.
199 GlBuffer invalid_buffer;
200 RETURN_IF_ERROR(
201 objects_->RegisterBuffer(def().id, std::move(invalid_buffer)));
202 return absl::OkStatus();
203 }
204 // Object is provided by a user, but runtime expects different object
205 // type. Therefore, we have to allocate internal object and convert.
206 return MaybeAllocateInternalObject();
207 } else {
208 RETURN_IF_ERROR(MaybeAllocateInternalObject());
209
210 if (is_same_def) {
211 // Object is NOT provided by a user, but it matches definition expected
212 // by runtime. Conversion is not needed.
213 external_obj_ = internal_obj_;
214 return absl::OkStatus();
215 }
216
217 // Object is NOT provided by a user.
218 return MaybeAllocateExternalObject();
219 }
220 return absl::OkStatus();
221 }
222
MaybeAllocateInternalObject()223 absl::Status MaybeAllocateInternalObject() {
224 const TensorObjectDef& d = def().internal_def;
225 if (d.object_def.user_provided) {
226 return absl::OkStatus();
227 }
228 switch (d.object_def.object_type) {
229 case gpu::ObjectType::OPENGL_SSBO: {
230 GlBuffer ssbo;
231 RETURN_IF_ERROR(MaybeAllocateGlBuffer(d, &ssbo));
232 internal_obj_ = OpenGlBuffer{ssbo.id()};
233 RETURN_IF_ERROR(objects_->RegisterBuffer(def().id, std::move(ssbo)));
234 break;
235 }
236 // TODO(akulik): support textures as internal object when compiler permits
237 default:
238 return absl::InternalError("Unexpected object type");
239 }
240 return absl::OkStatus();
241 }
242
MaybeAllocateExternalObject()243 absl::Status MaybeAllocateExternalObject() {
244 const TensorObjectDef& d = def().external_def;
245 switch (d.object_def.object_type) {
246 case gpu::ObjectType::CPU_MEMORY: {
247 size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
248 cpu_memory_.resize(bytes_size);
249 external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
250 break;
251 }
252 case gpu::ObjectType::OPENGL_SSBO: {
253 RETURN_IF_ERROR(MaybeAllocateGlBuffer(d, &external_ssbo_));
254 external_obj_ = OpenGlBuffer{external_ssbo_.id()};
255 GlBuffer bbb;
256 RETURN_IF_ERROR(WrapSSBO(OpenGlBuffer{external_ssbo_.id()}, &bbb));
257 break;
258 }
259 default:
260 return absl::InternalError("Unexpected object type");
261 }
262 return absl::OkStatus();
263 }
264
265 ObjectManager* objects_;
266
267 // hold references to objects.
268 TensorObject internal_obj_;
269 TensorObject external_obj_;
270
271 // Hold actual objects.
272 GlBuffer external_ssbo_;
273 std::vector<uint8_t> cpu_memory_;
274
275 std::unique_ptr<TensorObjectConverter> converter_to_;
276 std::unique_ptr<TensorObjectConverter> converter_from_;
277 };
278
279 // Copies data to intermediate OpenGL buffer and then does two step conversion.
280 // It drives the following cases were one-step conversion is not supported:
281 // - CPU BHWC -> GL buffer BHWC -> GL texture DHWC4.
282 class TwoStepTensorTie : public TensorTie {
283 public:
TwoStepTensorTie(const TensorTieDef & def)284 explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
285
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)286 static bool IsSupported(
287 const TensorTieDef& def,
288 const TensorObjectConverterBuilder& converter_builder) {
289 auto defs = MakeOuterInnerDefs(def);
290 return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
291 DefaultTensorTie::IsSupported(defs.second, converter_builder);
292 }
293
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)294 static absl::Status New(const TensorTieDef& def,
295 TensorObjectConverterBuilder* converter_builder,
296 ObjectManager* objects,
297 std::unique_ptr<TensorTie>* tie) {
298 auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
299 RETURN_IF_ERROR(tie_impl->Init(converter_builder, objects));
300 *tie = std::move(tie_impl);
301 return absl::OkStatus();
302 }
303
CopyToExternalObject()304 absl::Status CopyToExternalObject() final {
305 RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
306 return outer_tie_->CopyToExternalObject();
307 }
308
CopyFromExternalObject()309 absl::Status CopyFromExternalObject() final {
310 RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
311 return inner_tie_->CopyFromExternalObject();
312 }
313
SetExternalObject(TensorObject obj)314 absl::Status SetExternalObject(TensorObject obj) final {
315 return outer_tie_->SetExternalObject(obj);
316 }
317
GetExternalObject()318 TensorObject GetExternalObject() final {
319 return outer_tie_->GetExternalObject();
320 }
321
322 private:
MakeOuterInnerDefs(const TensorTieDef & def)323 static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
324 const TensorTieDef& def) {
325 TensorTieDef outer_def;
326 outer_def.external_def = def.external_def;
327 outer_def.internal_def = def.external_def;
328 outer_def.internal_def.object_def.object_type =
329 gpu::ObjectType::OPENGL_SSBO;
330 // Will not allocate new SSBO
331 outer_def.internal_def.object_def.user_provided = true;
332
333 TensorTieDef inner_def;
334 inner_def.id = def.id;
335 inner_def.external_def = outer_def.internal_def;
336 // Should not allocate external object.
337 inner_def.external_def.object_def.user_provided = false;
338 // Reflects what is actually supported by compiler.
339 inner_def.internal_def.dimensions = inner_def.external_def.dimensions;
340 inner_def.internal_def.object_def.data_type = DataType::FLOAT32;
341 inner_def.internal_def.object_def.data_layout = DataLayout::DHWC4;
342 inner_def.internal_def.object_def.object_type =
343 gpu::ObjectType::OPENGL_SSBO;
344 // It may allocate another internal object and should register it to
345 // ObjectManager.
346 inner_def.internal_def.object_def.user_provided = false;
347 return std::make_pair(outer_def, inner_def);
348 }
349
Init(TensorObjectConverterBuilder * converter_builder,ObjectManager * objects)350 absl::Status Init(TensorObjectConverterBuilder* converter_builder,
351 ObjectManager* objects) {
352 auto defs = MakeOuterInnerDefs(def());
353 RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, converter_builder,
354 objects, &inner_tie_));
355 return DefaultTensorTie::New(defs.first, converter_builder,
356 inner_tie_->GetExternalObject(), &outer_tie_);
357 }
358
359 std::unique_ptr<TensorTie> inner_tie_;
360 std::unique_ptr<TensorTie> outer_tie_;
361 };
362
363 // Responsible for creating new tensor tie objects.
364 class TensorTieFactory {
365 public:
TensorTieFactory(const InferenceEnvironmentOptions & env_options)366 explicit TensorTieFactory(const InferenceEnvironmentOptions& env_options)
367 : converter_builder_(NewConverterBuilder(env_options.queue)) {}
368
IsSupported(const TensorTieDef & def) const369 bool IsSupported(const TensorTieDef& def) const {
370 return IsValid(def.external_def.object_def) &&
371 (DefaultTensorTie::IsSupported(def, *converter_builder_) ||
372 TwoStepTensorTie::IsSupported(def, *converter_builder_));
373 }
374
NewTensorTie(const TensorTieDef & def,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)375 absl::Status NewTensorTie(const TensorTieDef& def, ObjectManager* objects,
376 std::unique_ptr<TensorTie>* tie) {
377 auto converter = converter_builder_.get();
378 if (DefaultTensorTie::IsSupported(def, *converter)) {
379 return DefaultTensorTie::New(def, converter, objects, tie);
380 }
381 if (TwoStepTensorTie::IsSupported(def, *converter)) {
382 return TwoStepTensorTie::New(def, converter, objects, tie);
383 }
384 return absl::UnimplementedError("Unsupported tensor tie definition.");
385 }
386
387 private:
388 std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
389 };
390
391 class InferenceRunnerImpl : public InferenceRunner {
392 public:
InferenceRunnerImpl(std::unique_ptr<Runtime> runtime,std::unique_ptr<ObjectManager> objects)393 InferenceRunnerImpl(std::unique_ptr<Runtime> runtime,
394 std::unique_ptr<ObjectManager> objects)
395 : runtime_(std::move(runtime)), external_objects_(std::move(objects)) {}
396
Initialize(const std::vector<TensorTieDef> & input_defs,const std::vector<TensorTieDef> & output_defs,TensorTieFactory * tie_factory)397 absl::Status Initialize(const std::vector<TensorTieDef>& input_defs,
398 const std::vector<TensorTieDef>& output_defs,
399 TensorTieFactory* tie_factory) {
400 RETURN_IF_ERROR(LinkTensors(input_defs, tie_factory, &input_tensor_ties_));
401 RETURN_IF_ERROR(
402 LinkTensors(output_defs, tie_factory, &output_tensor_ties_));
403 for (const auto& output_def : output_defs) {
404 output_to_cpu_ |= output_def.external_def.object_def.object_type ==
405 gpu::ObjectType::CPU_MEMORY;
406 }
407 return absl::OkStatus();
408 }
409
inputs() const410 std::vector<TensorObjectDef> inputs() const override {
411 return GetExternalDefinitions(input_tensor_ties_);
412 }
413
outputs() const414 std::vector<TensorObjectDef> outputs() const override {
415 return GetExternalDefinitions(output_tensor_ties_);
416 }
417
GetInputObject(int index,TensorObject * object)418 absl::Status GetInputObject(int index, TensorObject* object) override {
419 if (index < 0 || index >= input_tensor_ties_.size()) {
420 return absl::OutOfRangeError("Index is out of range");
421 }
422 *object = input_tensor_ties_[index]->GetExternalObject();
423 return absl::OkStatus();
424 }
425
GetOutputObject(int index,TensorObject * object)426 absl::Status GetOutputObject(int index, TensorObject* object) override {
427 if (index < 0 || index >= output_tensor_ties_.size()) {
428 return absl::OutOfRangeError("Index is out of range");
429 }
430 *object = output_tensor_ties_[index]->GetExternalObject();
431 return absl::OkStatus();
432 }
433
SetInputObject(int index,TensorObject object)434 absl::Status SetInputObject(int index, TensorObject object) override {
435 if (index < 0 || index >= input_tensor_ties_.size()) {
436 return absl::OutOfRangeError("Index is out of range");
437 }
438 return input_tensor_ties_[index]->SetExternalObject(object);
439 }
440
SetOutputObject(int index,TensorObject object)441 absl::Status SetOutputObject(int index, TensorObject object) override {
442 if (index < 0 || index >= output_tensor_ties_.size()) {
443 return absl::OutOfRangeError("Index is out of range");
444 }
445 return output_tensor_ties_[index]->SetExternalObject(object);
446 }
447
Run()448 absl::Status Run() override {
449 for (auto& obj : input_tensor_ties_) {
450 RETURN_IF_ERROR(obj->CopyFromExternalObject());
451 }
452 RETURN_IF_ERROR(runtime_->Execute());
453 for (auto& obj : output_tensor_ties_) {
454 RETURN_IF_ERROR(obj->CopyToExternalObject());
455 }
456 RETURN_IF_ERROR(runtime_->command_queue()->Flush());
457 if (output_to_cpu_) {
458 RETURN_IF_ERROR(runtime_->command_queue()->WaitForCompletion());
459 }
460 return absl::OkStatus();
461 }
462
463 private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * tie_factory,std::vector<std::unique_ptr<TensorTie>> * objects)464 absl::Status LinkTensors(const std::vector<TensorTieDef>& defs,
465 TensorTieFactory* tie_factory,
466 std::vector<std::unique_ptr<TensorTie>>* objects) {
467 objects->reserve(defs.size());
468 for (auto& def : defs) {
469 std::unique_ptr<TensorTie> object;
470 RETURN_IF_ERROR(
471 tie_factory->NewTensorTie(def, external_objects_.get(), &object));
472 objects->push_back(std::move(object));
473 }
474 return absl::OkStatus();
475 }
476
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)477 static std::vector<TensorObjectDef> GetExternalDefinitions(
478 const std::vector<std::unique_ptr<TensorTie>>& objects) {
479 std::vector<TensorObjectDef> defs;
480 defs.reserve(objects.size());
481 for (auto& obj : objects) {
482 defs.push_back(obj->def().external_def);
483 }
484 return defs;
485 }
486
487 std::unique_ptr<Runtime> runtime_;
488 std::unique_ptr<ObjectManager> external_objects_;
489 std::vector<std::unique_ptr<TensorTie>> input_tensor_ties_;
490 std::vector<std::unique_ptr<TensorTie>> output_tensor_ties_;
491 bool output_to_cpu_ = false;
492 };
493
494 class InferenceBuilderImpl : public InferenceBuilder {
495 public:
InferenceBuilderImpl(const InferenceEnvironmentOptions & env_options,const InferenceOptions & options,GraphFloat32 graph,const GpuInfo * gpu_info)496 InferenceBuilderImpl(const InferenceEnvironmentOptions& env_options,
497 const InferenceOptions& options, GraphFloat32 graph,
498 const GpuInfo* gpu_info)
499 : env_options_(env_options),
500 options_(options),
501 graph_(std::move(graph)),
502 gpu_info_(gpu_info),
503 tie_factory_(env_options_) {}
504
Initialize()505 absl::Status Initialize() {
506 inputs_ = LinkTensors(graph_.inputs());
507 outputs_ = LinkTensors(graph_.outputs());
508 return absl::OkStatus();
509 }
510
inputs() const511 std::vector<TensorObjectDef> inputs() const final {
512 return GetExternalDefinitions(inputs_);
513 }
514
outputs() const515 std::vector<TensorObjectDef> outputs() const final {
516 return GetExternalDefinitions(outputs_);
517 }
518
SetInputShape(int index,const Dimensions & dimensions)519 absl::Status SetInputShape(int index, const Dimensions& dimensions) final {
520 if (index < 0 || index >= inputs_.size()) {
521 return absl::OutOfRangeError("Index is out of range");
522 }
523 return absl::UnimplementedError("Changing input shapes is not supported");
524 }
525
SetInputObjectDef(int index,ObjectDef new_def)526 absl::Status SetInputObjectDef(int index, ObjectDef new_def) final {
527 if (index < 0 || index >= inputs_.size()) {
528 return absl::OutOfRangeError("Index is out of range");
529 }
530 auto def = inputs_[index];
531 def.external_def.object_def = new_def;
532 if (!tie_factory_.IsSupported(def)) {
533 return absl::InvalidArgumentError(
534 "New object definition is not supported.");
535 }
536 inputs_[index] = def;
537 return absl::OkStatus();
538 }
539
SetOutputObjectDef(int index,ObjectDef new_def)540 absl::Status SetOutputObjectDef(int index, ObjectDef new_def) final {
541 if (index < 0 || index >= outputs_.size()) {
542 return absl::OutOfRangeError("Index is out of range");
543 }
544 auto def = outputs_[index];
545 def.external_def.object_def = new_def;
546 if (!tie_factory_.IsSupported(def)) {
547 return absl::InvalidArgumentError(
548 "New object definition is not supported.");
549 }
550 outputs_[index] = def;
551 return absl::OkStatus();
552 }
553
Build(std::unique_ptr<InferenceRunner> * runner)554 absl::Status Build(std::unique_ptr<InferenceRunner>* runner) final {
555 auto kernels = NewNodeShaderRegistry();
556 CompilationOptions compiler_options;
557 compiler_options.allow_precision_loss =
558 GetPosition(options_, InferencePriority::MAX_PRECISION) > 1;
559 compiler_options.inline_parameters =
560 options_.usage == InferenceUsage::SUSTAINED_SPEED &&
561 GetPosition(options_, InferencePriority::MIN_LATENCY) == 1;
562 if (GetRelativeImportance(options_, InferencePriority::MIN_MEMORY_USAGE,
563 InferencePriority::MIN_LATENCY) ==
564 PriorityImportance::HIGHER) {
565 // Buffers have far better memory utilization.
566 compiler_options.preferred_obj_type = ObjectType::BUFFER;
567 compiler_options.ref_obj_type = ObjectType::BUFFER;
568 }
569
570 auto compiler = NewCompiler(kernels.get(), gpu_info_, compiler_options);
571 auto workgroup_calculator = NewDefaultWorkgroupsCalculator(*gpu_info_);
572 auto external_objects = absl::make_unique<ObjectManager>();
573 std::vector<GlShader> shaders;
574 absl::flat_hash_map<std::string, size_t> shader_to_index;
575 RuntimeOptions runtime_options;
576 auto runtime =
577 absl::make_unique<Runtime>(runtime_options, *gpu_info_,
578 env_options_.queue, external_objects.get());
579 Runtime* runtime_ptr = runtime.get();
580 auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
581 std::move(runtime), std::move(external_objects));
582 RETURN_IF_ERROR(runner_impl->Initialize(inputs_, outputs_, &tie_factory_));
583 RETURN_IF_ERROR(
584 compiler->Compile(graph_, {}, [&](ShaderCode code) -> absl::Status {
585 auto workgroup = workgroup_calculator->Calculate(code);
586 size_t shader_index;
587 std::string shader_src =
588 GetShaderHeader(workgroup) + code.source_code;
589 // Check if a shader was already compiled.
590 auto it = shader_to_index.find(shader_src);
591 if (it == shader_to_index.end()) {
592 GlShader shader;
593 RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER,
594 shader_src, &shader));
595 shaders.push_back(std::move(shader));
596 shader_to_index.insert({shader_src, shader_to_index.size()});
597 shader_index = shader_to_index.size() - 1;
598 } else {
599 shader_index = it->second;
600 }
601 auto num_workgroups = DivideRoundUp(code.workload, workgroup);
602 return runtime_ptr->AddProgram(shaders[shader_index], code.parameters,
603 code.objects, num_workgroups);
604 }));
605 RETURN_IF_ERROR(runtime_ptr->PrepareForExecution());
606 *runner = std::move(runner_impl);
607 return absl::OkStatus();
608 }
609
610 private:
611 // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<Value * > & values)612 std::vector<TensorTieDef> LinkTensors(const std::vector<Value*>& values) {
613 std::vector<TensorTieDef> links;
614 links.reserve(values.size());
615 for (const auto& value : values) {
616 TensorObjectDef external_def;
617 // So far the compiler always forces inputs and outputs to be in the fixed
618 // format.
619 const auto& shape = value->tensor.shape;
620 external_def.dimensions = Dimensions(shape.b, shape.h, shape.w, shape.c);
621 external_def.object_def.data_type = DataType::FLOAT32;
622 external_def.object_def.data_layout = DataLayout::DHWC4;
623 external_def.object_def.object_type = gpu::ObjectType::OPENGL_SSBO;
624
625 // Internal object is not expected to be provided by user because: if
626 // external and internal objects have same defs, the external object is
627 // propagated and just used as an internal one; otherwise, if they have
628 // different defs, internal object will be created, because it is not
629 // provided by user.
630 TensorObjectDef internal_def = external_def;
631 external_def.object_def.user_provided = true;
632 internal_def.object_def.user_provided = false;
633 AccessType access =
634 graph_.IsGraphInput(value->id) ? AccessType::READ : AccessType::WRITE;
635 links.push_back({value->id, access, internal_def, external_def});
636 }
637 return links;
638 }
639
GetExternalDefinitions(const std::vector<TensorTieDef> & links)640 static std::vector<TensorObjectDef> GetExternalDefinitions(
641 const std::vector<TensorTieDef>& links) {
642 std::vector<TensorObjectDef> defs;
643 defs.reserve(links.size());
644 for (auto& desc : links) {
645 defs.push_back(desc.external_def);
646 }
647 return defs;
648 }
649
650 const InferenceEnvironmentOptions env_options_;
651 const InferenceOptions options_;
652 GraphFloat32 graph_;
653 const GpuInfo* gpu_info_;
654 std::vector<TensorTieDef> inputs_;
655 std::vector<TensorTieDef> outputs_;
656 TensorTieFactory tie_factory_;
657 };
658
659 class InferenceEnvironmentImpl : public InferenceEnvironment {
660 public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)661 explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
662 : env_options_(options) {}
663
Init()664 absl::Status Init() {
665 RETURN_IF_ERROR(EglEnvironment::NewEglEnvironment(&egl_env_));
666
667 RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
668 properties_.is_opengl_available = gpu_info_.IsApiOpenGl31OrAbove();
669 if (!properties_.is_opengl_available) {
670 return absl::InternalError(
671 "OpenGL ES 3.1 or above is required to use OpenGL inference.");
672 }
673 if (!env_options_.queue) {
674 queue_ = NewCommandQueue(gpu_info_);
675 env_options_.queue = queue_.get();
676 }
677 return absl::OkStatus();
678 }
679
NewInferenceBuilder(GraphFloat32 && model,const InferenceOptions & options,std::unique_ptr<InferenceBuilder> * builder)680 absl::Status NewInferenceBuilder(
681 GraphFloat32&& model, const InferenceOptions& options,
682 std::unique_ptr<InferenceBuilder>* builder) final {
683 if (!IsValid(options)) {
684 return absl::InvalidArgumentError("InferenceOptions are invalid.");
685 }
686 InferenceOptions resolved_options = options;
687 ResolveAutoPriority(&resolved_options);
688 if (!IsBatchMatchesForAllValues(model)) {
689 return absl::InvalidArgumentError(
690 "Only identical batch dimension is supported");
691 }
692 auto builder_impl = absl::make_unique<InferenceBuilderImpl>(
693 env_options_, resolved_options, std::move(model), &gpu_info_);
694 RETURN_IF_ERROR(builder_impl->Initialize());
695 *builder = std::move(builder_impl);
696 return absl::OkStatus();
697 }
698
properties() const699 const InferenceEnvironmentProperties& properties() const {
700 return properties_;
701 }
702
703 private:
704 std::unique_ptr<EglEnvironment> egl_env_;
705 std::unique_ptr<CommandQueue> queue_;
706 InferenceEnvironmentOptions env_options_;
707 GpuInfo gpu_info_;
708 InferenceEnvironmentProperties properties_;
709 };
710
711 } // namespace
712
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)713 absl::Status NewInferenceEnvironment(
714 const InferenceEnvironmentOptions& options,
715 std::unique_ptr<InferenceEnvironment>* environment,
716 InferenceEnvironmentProperties* properties) {
717 auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
718 absl::Status status = env_impl->Init();
719 if (properties) {
720 *properties = env_impl->properties();
721 }
722 RETURN_IF_ERROR(status);
723 *environment = std::move(env_impl);
724 return absl::OkStatus();
725 }
726
727 } // namespace gl
728 } // namespace gpu
729 } // namespace tflite
730