• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/gl/api2.h"
17 
18 #include <algorithm>
19 #include <cstring>
20 #include <string>
21 #include <utility>
22 #include <vector>
23 
24 #include "absl/container/flat_hash_map.h"
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/gl/compiler.h"
30 #include "tensorflow/lite/delegates/gpu/gl/egl_environment.h"
31 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
32 #include "tensorflow/lite/delegates/gpu/gl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/gl/kernels/registry.h"
34 #include "tensorflow/lite/delegates/gpu/gl/object.h"
35 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
36 #include "tensorflow/lite/delegates/gpu/gl/request_gpu_info.h"
37 #include "tensorflow/lite/delegates/gpu/gl/runtime.h"
38 #include "tensorflow/lite/delegates/gpu/gl/variable.h"
39 #include "tensorflow/lite/delegates/gpu/gl/workgroups/default_calculator.h"
40 
41 namespace tflite {
42 namespace gpu {
43 namespace gl {
44 namespace {
45 
GetShaderHeader(uint3 localsize)46 std::string GetShaderHeader(uint3 localsize) {
47   return absl::StrCat("#version 310 es\nlayout(local_size_x = ", localsize.x,
48                       ", local_size_y = ", localsize.y,
49                       ", local_size_z = ", localsize.z, ") in;\n");
50 }
51 
52 // Wraps given SSBO into GlBuffer object that does not have ownership.
WrapSSBO(OpenGlBuffer ssbo,GlBuffer * buffer)53 absl::Status WrapSSBO(OpenGlBuffer ssbo, GlBuffer* buffer) {
54   int64_t size_bytes;
55   RETURN_IF_ERROR(GetSSBOSize(ssbo.id, &size_bytes));
56   *buffer = GlBuffer(GL_SHADER_STORAGE_BUFFER, ssbo.id, size_bytes, 0, false);
57   return absl::OkStatus();
58 }
59 
MaybeAllocateGlBuffer(const TensorObjectDef & def,GlBuffer * ssbo)60 absl::Status MaybeAllocateGlBuffer(const TensorObjectDef& def, GlBuffer* ssbo) {
61   if (def.object_def.object_type != gpu::ObjectType::OPENGL_SSBO) {
62     return absl::InvalidArgumentError("Tensor object is not GL SSBO");
63   }
64   const uint32_t num_elements = NumElements(def);
65   switch (def.object_def.data_type) {
66     case DataType::FLOAT32:
67       return CreateReadWriteShaderStorageBuffer<float>(num_elements, ssbo);
68     case DataType::FLOAT16:
69       return CreateReadWriteShaderStorageBuffer<uint16_t>(num_elements, ssbo);
70     default:
71       return absl::InternalError(
72           "Unable to create new GL SSBO. Unsupported data type.");
73   }
74   return absl::OkStatus();
75 }
76 
77 // Does one-step conversion between internal and external objects.
78 // It may also allocate external objects if requested.
79 class DefaultTensorTie : public TensorTie {
80  public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj,ObjectManager * objects)81   DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj,
82                    ObjectManager* objects)
83       : TensorTie(def), objects_(objects), internal_obj_(internal_obj) {}
84 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)85   static bool IsSupported(
86       const TensorTieDef& def,
87       const TensorObjectConverterBuilder& converter_builder) {
88     return converter_builder.IsSupported(def.internal_def, def.external_def) &&
89            converter_builder.IsSupported(def.external_def, def.internal_def);
90   }
91 
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)92   static absl::Status New(const TensorTieDef& def,
93                           TensorObjectConverterBuilder* converter_builder,
94                           ObjectManager* objects,
95                           std::unique_ptr<TensorTie>* tie) {
96     auto tie_impl =
97         absl::make_unique<DefaultTensorTie>(def, TensorObject{}, objects);
98     RETURN_IF_ERROR(tie_impl->Init(converter_builder));
99     *tie = std::move(tie_impl);
100     return absl::OkStatus();
101   }
102 
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,TensorObject internal_object,std::unique_ptr<TensorTie> * tie)103   static absl::Status New(const TensorTieDef& def,
104                           TensorObjectConverterBuilder* converter_builder,
105                           TensorObject internal_object,
106                           std::unique_ptr<TensorTie>* tie) {
107     if (!IsValid(def.internal_def, internal_object)) {
108       return absl::InternalError("Internal object does not match definition.");
109     }
110 
111     auto tie_impl =
112         absl::make_unique<DefaultTensorTie>(def, internal_object, nullptr);
113     RETURN_IF_ERROR(tie_impl->Init(converter_builder));
114     *tie = std::move(tie_impl);
115     return absl::OkStatus();
116   }
117 
CopyToExternalObject()118   absl::Status CopyToExternalObject() final {
119     if (!converter_to_) {
120       return absl::OkStatus();
121     }
122     return converter_to_->Convert(internal_obj_, GetExternalObject());
123   }
124 
CopyFromExternalObject()125   absl::Status CopyFromExternalObject() final {
126     if (!converter_from_) {
127       return absl::OkStatus();
128     }
129     return converter_from_->Convert(GetExternalObject(), internal_obj_);
130   }
131 
SetExternalObject(TensorObject obj)132   absl::Status SetExternalObject(TensorObject obj) final {
133     if (!def().external_def.object_def.user_provided) {
134       return absl::InvalidArgumentError("External object is read-only");
135     }
136     if (!IsValid(def().external_def, obj)) {
137       return absl::InvalidArgumentError("Given object is not valid");
138     }
139     external_obj_ = obj;
140 
141     // Internal object is not initialized when external object is going to be
142     // used as is, with not conversion. In this case we don't need to have a
143     // separate internal object, we are just registering the appropriate
144     // external object in the object manager for the future binding in the
145     // inference runner.
146     if (!IsObjectInitialized(internal_obj_)) {
147       if (def().external_def.object_def.object_type ==
148           gpu::ObjectType::OPENGL_SSBO) {
149         auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
150         GlBuffer buffer;
151         RETURN_IF_ERROR(WrapSSBO(*ssbo, &buffer));
152         RETURN_IF_ERROR(objects_->RegisterBuffer(def().id, std::move(buffer)));
153       } else {
154         return absl::InternalError("Unexpected object type.");
155       }
156     }
157     return absl::OkStatus();
158   }
159 
GetExternalObject()160   TensorObject GetExternalObject() final { return external_obj_; }
161 
162  private:
IsSameDef() const163   bool IsSameDef() const {
164     const auto& external_def = def().external_def.object_def;
165     const auto& internal_def = def().internal_def.object_def;
166     return (external_def.object_type == internal_def.object_type &&
167             external_def.data_type == internal_def.data_type &&
168             external_def.data_layout == internal_def.data_layout) ||
169            // Check for equivalent layouts that have the same size.
170            (external_def.object_type == internal_def.object_type &&
171             external_def.data_type == internal_def.data_type &&
172             external_def.data_layout == DataLayout::BHWC &&
173             internal_def.data_layout == DataLayout::DHWC4 &&
174             def().external_def.dimensions.c == 4);
175   }
176 
Init(TensorObjectConverterBuilder * converter_builder)177   absl::Status Init(TensorObjectConverterBuilder* converter_builder) {
178     // First check is an object is user provided.
179     const auto& external_def = def().external_def.object_def;
180 
181     const bool is_same_def = IsSameDef();
182 
183     if (!is_same_def) {
184       RETURN_IF_ERROR(converter_builder->MakeConverter(
185           def().internal_def, def().external_def, &converter_to_));
186       RETURN_IF_ERROR(converter_builder->MakeConverter(
187           def().external_def, def().internal_def, &converter_from_));
188     }
189 
190     if (external_def.user_provided) {
191       if (is_same_def) {
192         // Entering this scope indicates that external object is used with no
193         // conversion to internal one. We still need to register the stub buffer
194         // in the object manager, even that the real external object is not
195         // available yet. Later, when the SetExternalObject() is called, the
196         // proper external object will rewrite this record. The stub value will
197         // allow us to correctly prepare the runtime for the late binding of
198         // this object.
199         GlBuffer invalid_buffer;
200         RETURN_IF_ERROR(
201             objects_->RegisterBuffer(def().id, std::move(invalid_buffer)));
202         return absl::OkStatus();
203       }
204       // Object is provided by a user, but runtime expects different object
205       // type. Therefore, we have to allocate internal object and convert.
206       return MaybeAllocateInternalObject();
207     } else {
208       RETURN_IF_ERROR(MaybeAllocateInternalObject());
209 
210       if (is_same_def) {
211         // Object is NOT provided by a user, but it matches definition expected
212         // by runtime. Conversion is not needed.
213         external_obj_ = internal_obj_;
214         return absl::OkStatus();
215       }
216 
217       // Object is NOT provided by a user.
218       return MaybeAllocateExternalObject();
219     }
220     return absl::OkStatus();
221   }
222 
MaybeAllocateInternalObject()223   absl::Status MaybeAllocateInternalObject() {
224     const TensorObjectDef& d = def().internal_def;
225     if (d.object_def.user_provided) {
226       return absl::OkStatus();
227     }
228     switch (d.object_def.object_type) {
229       case gpu::ObjectType::OPENGL_SSBO: {
230         GlBuffer ssbo;
231         RETURN_IF_ERROR(MaybeAllocateGlBuffer(d, &ssbo));
232         internal_obj_ = OpenGlBuffer{ssbo.id()};
233         RETURN_IF_ERROR(objects_->RegisterBuffer(def().id, std::move(ssbo)));
234         break;
235       }
236       // TODO(akulik): support textures as internal object when compiler permits
237       default:
238         return absl::InternalError("Unexpected object type");
239     }
240     return absl::OkStatus();
241   }
242 
MaybeAllocateExternalObject()243   absl::Status MaybeAllocateExternalObject() {
244     const TensorObjectDef& d = def().external_def;
245     switch (d.object_def.object_type) {
246       case gpu::ObjectType::CPU_MEMORY: {
247         size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
248         cpu_memory_.resize(bytes_size);
249         external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
250         break;
251       }
252       case gpu::ObjectType::OPENGL_SSBO: {
253         RETURN_IF_ERROR(MaybeAllocateGlBuffer(d, &external_ssbo_));
254         external_obj_ = OpenGlBuffer{external_ssbo_.id()};
255         GlBuffer bbb;
256         RETURN_IF_ERROR(WrapSSBO(OpenGlBuffer{external_ssbo_.id()}, &bbb));
257         break;
258       }
259       default:
260         return absl::InternalError("Unexpected object type");
261     }
262     return absl::OkStatus();
263   }
264 
265   ObjectManager* objects_;
266 
267   // hold references to objects.
268   TensorObject internal_obj_;
269   TensorObject external_obj_;
270 
271   // Hold actual objects.
272   GlBuffer external_ssbo_;
273   std::vector<uint8_t> cpu_memory_;
274 
275   std::unique_ptr<TensorObjectConverter> converter_to_;
276   std::unique_ptr<TensorObjectConverter> converter_from_;
277 };
278 
279 // Copies data to intermediate OpenGL buffer and then does two step conversion.
280 // It drives the following cases were one-step conversion is not supported:
281 //   - CPU BHWC -> GL buffer BHWC -> GL texture DHWC4.
282 class TwoStepTensorTie : public TensorTie {
283  public:
TwoStepTensorTie(const TensorTieDef & def)284   explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
285 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)286   static bool IsSupported(
287       const TensorTieDef& def,
288       const TensorObjectConverterBuilder& converter_builder) {
289     auto defs = MakeOuterInnerDefs(def);
290     return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
291            DefaultTensorTie::IsSupported(defs.second, converter_builder);
292   }
293 
New(const TensorTieDef & def,TensorObjectConverterBuilder * converter_builder,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)294   static absl::Status New(const TensorTieDef& def,
295                           TensorObjectConverterBuilder* converter_builder,
296                           ObjectManager* objects,
297                           std::unique_ptr<TensorTie>* tie) {
298     auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
299     RETURN_IF_ERROR(tie_impl->Init(converter_builder, objects));
300     *tie = std::move(tie_impl);
301     return absl::OkStatus();
302   }
303 
CopyToExternalObject()304   absl::Status CopyToExternalObject() final {
305     RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
306     return outer_tie_->CopyToExternalObject();
307   }
308 
CopyFromExternalObject()309   absl::Status CopyFromExternalObject() final {
310     RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
311     return inner_tie_->CopyFromExternalObject();
312   }
313 
SetExternalObject(TensorObject obj)314   absl::Status SetExternalObject(TensorObject obj) final {
315     return outer_tie_->SetExternalObject(obj);
316   }
317 
GetExternalObject()318   TensorObject GetExternalObject() final {
319     return outer_tie_->GetExternalObject();
320   }
321 
322  private:
MakeOuterInnerDefs(const TensorTieDef & def)323   static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
324       const TensorTieDef& def) {
325     TensorTieDef outer_def;
326     outer_def.external_def = def.external_def;
327     outer_def.internal_def = def.external_def;
328     outer_def.internal_def.object_def.object_type =
329         gpu::ObjectType::OPENGL_SSBO;
330     // Will not allocate new SSBO
331     outer_def.internal_def.object_def.user_provided = true;
332 
333     TensorTieDef inner_def;
334     inner_def.id = def.id;
335     inner_def.external_def = outer_def.internal_def;
336     // Should not allocate external object.
337     inner_def.external_def.object_def.user_provided = false;
338     // Reflects what is actually supported by compiler.
339     inner_def.internal_def.dimensions = inner_def.external_def.dimensions;
340     inner_def.internal_def.object_def.data_type = DataType::FLOAT32;
341     inner_def.internal_def.object_def.data_layout = DataLayout::DHWC4;
342     inner_def.internal_def.object_def.object_type =
343         gpu::ObjectType::OPENGL_SSBO;
344     // It may allocate another internal object and should register it to
345     // ObjectManager.
346     inner_def.internal_def.object_def.user_provided = false;
347     return std::make_pair(outer_def, inner_def);
348   }
349 
Init(TensorObjectConverterBuilder * converter_builder,ObjectManager * objects)350   absl::Status Init(TensorObjectConverterBuilder* converter_builder,
351                     ObjectManager* objects) {
352     auto defs = MakeOuterInnerDefs(def());
353     RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, converter_builder,
354                                           objects, &inner_tie_));
355     return DefaultTensorTie::New(defs.first, converter_builder,
356                                  inner_tie_->GetExternalObject(), &outer_tie_);
357   }
358 
359   std::unique_ptr<TensorTie> inner_tie_;
360   std::unique_ptr<TensorTie> outer_tie_;
361 };
362 
363 // Responsible for creating new tensor tie objects.
364 class TensorTieFactory {
365  public:
TensorTieFactory(const InferenceEnvironmentOptions & env_options)366   explicit TensorTieFactory(const InferenceEnvironmentOptions& env_options)
367       : converter_builder_(NewConverterBuilder(env_options.queue)) {}
368 
IsSupported(const TensorTieDef & def) const369   bool IsSupported(const TensorTieDef& def) const {
370     return IsValid(def.external_def.object_def) &&
371            (DefaultTensorTie::IsSupported(def, *converter_builder_) ||
372             TwoStepTensorTie::IsSupported(def, *converter_builder_));
373   }
374 
NewTensorTie(const TensorTieDef & def,ObjectManager * objects,std::unique_ptr<TensorTie> * tie)375   absl::Status NewTensorTie(const TensorTieDef& def, ObjectManager* objects,
376                             std::unique_ptr<TensorTie>* tie) {
377     auto converter = converter_builder_.get();
378     if (DefaultTensorTie::IsSupported(def, *converter)) {
379       return DefaultTensorTie::New(def, converter, objects, tie);
380     }
381     if (TwoStepTensorTie::IsSupported(def, *converter)) {
382       return TwoStepTensorTie::New(def, converter, objects, tie);
383     }
384     return absl::UnimplementedError("Unsupported tensor tie definition.");
385   }
386 
387  private:
388   std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
389 };
390 
391 class InferenceRunnerImpl : public InferenceRunner {
392  public:
InferenceRunnerImpl(std::unique_ptr<Runtime> runtime,std::unique_ptr<ObjectManager> objects)393   InferenceRunnerImpl(std::unique_ptr<Runtime> runtime,
394                       std::unique_ptr<ObjectManager> objects)
395       : runtime_(std::move(runtime)), external_objects_(std::move(objects)) {}
396 
Initialize(const std::vector<TensorTieDef> & input_defs,const std::vector<TensorTieDef> & output_defs,TensorTieFactory * tie_factory)397   absl::Status Initialize(const std::vector<TensorTieDef>& input_defs,
398                           const std::vector<TensorTieDef>& output_defs,
399                           TensorTieFactory* tie_factory) {
400     RETURN_IF_ERROR(LinkTensors(input_defs, tie_factory, &input_tensor_ties_));
401     RETURN_IF_ERROR(
402         LinkTensors(output_defs, tie_factory, &output_tensor_ties_));
403     for (const auto& output_def : output_defs) {
404       output_to_cpu_ |= output_def.external_def.object_def.object_type ==
405                         gpu::ObjectType::CPU_MEMORY;
406     }
407     return absl::OkStatus();
408   }
409 
inputs() const410   std::vector<TensorObjectDef> inputs() const override {
411     return GetExternalDefinitions(input_tensor_ties_);
412   }
413 
outputs() const414   std::vector<TensorObjectDef> outputs() const override {
415     return GetExternalDefinitions(output_tensor_ties_);
416   }
417 
GetInputObject(int index,TensorObject * object)418   absl::Status GetInputObject(int index, TensorObject* object) override {
419     if (index < 0 || index >= input_tensor_ties_.size()) {
420       return absl::OutOfRangeError("Index is out of range");
421     }
422     *object = input_tensor_ties_[index]->GetExternalObject();
423     return absl::OkStatus();
424   }
425 
GetOutputObject(int index,TensorObject * object)426   absl::Status GetOutputObject(int index, TensorObject* object) override {
427     if (index < 0 || index >= output_tensor_ties_.size()) {
428       return absl::OutOfRangeError("Index is out of range");
429     }
430     *object = output_tensor_ties_[index]->GetExternalObject();
431     return absl::OkStatus();
432   }
433 
SetInputObject(int index,TensorObject object)434   absl::Status SetInputObject(int index, TensorObject object) override {
435     if (index < 0 || index >= input_tensor_ties_.size()) {
436       return absl::OutOfRangeError("Index is out of range");
437     }
438     return input_tensor_ties_[index]->SetExternalObject(object);
439   }
440 
SetOutputObject(int index,TensorObject object)441   absl::Status SetOutputObject(int index, TensorObject object) override {
442     if (index < 0 || index >= output_tensor_ties_.size()) {
443       return absl::OutOfRangeError("Index is out of range");
444     }
445     return output_tensor_ties_[index]->SetExternalObject(object);
446   }
447 
Run()448   absl::Status Run() override {
449     for (auto& obj : input_tensor_ties_) {
450       RETURN_IF_ERROR(obj->CopyFromExternalObject());
451     }
452     RETURN_IF_ERROR(runtime_->Execute());
453     for (auto& obj : output_tensor_ties_) {
454       RETURN_IF_ERROR(obj->CopyToExternalObject());
455     }
456     RETURN_IF_ERROR(runtime_->command_queue()->Flush());
457     if (output_to_cpu_) {
458       RETURN_IF_ERROR(runtime_->command_queue()->WaitForCompletion());
459     }
460     return absl::OkStatus();
461   }
462 
463  private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * tie_factory,std::vector<std::unique_ptr<TensorTie>> * objects)464   absl::Status LinkTensors(const std::vector<TensorTieDef>& defs,
465                            TensorTieFactory* tie_factory,
466                            std::vector<std::unique_ptr<TensorTie>>* objects) {
467     objects->reserve(defs.size());
468     for (auto& def : defs) {
469       std::unique_ptr<TensorTie> object;
470       RETURN_IF_ERROR(
471           tie_factory->NewTensorTie(def, external_objects_.get(), &object));
472       objects->push_back(std::move(object));
473     }
474     return absl::OkStatus();
475   }
476 
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)477   static std::vector<TensorObjectDef> GetExternalDefinitions(
478       const std::vector<std::unique_ptr<TensorTie>>& objects) {
479     std::vector<TensorObjectDef> defs;
480     defs.reserve(objects.size());
481     for (auto& obj : objects) {
482       defs.push_back(obj->def().external_def);
483     }
484     return defs;
485   }
486 
487   std::unique_ptr<Runtime> runtime_;
488   std::unique_ptr<ObjectManager> external_objects_;
489   std::vector<std::unique_ptr<TensorTie>> input_tensor_ties_;
490   std::vector<std::unique_ptr<TensorTie>> output_tensor_ties_;
491   bool output_to_cpu_ = false;
492 };
493 
494 class InferenceBuilderImpl : public InferenceBuilder {
495  public:
InferenceBuilderImpl(const InferenceEnvironmentOptions & env_options,const InferenceOptions & options,GraphFloat32 graph,const GpuInfo * gpu_info)496   InferenceBuilderImpl(const InferenceEnvironmentOptions& env_options,
497                        const InferenceOptions& options, GraphFloat32 graph,
498                        const GpuInfo* gpu_info)
499       : env_options_(env_options),
500         options_(options),
501         graph_(std::move(graph)),
502         gpu_info_(gpu_info),
503         tie_factory_(env_options_) {}
504 
Initialize()505   absl::Status Initialize() {
506     inputs_ = LinkTensors(graph_.inputs());
507     outputs_ = LinkTensors(graph_.outputs());
508     return absl::OkStatus();
509   }
510 
inputs() const511   std::vector<TensorObjectDef> inputs() const final {
512     return GetExternalDefinitions(inputs_);
513   }
514 
outputs() const515   std::vector<TensorObjectDef> outputs() const final {
516     return GetExternalDefinitions(outputs_);
517   }
518 
SetInputShape(int index,const Dimensions & dimensions)519   absl::Status SetInputShape(int index, const Dimensions& dimensions) final {
520     if (index < 0 || index >= inputs_.size()) {
521       return absl::OutOfRangeError("Index is out of range");
522     }
523     return absl::UnimplementedError("Changing input shapes is not supported");
524   }
525 
SetInputObjectDef(int index,ObjectDef new_def)526   absl::Status SetInputObjectDef(int index, ObjectDef new_def) final {
527     if (index < 0 || index >= inputs_.size()) {
528       return absl::OutOfRangeError("Index is out of range");
529     }
530     auto def = inputs_[index];
531     def.external_def.object_def = new_def;
532     if (!tie_factory_.IsSupported(def)) {
533       return absl::InvalidArgumentError(
534           "New object definition is not supported.");
535     }
536     inputs_[index] = def;
537     return absl::OkStatus();
538   }
539 
SetOutputObjectDef(int index,ObjectDef new_def)540   absl::Status SetOutputObjectDef(int index, ObjectDef new_def) final {
541     if (index < 0 || index >= outputs_.size()) {
542       return absl::OutOfRangeError("Index is out of range");
543     }
544     auto def = outputs_[index];
545     def.external_def.object_def = new_def;
546     if (!tie_factory_.IsSupported(def)) {
547       return absl::InvalidArgumentError(
548           "New object definition is not supported.");
549     }
550     outputs_[index] = def;
551     return absl::OkStatus();
552   }
553 
Build(std::unique_ptr<InferenceRunner> * runner)554   absl::Status Build(std::unique_ptr<InferenceRunner>* runner) final {
555     auto kernels = NewNodeShaderRegistry();
556     CompilationOptions compiler_options;
557     compiler_options.allow_precision_loss =
558         GetPosition(options_, InferencePriority::MAX_PRECISION) > 1;
559     compiler_options.inline_parameters =
560         options_.usage == InferenceUsage::SUSTAINED_SPEED &&
561         GetPosition(options_, InferencePriority::MIN_LATENCY) == 1;
562     if (GetRelativeImportance(options_, InferencePriority::MIN_MEMORY_USAGE,
563                               InferencePriority::MIN_LATENCY) ==
564         PriorityImportance::HIGHER) {
565       // Buffers have far better memory utilization.
566       compiler_options.preferred_obj_type = ObjectType::BUFFER;
567       compiler_options.ref_obj_type = ObjectType::BUFFER;
568     }
569 
570     auto compiler = NewCompiler(kernels.get(), gpu_info_, compiler_options);
571     auto workgroup_calculator = NewDefaultWorkgroupsCalculator(*gpu_info_);
572     auto external_objects = absl::make_unique<ObjectManager>();
573     std::vector<GlShader> shaders;
574     absl::flat_hash_map<std::string, size_t> shader_to_index;
575     RuntimeOptions runtime_options;
576     auto runtime =
577         absl::make_unique<Runtime>(runtime_options, *gpu_info_,
578                                    env_options_.queue, external_objects.get());
579     Runtime* runtime_ptr = runtime.get();
580     auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
581         std::move(runtime), std::move(external_objects));
582     RETURN_IF_ERROR(runner_impl->Initialize(inputs_, outputs_, &tie_factory_));
583     RETURN_IF_ERROR(
584         compiler->Compile(graph_, {}, [&](ShaderCode code) -> absl::Status {
585           auto workgroup = workgroup_calculator->Calculate(code);
586           size_t shader_index;
587           std::string shader_src =
588               GetShaderHeader(workgroup) + code.source_code;
589           // Check if a shader was already compiled.
590           auto it = shader_to_index.find(shader_src);
591           if (it == shader_to_index.end()) {
592             GlShader shader;
593             RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER,
594                                                     shader_src, &shader));
595             shaders.push_back(std::move(shader));
596             shader_to_index.insert({shader_src, shader_to_index.size()});
597             shader_index = shader_to_index.size() - 1;
598           } else {
599             shader_index = it->second;
600           }
601           auto num_workgroups = DivideRoundUp(code.workload, workgroup);
602           return runtime_ptr->AddProgram(shaders[shader_index], code.parameters,
603                                          code.objects, num_workgroups);
604         }));
605     RETURN_IF_ERROR(runtime_ptr->PrepareForExecution());
606     *runner = std::move(runner_impl);
607     return absl::OkStatus();
608   }
609 
610  private:
611   // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<Value * > & values)612   std::vector<TensorTieDef> LinkTensors(const std::vector<Value*>& values) {
613     std::vector<TensorTieDef> links;
614     links.reserve(values.size());
615     for (const auto& value : values) {
616       TensorObjectDef external_def;
617       // So far the compiler always forces inputs and outputs to be in the fixed
618       // format.
619       const auto& shape = value->tensor.shape;
620       external_def.dimensions = Dimensions(shape.b, shape.h, shape.w, shape.c);
621       external_def.object_def.data_type = DataType::FLOAT32;
622       external_def.object_def.data_layout = DataLayout::DHWC4;
623       external_def.object_def.object_type = gpu::ObjectType::OPENGL_SSBO;
624 
625       // Internal object is not expected to be provided by user because: if
626       // external and internal objects have same defs, the external object is
627       // propagated and just used as an internal one; otherwise, if they have
628       // different defs, internal object will be created, because it is not
629       // provided by user.
630       TensorObjectDef internal_def = external_def;
631       external_def.object_def.user_provided = true;
632       internal_def.object_def.user_provided = false;
633       AccessType access =
634           graph_.IsGraphInput(value->id) ? AccessType::READ : AccessType::WRITE;
635       links.push_back({value->id, access, internal_def, external_def});
636     }
637     return links;
638   }
639 
GetExternalDefinitions(const std::vector<TensorTieDef> & links)640   static std::vector<TensorObjectDef> GetExternalDefinitions(
641       const std::vector<TensorTieDef>& links) {
642     std::vector<TensorObjectDef> defs;
643     defs.reserve(links.size());
644     for (auto& desc : links) {
645       defs.push_back(desc.external_def);
646     }
647     return defs;
648   }
649 
650   const InferenceEnvironmentOptions env_options_;
651   const InferenceOptions options_;
652   GraphFloat32 graph_;
653   const GpuInfo* gpu_info_;
654   std::vector<TensorTieDef> inputs_;
655   std::vector<TensorTieDef> outputs_;
656   TensorTieFactory tie_factory_;
657 };
658 
659 class InferenceEnvironmentImpl : public InferenceEnvironment {
660  public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)661   explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
662       : env_options_(options) {}
663 
Init()664   absl::Status Init() {
665     RETURN_IF_ERROR(EglEnvironment::NewEglEnvironment(&egl_env_));
666 
667     RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
668     properties_.is_opengl_available = gpu_info_.IsApiOpenGl31OrAbove();
669     if (!properties_.is_opengl_available) {
670       return absl::InternalError(
671           "OpenGL ES 3.1 or above is required to use OpenGL inference.");
672     }
673     if (!env_options_.queue) {
674       queue_ = NewCommandQueue(gpu_info_);
675       env_options_.queue = queue_.get();
676     }
677     return absl::OkStatus();
678   }
679 
NewInferenceBuilder(GraphFloat32 && model,const InferenceOptions & options,std::unique_ptr<InferenceBuilder> * builder)680   absl::Status NewInferenceBuilder(
681       GraphFloat32&& model, const InferenceOptions& options,
682       std::unique_ptr<InferenceBuilder>* builder) final {
683     if (!IsValid(options)) {
684       return absl::InvalidArgumentError("InferenceOptions are invalid.");
685     }
686     InferenceOptions resolved_options = options;
687     ResolveAutoPriority(&resolved_options);
688     if (!IsBatchMatchesForAllValues(model)) {
689       return absl::InvalidArgumentError(
690           "Only identical batch dimension is supported");
691     }
692     auto builder_impl = absl::make_unique<InferenceBuilderImpl>(
693         env_options_, resolved_options, std::move(model), &gpu_info_);
694     RETURN_IF_ERROR(builder_impl->Initialize());
695     *builder = std::move(builder_impl);
696     return absl::OkStatus();
697   }
698 
properties() const699   const InferenceEnvironmentProperties& properties() const {
700     return properties_;
701   }
702 
703  private:
704   std::unique_ptr<EglEnvironment> egl_env_;
705   std::unique_ptr<CommandQueue> queue_;
706   InferenceEnvironmentOptions env_options_;
707   GpuInfo gpu_info_;
708   InferenceEnvironmentProperties properties_;
709 };
710 
711 }  // namespace
712 
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)713 absl::Status NewInferenceEnvironment(
714     const InferenceEnvironmentOptions& options,
715     std::unique_ptr<InferenceEnvironment>* environment,
716     InferenceEnvironmentProperties* properties) {
717   auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
718   absl::Status status = env_impl->Init();
719   if (properties) {
720     *properties = env_impl->properties();
721   }
722   RETURN_IF_ERROR(status);
723   *environment = std::move(env_impl);
724   return absl::OkStatus();
725 }
726 
727 }  // namespace gl
728 }  // namespace gpu
729 }  // namespace tflite
730