• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17 
18 #ifndef CL_DELEGATE_NO_GL
19 #define CL_DELEGATE_ALLOW_GL
20 #endif
21 
22 #include <algorithm>
23 #include <cstring>
24 
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
28 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
29 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
30 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
31 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
32 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
34 #include "tensorflow/lite/delegates/gpu/cl/serialization.h"
35 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
36 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
37 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
38 #include "tensorflow/lite/delegates/gpu/common/precision.h"
39 #include "tensorflow/lite/delegates/gpu/common/shape.h"
40 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
41 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
42 
43 #ifdef CL_DELEGATE_ALLOW_GL
44 #include <EGL/eglext.h>
45 
46 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
47 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
48 #endif
49 
50 namespace tflite {
51 namespace gpu {
52 namespace cl {
53 namespace {
54 
55 // Both internal and external defs are identical, therefore nothing to connect
56 // here.
57 class NoopTensorTie : public TensorTie {
58  public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)59   NoopTensorTie(const TensorTieDef& def, TensorObject obj)
60       : TensorTie(def), obj_(obj) {}
61 
IsSupported(const TensorTieDef & def)62   static bool IsSupported(const TensorTieDef& def) {
63     return def.external_def == def.internal_def;
64   }
65 
SetExternalObject(TensorObject obj)66   absl::Status SetExternalObject(TensorObject obj) final {
67     if (!def().external_def.object_def.user_provided) {
68       return absl::InvalidArgumentError("Tensor object is readonly.");
69     }
70     if (!IsValid(def().external_def, obj)) {
71       return absl::InvalidArgumentError("Given object is not valid");
72     }
73     obj_ = obj;
74     return absl::OkStatus();
75   }
76 
GetExternalObject()77   TensorObject GetExternalObject() final { return obj_; }
78 
CopyToExternalObject()79   absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
80 
CopyFromExternalObject()81   absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
82 
83  private:
84   TensorObject obj_;
85 };
86 
87 // Does one-step conversion between internal and external objects.
88 // It may also allocate external objects if requested.
89 class DefaultTensorTie : public TensorTie {
90  public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)91   DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
92       : TensorTie(def), internal_obj_(internal_obj) {}
93 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)94   static bool IsSupported(
95       const TensorTieDef& def,
96       const TensorObjectConverterBuilder& converter_builder) {
97     auto object_type = def.external_def.object_def.object_type;
98 #ifdef CL_DELEGATE_ALLOW_GL
99     if (def.external_def.object_def.user_provided &&
100         GlClBufferCopier::IsSupported(def.external_def.object_def,
101                                       def.internal_def.object_def)) {
102       return true;
103     }
104 #endif
105     return (object_type == ObjectType::OPENCL_BUFFER ||
106             object_type == ObjectType::OPENCL_TEXTURE ||
107             object_type == ObjectType::CPU_MEMORY) &&
108            converter_builder.IsSupported(def.internal_def, def.external_def) &&
109            converter_builder.IsSupported(def.external_def, def.internal_def);
110   }
111 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)112   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
113                           TensorObjectConverterBuilder* converter_builder,
114                           Environment* env, std::unique_ptr<TensorTie>* tie) {
115     auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
116     RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
117     *tie = std::move(tie_impl);
118     return absl::OkStatus();
119   }
120 
CopyToExternalObject()121   absl::Status CopyToExternalObject() final {
122     if (!converter_to_) {
123       return absl::UnavailableError("Conversion is not available");
124     }
125     return converter_to_->Convert(internal_obj_, GetExternalObject());
126   }
127 
CopyFromExternalObject()128   absl::Status CopyFromExternalObject() final {
129     if (!converter_from_) {
130       return absl::UnavailableError("Conversion is not available");
131     }
132     return converter_from_->Convert(GetExternalObject(), internal_obj_);
133   }
134 
SetExternalObject(TensorObject obj)135   absl::Status SetExternalObject(TensorObject obj) final {
136     if (!def().external_def.object_def.user_provided) {
137       return absl::InvalidArgumentError("External object is read-only");
138     }
139     if (!IsValid(def().external_def, obj)) {
140       return absl::InvalidArgumentError("Given object is not valid");
141     }
142     external_obj_ = obj;
143     return absl::OkStatus();
144   }
145 
GetExternalObject()146   TensorObject GetExternalObject() final { return external_obj_; }
147 
148  private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)149   absl::Status Init(TensorObjectConverterBuilder* converter_builder,
150                     Environment* env) {
151 #ifdef CL_DELEGATE_ALLOW_GL
152     if (def().external_def.object_def.user_provided &&
153         GlClBufferCopier::IsSupported(def().external_def.object_def,
154                                       def().internal_def.object_def)) {
155       converter_from_ = absl::make_unique<GlClBufferCopier>(
156           def().internal_def, def().external_def, env);
157     } else {
158       RETURN_IF_ERROR(converter_builder->MakeConverter(
159           def().external_def, def().internal_def, &converter_from_));
160     }
161     if (def().external_def.object_def.user_provided &&
162         GlClBufferCopier::IsSupported(def().internal_def.object_def,
163                                       def().external_def.object_def)) {
164       converter_to_ = absl::make_unique<GlClBufferCopier>(
165           def().internal_def, def().external_def, env);
166     } else {
167       RETURN_IF_ERROR(converter_builder->MakeConverter(
168           def().internal_def, def().external_def, &converter_to_));
169     }
170 #else
171     RETURN_IF_ERROR(converter_builder->MakeConverter(
172         def().external_def, def().internal_def, &converter_from_));
173     RETURN_IF_ERROR(converter_builder->MakeConverter(
174         def().internal_def, def().external_def, &converter_to_));
175 #endif
176     return MaybeAllocateExternalObject(env);
177   }
178 
MaybeAllocateExternalObject(Environment * env)179   absl::Status MaybeAllocateExternalObject(Environment* env) {
180     const TensorObjectDef& d = def().external_def;
181     if (d.object_def.user_provided) {
182       return absl::OkStatus();
183     }
184     switch (d.object_def.object_type) {
185       case ObjectType::CPU_MEMORY: {
186         size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
187         cpu_memory_.resize(bytes_size);
188         external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
189         break;
190       }
191       case ObjectType::OPENCL_TEXTURE:
192       case ObjectType::OPENCL_BUFFER: {
193         auto& dims = d.dimensions;
194         const BHWC shape(dims.b, dims.h, dims.w, dims.c);
195         const TensorDescriptor desc{
196             d.object_def.data_type,
197             ToTensorStorageType(d.object_def.object_type,
198                                 d.object_def.data_layout),
199             Layout::BHWC};
200         RETURN_IF_ERROR(
201             AllocateTensorMemory(env->context(), shape, desc, &cl_memory_));
202         if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
203           external_obj_ = OpenClTexture{cl_memory_.memory()};
204         } else {
205           external_obj_ = OpenClBuffer{cl_memory_.memory()};
206         }
207         break;
208       }
209       default:
210         return absl::InternalError("Unexpected object type");
211     }
212     return absl::OkStatus();
213   }
214 
215   const TensorObject internal_obj_;
216   TensorObject external_obj_;
217   CLMemory cl_memory_;
218   std::vector<uint8_t> cpu_memory_;
219   std::unique_ptr<TensorObjectConverter> converter_to_;
220   std::unique_ptr<TensorObjectConverter> converter_from_;
221 };
222 
223 // Copies data to intermediate OpenCL buffer and then does two step conversion.
224 // It drives the following cases were one-step conversion is not supported:
225 //   - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
226 class TwoStepTensorTie : public TensorTie {
227  public:
TwoStepTensorTie(const TensorTieDef & def)228   explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
229 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)230   static bool IsSupported(
231       const TensorTieDef& def,
232       const TensorObjectConverterBuilder& converter_builder) {
233     auto defs = MakeOuterInnerDefs(def);
234     return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
235            DefaultTensorTie::IsSupported(defs.second, converter_builder);
236   }
237 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)238   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
239                           TensorObjectConverterBuilder* converter_builder,
240                           Environment* env, std::unique_ptr<TensorTie>* tie) {
241     auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
242     RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
243     *tie = std::move(tie_impl);
244     return absl::OkStatus();
245   }
246 
CopyToExternalObject()247   absl::Status CopyToExternalObject() final {
248     RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
249     return outer_tie_->CopyToExternalObject();
250   }
251 
CopyFromExternalObject()252   absl::Status CopyFromExternalObject() final {
253     RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
254     return inner_tie_->CopyFromExternalObject();
255   }
256 
SetExternalObject(TensorObject obj)257   absl::Status SetExternalObject(TensorObject obj) final {
258     return outer_tie_->SetExternalObject(obj);
259   }
260 
GetExternalObject()261   TensorObject GetExternalObject() final {
262     return outer_tie_->GetExternalObject();
263   }
264 
265  private:
MakeOuterInnerDefs(const TensorTieDef & def)266   static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
267       const TensorTieDef& def) {
268     TensorTieDef outer_def;
269     outer_def.external_def = def.external_def;
270     outer_def.internal_def = def.external_def;
271     outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
272     outer_def.internal_def.object_def.user_provided = true;
273 
274     TensorTieDef inner_def;
275     inner_def.external_def = outer_def.internal_def;
276     inner_def.external_def.object_def.user_provided = false;
277     inner_def.internal_def = def.internal_def;
278     return std::make_pair(outer_def, inner_def);
279   }
280 
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)281   absl::Status Init(TensorObject internal_object,
282                     TensorObjectConverterBuilder* converter_builder,
283                     Environment* env) {
284     auto defs = MakeOuterInnerDefs(def());
285     RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
286                                           converter_builder, env, &inner_tie_));
287     return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
288                                  converter_builder, env, &outer_tie_);
289   }
290 
291   std::unique_ptr<TensorTie> inner_tie_;
292   std::unique_ptr<TensorTie> outer_tie_;
293 };
294 
295 #ifdef CL_DELEGATE_ALLOW_GL
296 // Captures GL object into CL context before performing a conversion.
297 class GlBufferHolder : public TensorTie {
298  public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)299   GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
300                  Environment* env)
301       : TensorTie(def),
302         gl_interop_fabric_(gl_interop_fabric),
303         environment_(env) {}
304 
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)305   static bool IsSupported(
306       const TensorTieDef& def,
307       const TensorObjectConverterBuilder& converter_builder) {
308     if (!def.external_def.object_def.user_provided ||
309         def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
310       return false;
311     }
312     return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
313   }
314 
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)315   static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
316                           TensorObjectConverterBuilder* converter_builder,
317                           GlInteropFabric* gl_interop_fabric, Environment* env,
318                           std::unique_ptr<TensorTie>* tie) {
319     auto tie_impl =
320         absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
321     RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
322                                           converter_builder, env,
323                                           &tie_impl->tie_));
324     *tie = std::move(tie_impl);
325     return absl::OkStatus();
326   }
327 
SetExternalObject(TensorObject obj)328   absl::Status SetExternalObject(TensorObject obj) final {
329     auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
330     if (!ssbo) {
331       return absl::InvalidArgumentError("Missing OpenGL SSBO");
332     }
333     auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
334     if (old_ssbo && ssbo->id == old_ssbo->id) {
335       return absl::OkStatus();
336     }
337     if (cl_object_.memory()) {
338       gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
339     }
340     RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
341         ssbo->id, def().access_type, &environment_->context(), &cl_object_));
342     external_obj_ = obj;
343     RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
344     gl_interop_fabric_->RegisterMemory(cl_object_.memory());
345     return absl::OkStatus();
346   }
347 
GetExternalObject()348   TensorObject GetExternalObject() final { return external_obj_; }
349 
CopyFromExternalObject()350   absl::Status CopyFromExternalObject() final {
351     return tie_->CopyFromExternalObject();
352   }
353 
CopyToExternalObject()354   absl::Status CopyToExternalObject() final {
355     return tie_->CopyToExternalObject();
356   }
357 
358  private:
MakeClDef(const TensorTieDef & def)359   static TensorTieDef MakeClDef(const TensorTieDef& def) {
360     auto cl_def = def;
361     cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
362     cl_def.external_def.object_def.user_provided = true;
363     return cl_def;
364   }
365 
366   CLMemory cl_object_;
367   GlInteropFabric* gl_interop_fabric_;
368   Environment* environment_;
369   std::unique_ptr<TensorTie> tie_;
370   TensorObject external_obj_;
371 };
372 #endif
373 
TensorToObj(const Tensor & tensor)374 TensorObject TensorToObj(const Tensor& tensor) {
375   if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
376     return OpenClBuffer{tensor.GetMemoryPtr()};
377   }
378   if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
379     return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
380   }
381   return OpenClTexture{tensor.GetMemoryPtr()};
382 }
383 
384 // Responsible for creating new tensor objects.
385 class TensorTieFactory {
386  public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)387   TensorTieFactory(Environment* env, InferenceContext* context
388 #ifdef CL_DELEGATE_ALLOW_GL
389                    ,
390                    GlInteropFabric* gl_interop_fabric
391 #endif
392                    )
393       : env_(*env),
394         context_(*context),
395 #ifdef CL_DELEGATE_ALLOW_GL
396         gl_interop_fabric_(gl_interop_fabric),
397 #endif
398         converter_builder_(NewConverterBuilder(env)) {
399   }
400 
IsSupported(const TensorTieDef & def) const401   bool IsSupported(const TensorTieDef& def) const {
402     return IsValid(def.external_def.object_def) &&
403            (NoopTensorTie::IsSupported(def) ||
404             DefaultTensorTie::IsSupported(def, *converter_builder_) ||
405 #ifdef CL_DELEGATE_ALLOW_GL
406             (gl_interop_fabric_ &&
407              GlBufferHolder::IsSupported(def, *converter_builder_)) ||
408 #endif
409             TwoStepTensorTie::IsSupported(def, *converter_builder_));
410   }
411 
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)412   absl::Status NewTensorTie(const TensorTieDef& def,
413                             std::unique_ptr<TensorTie>* tie) {
414     TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
415     auto converter = converter_builder_.get();
416     if (NoopTensorTie::IsSupported(def)) {
417       *tie = absl::make_unique<NoopTensorTie>(def, internal_object);
418       return absl::OkStatus();
419     }
420     if (DefaultTensorTie::IsSupported(def, *converter)) {
421       return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
422     }
423 #ifdef CL_DELEGATE_ALLOW_GL
424     if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
425       return GlBufferHolder::New(def, internal_object, converter,
426                                  gl_interop_fabric_, &env_, tie);
427     }
428 #endif
429     if (TwoStepTensorTie::IsSupported(def, *converter)) {
430       return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
431     }
432     return absl::UnimplementedError("Unsupported tensor tie definition.");
433   }
434 
435  private:
436   Environment& env_;
437   InferenceContext& context_;
438 #ifdef CL_DELEGATE_ALLOW_GL
439   GlInteropFabric* gl_interop_fabric_;
440 #endif
441   std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
442 };
443 
444 class InferenceRunnerImpl : public CLInferenceRunner {
445  public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)446   InferenceRunnerImpl(Environment* environment,
447                       std::unique_ptr<InferenceContext> context
448 #ifdef CL_DELEGATE_ALLOW_GL
449                       ,
450                       std::unique_ptr<GlInteropFabric> gl_interop_fabric
451 #endif
452                       )
453       : queue_(environment->queue()),
454         context_(std::move(context))
455 #ifdef CL_DELEGATE_ALLOW_GL
456         ,
457         gl_interop_fabric_(std::move(gl_interop_fabric))
458 #endif
459   {
460   }
461 
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)462   absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
463                           const std::vector<TensorTieDef>& outputs,
464                           TensorTieFactory* factory) {
465     RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
466     return LinkTensors(outputs, factory, &outputs_);
467   }
468 
inputs() const469   std::vector<TensorObjectDef> inputs() const override {
470     return GetExternalDefinitions(inputs_);
471   }
472 
outputs() const473   std::vector<TensorObjectDef> outputs() const override {
474     return GetExternalDefinitions(outputs_);
475   }
476 
GetInputObject(int index,TensorObject * object)477   absl::Status GetInputObject(int index, TensorObject* object) override {
478     if (index < 0 || index >= inputs_.size()) {
479       return absl::OutOfRangeError("Index is out of range");
480     }
481     *object = inputs_[index]->GetExternalObject();
482     return absl::OkStatus();
483   }
484 
GetOutputObject(int index,TensorObject * object)485   absl::Status GetOutputObject(int index, TensorObject* object) override {
486     if (index < 0 || index >= outputs_.size()) {
487       return absl::OutOfRangeError("Index is out of range");
488     }
489     *object = outputs_[index]->GetExternalObject();
490     return absl::OkStatus();
491   }
492 
SetInputObject(int index,TensorObject object)493   absl::Status SetInputObject(int index, TensorObject object) override {
494     if (index < 0 || index >= inputs_.size()) {
495       return absl::OutOfRangeError("Input index is out of range");
496     }
497     return inputs_[index]->SetExternalObject(object);
498   }
499 
SetOutputObject(int index,TensorObject object)500   absl::Status SetOutputObject(int index, TensorObject object) override {
501     if (index < 0 || index >= outputs_.size()) {
502       return absl::OutOfRangeError("Output index is out of range");
503     }
504     return outputs_[index]->SetExternalObject(object);
505   }
506 
CopyFromExternalInput(int index)507   absl::Status CopyFromExternalInput(int index) override {
508     if (index > inputs_.size()) {
509       return absl::NotFoundError(
510           absl::StrCat("Input id ", index, " is an invalid input index."));
511     }
512     RETURN_IF_ERROR(inputs_[index]->CopyFromExternalObject());
513     return queue_->WaitForCompletion();
514   }
515 
CopyToExternalOutput(int index)516   absl::Status CopyToExternalOutput(int index) override {
517     if (index > outputs_.size()) {
518       return absl::NotFoundError(
519           absl::StrCat("Output id ", index, " is an invalid output index"));
520     }
521     RETURN_IF_ERROR(outputs_[index]->CopyToExternalObject());
522     return queue_->WaitForCompletion();
523   }
524 
Run()525   absl::Status Run() override {
526 #ifdef CL_DELEGATE_ALLOW_GL
527     if (gl_interop_fabric_) {
528       RETURN_IF_ERROR(gl_interop_fabric_->Start());
529     }
530 #endif
531     for (const auto& input : inputs_) {
532       RETURN_IF_ERROR(input->CopyFromExternalObject());
533     }
534 
535     RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
536 
537     bool has_async_copies = false;
538     for (const auto& output : outputs_) {
539       RETURN_IF_ERROR(output->CopyToExternalObject());
540       if (output->def().external_def.object_def.object_type ==
541           ObjectType::CPU_MEMORY) {
542         has_async_copies = true;
543       }
544     }
545 #ifdef CL_DELEGATE_ALLOW_GL
546     if (gl_interop_fabric_) {
547       RETURN_IF_ERROR(gl_interop_fabric_->Finish());
548     }
549 #endif
550     if (has_async_copies) {
551       RETURN_IF_ERROR(queue_->WaitForCompletion());
552     }
553     return absl::OkStatus();
554   }
555 
RunWithoutExternalBufferCopy()556   absl::Status RunWithoutExternalBufferCopy() override {
557     RETURN_IF_ERROR(context_->AddToQueue(queue_));
558     clFlush(queue_->queue());
559 
560     return absl::OkStatus();
561   }
562 
563  private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)564   static absl::Status LinkTensors(
565       const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
566       std::vector<std::unique_ptr<TensorTie>>* objects) {
567     objects->reserve(defs.size());
568     for (auto& def : defs) {
569       std::unique_ptr<TensorTie> object;
570       RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
571       objects->push_back(std::move(object));
572     }
573     return absl::OkStatus();
574   }
575 
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)576   static std::vector<TensorObjectDef> GetExternalDefinitions(
577       const std::vector<std::unique_ptr<TensorTie>>& objects) {
578     std::vector<TensorObjectDef> defs;
579     defs.reserve(objects.size());
580     for (auto& obj : objects) {
581       defs.push_back(obj->def().external_def);
582     }
583     return defs;
584   }
585 
586   CLCommandQueue* queue_;
587   std::unique_ptr<InferenceContext> context_;
588 #ifdef CL_DELEGATE_ALLOW_GL
589   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
590 #endif
591   std::vector<std::unique_ptr<TensorTie>> inputs_;
592   std::vector<std::unique_ptr<TensorTie>> outputs_;
593 };
594 
TensorToDef(const Tensor & tensor)595 TensorObjectDef TensorToDef(const Tensor& tensor) {
596   TensorObjectDef def;
597   def.dimensions.b = tensor.Batch();
598   def.dimensions.h = tensor.Height();
599   def.dimensions.w = tensor.Width();
600   def.dimensions.c = tensor.Channels();
601   def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
602   def.object_def.data_type = tensor.GetDataType();
603   def.object_def.object_type = ToObjectType(tensor.GetStorageType());
604   def.object_def.user_provided = false;
605   return def;
606 }
607 
GetPrecision(const Environment & env,const InferenceOptions & options)608 CalculationsPrecision GetPrecision(const Environment& env,
609                                    const InferenceOptions& options) {
610   CalculationsPrecision precision;
611   switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
612     case 1:
613       precision = CalculationsPrecision::F32;
614       break;
615     case 2:
616       precision = CalculationsPrecision::F32_F16;
617       break;
618     case 3:
619       precision = CalculationsPrecision::F16;
620       break;
621     default:
622       precision = CalculationsPrecision::F16;
623       break;
624   }
625   // Increase precision if lower precision is not supported.
626   if (!env.IsSupported(precision)) {
627     precision = CalculationsPrecision::F32_F16;
628     if (!env.IsSupported(precision)) {
629       precision = CalculationsPrecision::F32;
630     }
631   }
632   return precision;
633 }
634 
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)635 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
636                                             const InferenceOptions& options) {
637   // Fallback to BUFFER that should be supported by default.
638   std::vector<TensorStorageType> preferred_storage_types;
639   if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
640                             InferencePriority::MIN_MEMORY_USAGE) ==
641       PriorityImportance::HIGHER) {
642     preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
643                                TensorStorageType::BUFFER};
644   } else {
645     preferred_storage_types = {
646         GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
647         TensorStorageType::BUFFER};
648   }
649 
650   for (TensorStorageType storage_type : preferred_storage_types) {
651     if (env.IsSupported(storage_type)) {
652       return storage_type;
653     }
654   }
655   return TensorStorageType::UNKNOWN;
656 }
657 
658 class InferenceBuilderImpl : public InferenceBuilder {
659  public:
InferenceBuilderImpl(Environment * environment)660   explicit InferenceBuilderImpl(Environment* environment)
661       : environment_(environment) {}
662 
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)663   absl::Status Initialize(const InferenceOptions& options,
664                           const InferenceEnvironmentOptions& env_options,
665                           const GraphFloat32& graph) {
666     context_ = absl::make_unique<InferenceContext>();
667     InferenceContext::CreateInferenceInfo create_info;
668     create_info.precision = GetPrecision(*environment_, options);
669     create_info.storage_type =
670         GetStorageTypeFromOptions(*environment_, options);
671     if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
672       create_info.hints.Add(ModelHints::kReduceKernelsCount);
673       create_info.hints.Add(ModelHints::kFastTuning);
674     } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
675       create_info.hints.Add(ModelHints::kAllowSpecialKernels);
676     }
677     if (GetRelativeImportance(options, InferencePriority::MIN_MEMORY_USAGE,
678                               InferencePriority::MIN_LATENCY) ==
679         PriorityImportance::HIGHER) {
680       create_info.hints.Add(ModelHints::kNoWinogradOptimizations);
681     }
682     RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
683 
684 #ifdef CL_DELEGATE_ALLOW_GL
685     if (env_options.IsGlAware() &&
686         IsGlSharingSupported(environment_->device())) {
687       gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
688           env_options.egl_display, environment_);
689     }
690     tie_factory_ = absl::make_unique<TensorTieFactory>(
691         environment_, context_.get(), gl_interop_fabric_.get());
692 #else
693     tie_factory_ =
694         absl::make_unique<TensorTieFactory>(environment_, context_.get());
695 #endif
696 
697     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
698     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
699     return absl::OkStatus();
700   }
701 
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model)702   absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
703                           const absl::Span<const uint8_t> serialized_model) {
704     context_ = absl::make_unique<InferenceContext>();
705     RETURN_IF_ERROR(
706         context_->RestoreDeserialized(serialized_model, environment_));
707 
708 #ifdef CL_DELEGATE_ALLOW_GL
709     if (env_options.IsGlAware() &&
710         IsGlSharingSupported(environment_->device())) {
711       gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
712           env_options.egl_display, environment_);
713     }
714     tie_factory_ = absl::make_unique<TensorTieFactory>(
715         environment_, context_.get(), gl_interop_fabric_.get());
716 #else
717     tie_factory_ =
718         absl::make_unique<TensorTieFactory>(environment_, context_.get());
719 #endif
720 
721     inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
722     outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
723     return absl::OkStatus();
724   }
725 
inputs() const726   std::vector<TensorObjectDef> inputs() const override {
727     return GetExternalDefinitions(inputs_);
728   }
729 
outputs() const730   std::vector<TensorObjectDef> outputs() const override {
731     return GetExternalDefinitions(outputs_);
732   }
733 
SetInputShape(int index,const Dimensions & dimensions)734   absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
735     if (index < 0 || index >= inputs_.size()) {
736       return absl::OutOfRangeError("Index is out of range");
737     }
738     return absl::UnimplementedError("Changing input shapes is not supported");
739   }
740 
SetInputObjectDef(int index,ObjectDef new_def)741   absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
742     if (index < 0 || index >= inputs_.size()) {
743       return absl::OutOfRangeError("Input index is out of range");
744     }
745     auto def = inputs_[index];
746     def.external_def.object_def = new_def;
747     if (!tie_factory_->IsSupported(def)) {
748       return absl::InvalidArgumentError(
749           "New input object definition is not supported.");
750     }
751     inputs_[index] = def;
752     return absl::OkStatus();
753   }
754 
SetOutputObjectDef(int index,ObjectDef new_def)755   absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
756     if (index < 0 || index >= outputs_.size()) {
757       return absl::OutOfRangeError("Output index is out of range");
758     }
759     auto def = outputs_[index];
760     def.external_def.object_def = new_def;
761     if (!tie_factory_->IsSupported(def)) {
762       return absl::InvalidArgumentError(
763           "New output object definition is not supported.");
764     }
765     outputs_[index] = def;
766     return absl::OkStatus();
767   }
768 
Build(std::unique_ptr<InferenceRunner> * runner)769   absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
770 #ifdef CL_DELEGATE_ALLOW_GL
771     if (gl_interop_fabric_ && !HasGlObjects()) {
772       // destroy interop layer when there are no GL objects to avoid
773       // extra synchronization cost.
774       gl_interop_fabric_.reset(nullptr);
775     }
776     auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
777         environment_, std::move(context_), std::move(gl_interop_fabric_));
778 #else
779     auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
780         environment_, std::move(context_));
781 #endif
782     RETURN_IF_ERROR(
783         runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
784     *runner = std::move(runner_impl);
785     return absl::OkStatus();
786   }
787 
788  private:
789   // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)790   std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
791                                         AccessType access) {
792     std::vector<TensorTieDef> links;
793     links.reserve(ids.size());
794     for (const auto& id : ids) {
795       TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
796       links.push_back({id, access, def, def});
797     }
798     return links;
799   }
800 
HasGlObjects() const801   bool HasGlObjects() const {
802 #ifdef CL_DELEGATE_ALLOW_GL
803     auto is_gl = [](ObjectType t) {
804       return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
805     };
806     for (const TensorTieDef& def : inputs_) {
807       if (is_gl(def.external_def.object_def.object_type)) {
808         return true;
809       }
810     }
811     for (const TensorTieDef& def : outputs_) {
812       if (is_gl(def.external_def.object_def.object_type)) {
813         return true;
814       }
815     }
816 #endif
817     return false;
818   }
819 
GetExternalDefinitions(const std::vector<TensorTieDef> & links)820   static std::vector<TensorObjectDef> GetExternalDefinitions(
821       const std::vector<TensorTieDef>& links) {
822     std::vector<TensorObjectDef> defs;
823     defs.reserve(links.size());
824     for (auto& desc : links) {
825       defs.push_back(desc.external_def);
826     }
827     return defs;
828   }
829 
830   std::unique_ptr<InferenceContext> context_;
831 #ifdef CL_DELEGATE_ALLOW_GL
832   std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
833 #endif
834   Environment* environment_;
835 
836   std::vector<TensorTieDef> inputs_;
837   std::vector<TensorTieDef> outputs_;
838   std::unique_ptr<TensorTieFactory> tie_factory_;
839 };
840 
841 class InferenceEnvironmentImpl : public InferenceEnvironment {
842  public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)843   explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
844       : options_(options) {}
845 
Init()846   absl::Status Init() {
847     RETURN_IF_ERROR(LoadOpenCL());
848     properties_.is_opencl_available = true;
849 
850     CLDevice device;
851     if (options_.device) {
852       cl_platform_id platform;
853       RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
854           options_.device, CL_DEVICE_PLATFORM, &platform));
855       device = CLDevice(options_.device, platform);
856     } else {
857       RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
858     }
859 
860 #ifdef CL_DELEGATE_ALLOW_GL
861     properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
862     properties_.is_gl_to_cl_fast_sync_supported =
863         IsClEventFromEglSyncSupported(device);
864     properties_.is_cl_to_gl_fast_sync_supported =
865         IsEglSyncFromClEventSupported();
866 #endif
867 
868     CLContext context;
869     if (options_.context) {
870 #ifdef CL_DELEGATE_ALLOW_GL
871       if (options_.IsGlAware()) {
872         return absl::InvalidArgumentError(
873             "OpenCL context and EGL parameters are set in the same time.");
874       }
875 #endif
876       context = CLContext(options_.context, /* has_ownership = */ false);
877     } else {
878 #ifdef CL_DELEGATE_ALLOW_GL
879       if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
880         RETURN_IF_ERROR(CreateCLGLContext(
881             device,
882             reinterpret_cast<cl_context_properties>(options_.egl_context),
883             reinterpret_cast<cl_context_properties>(options_.egl_display),
884             &context));
885       } else {
886         RETURN_IF_ERROR(CreateCLContext(device, &context));
887       }
888 #else
889       RETURN_IF_ERROR(CreateCLContext(device, &context));
890 #endif
891     }
892 
893     CLCommandQueue queue;
894     if (options_.command_queue) {
895       queue =
896           CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
897     } else {
898       RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
899     }
900     // Profiling queue is used for workgroup size tuning.
901     ProfilingCommandQueue profiling_queue;
902     RETURN_IF_ERROR(
903         CreateProfilingCommandQueue(device, context, &profiling_queue));
904     environment_ = Environment(std::move(device), std::move(context),
905                                std::move(queue), std::move(profiling_queue));
906     return environment_.Init();
907   }
908 
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)909   absl::Status BuildSerializedModel(
910       const InferenceOptions& options, GraphFloat32 model,
911       std::vector<uint8_t>* serialized_model) final {
912     if (!IsValid(options)) {
913       return absl::InvalidArgumentError("InferenceOptions are invalid.");
914     }
915     InferenceOptions resolved_options = options;
916     ResolveAutoPriority(&resolved_options);
917     if (environment_.program_cache() &&
918         !options_.serialized_binary_cache.empty()) {
919       // Ignore returned error. Cache is discarded.
920       environment_.program_cache()
921           ->AddSerializedCache(environment_.context(), environment_.device(),
922                                options_.serialized_binary_cache)
923           .IgnoreError();
924     }
925 
926     RETURN_IF_ERROR(RunGraphTransforms(&model));
927     InferenceContext context;
928     InferenceContext::CreateInferenceInfo create_info;
929     create_info.precision = GetPrecision(environment_, options);
930     create_info.storage_type = GetStorageTypeFromOptions(environment_, options);
931     if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
932       create_info.hints.Add(ModelHints::kReduceKernelsCount);
933       create_info.hints.Add(ModelHints::kFastTuning);
934     } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
935       create_info.hints.Add(ModelHints::kAllowSpecialKernels);
936     }
937     RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
938                                           serialized_model));
939     return absl::OkStatus();
940   }
941 
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)942   absl::Status NewInferenceBuilder(
943       const InferenceOptions& options, GraphFloat32 model,
944       std::unique_ptr<InferenceBuilder>* builder) final {
945     if (!IsValid(options)) {
946       return absl::InvalidArgumentError("InferenceOptions are invalid.");
947     }
948     InferenceOptions resolved_options = options;
949     ResolveAutoPriority(&resolved_options);
950     if (environment_.program_cache() &&
951         !options_.serialized_binary_cache.empty()) {
952       // Ignore returned error. Cache is discarded.
953       environment_.program_cache()
954           ->AddSerializedCache(environment_.context(), environment_.device(),
955                                options_.serialized_binary_cache)
956           .IgnoreError();
957     }
958 
959     RETURN_IF_ERROR(RunGraphTransforms(&model));
960     auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
961     RETURN_IF_ERROR(
962         builder_impl->Initialize(resolved_options, options_, model));
963     *builder = std::move(builder_impl);
964     return absl::OkStatus();
965   }
966 
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder)967   absl::Status NewInferenceBuilder(
968       const absl::Span<const uint8_t> serialized_model,
969       std::unique_ptr<InferenceBuilder>* builder) final {
970     if (environment_.program_cache() &&
971         !options_.serialized_binary_cache.empty()) {
972       // Ignore returned error. Cache is discarded.
973       environment_.program_cache()
974           ->AddSerializedCache(environment_.context(), environment_.device(),
975                                options_.serialized_binary_cache)
976           .IgnoreError();
977     }
978 
979     auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
980     RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model));
981     *builder = std::move(builder_impl);
982     return absl::OkStatus();
983   }
984 
GetSerializedBinaryCache() const985   std::vector<uint8_t> GetSerializedBinaryCache() const final {
986     std::vector<uint8_t> data;
987     // Is there was a problem, data would be empty.
988     environment_.program_cache()
989         ->GetSerializedCache(environment_.device(), &data)
990         .IgnoreError();
991     return data;
992   }
993 
properties() const994   const InferenceEnvironmentProperties& properties() const {
995     return properties_;
996   }
997 
998  private:
999   const InferenceEnvironmentOptions options_;
1000   Environment environment_;
1001   InferenceEnvironmentProperties properties_;
1002 };
1003 
1004 }  // namespace
1005 
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1006 absl::Status NewInferenceEnvironment(
1007     const InferenceEnvironmentOptions& options,
1008     std::unique_ptr<InferenceEnvironment>* environment,
1009     InferenceEnvironmentProperties* properties) {
1010   auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
1011   absl::Status status = env_impl->Init();
1012   if (properties) {
1013     *properties = env_impl->properties();
1014   }
1015   RETURN_IF_ERROR(status);
1016   *environment = std::move(env_impl);
1017   return absl::OkStatus();
1018 }
1019 
1020 }  // namespace cl
1021 }  // namespace gpu
1022 }  // namespace tflite
1023