1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/api.h"
17
18 #ifndef CL_DELEGATE_NO_GL
19 #define CL_DELEGATE_ALLOW_GL
20 #endif
21
22 #include <algorithm>
23 #include <cstring>
24
25 #include "absl/memory/memory.h"
26 #include "absl/types/span.h"
27 #include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
28 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
29 #include "tensorflow/lite/delegates/gpu/cl/cl_event.h"
30 #include "tensorflow/lite/delegates/gpu/cl/environment.h"
31 #include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
32 #include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
33 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
34 #include "tensorflow/lite/delegates/gpu/cl/serialization.h"
35 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
36 #include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
37 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
38 #include "tensorflow/lite/delegates/gpu/common/precision.h"
39 #include "tensorflow/lite/delegates/gpu/common/shape.h"
40 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
41 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
42
43 #ifdef CL_DELEGATE_ALLOW_GL
44 #include <EGL/eglext.h>
45
46 #include "tensorflow/lite/delegates/gpu/cl/egl_sync.h"
47 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
48 #endif
49
50 namespace tflite {
51 namespace gpu {
52 namespace cl {
53 namespace {
54
55 // Both internal and external defs are identical, therefore nothing to connect
56 // here.
57 class NoopTensorTie : public TensorTie {
58 public:
NoopTensorTie(const TensorTieDef & def,TensorObject obj)59 NoopTensorTie(const TensorTieDef& def, TensorObject obj)
60 : TensorTie(def), obj_(obj) {}
61
IsSupported(const TensorTieDef & def)62 static bool IsSupported(const TensorTieDef& def) {
63 return def.external_def == def.internal_def;
64 }
65
SetExternalObject(TensorObject obj)66 absl::Status SetExternalObject(TensorObject obj) final {
67 if (!def().external_def.object_def.user_provided) {
68 return absl::InvalidArgumentError("Tensor object is readonly.");
69 }
70 if (!IsValid(def().external_def, obj)) {
71 return absl::InvalidArgumentError("Given object is not valid");
72 }
73 obj_ = obj;
74 return absl::OkStatus();
75 }
76
GetExternalObject()77 TensorObject GetExternalObject() final { return obj_; }
78
CopyToExternalObject()79 absl::Status CopyToExternalObject() final { return absl::OkStatus(); }
80
CopyFromExternalObject()81 absl::Status CopyFromExternalObject() final { return absl::OkStatus(); }
82
83 private:
84 TensorObject obj_;
85 };
86
87 // Does one-step conversion between internal and external objects.
88 // It may also allocate external objects if requested.
89 class DefaultTensorTie : public TensorTie {
90 public:
DefaultTensorTie(const TensorTieDef & def,TensorObject internal_obj)91 DefaultTensorTie(const TensorTieDef& def, TensorObject internal_obj)
92 : TensorTie(def), internal_obj_(internal_obj) {}
93
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)94 static bool IsSupported(
95 const TensorTieDef& def,
96 const TensorObjectConverterBuilder& converter_builder) {
97 auto object_type = def.external_def.object_def.object_type;
98 #ifdef CL_DELEGATE_ALLOW_GL
99 if (def.external_def.object_def.user_provided &&
100 GlClBufferCopier::IsSupported(def.external_def.object_def,
101 def.internal_def.object_def)) {
102 return true;
103 }
104 #endif
105 return (object_type == ObjectType::OPENCL_BUFFER ||
106 object_type == ObjectType::OPENCL_TEXTURE ||
107 object_type == ObjectType::CPU_MEMORY) &&
108 converter_builder.IsSupported(def.internal_def, def.external_def) &&
109 converter_builder.IsSupported(def.external_def, def.internal_def);
110 }
111
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)112 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
113 TensorObjectConverterBuilder* converter_builder,
114 Environment* env, std::unique_ptr<TensorTie>* tie) {
115 auto tie_impl = absl::make_unique<DefaultTensorTie>(def, internal_object);
116 RETURN_IF_ERROR(tie_impl->Init(converter_builder, env));
117 *tie = std::move(tie_impl);
118 return absl::OkStatus();
119 }
120
CopyToExternalObject()121 absl::Status CopyToExternalObject() final {
122 if (!converter_to_) {
123 return absl::UnavailableError("Conversion is not available");
124 }
125 return converter_to_->Convert(internal_obj_, GetExternalObject());
126 }
127
CopyFromExternalObject()128 absl::Status CopyFromExternalObject() final {
129 if (!converter_from_) {
130 return absl::UnavailableError("Conversion is not available");
131 }
132 return converter_from_->Convert(GetExternalObject(), internal_obj_);
133 }
134
SetExternalObject(TensorObject obj)135 absl::Status SetExternalObject(TensorObject obj) final {
136 if (!def().external_def.object_def.user_provided) {
137 return absl::InvalidArgumentError("External object is read-only");
138 }
139 if (!IsValid(def().external_def, obj)) {
140 return absl::InvalidArgumentError("Given object is not valid");
141 }
142 external_obj_ = obj;
143 return absl::OkStatus();
144 }
145
GetExternalObject()146 TensorObject GetExternalObject() final { return external_obj_; }
147
148 private:
Init(TensorObjectConverterBuilder * converter_builder,Environment * env)149 absl::Status Init(TensorObjectConverterBuilder* converter_builder,
150 Environment* env) {
151 #ifdef CL_DELEGATE_ALLOW_GL
152 if (def().external_def.object_def.user_provided &&
153 GlClBufferCopier::IsSupported(def().external_def.object_def,
154 def().internal_def.object_def)) {
155 converter_from_ = absl::make_unique<GlClBufferCopier>(
156 def().internal_def, def().external_def, env);
157 } else {
158 RETURN_IF_ERROR(converter_builder->MakeConverter(
159 def().external_def, def().internal_def, &converter_from_));
160 }
161 if (def().external_def.object_def.user_provided &&
162 GlClBufferCopier::IsSupported(def().internal_def.object_def,
163 def().external_def.object_def)) {
164 converter_to_ = absl::make_unique<GlClBufferCopier>(
165 def().internal_def, def().external_def, env);
166 } else {
167 RETURN_IF_ERROR(converter_builder->MakeConverter(
168 def().internal_def, def().external_def, &converter_to_));
169 }
170 #else
171 RETURN_IF_ERROR(converter_builder->MakeConverter(
172 def().external_def, def().internal_def, &converter_from_));
173 RETURN_IF_ERROR(converter_builder->MakeConverter(
174 def().internal_def, def().external_def, &converter_to_));
175 #endif
176 return MaybeAllocateExternalObject(env);
177 }
178
MaybeAllocateExternalObject(Environment * env)179 absl::Status MaybeAllocateExternalObject(Environment* env) {
180 const TensorObjectDef& d = def().external_def;
181 if (d.object_def.user_provided) {
182 return absl::OkStatus();
183 }
184 switch (d.object_def.object_type) {
185 case ObjectType::CPU_MEMORY: {
186 size_t bytes_size = NumElements(d) * SizeOf(d.object_def.data_type);
187 cpu_memory_.resize(bytes_size);
188 external_obj_ = CpuMemory{cpu_memory_.data(), cpu_memory_.size()};
189 break;
190 }
191 case ObjectType::OPENCL_TEXTURE:
192 case ObjectType::OPENCL_BUFFER: {
193 auto& dims = d.dimensions;
194 const BHWC shape(dims.b, dims.h, dims.w, dims.c);
195 const TensorDescriptor desc{
196 d.object_def.data_type,
197 ToTensorStorageType(d.object_def.object_type,
198 d.object_def.data_layout),
199 Layout::BHWC};
200 RETURN_IF_ERROR(
201 AllocateTensorMemory(env->context(), shape, desc, &cl_memory_));
202 if (d.object_def.object_type == ObjectType::OPENCL_TEXTURE) {
203 external_obj_ = OpenClTexture{cl_memory_.memory()};
204 } else {
205 external_obj_ = OpenClBuffer{cl_memory_.memory()};
206 }
207 break;
208 }
209 default:
210 return absl::InternalError("Unexpected object type");
211 }
212 return absl::OkStatus();
213 }
214
215 const TensorObject internal_obj_;
216 TensorObject external_obj_;
217 CLMemory cl_memory_;
218 std::vector<uint8_t> cpu_memory_;
219 std::unique_ptr<TensorObjectConverter> converter_to_;
220 std::unique_ptr<TensorObjectConverter> converter_from_;
221 };
222
223 // Copies data to intermediate OpenCL buffer and then does two step conversion.
224 // It drives the following cases were one-step conversion is not supported:
225 // - CPU BHWC -> CL buffer BHWC -> CL texture DHWC4.
226 class TwoStepTensorTie : public TensorTie {
227 public:
TwoStepTensorTie(const TensorTieDef & def)228 explicit TwoStepTensorTie(const TensorTieDef& def) : TensorTie(def) {}
229
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)230 static bool IsSupported(
231 const TensorTieDef& def,
232 const TensorObjectConverterBuilder& converter_builder) {
233 auto defs = MakeOuterInnerDefs(def);
234 return DefaultTensorTie::IsSupported(defs.first, converter_builder) &&
235 DefaultTensorTie::IsSupported(defs.second, converter_builder);
236 }
237
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env,std::unique_ptr<TensorTie> * tie)238 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
239 TensorObjectConverterBuilder* converter_builder,
240 Environment* env, std::unique_ptr<TensorTie>* tie) {
241 auto tie_impl = absl::make_unique<TwoStepTensorTie>(def);
242 RETURN_IF_ERROR(tie_impl->Init(internal_object, converter_builder, env));
243 *tie = std::move(tie_impl);
244 return absl::OkStatus();
245 }
246
CopyToExternalObject()247 absl::Status CopyToExternalObject() final {
248 RETURN_IF_ERROR(inner_tie_->CopyToExternalObject());
249 return outer_tie_->CopyToExternalObject();
250 }
251
CopyFromExternalObject()252 absl::Status CopyFromExternalObject() final {
253 RETURN_IF_ERROR(outer_tie_->CopyFromExternalObject());
254 return inner_tie_->CopyFromExternalObject();
255 }
256
SetExternalObject(TensorObject obj)257 absl::Status SetExternalObject(TensorObject obj) final {
258 return outer_tie_->SetExternalObject(obj);
259 }
260
GetExternalObject()261 TensorObject GetExternalObject() final {
262 return outer_tie_->GetExternalObject();
263 }
264
265 private:
MakeOuterInnerDefs(const TensorTieDef & def)266 static std::pair<TensorTieDef, TensorTieDef> MakeOuterInnerDefs(
267 const TensorTieDef& def) {
268 TensorTieDef outer_def;
269 outer_def.external_def = def.external_def;
270 outer_def.internal_def = def.external_def;
271 outer_def.internal_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
272 outer_def.internal_def.object_def.user_provided = true;
273
274 TensorTieDef inner_def;
275 inner_def.external_def = outer_def.internal_def;
276 inner_def.external_def.object_def.user_provided = false;
277 inner_def.internal_def = def.internal_def;
278 return std::make_pair(outer_def, inner_def);
279 }
280
Init(TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,Environment * env)281 absl::Status Init(TensorObject internal_object,
282 TensorObjectConverterBuilder* converter_builder,
283 Environment* env) {
284 auto defs = MakeOuterInnerDefs(def());
285 RETURN_IF_ERROR(DefaultTensorTie::New(defs.second, internal_object,
286 converter_builder, env, &inner_tie_));
287 return DefaultTensorTie::New(defs.first, inner_tie_->GetExternalObject(),
288 converter_builder, env, &outer_tie_);
289 }
290
291 std::unique_ptr<TensorTie> inner_tie_;
292 std::unique_ptr<TensorTie> outer_tie_;
293 };
294
295 #ifdef CL_DELEGATE_ALLOW_GL
296 // Captures GL object into CL context before performing a conversion.
297 class GlBufferHolder : public TensorTie {
298 public:
GlBufferHolder(const TensorTieDef & def,GlInteropFabric * gl_interop_fabric,Environment * env)299 GlBufferHolder(const TensorTieDef& def, GlInteropFabric* gl_interop_fabric,
300 Environment* env)
301 : TensorTie(def),
302 gl_interop_fabric_(gl_interop_fabric),
303 environment_(env) {}
304
IsSupported(const TensorTieDef & def,const TensorObjectConverterBuilder & converter_builder)305 static bool IsSupported(
306 const TensorTieDef& def,
307 const TensorObjectConverterBuilder& converter_builder) {
308 if (!def.external_def.object_def.user_provided ||
309 def.external_def.object_def.object_type != ObjectType::OPENGL_SSBO) {
310 return false;
311 }
312 return DefaultTensorTie::IsSupported(MakeClDef(def), converter_builder);
313 }
314
New(const TensorTieDef & def,TensorObject internal_object,TensorObjectConverterBuilder * converter_builder,GlInteropFabric * gl_interop_fabric,Environment * env,std::unique_ptr<TensorTie> * tie)315 static absl::Status New(const TensorTieDef& def, TensorObject internal_object,
316 TensorObjectConverterBuilder* converter_builder,
317 GlInteropFabric* gl_interop_fabric, Environment* env,
318 std::unique_ptr<TensorTie>* tie) {
319 auto tie_impl =
320 absl::make_unique<GlBufferHolder>(def, gl_interop_fabric, env);
321 RETURN_IF_ERROR(DefaultTensorTie::New(MakeClDef(def), internal_object,
322 converter_builder, env,
323 &tie_impl->tie_));
324 *tie = std::move(tie_impl);
325 return absl::OkStatus();
326 }
327
SetExternalObject(TensorObject obj)328 absl::Status SetExternalObject(TensorObject obj) final {
329 auto ssbo = absl::get_if<OpenGlBuffer>(&obj);
330 if (!ssbo) {
331 return absl::InvalidArgumentError("Missing OpenGL SSBO");
332 }
333 auto old_ssbo = absl::get_if<OpenGlBuffer>(&external_obj_);
334 if (old_ssbo && ssbo->id == old_ssbo->id) {
335 return absl::OkStatus();
336 }
337 if (cl_object_.memory()) {
338 gl_interop_fabric_->UnregisterMemory(cl_object_.memory());
339 }
340 RETURN_IF_ERROR(CreateClMemoryFromGlBuffer(
341 ssbo->id, def().access_type, &environment_->context(), &cl_object_));
342 external_obj_ = obj;
343 RETURN_IF_ERROR(tie_->SetExternalObject(OpenClBuffer{cl_object_.memory()}));
344 gl_interop_fabric_->RegisterMemory(cl_object_.memory());
345 return absl::OkStatus();
346 }
347
GetExternalObject()348 TensorObject GetExternalObject() final { return external_obj_; }
349
CopyFromExternalObject()350 absl::Status CopyFromExternalObject() final {
351 return tie_->CopyFromExternalObject();
352 }
353
CopyToExternalObject()354 absl::Status CopyToExternalObject() final {
355 return tie_->CopyToExternalObject();
356 }
357
358 private:
MakeClDef(const TensorTieDef & def)359 static TensorTieDef MakeClDef(const TensorTieDef& def) {
360 auto cl_def = def;
361 cl_def.external_def.object_def.object_type = ObjectType::OPENCL_BUFFER;
362 cl_def.external_def.object_def.user_provided = true;
363 return cl_def;
364 }
365
366 CLMemory cl_object_;
367 GlInteropFabric* gl_interop_fabric_;
368 Environment* environment_;
369 std::unique_ptr<TensorTie> tie_;
370 TensorObject external_obj_;
371 };
372 #endif
373
TensorToObj(const Tensor & tensor)374 TensorObject TensorToObj(const Tensor& tensor) {
375 if (tensor.GetStorageType() == TensorStorageType::BUFFER) {
376 return OpenClBuffer{tensor.GetMemoryPtr()};
377 }
378 if (tensor.GetStorageType() == TensorStorageType::IMAGE_BUFFER) {
379 return OpenClBuffer{tensor.GetMemoryPtrForWriting()};
380 }
381 return OpenClTexture{tensor.GetMemoryPtr()};
382 }
383
384 // Responsible for creating new tensor objects.
385 class TensorTieFactory {
386 public:
TensorTieFactory(Environment * env,InferenceContext * context,GlInteropFabric * gl_interop_fabric)387 TensorTieFactory(Environment* env, InferenceContext* context
388 #ifdef CL_DELEGATE_ALLOW_GL
389 ,
390 GlInteropFabric* gl_interop_fabric
391 #endif
392 )
393 : env_(*env),
394 context_(*context),
395 #ifdef CL_DELEGATE_ALLOW_GL
396 gl_interop_fabric_(gl_interop_fabric),
397 #endif
398 converter_builder_(NewConverterBuilder(env)) {
399 }
400
IsSupported(const TensorTieDef & def) const401 bool IsSupported(const TensorTieDef& def) const {
402 return IsValid(def.external_def.object_def) &&
403 (NoopTensorTie::IsSupported(def) ||
404 DefaultTensorTie::IsSupported(def, *converter_builder_) ||
405 #ifdef CL_DELEGATE_ALLOW_GL
406 (gl_interop_fabric_ &&
407 GlBufferHolder::IsSupported(def, *converter_builder_)) ||
408 #endif
409 TwoStepTensorTie::IsSupported(def, *converter_builder_));
410 }
411
NewTensorTie(const TensorTieDef & def,std::unique_ptr<TensorTie> * tie)412 absl::Status NewTensorTie(const TensorTieDef& def,
413 std::unique_ptr<TensorTie>* tie) {
414 TensorObject internal_object = TensorToObj(*context_.GetTensor(def.id));
415 auto converter = converter_builder_.get();
416 if (NoopTensorTie::IsSupported(def)) {
417 *tie = absl::make_unique<NoopTensorTie>(def, internal_object);
418 return absl::OkStatus();
419 }
420 if (DefaultTensorTie::IsSupported(def, *converter)) {
421 return DefaultTensorTie::New(def, internal_object, converter, &env_, tie);
422 }
423 #ifdef CL_DELEGATE_ALLOW_GL
424 if (gl_interop_fabric_ && GlBufferHolder::IsSupported(def, *converter)) {
425 return GlBufferHolder::New(def, internal_object, converter,
426 gl_interop_fabric_, &env_, tie);
427 }
428 #endif
429 if (TwoStepTensorTie::IsSupported(def, *converter)) {
430 return TwoStepTensorTie::New(def, internal_object, converter, &env_, tie);
431 }
432 return absl::UnimplementedError("Unsupported tensor tie definition.");
433 }
434
435 private:
436 Environment& env_;
437 InferenceContext& context_;
438 #ifdef CL_DELEGATE_ALLOW_GL
439 GlInteropFabric* gl_interop_fabric_;
440 #endif
441 std::unique_ptr<TensorObjectConverterBuilder> converter_builder_;
442 };
443
444 class InferenceRunnerImpl : public CLInferenceRunner {
445 public:
InferenceRunnerImpl(Environment * environment,std::unique_ptr<InferenceContext> context,std::unique_ptr<GlInteropFabric> gl_interop_fabric)446 InferenceRunnerImpl(Environment* environment,
447 std::unique_ptr<InferenceContext> context
448 #ifdef CL_DELEGATE_ALLOW_GL
449 ,
450 std::unique_ptr<GlInteropFabric> gl_interop_fabric
451 #endif
452 )
453 : queue_(environment->queue()),
454 context_(std::move(context))
455 #ifdef CL_DELEGATE_ALLOW_GL
456 ,
457 gl_interop_fabric_(std::move(gl_interop_fabric))
458 #endif
459 {
460 }
461
Initialize(const std::vector<TensorTieDef> & inputs,const std::vector<TensorTieDef> & outputs,TensorTieFactory * factory)462 absl::Status Initialize(const std::vector<TensorTieDef>& inputs,
463 const std::vector<TensorTieDef>& outputs,
464 TensorTieFactory* factory) {
465 RETURN_IF_ERROR(LinkTensors(inputs, factory, &inputs_));
466 return LinkTensors(outputs, factory, &outputs_);
467 }
468
inputs() const469 std::vector<TensorObjectDef> inputs() const override {
470 return GetExternalDefinitions(inputs_);
471 }
472
outputs() const473 std::vector<TensorObjectDef> outputs() const override {
474 return GetExternalDefinitions(outputs_);
475 }
476
GetInputObject(int index,TensorObject * object)477 absl::Status GetInputObject(int index, TensorObject* object) override {
478 if (index < 0 || index >= inputs_.size()) {
479 return absl::OutOfRangeError("Index is out of range");
480 }
481 *object = inputs_[index]->GetExternalObject();
482 return absl::OkStatus();
483 }
484
GetOutputObject(int index,TensorObject * object)485 absl::Status GetOutputObject(int index, TensorObject* object) override {
486 if (index < 0 || index >= outputs_.size()) {
487 return absl::OutOfRangeError("Index is out of range");
488 }
489 *object = outputs_[index]->GetExternalObject();
490 return absl::OkStatus();
491 }
492
SetInputObject(int index,TensorObject object)493 absl::Status SetInputObject(int index, TensorObject object) override {
494 if (index < 0 || index >= inputs_.size()) {
495 return absl::OutOfRangeError("Input index is out of range");
496 }
497 return inputs_[index]->SetExternalObject(object);
498 }
499
SetOutputObject(int index,TensorObject object)500 absl::Status SetOutputObject(int index, TensorObject object) override {
501 if (index < 0 || index >= outputs_.size()) {
502 return absl::OutOfRangeError("Output index is out of range");
503 }
504 return outputs_[index]->SetExternalObject(object);
505 }
506
CopyFromExternalInput(int index)507 absl::Status CopyFromExternalInput(int index) override {
508 if (index > inputs_.size()) {
509 return absl::NotFoundError(
510 absl::StrCat("Input id ", index, " is an invalid input index."));
511 }
512 RETURN_IF_ERROR(inputs_[index]->CopyFromExternalObject());
513 return queue_->WaitForCompletion();
514 }
515
CopyToExternalOutput(int index)516 absl::Status CopyToExternalOutput(int index) override {
517 if (index > outputs_.size()) {
518 return absl::NotFoundError(
519 absl::StrCat("Output id ", index, " is an invalid output index"));
520 }
521 RETURN_IF_ERROR(outputs_[index]->CopyToExternalObject());
522 return queue_->WaitForCompletion();
523 }
524
Run()525 absl::Status Run() override {
526 #ifdef CL_DELEGATE_ALLOW_GL
527 if (gl_interop_fabric_) {
528 RETURN_IF_ERROR(gl_interop_fabric_->Start());
529 }
530 #endif
531 for (const auto& input : inputs_) {
532 RETURN_IF_ERROR(input->CopyFromExternalObject());
533 }
534
535 RETURN_IF_ERROR(RunWithoutExternalBufferCopy());
536
537 bool has_async_copies = false;
538 for (const auto& output : outputs_) {
539 RETURN_IF_ERROR(output->CopyToExternalObject());
540 if (output->def().external_def.object_def.object_type ==
541 ObjectType::CPU_MEMORY) {
542 has_async_copies = true;
543 }
544 }
545 #ifdef CL_DELEGATE_ALLOW_GL
546 if (gl_interop_fabric_) {
547 RETURN_IF_ERROR(gl_interop_fabric_->Finish());
548 }
549 #endif
550 if (has_async_copies) {
551 RETURN_IF_ERROR(queue_->WaitForCompletion());
552 }
553 return absl::OkStatus();
554 }
555
RunWithoutExternalBufferCopy()556 absl::Status RunWithoutExternalBufferCopy() override {
557 RETURN_IF_ERROR(context_->AddToQueue(queue_));
558 clFlush(queue_->queue());
559
560 return absl::OkStatus();
561 }
562
563 private:
LinkTensors(const std::vector<TensorTieDef> & defs,TensorTieFactory * factory,std::vector<std::unique_ptr<TensorTie>> * objects)564 static absl::Status LinkTensors(
565 const std::vector<TensorTieDef>& defs, TensorTieFactory* factory,
566 std::vector<std::unique_ptr<TensorTie>>* objects) {
567 objects->reserve(defs.size());
568 for (auto& def : defs) {
569 std::unique_ptr<TensorTie> object;
570 RETURN_IF_ERROR(factory->NewTensorTie(def, &object));
571 objects->push_back(std::move(object));
572 }
573 return absl::OkStatus();
574 }
575
GetExternalDefinitions(const std::vector<std::unique_ptr<TensorTie>> & objects)576 static std::vector<TensorObjectDef> GetExternalDefinitions(
577 const std::vector<std::unique_ptr<TensorTie>>& objects) {
578 std::vector<TensorObjectDef> defs;
579 defs.reserve(objects.size());
580 for (auto& obj : objects) {
581 defs.push_back(obj->def().external_def);
582 }
583 return defs;
584 }
585
586 CLCommandQueue* queue_;
587 std::unique_ptr<InferenceContext> context_;
588 #ifdef CL_DELEGATE_ALLOW_GL
589 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
590 #endif
591 std::vector<std::unique_ptr<TensorTie>> inputs_;
592 std::vector<std::unique_ptr<TensorTie>> outputs_;
593 };
594
TensorToDef(const Tensor & tensor)595 TensorObjectDef TensorToDef(const Tensor& tensor) {
596 TensorObjectDef def;
597 def.dimensions.b = tensor.Batch();
598 def.dimensions.h = tensor.Height();
599 def.dimensions.w = tensor.Width();
600 def.dimensions.c = tensor.Channels();
601 def.object_def.data_layout = ToDataLayout(tensor.GetStorageType());
602 def.object_def.data_type = tensor.GetDataType();
603 def.object_def.object_type = ToObjectType(tensor.GetStorageType());
604 def.object_def.user_provided = false;
605 return def;
606 }
607
GetPrecision(const Environment & env,const InferenceOptions & options)608 CalculationsPrecision GetPrecision(const Environment& env,
609 const InferenceOptions& options) {
610 CalculationsPrecision precision;
611 switch (GetPosition(options, InferencePriority::MAX_PRECISION)) {
612 case 1:
613 precision = CalculationsPrecision::F32;
614 break;
615 case 2:
616 precision = CalculationsPrecision::F32_F16;
617 break;
618 case 3:
619 precision = CalculationsPrecision::F16;
620 break;
621 default:
622 precision = CalculationsPrecision::F16;
623 break;
624 }
625 // Increase precision if lower precision is not supported.
626 if (!env.IsSupported(precision)) {
627 precision = CalculationsPrecision::F32_F16;
628 if (!env.IsSupported(precision)) {
629 precision = CalculationsPrecision::F32;
630 }
631 }
632 return precision;
633 }
634
GetStorageTypeFromOptions(const Environment & env,const InferenceOptions & options)635 TensorStorageType GetStorageTypeFromOptions(const Environment& env,
636 const InferenceOptions& options) {
637 // Fallback to BUFFER that should be supported by default.
638 std::vector<TensorStorageType> preferred_storage_types;
639 if (GetRelativeImportance(options, InferencePriority::MIN_LATENCY,
640 InferencePriority::MIN_MEMORY_USAGE) ==
641 PriorityImportance::HIGHER) {
642 preferred_storage_types = {GetFastestStorageType(env.device().GetInfo()),
643 TensorStorageType::BUFFER};
644 } else {
645 preferred_storage_types = {
646 GetStorageTypeWithMinimalMemoryConsumption(env.device().GetInfo()),
647 TensorStorageType::BUFFER};
648 }
649
650 for (TensorStorageType storage_type : preferred_storage_types) {
651 if (env.IsSupported(storage_type)) {
652 return storage_type;
653 }
654 }
655 return TensorStorageType::UNKNOWN;
656 }
657
658 class InferenceBuilderImpl : public InferenceBuilder {
659 public:
InferenceBuilderImpl(Environment * environment)660 explicit InferenceBuilderImpl(Environment* environment)
661 : environment_(environment) {}
662
Initialize(const InferenceOptions & options,const InferenceEnvironmentOptions & env_options,const GraphFloat32 & graph)663 absl::Status Initialize(const InferenceOptions& options,
664 const InferenceEnvironmentOptions& env_options,
665 const GraphFloat32& graph) {
666 context_ = absl::make_unique<InferenceContext>();
667 InferenceContext::CreateInferenceInfo create_info;
668 create_info.precision = GetPrecision(*environment_, options);
669 create_info.storage_type =
670 GetStorageTypeFromOptions(*environment_, options);
671 if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
672 create_info.hints.Add(ModelHints::kReduceKernelsCount);
673 create_info.hints.Add(ModelHints::kFastTuning);
674 } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
675 create_info.hints.Add(ModelHints::kAllowSpecialKernels);
676 }
677 if (GetRelativeImportance(options, InferencePriority::MIN_MEMORY_USAGE,
678 InferencePriority::MIN_LATENCY) ==
679 PriorityImportance::HIGHER) {
680 create_info.hints.Add(ModelHints::kNoWinogradOptimizations);
681 }
682 RETURN_IF_ERROR(context_->InitFromGraph(create_info, graph, environment_));
683
684 #ifdef CL_DELEGATE_ALLOW_GL
685 if (env_options.IsGlAware() &&
686 IsGlSharingSupported(environment_->device())) {
687 gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
688 env_options.egl_display, environment_);
689 }
690 tie_factory_ = absl::make_unique<TensorTieFactory>(
691 environment_, context_.get(), gl_interop_fabric_.get());
692 #else
693 tie_factory_ =
694 absl::make_unique<TensorTieFactory>(environment_, context_.get());
695 #endif
696
697 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
698 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
699 return absl::OkStatus();
700 }
701
Initialize(const InferenceEnvironmentOptions & env_options,const absl::Span<const uint8_t> serialized_model)702 absl::Status Initialize(const InferenceEnvironmentOptions& env_options,
703 const absl::Span<const uint8_t> serialized_model) {
704 context_ = absl::make_unique<InferenceContext>();
705 RETURN_IF_ERROR(
706 context_->RestoreDeserialized(serialized_model, environment_));
707
708 #ifdef CL_DELEGATE_ALLOW_GL
709 if (env_options.IsGlAware() &&
710 IsGlSharingSupported(environment_->device())) {
711 gl_interop_fabric_ = absl::make_unique<GlInteropFabric>(
712 env_options.egl_display, environment_);
713 }
714 tie_factory_ = absl::make_unique<TensorTieFactory>(
715 environment_, context_.get(), gl_interop_fabric_.get());
716 #else
717 tie_factory_ =
718 absl::make_unique<TensorTieFactory>(environment_, context_.get());
719 #endif
720
721 inputs_ = LinkTensors(context_->GetInputIds(), AccessType::READ);
722 outputs_ = LinkTensors(context_->GetOutputIds(), AccessType::WRITE);
723 return absl::OkStatus();
724 }
725
inputs() const726 std::vector<TensorObjectDef> inputs() const override {
727 return GetExternalDefinitions(inputs_);
728 }
729
outputs() const730 std::vector<TensorObjectDef> outputs() const override {
731 return GetExternalDefinitions(outputs_);
732 }
733
SetInputShape(int index,const Dimensions & dimensions)734 absl::Status SetInputShape(int index, const Dimensions& dimensions) override {
735 if (index < 0 || index >= inputs_.size()) {
736 return absl::OutOfRangeError("Index is out of range");
737 }
738 return absl::UnimplementedError("Changing input shapes is not supported");
739 }
740
SetInputObjectDef(int index,ObjectDef new_def)741 absl::Status SetInputObjectDef(int index, ObjectDef new_def) override {
742 if (index < 0 || index >= inputs_.size()) {
743 return absl::OutOfRangeError("Input index is out of range");
744 }
745 auto def = inputs_[index];
746 def.external_def.object_def = new_def;
747 if (!tie_factory_->IsSupported(def)) {
748 return absl::InvalidArgumentError(
749 "New input object definition is not supported.");
750 }
751 inputs_[index] = def;
752 return absl::OkStatus();
753 }
754
SetOutputObjectDef(int index,ObjectDef new_def)755 absl::Status SetOutputObjectDef(int index, ObjectDef new_def) override {
756 if (index < 0 || index >= outputs_.size()) {
757 return absl::OutOfRangeError("Output index is out of range");
758 }
759 auto def = outputs_[index];
760 def.external_def.object_def = new_def;
761 if (!tie_factory_->IsSupported(def)) {
762 return absl::InvalidArgumentError(
763 "New output object definition is not supported.");
764 }
765 outputs_[index] = def;
766 return absl::OkStatus();
767 }
768
Build(std::unique_ptr<InferenceRunner> * runner)769 absl::Status Build(std::unique_ptr<InferenceRunner>* runner) override {
770 #ifdef CL_DELEGATE_ALLOW_GL
771 if (gl_interop_fabric_ && !HasGlObjects()) {
772 // destroy interop layer when there are no GL objects to avoid
773 // extra synchronization cost.
774 gl_interop_fabric_.reset(nullptr);
775 }
776 auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
777 environment_, std::move(context_), std::move(gl_interop_fabric_));
778 #else
779 auto runner_impl = absl::make_unique<InferenceRunnerImpl>(
780 environment_, std::move(context_));
781 #endif
782 RETURN_IF_ERROR(
783 runner_impl->Initialize(inputs_, outputs_, tie_factory_.get()));
784 *runner = std::move(runner_impl);
785 return absl::OkStatus();
786 }
787
788 private:
789 // Links internal tensors with external user-facing objects.
LinkTensors(const std::vector<ValueId> & ids,AccessType access)790 std::vector<TensorTieDef> LinkTensors(const std::vector<ValueId>& ids,
791 AccessType access) {
792 std::vector<TensorTieDef> links;
793 links.reserve(ids.size());
794 for (const auto& id : ids) {
795 TensorObjectDef def = TensorToDef(*context_->GetTensor(id));
796 links.push_back({id, access, def, def});
797 }
798 return links;
799 }
800
HasGlObjects() const801 bool HasGlObjects() const {
802 #ifdef CL_DELEGATE_ALLOW_GL
803 auto is_gl = [](ObjectType t) {
804 return t == ObjectType::OPENGL_SSBO || t == ObjectType::OPENGL_TEXTURE;
805 };
806 for (const TensorTieDef& def : inputs_) {
807 if (is_gl(def.external_def.object_def.object_type)) {
808 return true;
809 }
810 }
811 for (const TensorTieDef& def : outputs_) {
812 if (is_gl(def.external_def.object_def.object_type)) {
813 return true;
814 }
815 }
816 #endif
817 return false;
818 }
819
GetExternalDefinitions(const std::vector<TensorTieDef> & links)820 static std::vector<TensorObjectDef> GetExternalDefinitions(
821 const std::vector<TensorTieDef>& links) {
822 std::vector<TensorObjectDef> defs;
823 defs.reserve(links.size());
824 for (auto& desc : links) {
825 defs.push_back(desc.external_def);
826 }
827 return defs;
828 }
829
830 std::unique_ptr<InferenceContext> context_;
831 #ifdef CL_DELEGATE_ALLOW_GL
832 std::unique_ptr<GlInteropFabric> gl_interop_fabric_;
833 #endif
834 Environment* environment_;
835
836 std::vector<TensorTieDef> inputs_;
837 std::vector<TensorTieDef> outputs_;
838 std::unique_ptr<TensorTieFactory> tie_factory_;
839 };
840
841 class InferenceEnvironmentImpl : public InferenceEnvironment {
842 public:
InferenceEnvironmentImpl(const InferenceEnvironmentOptions & options)843 explicit InferenceEnvironmentImpl(const InferenceEnvironmentOptions& options)
844 : options_(options) {}
845
Init()846 absl::Status Init() {
847 RETURN_IF_ERROR(LoadOpenCL());
848 properties_.is_opencl_available = true;
849
850 CLDevice device;
851 if (options_.device) {
852 cl_platform_id platform;
853 RETURN_IF_ERROR(GetDeviceInfo<cl_platform_id>(
854 options_.device, CL_DEVICE_PLATFORM, &platform));
855 device = CLDevice(options_.device, platform);
856 } else {
857 RETURN_IF_ERROR(CreateDefaultGPUDevice(&device));
858 }
859
860 #ifdef CL_DELEGATE_ALLOW_GL
861 properties_.is_gl_sharing_supported = IsGlSharingSupported(device);
862 properties_.is_gl_to_cl_fast_sync_supported =
863 IsClEventFromEglSyncSupported(device);
864 properties_.is_cl_to_gl_fast_sync_supported =
865 IsEglSyncFromClEventSupported();
866 #endif
867
868 CLContext context;
869 if (options_.context) {
870 #ifdef CL_DELEGATE_ALLOW_GL
871 if (options_.IsGlAware()) {
872 return absl::InvalidArgumentError(
873 "OpenCL context and EGL parameters are set in the same time.");
874 }
875 #endif
876 context = CLContext(options_.context, /* has_ownership = */ false);
877 } else {
878 #ifdef CL_DELEGATE_ALLOW_GL
879 if (options_.IsGlAware() && properties_.is_gl_sharing_supported) {
880 RETURN_IF_ERROR(CreateCLGLContext(
881 device,
882 reinterpret_cast<cl_context_properties>(options_.egl_context),
883 reinterpret_cast<cl_context_properties>(options_.egl_display),
884 &context));
885 } else {
886 RETURN_IF_ERROR(CreateCLContext(device, &context));
887 }
888 #else
889 RETURN_IF_ERROR(CreateCLContext(device, &context));
890 #endif
891 }
892
893 CLCommandQueue queue;
894 if (options_.command_queue) {
895 queue =
896 CLCommandQueue(options_.command_queue, /* has_ownership = */ false);
897 } else {
898 RETURN_IF_ERROR(CreateCLCommandQueue(device, context, &queue));
899 }
900 // Profiling queue is used for workgroup size tuning.
901 ProfilingCommandQueue profiling_queue;
902 RETURN_IF_ERROR(
903 CreateProfilingCommandQueue(device, context, &profiling_queue));
904 environment_ = Environment(std::move(device), std::move(context),
905 std::move(queue), std::move(profiling_queue));
906 return environment_.Init();
907 }
908
BuildSerializedModel(const InferenceOptions & options,GraphFloat32 model,std::vector<uint8_t> * serialized_model)909 absl::Status BuildSerializedModel(
910 const InferenceOptions& options, GraphFloat32 model,
911 std::vector<uint8_t>* serialized_model) final {
912 if (!IsValid(options)) {
913 return absl::InvalidArgumentError("InferenceOptions are invalid.");
914 }
915 InferenceOptions resolved_options = options;
916 ResolveAutoPriority(&resolved_options);
917 if (environment_.program_cache() &&
918 !options_.serialized_binary_cache.empty()) {
919 // Ignore returned error. Cache is discarded.
920 environment_.program_cache()
921 ->AddSerializedCache(environment_.context(), environment_.device(),
922 options_.serialized_binary_cache)
923 .IgnoreError();
924 }
925
926 RETURN_IF_ERROR(RunGraphTransforms(&model));
927 InferenceContext context;
928 InferenceContext::CreateInferenceInfo create_info;
929 create_info.precision = GetPrecision(environment_, options);
930 create_info.storage_type = GetStorageTypeFromOptions(environment_, options);
931 if (options.usage == InferenceUsage::FAST_SINGLE_ANSWER) {
932 create_info.hints.Add(ModelHints::kReduceKernelsCount);
933 create_info.hints.Add(ModelHints::kFastTuning);
934 } else if (options.usage == InferenceUsage::SUSTAINED_SPEED) {
935 create_info.hints.Add(ModelHints::kAllowSpecialKernels);
936 }
937 RETURN_IF_ERROR(context.InitFromGraph(create_info, model, &environment_,
938 serialized_model));
939 return absl::OkStatus();
940 }
941
NewInferenceBuilder(const InferenceOptions & options,GraphFloat32 model,std::unique_ptr<InferenceBuilder> * builder)942 absl::Status NewInferenceBuilder(
943 const InferenceOptions& options, GraphFloat32 model,
944 std::unique_ptr<InferenceBuilder>* builder) final {
945 if (!IsValid(options)) {
946 return absl::InvalidArgumentError("InferenceOptions are invalid.");
947 }
948 InferenceOptions resolved_options = options;
949 ResolveAutoPriority(&resolved_options);
950 if (environment_.program_cache() &&
951 !options_.serialized_binary_cache.empty()) {
952 // Ignore returned error. Cache is discarded.
953 environment_.program_cache()
954 ->AddSerializedCache(environment_.context(), environment_.device(),
955 options_.serialized_binary_cache)
956 .IgnoreError();
957 }
958
959 RETURN_IF_ERROR(RunGraphTransforms(&model));
960 auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
961 RETURN_IF_ERROR(
962 builder_impl->Initialize(resolved_options, options_, model));
963 *builder = std::move(builder_impl);
964 return absl::OkStatus();
965 }
966
NewInferenceBuilder(const absl::Span<const uint8_t> serialized_model,std::unique_ptr<InferenceBuilder> * builder)967 absl::Status NewInferenceBuilder(
968 const absl::Span<const uint8_t> serialized_model,
969 std::unique_ptr<InferenceBuilder>* builder) final {
970 if (environment_.program_cache() &&
971 !options_.serialized_binary_cache.empty()) {
972 // Ignore returned error. Cache is discarded.
973 environment_.program_cache()
974 ->AddSerializedCache(environment_.context(), environment_.device(),
975 options_.serialized_binary_cache)
976 .IgnoreError();
977 }
978
979 auto builder_impl = absl::make_unique<InferenceBuilderImpl>(&environment_);
980 RETURN_IF_ERROR(builder_impl->Initialize(options_, serialized_model));
981 *builder = std::move(builder_impl);
982 return absl::OkStatus();
983 }
984
GetSerializedBinaryCache() const985 std::vector<uint8_t> GetSerializedBinaryCache() const final {
986 std::vector<uint8_t> data;
987 // Is there was a problem, data would be empty.
988 environment_.program_cache()
989 ->GetSerializedCache(environment_.device(), &data)
990 .IgnoreError();
991 return data;
992 }
993
properties() const994 const InferenceEnvironmentProperties& properties() const {
995 return properties_;
996 }
997
998 private:
999 const InferenceEnvironmentOptions options_;
1000 Environment environment_;
1001 InferenceEnvironmentProperties properties_;
1002 };
1003
1004 } // namespace
1005
NewInferenceEnvironment(const InferenceEnvironmentOptions & options,std::unique_ptr<InferenceEnvironment> * environment,InferenceEnvironmentProperties * properties)1006 absl::Status NewInferenceEnvironment(
1007 const InferenceEnvironmentOptions& options,
1008 std::unique_ptr<InferenceEnvironment>* environment,
1009 InferenceEnvironmentProperties* properties) {
1010 auto env_impl = absl::make_unique<InferenceEnvironmentImpl>(options);
1011 absl::Status status = env_impl->Init();
1012 if (properties) {
1013 *properties = env_impl->properties();
1014 }
1015 RETURN_IF_ERROR(status);
1016 *environment = std::move(env_impl);
1017 return absl::OkStatus();
1018 }
1019
1020 } // namespace cl
1021 } // namespace gpu
1022 } // namespace tflite
1023