1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
17
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21
22 #include "absl/strings/str_cat.h"
23 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
24 #include "tensorflow/lite/delegates/gpu/cl/cl_image_format.h"
25 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
29
30 namespace tflite {
31 namespace gpu {
32 namespace cl {
33 namespace {
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,const void * data_ptr,CLMemory * result)34 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
35 const TensorDescriptor& descriptor,
36 const void* data_ptr, CLMemory* result) {
37 const int slices = DivideRoundUp(shape.c, 4);
38 cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
39 if (data_ptr) {
40 mem_flags |= CL_MEM_COPY_HOST_PTR;
41 }
42 switch (descriptor.storage_type) {
43 case TensorStorageType::BUFFER:
44 case TensorStorageType::IMAGE_BUFFER: {
45 const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
46 4 * SizeOf(descriptor.data_type);
47 cl_int error_code;
48 cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
49 const_cast<void*>(data_ptr), &error_code);
50 if (!memory) {
51 return absl::UnknownError(
52 absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
53 CLErrorCodeToString(error_code)));
54 }
55 *result = CLMemory(memory, true);
56 return absl::OkStatus();
57 }
58 case TensorStorageType::TEXTURE_2D: {
59 cl_image_desc desc;
60 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
61 desc.image_width = shape.w * shape.b * shape.d;
62 desc.image_height = shape.h * slices;
63 desc.image_depth = 0;
64 desc.image_row_pitch = 0;
65 desc.image_slice_pitch = 0;
66 desc.num_mip_levels = 0;
67 desc.num_samples = 0;
68 desc.buffer = nullptr;
69
70 cl_image_format format;
71 format.image_channel_order = CL_RGBA;
72 format.image_channel_data_type =
73 DataTypeToChannelType(descriptor.data_type);
74
75 cl_int error_code;
76 cl_mem memory =
77 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
78 const_cast<void*>(data_ptr), &error_code);
79 if (error_code != CL_SUCCESS) {
80 return absl::UnknownError(
81 absl::StrCat("Failed to create 2D texture (clCreateImage): ",
82 CLErrorCodeToString(error_code)));
83 }
84
85 *result = CLMemory(memory, true);
86 return absl::OkStatus();
87 }
88 case TensorStorageType::TEXTURE_3D: {
89 cl_image_desc desc;
90 desc.image_type = CL_MEM_OBJECT_IMAGE3D;
91 desc.image_width = shape.w * shape.b;
92 desc.image_height = shape.h;
93 desc.image_depth = slices * shape.d;
94 desc.image_row_pitch = 0;
95 desc.image_slice_pitch = 0;
96 desc.num_mip_levels = 0;
97 desc.num_samples = 0;
98 desc.buffer = nullptr;
99
100 cl_image_format format;
101 format.image_channel_order = CL_RGBA;
102 format.image_channel_data_type =
103 DataTypeToChannelType(descriptor.data_type);
104
105 cl_int error_code;
106 cl_mem memory =
107 CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
108 const_cast<void*>(data_ptr), &error_code);
109 if (error_code != CL_SUCCESS) {
110 return absl::UnknownError(
111 absl::StrCat("Failed to create 3D texture (clCreateImage): ",
112 CLErrorCodeToString(error_code)));
113 }
114
115 *result = CLMemory(memory, true);
116 return absl::OkStatus();
117 }
118 case TensorStorageType::TEXTURE_ARRAY: {
119 cl_image_desc desc;
120 desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
121 desc.image_width = shape.w * shape.b;
122 desc.image_height = shape.h;
123 desc.image_depth = 0;
124 desc.image_array_size = slices * shape.d;
125 desc.image_row_pitch = 0;
126 desc.image_slice_pitch = 0;
127 desc.num_mip_levels = 0;
128 desc.num_samples = 0;
129 desc.buffer = nullptr;
130
131 cl_image_format format;
132 format.image_channel_order = CL_RGBA;
133 format.image_channel_data_type =
134 DataTypeToChannelType(descriptor.data_type);
135
136 cl_int error_code;
137 cl_mem memory =
138 clCreateImage(context.context(), mem_flags, &format, &desc,
139 const_cast<void*>(data_ptr), &error_code);
140 if (error_code != CL_SUCCESS) {
141 return absl::UnknownError(
142 absl::StrCat("Failed to create 2D texture array (clCreateImage): ",
143 CLErrorCodeToString(error_code)));
144 }
145
146 *result = CLMemory(memory, true);
147 return absl::OkStatus();
148 }
149
150 case TensorStorageType::SINGLE_TEXTURE_2D: {
151 if (slices != 1) {
152 return absl::InvalidArgumentError(absl::StrCat(
153 "SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
154 shape.c, "was provided"));
155 }
156 cl_image_desc desc;
157 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
158 desc.image_width = shape.w * shape.b * shape.d;
159 desc.image_height = shape.h;
160 desc.image_depth = 0;
161 desc.image_row_pitch = 0;
162 desc.image_slice_pitch = 0;
163 desc.num_mip_levels = 0;
164 desc.num_samples = 0;
165 desc.buffer = nullptr;
166
167 cl_image_format format;
168 if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type)) {
169 format.image_channel_order = ToChannelOrder(shape.c);
170 format.image_channel_data_type =
171 DataTypeToChannelType(descriptor.data_type);
172 } else {
173 return absl::InvalidArgumentError(absl::StrCat(
174 "This device doesn't support ", shape.c, "-channel textures."));
175 }
176
177 cl_int error_code;
178 cl_mem memory =
179 CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
180 const_cast<void*>(data_ptr), &error_code);
181 if (error_code != CL_SUCCESS) {
182 return absl::UnknownError(
183 absl::StrCat("Failed to create single 2D texture (clCreateImage): ",
184 CLErrorCodeToString(error_code)));
185 }
186
187 *result = CLMemory(memory, true);
188 return absl::OkStatus();
189 }
190
191 default:
192 return absl::InternalError("Unsupported tensor storage type");
193 }
194 }
195
CreateImageBufferFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,cl_mem * result)196 absl::Status CreateImageBufferFromBuffer(const CLContext& context,
197 cl_mem memory, DataType data_type,
198 int width, cl_mem* result) {
199 cl_image_format format;
200 cl_image_desc desc;
201 std::memset(&desc, 0, sizeof(desc));
202 desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
203 desc.image_width = width;
204 desc.mem_object = memory;
205
206 format.image_channel_data_type = DataTypeToChannelType(data_type);
207 format.image_channel_order = CL_RGBA;
208
209 cl_int error_code;
210 *result = clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc,
211 nullptr, &error_code);
212 if (error_code != CL_SUCCESS) {
213 return absl::UnknownError(
214 absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
215 CLErrorCodeToString(error_code)));
216 }
217 return absl::OkStatus();
218 }
219
CreateImage2DFromBuffer(const CLContext & context,cl_mem memory,DataType data_type,int width,int height,int channels,int row_bytes_alignment,cl_mem * result)220 absl::Status CreateImage2DFromBuffer(const CLContext& context, cl_mem memory,
221 DataType data_type, int width, int height,
222 int channels, int row_bytes_alignment,
223 cl_mem* result) {
224 if (!context.IsFloatTexture2DSupported(channels, data_type)) {
225 return absl::InvalidArgumentError(absl::StrCat(
226 "This device doesn't support ", channels, "-channel textures."));
227 }
228
229 cl_image_desc desc;
230 desc.image_type = CL_MEM_OBJECT_IMAGE2D;
231 desc.image_width = width;
232 desc.image_height = height;
233 desc.image_depth = 0;
234 if (row_bytes_alignment == 0) {
235 desc.image_row_pitch = 0;
236 } else {
237 const size_t bytes_per_row = width * channels * SizeOf(data_type);
238 desc.image_row_pitch = AlignByN(bytes_per_row, row_bytes_alignment);
239 }
240 desc.image_slice_pitch = 0;
241 desc.num_mip_levels = 0;
242 desc.num_samples = 0;
243 desc.mem_object = memory;
244
245 cl_image_format format;
246 format.image_channel_order = ToChannelOrder(channels);
247 format.image_channel_data_type = DataTypeToChannelType(data_type);
248
249 cl_int error_code;
250 *result = CreateImage2DLegacy(context.context(), CL_MEM_READ_WRITE, &format,
251 &desc, nullptr, &error_code);
252 if (error_code != CL_SUCCESS) {
253 return absl::UnknownError(
254 absl::StrCat("Failed to create Image2D from Buffer (clCreateImage): ",
255 CLErrorCodeToString(error_code)));
256 }
257 return absl::OkStatus();
258 }
259
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)260 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
261 const TensorDescriptor& descriptor, cl_mem memory,
262 Tensor* result) {
263 const bool memory_owner = memory == nullptr;
264 if (memory_owner) {
265 CLMemory mem;
266 RETURN_IF_ERROR(
267 AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
268 memory = mem.Release();
269 }
270 if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
271 cl_mem image_memory;
272 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
273 context, memory, descriptor.data_type,
274 shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
275 &image_memory));
276 *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
277 } else {
278 *result = Tensor(memory, memory_owner, shape, descriptor);
279 }
280 return absl::OkStatus();
281 }
282
CreateTensorShared(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,cl_mem memory,Tensor * result)283 absl::Status CreateTensorShared(const CLContext& context, const BHWDC& shape,
284 const TensorDescriptor& descriptor,
285 cl_mem memory, Tensor* result) {
286 const bool memory_owner = false;
287 if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
288 cl_mem image_memory;
289 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
290 context, memory, descriptor.data_type,
291 shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4),
292 &image_memory));
293 *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
294 } else {
295 *result = Tensor(memory, memory_owner, shape, descriptor);
296 }
297 return absl::OkStatus();
298 }
299
300 } // namespace
301
Tensor(cl_mem memory,bool memory_owner,const BHWC & shape,const TensorDescriptor & descriptor)302 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC& shape,
303 const TensorDescriptor& descriptor)
304 : memory_(memory),
305 image_buffer_memory_(nullptr),
306 memory_owner_(memory_owner),
307 shape_(shape.b, shape.h, shape.w, 1, shape.c),
308 descriptor_(descriptor) {}
309
Tensor(cl_mem memory,bool memory_owner,const BHWDC & shape,const TensorDescriptor & descriptor)310 Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC& shape,
311 const TensorDescriptor& descriptor)
312 : memory_(memory),
313 image_buffer_memory_(nullptr),
314 memory_owner_(memory_owner),
315 shape_(shape),
316 descriptor_(descriptor) {}
317
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWC & shape,const TensorDescriptor & descriptor)318 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
319 const BHWC& shape, const TensorDescriptor& descriptor)
320 : memory_(memory),
321 image_buffer_memory_(image_buffer_memory),
322 memory_owner_(memory_owner),
323 shape_(shape.b, shape.h, shape.w, 1, shape.c),
324 descriptor_(descriptor) {
325 if (image_buffer_memory &&
326 (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
327 descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)) {
328 buffer_based_ = true;
329 }
330 }
331
Tensor(cl_mem memory,bool memory_owner,cl_mem image_buffer_memory,const BHWDC & shape,const TensorDescriptor & descriptor)332 Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory,
333 const BHWDC& shape, const TensorDescriptor& descriptor)
334 : memory_(memory),
335 image_buffer_memory_(image_buffer_memory),
336 memory_owner_(memory_owner),
337 shape_(shape),
338 descriptor_(descriptor) {
339 if (image_buffer_memory &&
340 (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
341 descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)) {
342 buffer_based_ = true;
343 }
344 }
345
Tensor(Tensor && tensor)346 Tensor::Tensor(Tensor&& tensor)
347 : memory_(tensor.memory_),
348 image_buffer_memory_(tensor.image_buffer_memory_),
349 memory_owner_(tensor.memory_owner_),
350 buffer_based_(tensor.buffer_based_),
351 shape_(tensor.shape_),
352 descriptor_(tensor.descriptor_) {
353 tensor.memory_ = nullptr;
354 tensor.image_buffer_memory_ = nullptr;
355 }
356
operator =(Tensor && tensor)357 Tensor& Tensor::operator=(Tensor&& tensor) {
358 if (this != &tensor) {
359 Release();
360 std::swap(memory_, tensor.memory_);
361 std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
362 std::swap(memory_owner_, tensor.memory_owner_);
363 std::swap(buffer_based_, tensor.buffer_based_);
364 std::swap(shape_, tensor.shape_);
365 std::swap(descriptor_, tensor.descriptor_);
366 }
367 return *this;
368 }
369
Release()370 void Tensor::Release() {
371 // image_buffer_memory_ always owned by object
372 if (image_buffer_memory_) {
373 clReleaseMemObject(image_buffer_memory_);
374 image_buffer_memory_ = nullptr;
375 }
376 if (memory_owner_ && memory_) {
377 clReleaseMemObject(memory_);
378 memory_ = nullptr;
379 }
380 }
381
GetGPUResources(const GPUObjectDescriptor * obj_ptr,GPUResourcesWithValue * resources) const382 absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor* obj_ptr,
383 GPUResourcesWithValue* resources) const {
384 const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(obj_ptr);
385 if (buffer_desc) {
386 if (descriptor_.storage_type != TensorStorageType::BUFFER) {
387 return absl::InvalidArgumentError(
388 "Tensor can be used with BufferDescriptor only with "
389 "TensorStorageType::BUFFER.");
390 }
391 resources->buffers.push_back({"buffer", memory_});
392 return absl::OkStatus();
393 }
394 const auto* texture2d_desc =
395 dynamic_cast<const Texture2DDescriptor*>(obj_ptr);
396 if (texture2d_desc) {
397 if (descriptor_.storage_type != TensorStorageType::TEXTURE_2D) {
398 return absl::InvalidArgumentError(
399 "Tensor can be used with Texture2DDescriptor only with "
400 "TensorStorageType::TEXTURE_2D.");
401 }
402 cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
403 resources->images2d.push_back({"tex2d", mem});
404 return absl::OkStatus();
405 }
406 const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(obj_ptr);
407 if (!tensor_desc) {
408 return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
409 }
410 resources->ints.push_back(
411 {"slice_stride", tensor_desc->GetSliceStrideSize(shape_)});
412 if (descriptor_.HasAxis(Axis::WIDTH)) {
413 resources->ints.push_back({"width", tensor_desc->GetWidthSize(shape_)});
414 }
415 if (descriptor_.HasAxis(Axis::HEIGHT)) {
416 resources->ints.push_back({"height", Height()});
417 }
418 if (descriptor_.HasAxis(Axis::CHANNELS)) {
419 resources->ints.push_back({"slices", Slices()});
420 resources->ints.push_back({"channels", Channels()});
421 }
422 if (descriptor_.HasAxis(Axis::BATCH)) {
423 resources->ints.push_back({"batch", Batch()});
424 }
425 if (descriptor_.HasAxis(Axis::DEPTH)) {
426 resources->ints.push_back({"depth", Depth()});
427 }
428
429 if (descriptor_.storage_type == TensorStorageType::BUFFER) {
430 resources->buffers.push_back({"buffer", memory_});
431 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
432 descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
433 cl_mem mem = buffer_based_ ? image_buffer_memory_ : memory_;
434 resources->images2d.push_back({"image2d", mem});
435 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) {
436 resources->image2d_arrays.push_back({"image2d_array", memory_});
437 } else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) {
438 resources->images3d.push_back({"image3d", memory_});
439 } else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) {
440 if (obj_ptr->GetAccess() == AccessType::READ) {
441 resources->image_buffers.push_back(
442 {"image_buffer", image_buffer_memory_});
443 } else {
444 resources->buffers.push_back({"buffer", memory_});
445 }
446 }
447
448 return absl::OkStatus();
449 }
450
GetFullTensorRegion() const451 int3 Tensor::GetFullTensorRegion() const {
452 switch (descriptor_.storage_type) {
453 case TensorStorageType::BUFFER:
454 case TensorStorageType::TEXTURE_ARRAY:
455 case TensorStorageType::TEXTURE_3D:
456 case TensorStorageType::IMAGE_BUFFER:
457 return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
458 case TensorStorageType::TEXTURE_2D:
459 return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
460 case TensorStorageType::SINGLE_TEXTURE_2D:
461 return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
462 case TensorStorageType::UNKNOWN:
463 return {-1, -1, -1};
464 }
465 }
466
IsValid(const BHWC & shape) const467 absl::Status Tensor::IsValid(const BHWC& shape) const {
468 if (shape.b != shape_.b) {
469 return absl::InvalidArgumentError(
470 "Shape batch does not match tensor batch");
471 }
472 if (shape.w != shape_.w) {
473 return absl::InvalidArgumentError(
474 "Shape width does not match tensor width");
475 }
476 if (shape.h != shape_.h) {
477 return absl::InvalidArgumentError(
478 "Shape height does not match tensor height");
479 }
480 if (shape.c != shape_.c) {
481 return absl::InvalidArgumentError(
482 "Shape channels does not match tensor channels");
483 }
484 return absl::OkStatus();
485 }
486
IsValid(const BHWDC & shape) const487 absl::Status Tensor::IsValid(const BHWDC& shape) const {
488 if (shape.b != shape_.b) {
489 return absl::InvalidArgumentError(
490 "Shape batch does not match tensor batch");
491 }
492 if (shape.w != shape_.w) {
493 return absl::InvalidArgumentError(
494 "Shape width does not match tensor width");
495 }
496 if (shape.h != shape_.h) {
497 return absl::InvalidArgumentError(
498 "Shape height does not match tensor height");
499 }
500 if (shape.d != shape_.d) {
501 return absl::InvalidArgumentError(
502 "Shape depth does not match tensor depth");
503 }
504 if (shape.c != shape_.c) {
505 return absl::InvalidArgumentError(
506 "Shape channels does not match tensor channels");
507 }
508 return absl::OkStatus();
509 }
510
GetAlignedChannels() const511 int Tensor::GetAlignedChannels() const {
512 return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
513 ? shape_.c
514 : AlignByN(shape_.c, 4);
515 }
516
GetMemorySizeInBytes() const517 uint64_t Tensor::GetMemorySizeInBytes() const {
518 const int flt_size = SizeOf(descriptor_.data_type);
519 const int flt4_size = 4 * flt_size;
520 switch (descriptor_.storage_type) {
521 case TensorStorageType::BUFFER:
522 case TensorStorageType::IMAGE_BUFFER:
523 case TensorStorageType::TEXTURE_ARRAY:
524 case TensorStorageType::TEXTURE_2D:
525 case TensorStorageType::TEXTURE_3D:
526 return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
527 case TensorStorageType::SINGLE_TEXTURE_2D:
528 return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
529 default:
530 return 0;
531 }
532 }
533
GetMemoryPtr() const534 cl_mem Tensor::GetMemoryPtr() const {
535 if (buffer_based_) {
536 return image_buffer_memory_;
537 } else {
538 return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER
539 ? image_buffer_memory_
540 : memory_;
541 }
542 }
543
GetMemoryPtrForWriting() const544 cl_mem Tensor::GetMemoryPtrForWriting() const {
545 if (buffer_based_) {
546 return image_buffer_memory_;
547 } else {
548 return memory_;
549 }
550 }
551
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)552 absl::Status Tensor::WriteData(
553 CLCommandQueue* queue,
554 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
555 return WriteDataBHWDC(src.data.data(), queue);
556 }
557
WriteData(CLCommandQueue * queue,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)558 absl::Status Tensor::WriteData(
559 CLCommandQueue* queue,
560 const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
561 return WriteDataBHWDC(src.data.data(), queue);
562 }
563
CreateFromDescriptor(const TensorDescriptor & desc,CLContext * context)564 absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor& desc,
565 CLContext* context) {
566 shape_ = desc.shape;
567 descriptor_.data_type = desc.data_type;
568 descriptor_.storage_type = desc.storage_type;
569 descriptor_.layout = desc.layout;
570 memory_owner_ = true;
571 CLMemory memory;
572 uint8_t* data_ptr = desc.data.empty()
573 ? nullptr
574 : const_cast<unsigned char*>(desc.data.data());
575 RETURN_IF_ERROR(
576 AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
577 memory_ = memory.Release();
578 if (desc.storage_type == TensorStorageType::IMAGE_BUFFER) {
579 RETURN_IF_ERROR(CreateImageBufferFromBuffer(
580 *context, memory_, desc.data_type,
581 shape_.b * shape_.w * shape_.h * shape_.d * DivideRoundUp(shape_.c, 4),
582 &image_buffer_memory_));
583 }
584 return absl::OkStatus();
585 }
586
CreateTensor(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)587 absl::Status CreateTensor(const CLContext& context, const BHWC& shape,
588 const TensorDescriptor& descriptor, Tensor* result) {
589 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
590 return CreateTensor(context, shape5D, descriptor, nullptr, result);
591 }
592
CreateTensor(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)593 absl::Status CreateTensor(const CLContext& context, const BHWDC& shape,
594 const TensorDescriptor& descriptor, Tensor* result) {
595 return CreateTensor(context, shape, descriptor, nullptr, result);
596 }
597
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,Tensor * result)598 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
599 const BHWC& shape,
600 const TensorDescriptor& descriptor,
601 Tensor* result) {
602 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
603 return CreateTensorShared(context, shape5D, descriptor, memory, result);
604 }
605
CreateSharedTensor(const CLContext & context,cl_mem memory,const BHWDC & shape,const TensorDescriptor & descriptor,Tensor * result)606 absl::Status CreateSharedTensor(const CLContext& context, cl_mem memory,
607 const BHWDC& shape,
608 const TensorDescriptor& descriptor,
609 Tensor* result) {
610 return CreateTensorShared(context, shape, descriptor, memory, result);
611 }
612
CreateSharedImage2DBufferTensor(const CLContext & context,cl_mem memory,const BHWC & shape,const TensorDescriptor & descriptor,int row_bytes_alignment,Tensor * result)613 absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
614 cl_mem memory, const BHWC& shape,
615 const TensorDescriptor& descriptor,
616 int row_bytes_alignment,
617 Tensor* result) {
618 const int width = shape.b * shape.w;
619 const int height =
620 descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
621 ? shape.h
622 : shape.h * DivideRoundUp(shape.c, 4);
623 const int channels =
624 descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c
625 : 4;
626 cl_mem image_memory;
627 RETURN_IF_ERROR(CreateImage2DFromBuffer(context, memory, descriptor.data_type,
628 width, height, channels,
629 row_bytes_alignment, &image_memory));
630 *result = Tensor(memory, false, image_memory, shape, descriptor);
631 return absl::OkStatus();
632 }
633
AllocateTensorMemory(const CLContext & context,const BHWC & shape,const TensorDescriptor & descriptor,CLMemory * result)634 absl::Status AllocateTensorMemory(const CLContext& context, const BHWC& shape,
635 const TensorDescriptor& descriptor,
636 CLMemory* result) {
637 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
638 return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
639 }
640
AllocateTensorMemory(const CLContext & context,const BHWDC & shape,const TensorDescriptor & descriptor,CLMemory * result)641 absl::Status AllocateTensorMemory(const CLContext& context, const BHWDC& shape,
642 const TensorDescriptor& descriptor,
643 CLMemory* result) {
644 return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
645 }
646
647 } // namespace cl
648 } // namespace gpu
649 } // namespace tflite
650