1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
17
18 #include <cstdint>
19 #include <string>
20 #include <utility>
21 #include <vector>
22
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/substitute.h"
25 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
26 #include "tensorflow/lite/delegates/gpu/common/shape.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/util.h"
28 #include "tensorflow/lite/delegates/gpu/common/util.h"
29
30 namespace tflite {
31 namespace gpu {
32 namespace {
GetReadImageFromDataType(DataType data_type)33 std::string GetReadImageFromDataType(DataType data_type) {
34 if (data_type == DataType::FLOAT32) {
35 return "read_imagef";
36 } else if (data_type == DataType::FLOAT16) {
37 return "read_imageh";
38 } else if (data_type == DataType::INT8 || data_type == DataType::INT16 ||
39 data_type == DataType::INT32) {
40 return "read_imagei";
41 } else if (data_type == DataType::UINT8 || data_type == DataType::UINT16 ||
42 data_type == DataType::UINT32 || data_type == DataType::BOOL) {
43 return "read_imageui";
44 } else {
45 return "error";
46 }
47 }
48
ToClTextureType(DataType data_type)49 DataType ToClTextureType(DataType data_type) {
50 switch (data_type) {
51 case DataType::FLOAT32:
52 case DataType::FLOAT16:
53 case DataType::INT32:
54 case DataType::UINT32:
55 return data_type;
56 case DataType::INT16:
57 case DataType::INT8:
58 return DataType::INT32;
59 case DataType::BOOL:
60 case DataType::UINT16:
61 case DataType::UINT8:
62 return DataType::UINT32;
63 default:
64 return DataType::UNKNOWN;
65 }
66 }
67
GetWriteImageFromDataType(DataType data_type)68 std::string GetWriteImageFromDataType(DataType data_type) {
69 if (data_type == DataType::FLOAT32) {
70 return "write_imagef";
71 } else if (data_type == DataType::FLOAT16) {
72 return "write_imageh";
73 } else if (data_type == DataType::INT8 || data_type == DataType::INT16 ||
74 data_type == DataType::INT32) {
75 return "write_imagei";
76 } else if (data_type == DataType::UINT8 || data_type == DataType::UINT16 ||
77 data_type == DataType::UINT32 || data_type == DataType::BOOL) {
78 return "write_imageui";
79 } else {
80 return "error";
81 }
82 }
83
GetConversionForImage(const GpuInfo & gpu_info,DataType src_type,DataType dst_type)84 std::string GetConversionForImage(const GpuInfo& gpu_info, DataType src_type,
85 DataType dst_type) {
86 DataType interm_type = src_type;
87 if (gpu_info.IsApiOpenCl()) {
88 if (src_type == DataType::FLOAT16 && dst_type == DataType::FLOAT32) {
89 return "$0";
90 }
91 interm_type = ToClTextureType(src_type);
92 } else if (gpu_info.IsApiMetal()) {
93 interm_type = ToMetalTextureType(src_type);
94 }
95 return GetTypeConversion(gpu_info, interm_type, dst_type, 4);
96 }
97
GetConversion(const GpuInfo & gpu_info,TensorStorageType storage_type,DataType src_type,DataType dst_type)98 std::string GetConversion(const GpuInfo& gpu_info,
99 TensorStorageType storage_type, DataType src_type,
100 DataType dst_type) {
101 if (src_type == DataType::BOOL) {
102 // DataType::BOOL stored as DataType::UINT8
103 src_type = DataType::UINT8;
104 }
105 if (storage_type == TensorStorageType::BUFFER) {
106 return GetTypeConversion(gpu_info, src_type, dst_type, 4);
107 } else {
108 return GetConversionForImage(gpu_info, src_type, dst_type);
109 }
110 }
111
MayBeAddConversion(const std::string & conversion,std::string * result)112 void MayBeAddConversion(const std::string& conversion, std::string* result) {
113 *result = absl::Substitute(conversion, *result);
114 }
115
116 } // namespace
117
ToString(TensorStorageType type)118 std::string ToString(TensorStorageType type) {
119 switch (type) {
120 case TensorStorageType::UNKNOWN:
121 return "TensorStorageType::UNKNOWN";
122 case TensorStorageType::BUFFER:
123 return "TensorStorageType::BUFFER";
124 case TensorStorageType::TEXTURE_ARRAY:
125 return "TensorStorageType::TEXTURE_ARRAY";
126 case TensorStorageType::TEXTURE_2D:
127 return "TensorStorageType::TEXTURE_2D";
128 case TensorStorageType::TEXTURE_3D:
129 return "TensorStorageType::TEXTURE_3D";
130 case TensorStorageType::SINGLE_TEXTURE_2D:
131 return "TensorStorageType::SINGLE_TEXTURE_2D";
132 case TensorStorageType::IMAGE_BUFFER:
133 return "TensorStorageType::IMAGE_BUFFER";
134 }
135 }
136
TensorDescriptor(TensorDescriptor && desc)137 TensorDescriptor::TensorDescriptor(TensorDescriptor&& desc)
138 : GPUObjectDescriptor(std::move(desc)),
139 data_type_(desc.data_type_),
140 storage_type_(desc.storage_type_),
141 layout_(desc.layout_),
142 use_buffer_for_write_only_2d_texture_(
143 desc.use_buffer_for_write_only_2d_texture_),
144 use_buffer_for_write_only_image_buffer_(
145 desc.use_buffer_for_write_only_image_buffer_),
146 shape_(desc.shape_),
147 data_(std::move(desc.data_)) {}
operator =(TensorDescriptor && desc)148 TensorDescriptor& TensorDescriptor::operator=(TensorDescriptor&& desc) {
149 if (this != &desc) {
150 std::swap(data_type_, desc.data_type_);
151 std::swap(storage_type_, desc.storage_type_);
152 std::swap(layout_, desc.layout_);
153 std::swap(use_buffer_for_write_only_2d_texture_,
154 desc.use_buffer_for_write_only_2d_texture_);
155 std::swap(use_buffer_for_write_only_image_buffer_,
156 desc.use_buffer_for_write_only_image_buffer_);
157 std::swap(shape_, desc.shape_);
158 data_ = std::move(desc.data_);
159 GPUObjectDescriptor::operator=(std::move(desc));
160 }
161 return *this;
162 }
163
CopyWithoutData(TensorDescriptor * desc) const164 void TensorDescriptor::CopyWithoutData(TensorDescriptor* desc) const {
165 desc->data_type_ = data_type_;
166 desc->storage_type_ = storage_type_;
167 desc->layout_ = layout_;
168 desc->use_buffer_for_write_only_2d_texture_ =
169 use_buffer_for_write_only_2d_texture_;
170 desc->use_buffer_for_write_only_image_buffer_ =
171 use_buffer_for_write_only_image_buffer_;
172 desc->shape_ = shape_;
173 }
174
GetStorageDims() const175 std::vector<uint64_t> TensorDescriptor::GetStorageDims() const {
176 const int slices = DivideRoundUp(shape_.c, 4);
177 if (layout_ == Layout::LINEAR) {
178 switch (storage_type_) {
179 case TensorStorageType::BUFFER:
180 case TensorStorageType::IMAGE_BUFFER:
181 return {static_cast<uint64_t>(slices)};
182 case TensorStorageType::TEXTURE_ARRAY:
183 case TensorStorageType::TEXTURE_3D:
184 return {static_cast<uint64_t>(slices), 1u, 1u};
185 case TensorStorageType::TEXTURE_2D:
186 case TensorStorageType::SINGLE_TEXTURE_2D:
187 return {static_cast<uint64_t>(slices), 1u};
188 case TensorStorageType::UNKNOWN:
189 return {};
190 }
191 } else if (layout_ == Layout::HW) {
192 switch (storage_type_) {
193 case TensorStorageType::BUFFER:
194 case TensorStorageType::IMAGE_BUFFER:
195 return {static_cast<uint64_t>(shape_.w * shape_.h)};
196 case TensorStorageType::TEXTURE_ARRAY:
197 case TensorStorageType::TEXTURE_3D:
198 return {static_cast<uint64_t>(shape_.w),
199 static_cast<uint64_t>(shape_.h), 1u};
200 case TensorStorageType::TEXTURE_2D:
201 case TensorStorageType::SINGLE_TEXTURE_2D:
202 return {static_cast<uint64_t>(shape_.w),
203 static_cast<uint64_t>(shape_.h)};
204 case TensorStorageType::UNKNOWN:
205 return {};
206 }
207 }
208 // HWC/BHWC/HWDC/BHWDC
209 switch (storage_type_) {
210 case TensorStorageType::BUFFER:
211 case TensorStorageType::IMAGE_BUFFER:
212 return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.h * shape_.d *
213 slices)};
214 case TensorStorageType::TEXTURE_ARRAY:
215 case TensorStorageType::TEXTURE_3D:
216 return {static_cast<uint64_t>(shape_.w * shape_.b),
217 static_cast<uint64_t>(shape_.h),
218 static_cast<uint64_t>(shape_.d * slices)};
219 case TensorStorageType::TEXTURE_2D:
220 return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.d),
221 static_cast<uint64_t>(shape_.h * slices)};
222 case TensorStorageType::SINGLE_TEXTURE_2D:
223 return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.d),
224 static_cast<uint64_t>(shape_.h)};
225 case TensorStorageType::UNKNOWN:
226 return {};
227 }
228 }
229
GetFullTensorRegion() const230 int3 TensorDescriptor::GetFullTensorRegion() const {
231 std::vector<uint64_t> storage_dims = GetStorageDims();
232 if (layout_ == Layout::LINEAR) {
233 return int3(static_cast<int>(storage_dims[0]), 1, 1);
234 } else if (layout_ == Layout::HW) {
235 switch (storage_type_) {
236 case TensorStorageType::BUFFER:
237 case TensorStorageType::IMAGE_BUFFER:
238 return int3(static_cast<int>(storage_dims[0]), 1, 1);
239 case TensorStorageType::TEXTURE_2D:
240 case TensorStorageType::SINGLE_TEXTURE_2D:
241 case TensorStorageType::TEXTURE_ARRAY:
242 case TensorStorageType::TEXTURE_3D:
243 return int3(static_cast<int>(storage_dims[0]),
244 static_cast<int>(storage_dims[1]), 1);
245 case TensorStorageType::UNKNOWN:
246 return {-1, -1, -1};
247 }
248 }
249 // HWC/BHWC/HWDC/BHWDC
250 switch (storage_type_) {
251 case TensorStorageType::BUFFER:
252 case TensorStorageType::IMAGE_BUFFER:
253 // 1D resources
254 return int3(static_cast<int>(storage_dims[0]), 1, 1);
255 case TensorStorageType::TEXTURE_2D:
256 case TensorStorageType::SINGLE_TEXTURE_2D:
257 // 2D resources
258 return int3(static_cast<int>(storage_dims[0]),
259 static_cast<int>(storage_dims[1]), 1);
260 case TensorStorageType::TEXTURE_ARRAY:
261 case TensorStorageType::TEXTURE_3D:
262 // 3D resources
263 return int3(static_cast<int>(storage_dims[0]),
264 static_cast<int>(storage_dims[1]),
265 static_cast<int>(storage_dims[2]));
266 case TensorStorageType::UNKNOWN:
267 return {-1, -1, -1};
268 }
269 }
GetMemorySizeInBytes() const270 uint64_t TensorDescriptor::GetMemorySizeInBytes() const {
271 std::vector<uint64_t> storage_dims = GetStorageDims();
272 uint64_t total_size = 1;
273 for (int i = 0; i < storage_dims.size(); ++i) {
274 total_size *= storage_dims[i];
275 }
276 const int element_size = GetElementSize() * SizeOf(data_type_);
277 return total_size * element_size;
278 }
279
GetElementSize() const280 int TensorDescriptor::GetElementSize() const {
281 if (storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D) {
282 return shape_.c;
283 } else {
284 return 4;
285 }
286 }
287
GetGPUResources(const GpuInfo & gpu_info) const288 GPUResources TensorDescriptor::GetGPUResources(const GpuInfo& gpu_info) const {
289 GPUResources resources;
290 resources.ints.push_back("slice_stride");
291 if (HasAxis(Axis::WIDTH)) {
292 resources.ints.push_back("width");
293 }
294 if (HasAxis(Axis::HEIGHT)) {
295 resources.ints.push_back("height");
296 }
297 if (HasAxis(Axis::CHANNELS)) {
298 resources.ints.push_back("slices");
299 resources.ints.push_back("channels");
300 }
301 if (HasAxis(Axis::BATCH)) {
302 resources.ints.push_back("batch");
303 }
304 if (HasAxis(Axis::DEPTH)) {
305 resources.ints.push_back("depth");
306 }
307 if (storage_type_ == TensorStorageType::BUFFER) {
308 GPUBufferDescriptor desc;
309 desc.data_type = data_type_;
310 desc.access_type = access_type_;
311 desc.element_size = 4;
312 resources.buffers.push_back({"buffer", desc});
313 } else if (storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D ||
314 storage_type_ == TensorStorageType::TEXTURE_2D) {
315 if (access_type_ == AccessType::WRITE &&
316 use_buffer_for_write_only_2d_texture_) {
317 resources.ints.push_back("aligned_texture_width");
318 GPUBufferDescriptor desc;
319 desc.data_type = data_type_;
320 desc.access_type = access_type_;
321 desc.element_size = 4;
322 resources.buffers.push_back({"buffer", desc});
323 } else {
324 GPUImage2DDescriptor desc;
325 desc.data_type = data_type_;
326 desc.normalized = false;
327 desc.access_type = access_type_;
328 resources.images2d.push_back({"image2d", desc});
329 }
330 } else if (storage_type_ == TensorStorageType::TEXTURE_ARRAY) {
331 GPUImage2DArrayDescriptor desc;
332 desc.data_type = data_type_;
333 desc.access_type = access_type_;
334 resources.image2d_arrays.push_back({"image2d_array", desc});
335 } else if (storage_type_ == TensorStorageType::TEXTURE_3D) {
336 GPUImage3DDescriptor desc;
337 desc.data_type = data_type_;
338 desc.access_type = access_type_;
339 resources.images3d.push_back({"image3d", desc});
340 } else if (storage_type_ == TensorStorageType::IMAGE_BUFFER) {
341 if (access_type_ == AccessType::WRITE &&
342 use_buffer_for_write_only_image_buffer_) {
343 GPUBufferDescriptor desc;
344 desc.data_type = data_type_;
345 desc.access_type = access_type_;
346 desc.element_size = 4;
347 resources.buffers.push_back({"buffer", desc});
348 } else {
349 GPUImageBufferDescriptor desc;
350 desc.data_type = data_type_;
351 desc.access_type = access_type_;
352 resources.image_buffers.push_back({"image_buffer", desc});
353 }
354 }
355 return resources;
356 }
357
GetGpuResources(const BHWDC & tensor_shape,GenericGPUResourcesWithValue * resources) const358 void TensorDescriptor::GetGpuResources(
359 const BHWDC& tensor_shape, GenericGPUResourcesWithValue* resources) const {
360 if (HasAxis(Axis::BATCH)) {
361 resources->AddInt("slice_stride",
362 tensor_shape.w * tensor_shape.h * tensor_shape.b);
363 } else {
364 resources->AddInt("slice_stride", tensor_shape.w * tensor_shape.h);
365 }
366 if (HasAxis(Axis::WIDTH)) {
367 resources->AddInt("width", tensor_shape.w);
368 }
369 if (HasAxis(Axis::HEIGHT)) {
370 resources->AddInt("height", tensor_shape.h);
371 }
372 if (HasAxis(Axis::CHANNELS)) {
373 resources->AddInt("slices", DivideRoundUp(tensor_shape.c, 4));
374 resources->AddInt("channels", tensor_shape.c);
375 }
376 if (HasAxis(Axis::BATCH)) {
377 resources->AddInt("batch", tensor_shape.b);
378 }
379 if (HasAxis(Axis::DEPTH)) {
380 resources->AddInt("depth", tensor_shape.d);
381 }
382 }
383
PerformConstExpr(const GpuInfo & gpu_info,const std::string & const_expr,std::string * result) const384 absl::Status TensorDescriptor::PerformConstExpr(const GpuInfo& gpu_info,
385 const std::string& const_expr,
386 std::string* result) const {
387 if (const_expr == "type" || const_expr == "scalar_type") {
388 const int vec_size = const_expr == "scalar_type" ? 1 : 4;
389 *result = GetTypeDeclaration(gpu_info, data_type_, vec_size);
390 return absl::OkStatus();
391 } else if (const_expr == "zero_value" || const_expr == "scalar_zero_value") {
392 const int vec_size = const_expr == "scalar_zero_value" ? 1 : 4;
393 *result = GetZeroValue(gpu_info, data_type_, vec_size);
394 return absl::OkStatus();
395 } else {
396 return absl::UnimplementedError(
397 absl::StrCat("Can not resolve constant expression - ", const_expr));
398 }
399 }
400
PerformSelector(const GpuInfo & gpu_info,const std::string & selector,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const401 absl::Status TensorDescriptor::PerformSelector(
402 const GpuInfo& gpu_info, const std::string& selector,
403 const std::vector<std::string>& args,
404 const std::vector<std::string>& template_args, std::string* result) const {
405 if (selector == "Width") {
406 *result = "width";
407 return absl::OkStatus();
408 } else if (selector == "Height") {
409 *result = "height";
410 return absl::OkStatus();
411 } else if (selector == "Slices") {
412 *result = "slices";
413 return absl::OkStatus();
414 } else if (selector == "SliceStride") {
415 *result = "slice_stride";
416 return absl::OkStatus();
417 } else if (selector == "Channels") {
418 *result = "channels";
419 return absl::OkStatus();
420 } else if (selector == "Batch") {
421 if (HasAxis(Axis::BATCH)) {
422 *result = "batch";
423 } else {
424 *result = "1";
425 }
426 return absl::OkStatus();
427 } else if (selector == "Depth") {
428 *result = "depth";
429 return absl::OkStatus();
430 } else if (selector == "SetBatchRef") {
431 if (args.size() != 1) {
432 return absl::InvalidArgumentError(
433 "Unsupported arguments in SetBatchRef selector");
434 }
435 state_vars_["batch_id"] = args[0];
436 *result = "";
437 return absl::OkStatus();
438 } else if (selector == "Read") {
439 return PerformReadSelector(gpu_info, args, template_args, result);
440 } else if (selector == "ReadNearest") {
441 return PerformReadNearestSelector(gpu_info, args, result);
442 } else if (selector == "ReadBilinear") {
443 return PerformReadBilinearSelector(gpu_info, args, result);
444 } else if (selector == "ReadPerChannel") {
445 return PerformReadPerChannelSelector(gpu_info, args, template_args, result);
446 } else if (selector == "Write") {
447 return PerformWriteSelector(gpu_info, args, template_args, result);
448 } else if (selector == "WriteLinear") {
449 return PerformWriteLinearSelector(gpu_info, args, template_args, result);
450 } else if (selector == "Write2D") {
451 return PerformWrite2DSelector(gpu_info, args, template_args, result);
452 } else if (selector == "GetAddress") {
453 return PerformGetAddressSelector(args, result);
454 } else if (selector == "GetHandle") {
455 return PerformGetHandleSelector(args, result);
456 } else {
457 return absl::NotFoundError(absl::StrCat(
458 "TensorDescriptor don't have selector with name - ", selector));
459 }
460 }
461
PerformReadSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const462 absl::Status TensorDescriptor::PerformReadSelector(
463 const GpuInfo& gpu_info, const std::vector<std::string>& args,
464 const std::vector<std::string>& template_args, std::string* result) const {
465 DataType read_as_type = data_type_;
466 RETURN_IF_ERROR(
467 MaybeGetDataTypeFromTemplateArgs(template_args, &read_as_type));
468 if (layout_ == Layout::LINEAR) {
469 if (args.size() != 1) {
470 return absl::InvalidArgumentError(
471 "Read selector for LINEAR tensor require single argument");
472 }
473 *result = Read(gpu_info, read_as_type, GetPhysicalCoordsLinear(args[0]));
474 return absl::OkStatus();
475 }
476 if (layout_ == Layout::HW) {
477 if (args.size() != 2) {
478 return absl::InvalidArgumentError(
479 "Read selector for HW tensor require two arguments");
480 }
481 *result =
482 Read(gpu_info, read_as_type, GetPhysicalCoordsHW(args[0], args[1]));
483 return absl::OkStatus();
484 }
485 if (args.size() == 1) { // function overload for 1D linear types.
486 if (storage_type_ == TensorStorageType::BUFFER ||
487 storage_type_ == TensorStorageType::IMAGE_BUFFER) {
488 *result = Read(gpu_info, read_as_type, {args[0]});
489 return absl::OkStatus();
490 } else {
491 return absl::InvalidArgumentError(
492 "Read selector with single argument can be used only with linear "
493 "storage types(BUFFER or IMAGE_BUFFER)");
494 }
495 }
496 std::string xc;
497 std::string yc;
498 std::string zc;
499 std::string sc;
500 std::string bc;
501 bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
502 if (args.size() < 2 || !parsed) {
503 return absl::NotFoundError("Unrecognized Read selector");
504 }
505
506 *result = Read(gpu_info, read_as_type, GetPhysicalCoords(xc, yc, zc, sc, bc));
507 return absl::OkStatus();
508 }
509
PerformReadNearestSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,std::string * result) const510 absl::Status TensorDescriptor::PerformReadNearestSelector(
511 const GpuInfo& gpu_info, const std::vector<std::string>& args,
512 std::string* result) const {
513 // ReadNearest(result, fc_x, fc_y, {fc_z}, slice);
514 if (!((args.size() == 5 && HasAxis(Axis::DEPTH)) || args.size() == 4)) {
515 return absl::NotFoundError("Unrecognized ReadNearest selector");
516 }
517 std::vector<std::string> coord_args =
518 std::vector<std::string>(args.begin() + 1, args.end());
519 std::string c;
520 c += " {\n";
521 c += " int coord_x_TMP = INIT_INT(" + coord_args[0] + ");\n";
522 c += " coord_x_TMP = max(coord_x_TMP, 0);\n";
523 c += " coord_x_TMP = min(coord_x_TMP, width - 1);\n";
524 coord_args[0] = "coord_x_TMP";
525 c += " int coord_y_TMP = INIT_INT(" + coord_args[1] + ");\n";
526 c += " coord_y_TMP = max(coord_y_TMP, 0);\n";
527 c += " coord_y_TMP = min(coord_y_TMP, height - 1);\n";
528 coord_args[1] = "coord_y_TMP";
529 if (HasAxis(Axis::DEPTH)) {
530 c += " int coord_z_TMP = INIT_INT(" + coord_args[2] + ");\n";
531 c += " coord_z_TMP = max(coord_z_TMP, 0);\n";
532 c += " coord_z_TMP = min(coord_z_TMP, depth - 1);\n";
533 coord_args[2] = "coord_z_TMP";
534 }
535 std::string src_value;
536 RETURN_IF_ERROR(PerformReadSelector(gpu_info, coord_args, {}, &src_value));
537 c += " " + args[0] + " = " + src_value + ";\n";
538 c += " }";
539 *result = c;
540 return absl::OkStatus();
541 }
542
PerformReadBilinearSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,std::string * result) const543 absl::Status TensorDescriptor::PerformReadBilinearSelector(
544 const GpuInfo& gpu_info, const std::vector<std::string>& args,
545 std::string* result) const {
546 // ReadBilinear(result, fc_x, fc_y, {fc_z}, slice);
547 if (!((args.size() == 5 && HasAxis(Axis::DEPTH)) || args.size() == 4)) {
548 return absl::NotFoundError("Unrecognized ReadBilinear selector");
549 }
550 std::vector<std::string> coord_args =
551 std::vector<std::string>(args.begin() + 1, args.end());
552 std::string c;
553 c += " {\n";
554 c += " float f_x_TMP = floor(" + coord_args[0] + ");\n";
555 c += " float x_scale_TMP = (" + coord_args[0] + ") - f_x_TMP;\n";
556 c += " int i_x_TMP = INIT_INT(f_x_TMP);\n";
557 c += " int start_x_TMP = max(i_x_TMP, 0);\n";
558 c += " int end_x_TMP = min(i_x_TMP + 1, width - 1);\n";
559 c += " float f_y_TMP = floor(" + coord_args[1] + ");\n";
560 c += " float y_scale_TMP = (" + coord_args[1] + ") - f_y_TMP;\n";
561 c += " int i_y_TMP = INIT_INT(f_y_TMP);\n";
562 c += " int start_y_TMP = max(i_y_TMP, 0);\n";
563 c += " int end_y_TMP = min(i_y_TMP + 1, height - 1);\n";
564 if (HasAxis(Axis::DEPTH)) {
565 // 3d bilinear read, x, y, z
566 c += " float f_z_TMP = floor(" + coord_args[2] + ");\n";
567 c += " float z_scale_TMP = (" + coord_args[2] + ") - f_z_TMP;\n";
568 c += " int i_z_TMP = INIT_INT(f_z_TMP);\n";
569 c += " int start_z_TMP = max(i_z_TMP, 0);\n";
570 c += " int end_z_TMP = min(i_z_TMP + 1, depth - 1);\n";
571 int index = 0;
572 for (const auto& src_z : {"start_z_TMP", "end_z_TMP"}) {
573 for (const auto& src_y : {"start_y_TMP", "end_y_TMP"}) {
574 for (const auto& src_x : {"start_x_TMP", "end_x_TMP"}) {
575 coord_args[0] = src_x;
576 coord_args[1] = src_y;
577 coord_args[2] = src_z;
578 std::string src_value;
579 RETURN_IF_ERROR(
580 PerformReadSelector(gpu_info, coord_args, {"float"}, &src_value));
581 c += " float4 src" + std::to_string(index) + "_TMP = " + src_value +
582 ";\n";
583 index++;
584 }
585 }
586 }
587 c += " float4 t0_TMP = mix(mix(src0_TMP, src1_TMP, x_scale_TMP), "
588 "mix(src2_TMP, src3_TMP, x_scale_TMP), y_scale_TMP);\n";
589 c += " float4 t1_TMP = mix(mix(src4_TMP, src5_TMP, x_scale_TMP), "
590 "mix(src6_TMP, src7_TMP, x_scale_TMP), y_scale_TMP);\n";
591 c += " " + args[0] + " = TO_FLT4(mix(t0_TMP, t1_TMP, z_scale_TMP));\n";
592 } else {
593 // 2d bilinear read, x, y
594 int index = 0;
595 for (const auto& src_y : {"start_y_TMP", "end_y_TMP"}) {
596 for (const auto& src_x : {"start_x_TMP", "end_x_TMP"}) {
597 coord_args[0] = src_x;
598 coord_args[1] = src_y;
599 std::string src_value;
600 RETURN_IF_ERROR(
601 PerformReadSelector(gpu_info, coord_args, {"float"}, &src_value));
602 c += " float4 src" + std::to_string(index) + "_TMP = " + src_value +
603 ";\n";
604 index++;
605 }
606 }
607 c += " " + args[0] +
608 " = TO_FLT4(mix(mix(src0_TMP, src1_TMP, x_scale_TMP), mix(src2_TMP, "
609 "src3_TMP, x_scale_TMP), y_scale_TMP));\n";
610 }
611 c += " }";
612 *result = c;
613 return absl::OkStatus();
614 }
615
PerformReadPerChannelSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const616 absl::Status TensorDescriptor::PerformReadPerChannelSelector(
617 const GpuInfo& gpu_info, const std::vector<std::string>& args,
618 const std::vector<std::string>& template_args, std::string* result) const {
619 std::vector<std::string> coord_args =
620 std::vector<std::string>(args.begin() + 1, args.end());
621 int channels_index = 0;
622 if (HasAxis(Axis::WIDTH)) {
623 channels_index++;
624 }
625 if (HasAxis(Axis::HEIGHT)) {
626 channels_index++;
627 }
628 if (HasAxis(Axis::DEPTH)) {
629 channels_index++;
630 }
631 if (channels_index >= coord_args.size()) {
632 return absl::NotFoundError(
633 "Wrong number of coordinates in ReadPerChannel.");
634 }
635 std::string c = " {\n";
636 c += " int slice_coord_TMP = (" + coord_args[channels_index] + ") / 4;\n";
637 c += " int sub_ch_coord_TMP = (" + coord_args[channels_index] + ") % 4;\n";
638 coord_args[channels_index] = "slice_coord_TMP";
639 std::string src_value;
640 RETURN_IF_ERROR(
641 PerformReadSelector(gpu_info, coord_args, template_args, &src_value));
642 if (gpu_info.IsApiOpenCl()) {
643 DataType dst_type = data_type_;
644 RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &dst_type));
645 c += " " + GetTypeDeclaration(gpu_info, dst_type, 4) +
646 " src_TMP = " + src_value + ";\n";
647 c +=
648 " " + args[0] + " = (" + ToCLDataType(dst_type, 1) +
649 "[4]){src_TMP.x, src_TMP.y, src_TMP.z, src_TMP.w}[sub_ch_coord_TMP];\n";
650 } else {
651 if (gpu_info.IsAdreno() && gpu_info.IsApiVulkan()) {
652 DataType dst_type = data_type_;
653 RETURN_IF_ERROR(
654 MaybeGetDataTypeFromTemplateArgs(template_args, &dst_type));
655 c += " " + GetTypeDeclaration(gpu_info, dst_type, 4) +
656 " src_TMP = " + src_value + ";\n";
657 c += " " + args[0] + " = " +
658 ToGlslShaderDataType(dst_type, 1, /*add_precision*/ false,
659 gpu_info.vulkan_info.SupportsExplicitFp16()) +
660 "[4](src_TMP.x, src_TMP.y, src_TMP.z, "
661 "src_TMP.w)[sub_ch_coord_TMP];\n";
662 } else {
663 c += " " + args[0] + " = " + src_value + "[sub_ch_coord_TMP];\n";
664 }
665 }
666
667 c += " }";
668 *result = c;
669 return absl::OkStatus();
670 }
671
GetLinkingContextFromWriteSelector(const std::vector<std::string> & args,std::string * value_name,std::string * x_coord,std::string * y_coord,std::string * z_coord,std::string * s_coord,std::string * b_coord) const672 absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector(
673 const std::vector<std::string>& args, std::string* value_name,
674 std::string* x_coord, std::string* y_coord, std::string* z_coord,
675 std::string* s_coord, std::string* b_coord) const {
676 std::string xc;
677 std::string yc;
678 std::string zc;
679 std::string sc;
680 std::string bc;
681 bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
682 if (args.size() < 2 || !parsed) {
683 return absl::NotFoundError("Unrecognized Write selector");
684 }
685 *value_name = args[0];
686 *b_coord = absl::StrCat("(", bc, ")");
687 *x_coord = absl::StrCat("(", xc, ")");
688 *y_coord = absl::StrCat("(", yc, ")");
689 *z_coord = absl::StrCat("(", zc, ")");
690 *s_coord = absl::StrCat("(", sc, ")");
691 return absl::OkStatus();
692 }
693
PerformWriteSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const694 absl::Status TensorDescriptor::PerformWriteSelector(
695 const GpuInfo& gpu_info, const std::vector<std::string>& args,
696 const std::vector<std::string>& template_args, std::string* result) const {
697 std::string xc;
698 std::string yc;
699 std::string zc;
700 std::string sc;
701 std::string bc;
702 bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
703 if (args.size() < 2 || !parsed) {
704 return absl::NotFoundError("Unrecognized Write selector");
705 }
706 DataType write_type = data_type_;
707 RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
708 *result = Write(gpu_info, write_type, args[0],
709 GetPhysicalCoords(xc, yc, zc, sc, bc));
710 return absl::OkStatus();
711 }
712
PerformWriteLinearSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const713 absl::Status TensorDescriptor::PerformWriteLinearSelector(
714 const GpuInfo& gpu_info, const std::vector<std::string>& args,
715 const std::vector<std::string>& template_args, std::string* result) const {
716 if (storage_type_ != TensorStorageType::BUFFER &&
717 storage_type_ != TensorStorageType::IMAGE_BUFFER) {
718 return absl::InvalidArgumentError(
719 "WriteLinear selector can be used only with linear "
720 "storages(BUFFER/IMAGE_BUFFER)");
721 }
722 if (args.size() != 2) {
723 return absl::NotFoundError("Unrecognized WriteLinear selector");
724 }
725 DataType write_type = data_type_;
726 RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
727 *result = Write(gpu_info, write_type, args[0], {args[1]});
728 return absl::OkStatus();
729 }
730
PerformWrite2DSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const731 absl::Status TensorDescriptor::PerformWrite2DSelector(
732 const GpuInfo& gpu_info, const std::vector<std::string>& args,
733 const std::vector<std::string>& template_args, std::string* result) const {
734 if (storage_type_ != TensorStorageType::TEXTURE_2D) {
735 return absl::InvalidArgumentError(
736 "Write2D selector can be used only with 2d "
737 "storages(TEXTURE_2D)");
738 }
739 if (args.size() != 3) {
740 return absl::NotFoundError("Unrecognized Write2D selector");
741 }
742 DataType write_type = data_type_;
743 RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
744 *result = Write(gpu_info, write_type, args[0], {args[1], args[2]});
745 return absl::OkStatus();
746 }
747
Read(const GpuInfo & gpu_info,DataType read_as_type,const std::vector<std::string> & coords) const748 std::string TensorDescriptor::Read(
749 const GpuInfo& gpu_info, DataType read_as_type,
750 const std::vector<std::string>& coords) const {
751 const std::string conversion =
752 GetConversion(gpu_info, storage_type_, data_type_, read_as_type);
753 if (gpu_info.IsApiOpenCl() &&
754 !(data_type_ == DataType::FLOAT16 && read_as_type == DataType::FLOAT32)) {
755 read_as_type = data_type_;
756 }
757 switch (storage_type_) {
758 case TensorStorageType::BUFFER: {
759 std::string result;
760 if (gpu_info.IsGlsl() && data_type_ == DataType::FLOAT16 &&
761 !gpu_info.IsGlslSupportsExplicitFp16()) {
762 result =
763 absl::StrCat("vec4(unpackHalf2x16(buffer[", coords[0],
764 "].x), unpackHalf2x16(buffer[", coords[0], "].y))");
765 } else {
766 result = absl::StrCat("buffer[", coords[0], "]");
767 }
768 MayBeAddConversion(conversion, &result);
769 return result;
770 }
771 case TensorStorageType::TEXTURE_2D:
772 case TensorStorageType::SINGLE_TEXTURE_2D: {
773 std::string result;
774 if (gpu_info.IsApiOpenCl()) {
775 result = absl::Substitute("$0(image2d, smp_zero, (int2)($1, $2))",
776 GetReadImageFromDataType(read_as_type),
777 coords[0], coords[1]);
778 } else if (gpu_info.IsApiMetal()) {
779 result = absl::Substitute("image2d.read(ushort2($0, $1))", coords[0],
780 coords[1]);
781 } else if (gpu_info.IsGlsl()) {
782 result = "texelFetch(image2d, ivec2(" + coords[0] + ", " + coords[1] +
783 "), 0)";
784 if (data_type_ == DataType::FLOAT16 &&
785 gpu_info.IsGlslSupportsExplicitFp16()) {
786 result = "f16vec4(" + result + ")";
787 }
788 }
789 MayBeAddConversion(conversion, &result);
790 return result;
791 }
792 case TensorStorageType::TEXTURE_3D: {
793 std::string result;
794 if (gpu_info.IsApiOpenCl()) {
795 result =
796 absl::Substitute("$0(image3d, smp_zero, (int4)($1, $2, $3, 0))",
797 GetReadImageFromDataType(read_as_type), coords[0],
798 coords[1], coords[2]);
799 } else if (gpu_info.IsApiMetal()) {
800 result = absl::Substitute("image3d.read(ushort3($0, $1, $2))",
801 coords[0], coords[1], coords[2]);
802 } else if (gpu_info.IsGlsl()) {
803 result = "texelFetch(image3d, ivec3(" + coords[0] + ", " + coords[1] +
804 ", " + coords[2] + "), 0)";
805 if (data_type_ == DataType::FLOAT16 &&
806 gpu_info.IsGlslSupportsExplicitFp16()) {
807 result = "f16vec4(" + result + ")";
808 }
809 }
810 MayBeAddConversion(conversion, &result);
811 return result;
812 }
813 case TensorStorageType::TEXTURE_ARRAY: {
814 std::string result;
815 if (gpu_info.IsApiOpenCl()) {
816 result = absl::Substitute(
817 "$0(image2d_array, smp_zero, (int4)($1, $2, $3, 0))",
818 GetReadImageFromDataType(read_as_type), coords[0], coords[1],
819 coords[2]);
820 } else if (gpu_info.IsApiMetal()) {
821 result = absl::Substitute("image2d_array.read(ushort2($0, $1), $2)",
822 coords[0], coords[1], coords[2]);
823 } else if (gpu_info.IsGlsl()) {
824 result = "texelFetch(image2d_array, ivec3(" + coords[0] + ", " +
825 coords[1] + ", " + coords[2] + "), 0)";
826 if (data_type_ == DataType::FLOAT16 &&
827 gpu_info.IsGlslSupportsExplicitFp16()) {
828 result = "f16vec4(" + result + ")";
829 }
830 }
831 MayBeAddConversion(conversion, &result);
832 return result;
833 }
834 case TensorStorageType::IMAGE_BUFFER: {
835 std::string result;
836 if (gpu_info.IsApiOpenCl()) {
837 result = absl::StrCat(GetReadImageFromDataType(read_as_type),
838 "(image_buffer, ", coords[0], ")");
839 } else if (gpu_info.IsApiMetal()) {
840 result = absl::Substitute("image_buffer.read(uint($0))", coords[0]);
841 } else if (gpu_info.IsGlsl()) {
842 result = "texelFetch(image_buffer, " + coords[0] + ")";
843 if (data_type_ == DataType::FLOAT16 &&
844 gpu_info.IsGlslSupportsExplicitFp16()) {
845 result = "f16vec4(" + result + ")";
846 }
847 }
848 MayBeAddConversion(conversion, &result);
849 return result;
850 }
851 case TensorStorageType::UNKNOWN:
852 return "";
853 }
854 }
855
Write(const GpuInfo & gpu_info,DataType write_type,const std::string & var_name,const std::vector<std::string> & coords) const856 std::string TensorDescriptor::Write(
857 const GpuInfo& gpu_info, DataType write_type, const std::string& var_name,
858 const std::vector<std::string>& coords) const {
859 bool is_texture_write = storage_type_ == TensorStorageType::IMAGE_BUFFER ||
860 storage_type_ == TensorStorageType::TEXTURE_2D ||
861 storage_type_ == TensorStorageType::TEXTURE_ARRAY ||
862 storage_type_ == TensorStorageType::TEXTURE_3D;
863 if (storage_type_ == TensorStorageType::IMAGE_BUFFER &&
864 use_buffer_for_write_only_image_buffer_) {
865 is_texture_write = false;
866 }
867 if (storage_type_ == TensorStorageType::TEXTURE_2D &&
868 use_buffer_for_write_only_2d_texture_) {
869 is_texture_write = false;
870 }
871 std::string write_expr = var_name;
872 DataType write_required_type = data_type_;
873 if (data_type_ == DataType::BOOL) {
874 // DataType::BOOL stored as DataType::UINT8
875 const std::string conversion =
876 GetTypeConversion(gpu_info, DataType::BOOL, DataType::UINT8, 4);
877 write_expr = absl::Substitute(conversion, write_expr);
878 write_required_type = DataType::UINT8;
879 }
880 if (is_texture_write) {
881 if (gpu_info.IsApiOpenCl()) {
882 write_required_type = ToClTextureType(write_required_type);
883 } else if (gpu_info.IsApiMetal()) {
884 write_required_type = ToMetalTextureType(write_required_type);
885 }
886 }
887 if (write_type != write_required_type) {
888 const std::string conversion =
889 GetTypeConversion(gpu_info, write_type, write_required_type, 4);
890 write_expr = absl::Substitute(conversion, write_expr);
891 }
892 switch (storage_type_) {
893 case TensorStorageType::BUFFER:
894 case TensorStorageType::IMAGE_BUFFER:
895 if (gpu_info.IsApiOpenCl()) {
896 if (use_buffer_for_write_only_image_buffer_) {
897 return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
898 } else {
899 return absl::Substitute("$0(image_buffer, $1, $2)",
900 GetWriteImageFromDataType(data_type_),
901 coords[0], write_expr);
902 }
903 } else if (gpu_info.IsApiMetal()) {
904 if (use_buffer_for_write_only_image_buffer_) {
905 return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
906 } else {
907 return absl::Substitute("image_buffer.write($0, uint($1))",
908 write_expr, coords[0]);
909 }
910 } else if (gpu_info.IsGlsl()) {
911 if (data_type_ == DataType::FLOAT16 &&
912 !gpu_info.IsGlslSupportsExplicitFp16()) {
913 return absl::StrCat("buffer[", coords[0], "] = uvec2(packHalf2x16(",
914 write_expr, ".xy), packHalf2x16(", write_expr,
915 ".zw))");
916 } else {
917 return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
918 }
919 } else {
920 return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
921 }
922 case TensorStorageType::SINGLE_TEXTURE_2D:
923 case TensorStorageType::TEXTURE_2D:
924 if (gpu_info.IsApiOpenCl()) {
925 if (use_buffer_for_write_only_2d_texture_) {
926 return absl::Substitute(
927 "buffer[($2) * aligned_texture_width + ($1)] = $0", write_expr,
928 coords[0], coords[1]);
929 } else {
930 return absl::Substitute("$0(image2d, (int2)($1, $2), $3)",
931 GetWriteImageFromDataType(data_type_),
932 coords[0], coords[1], write_expr);
933 }
934 } else if (gpu_info.IsApiMetal()) {
935 if (use_buffer_for_write_only_2d_texture_) {
936 return absl::Substitute(
937 "buffer[($2) * aligned_texture_width + ($1)] = $0", write_expr,
938 coords[0], coords[1]);
939 } else {
940 return absl::Substitute("image2d.write($0, ushort2($1, $2))",
941 write_expr, coords[0], coords[1]);
942 }
943 } else if (gpu_info.IsGlsl()) {
944 return absl::Substitute("imageStore(image2d, ivec2($0, $1), $2)",
945 coords[0], coords[1], write_expr);
946 } else {
947 return "";
948 }
949 case TensorStorageType::TEXTURE_3D:
950 if (gpu_info.IsApiOpenCl()) {
951 return absl::Substitute("$0(image3d, (int4)($1, $2, $3, 0), $4)",
952 GetWriteImageFromDataType(data_type_),
953 coords[0], coords[1], coords[2], write_expr);
954 } else if (gpu_info.IsApiMetal()) {
955 return absl::Substitute("image3d.write($0, ushort3($1, $2, $3))",
956 write_expr, coords[0], coords[1], coords[2]);
957 } else if (gpu_info.IsGlsl()) {
958 return absl::Substitute("imageStore(image3d, ivec3($0, $1, $2), $3)",
959 coords[0], coords[1], coords[2], write_expr);
960 } else {
961 return "";
962 }
963 case TensorStorageType::TEXTURE_ARRAY:
964 if (gpu_info.IsApiOpenCl()) {
965 return absl::Substitute("$0(image2d_array, (int4)($1, $2, $3, 0), $4)",
966 GetWriteImageFromDataType(data_type_),
967 coords[0], coords[1], coords[2], write_expr);
968 } else if (gpu_info.IsApiMetal()) {
969 return absl::Substitute("image2d_array.write($0, ushort2($1, $2), $3)",
970 write_expr, coords[0], coords[1], coords[2]);
971 } else if (gpu_info.IsGlsl()) {
972 return absl::Substitute(
973 "imageStore(image2d_array, ivec3($0, $1, $2), $3)", coords[0],
974 coords[1], coords[2], write_expr);
975 } else {
976 return "";
977 }
978 case TensorStorageType::UNKNOWN:
979 return "";
980 }
981 }
982
PerformGetAddressSelector(const std::vector<std::string> & args,std::string * result) const983 absl::Status TensorDescriptor::PerformGetAddressSelector(
984 const std::vector<std::string>& args, std::string* result) const {
985 std::string xc, yc, zc, sc, bc;
986 bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
987 if (!parsed) {
988 return absl::NotFoundError("Unrecognized GetAddress selector");
989 }
990
991 *result = GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc);
992 return absl::OkStatus();
993 }
994
PerformGetHandleSelector(const std::vector<std::string> & args,std::string * result) const995 absl::Status TensorDescriptor::PerformGetHandleSelector(
996 const std::vector<std::string>& args, std::string* result) const {
997 if (!args.empty()) {
998 return absl::NotFoundError(
999 absl::StrCat("GetHandle does not require arguments, but ", args.size(),
1000 " was passed"));
1001 }
1002 switch (storage_type_) {
1003 case TensorStorageType::BUFFER:
1004 *result = "buffer";
1005 return absl::OkStatus();
1006 case TensorStorageType::IMAGE_BUFFER:
1007 if (access_type_ == AccessType::READ) {
1008 *result = "image_buffer";
1009 } else {
1010 *result = "buffer";
1011 }
1012 return absl::OkStatus();
1013 case TensorStorageType::TEXTURE_2D:
1014 case TensorStorageType::SINGLE_TEXTURE_2D:
1015 *result = "image2d";
1016 return absl::OkStatus();
1017 case TensorStorageType::TEXTURE_ARRAY:
1018 *result = "image2d_array";
1019 return absl::OkStatus();
1020 case TensorStorageType::TEXTURE_3D:
1021 *result = "image3d";
1022 return absl::OkStatus();
1023 case TensorStorageType::UNKNOWN:
1024 return absl::UnavailableError("Unknown type");
1025 }
1026 }
1027
StorageTypeToAddressType() const1028 std::string TensorDescriptor::StorageTypeToAddressType() const {
1029 switch (storage_type_) {
1030 case TensorStorageType::BUFFER:
1031 case TensorStorageType::IMAGE_BUFFER:
1032 return "int";
1033 case TensorStorageType::TEXTURE_2D:
1034 case TensorStorageType::SINGLE_TEXTURE_2D:
1035 return "int2";
1036 case TensorStorageType::TEXTURE_ARRAY:
1037 case TensorStorageType::TEXTURE_3D:
1038 return "int4";
1039 case TensorStorageType::UNKNOWN:
1040 return "";
1041 }
1042 }
1043
GetPhysicalCoordsLinear(const std::string & x) const1044 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsLinear(
1045 const std::string& x) const {
1046 switch (storage_type_) {
1047 case TensorStorageType::BUFFER:
1048 case TensorStorageType::IMAGE_BUFFER:
1049 return {absl::Substitute("($0)", x)};
1050 case TensorStorageType::TEXTURE_2D:
1051 case TensorStorageType::SINGLE_TEXTURE_2D:
1052 return {absl::Substitute("($0)", x), "0"};
1053 case TensorStorageType::TEXTURE_ARRAY:
1054 case TensorStorageType::TEXTURE_3D:
1055 return {absl::Substitute("($0)", x), "0", "0"};
1056 case TensorStorageType::UNKNOWN:
1057 return {""};
1058 default:
1059 return {""};
1060 }
1061 }
1062
GetPhysicalCoordsHW(const std::string & x,const std::string & y) const1063 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsHW(
1064 const std::string& x, const std::string& y) const {
1065 switch (storage_type_) {
1066 case TensorStorageType::BUFFER:
1067 case TensorStorageType::IMAGE_BUFFER:
1068 return {absl::Substitute("(($1) * width + ($0))", x, y)};
1069 case TensorStorageType::TEXTURE_2D:
1070 case TensorStorageType::SINGLE_TEXTURE_2D:
1071 return {absl::Substitute("($0)", x), absl::Substitute("($0)", y)};
1072 case TensorStorageType::TEXTURE_ARRAY:
1073 case TensorStorageType::TEXTURE_3D:
1074 return {absl::Substitute("($0)", x), absl::Substitute("($0)", y), "0"};
1075 case TensorStorageType::UNKNOWN:
1076 return {""};
1077 default:
1078 return {""};
1079 }
1080 }
1081
GetPhysicalCoordsWHS(const std::string & x,const std::string & y,const std::string & s) const1082 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHS(
1083 const std::string& x, const std::string& y, const std::string& s) const {
1084 switch (storage_type_) {
1085 case TensorStorageType::BUFFER:
1086 case TensorStorageType::IMAGE_BUFFER:
1087 return {
1088 absl::Substitute("((($2) * height + ($1)) * width + ($0))", x, y, s)};
1089 case TensorStorageType::TEXTURE_2D:
1090 return {absl::Substitute("($0)", x),
1091 absl::Substitute("(($0) * slices + ($1))", y, s)};
1092 case TensorStorageType::SINGLE_TEXTURE_2D:
1093 return {absl::Substitute("($0)", x), absl::Substitute("($0)", y)};
1094 case TensorStorageType::TEXTURE_ARRAY:
1095 case TensorStorageType::TEXTURE_3D:
1096 return {absl::Substitute("($0)", x), absl::Substitute("($0)", y),
1097 absl::Substitute("($0)", s)};
1098 case TensorStorageType::UNKNOWN:
1099 return {""};
1100 default:
1101 return {""};
1102 }
1103 }
1104
GetPhysicalCoordsWHSB(const std::string & x,const std::string & y,const std::string & s,const std::string & b) const1105 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHSB(
1106 const std::string& x, const std::string& y, const std::string& s,
1107 const std::string& b) const {
1108 switch (storage_type_) {
1109 case TensorStorageType::BUFFER:
1110 case TensorStorageType::IMAGE_BUFFER:
1111 return {absl::Substitute(
1112 "(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y,
1113 s)};
1114 case TensorStorageType::TEXTURE_2D:
1115 return {absl::Substitute("(($0) * batch + ($1))", x, b),
1116 absl::Substitute("(($0) * slices + ($1))", y, s)};
1117 case TensorStorageType::SINGLE_TEXTURE_2D:
1118 return {absl::Substitute("(($0) * batch + ($1))", x, b),
1119 absl::Substitute("($0)", y)};
1120 case TensorStorageType::TEXTURE_ARRAY:
1121 case TensorStorageType::TEXTURE_3D:
1122 return {absl::Substitute("(($0) * batch + ($1))", x, b),
1123 absl::Substitute("($0)", y), absl::Substitute("($0)", s)};
1124 case TensorStorageType::UNKNOWN:
1125 return {""};
1126 default:
1127 return {""};
1128 }
1129 }
1130
GetPhysicalCoordsWHDS(const std::string & x,const std::string & y,const std::string & z,const std::string & s) const1131 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHDS(
1132 const std::string& x, const std::string& y, const std::string& z,
1133 const std::string& s) const {
1134 switch (storage_type_) {
1135 case TensorStorageType::BUFFER:
1136 case TensorStorageType::IMAGE_BUFFER:
1137 return {absl::Substitute(
1138 "(((($3) * slices + ($2)) * height + ($1)) * width + ($0))", x, y, s,
1139 z)};
1140 case TensorStorageType::TEXTURE_2D:
1141 return {absl::Substitute("(($0) * depth + ($1))", x, z),
1142 absl::Substitute("(($0) * slices + ($1))", y, s)};
1143 case TensorStorageType::SINGLE_TEXTURE_2D:
1144 return {absl::Substitute("(($0) * depth + ($1))", x, z),
1145 absl::Substitute("($0)", y)};
1146 case TensorStorageType::TEXTURE_ARRAY:
1147 case TensorStorageType::TEXTURE_3D:
1148 return {absl::Substitute("($0)", x), absl::Substitute("($0)", y),
1149 absl::Substitute("(($0) * slices + ($1))", z, s)};
1150 case TensorStorageType::UNKNOWN:
1151 return {""};
1152 default:
1153 return {""};
1154 }
1155 }
1156
GetPhysicalCoordsWHDSB(const std::string & x,const std::string & y,const std::string & z,const std::string & s,const std::string & b) const1157 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHDSB(
1158 const std::string& x, const std::string& y, const std::string& z,
1159 const std::string& s, const std::string& b) const {
1160 switch (storage_type_) {
1161 case TensorStorageType::BUFFER:
1162 case TensorStorageType::IMAGE_BUFFER:
1163 return {absl::Substitute(
1164 "((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + "
1165 "($0))",
1166 b, x, y, s, z)};
1167 case TensorStorageType::TEXTURE_2D:
1168 return {absl::Substitute("((($0)*batch + ($1))*depth + ($2))", x, b, z),
1169 absl::Substitute("(($0) * slices + ($1))", y, s)};
1170 case TensorStorageType::SINGLE_TEXTURE_2D:
1171 return {absl::Substitute("((($0)*batch + ($1))*depth + ($2))", x, b, z),
1172 absl::Substitute("($0)", y)};
1173 case TensorStorageType::TEXTURE_ARRAY:
1174 case TensorStorageType::TEXTURE_3D:
1175 return {absl::Substitute("(($0) * batch + ($1))", x, b),
1176 absl::Substitute("($0)", y),
1177 absl::Substitute("(($0) * slices + ($1))", z, s)};
1178 case TensorStorageType::UNKNOWN:
1179 return {""};
1180 default:
1181 return {""};
1182 }
1183 }
1184
GetGlobalAddressNoDeclaration(const std::string & xc,const std::string & yc,const std::string & zc,const std::string & sc,const std::string & bc) const1185 std::string TensorDescriptor::GetGlobalAddressNoDeclaration(
1186 const std::string& xc, const std::string& yc, const std::string& zc,
1187 const std::string& sc, const std::string& bc) const {
1188 auto coords = GetPhysicalCoords(xc, yc, zc, sc, bc);
1189 switch (storage_type_) {
1190 case TensorStorageType::BUFFER:
1191 case TensorStorageType::IMAGE_BUFFER: {
1192 return coords[0];
1193 }
1194 case TensorStorageType::TEXTURE_2D:
1195 case TensorStorageType::SINGLE_TEXTURE_2D:
1196 return absl::Substitute("(int2)($0, $1)", coords[0], coords[1]);
1197 case TensorStorageType::TEXTURE_ARRAY:
1198 case TensorStorageType::TEXTURE_3D:
1199 return absl::Substitute("(int4)($0, $1, $2, 0)", coords[0], coords[1],
1200 coords[2]);
1201 case TensorStorageType::UNKNOWN:
1202 return "error";
1203 }
1204 }
1205
GetPhysicalCoords(const std::string & xc,const std::string & yc,const std::string & zc,const std::string & sc,const std::string & bc) const1206 std::vector<std::string> TensorDescriptor::GetPhysicalCoords(
1207 const std::string& xc, const std::string& yc, const std::string& zc,
1208 const std::string& sc, const std::string& bc) const {
1209 if (layout_ == Layout::HWC) {
1210 return GetPhysicalCoordsWHS(xc, yc, sc);
1211 } else if (layout_ == Layout::BHWC) {
1212 return GetPhysicalCoordsWHSB(xc, yc, sc, bc);
1213 } else if (layout_ == Layout::HWDC) {
1214 return GetPhysicalCoordsWHDS(xc, yc, zc, sc);
1215 } else if (layout_ == Layout::BHWDC) {
1216 return GetPhysicalCoordsWHDSB(xc, yc, zc, sc, bc);
1217 } else {
1218 return {""};
1219 }
1220 }
1221
MaybeGetDataTypeFromTemplateArgs(const std::vector<std::string> & template_args,DataType * result) const1222 absl::Status TensorDescriptor::MaybeGetDataTypeFromTemplateArgs(
1223 const std::vector<std::string>& template_args, DataType* result) const {
1224 for (const auto& template_arg : template_args) {
1225 std::string read_type = template_arg;
1226 if (read_type == "half") {
1227 *result = DataType::FLOAT16;
1228 return absl::OkStatus();
1229 } else if (read_type == "float") {
1230 *result = DataType::FLOAT32;
1231 return absl::OkStatus();
1232 } else if (read_type == "int") {
1233 *result = DataType::INT32;
1234 return absl::OkStatus();
1235 } else if (read_type == "short") {
1236 *result = DataType::INT16;
1237 return absl::OkStatus();
1238 } else if (read_type == "char") {
1239 *result = DataType::INT8;
1240 return absl::OkStatus();
1241 } else if (read_type == "uint") {
1242 *result = DataType::UINT32;
1243 return absl::OkStatus();
1244 } else if (read_type == "ushort") {
1245 *result = DataType::UINT16;
1246 return absl::OkStatus();
1247 } else if (read_type == "uchar") {
1248 *result = DataType::UINT8;
1249 return absl::OkStatus();
1250 } else if (read_type == "bool") {
1251 *result = DataType::BOOL;
1252 return absl::OkStatus();
1253 }
1254 }
1255 return absl::OkStatus();
1256 }
1257
HasAxis(Axis axis) const1258 bool TensorDescriptor::HasAxis(Axis axis) const {
1259 if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS) {
1260 return true;
1261 }
1262 if (axis == Axis::BATCH &&
1263 (layout_ == Layout::BHWC || layout_ == Layout::BHWDC)) {
1264 return true;
1265 }
1266 if (axis == Axis::DEPTH &&
1267 (layout_ == Layout::HWDC || layout_ == Layout::BHWDC)) {
1268 return true;
1269 }
1270 return false;
1271 }
1272
ParseCoordsFromArgs(const std::vector<std::string> & args,int offset,std::string * xc,std::string * yc,std::string * zc,std::string * sc,std::string * bc) const1273 bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string>& args,
1274 int offset, std::string* xc,
1275 std::string* yc, std::string* zc,
1276 std::string* sc,
1277 std::string* bc) const {
1278 if (HasAxis(Axis::WIDTH)) {
1279 if (offset >= args.size()) return false;
1280 *xc = args[offset++];
1281 }
1282 if (HasAxis(Axis::HEIGHT)) {
1283 if (offset >= args.size()) return false;
1284 *yc = args[offset++];
1285 }
1286 if (HasAxis(Axis::DEPTH)) {
1287 if (offset >= args.size()) return false;
1288 *zc = args[offset++];
1289 }
1290 if (HasAxis(Axis::CHANNELS)) {
1291 if (offset >= args.size()) return false;
1292 *sc = args[offset++];
1293 }
1294 if (HasAxis(Axis::BATCH)) {
1295 if (offset >= args.size()) {
1296 auto it = state_vars_.find("batch_id");
1297 if (it == state_vars_.end()) {
1298 return false;
1299 } else {
1300 *bc = it->second;
1301 }
1302 } else {
1303 *bc = args[offset++];
1304 }
1305 }
1306 return true;
1307 }
1308
GetSizeInBytesForShape(const BHWDC & shape5d) const1309 size_t TensorDescriptor::GetSizeInBytesForShape(const BHWDC& shape5d) const {
1310 int aligned_channels = storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D
1311 ? shape5d.c
1312 : AlignByN(shape5d.c, 4);
1313 int elements_count =
1314 shape5d.b * shape5d.w * shape5d.h * shape5d.d * aligned_channels;
1315 return elements_count * SizeOf(data_type_);
1316 }
1317
GetLinearIndex(const BHWDC & shape5d,int b,int x,int y,int d,int s,int sub_c) const1318 int TensorDescriptor::GetLinearIndex(const BHWDC& shape5d, int b, int x, int y,
1319 int d, int s, int sub_c) const {
1320 const int slices = DivideRoundUp(shape5d.c, 4);
1321 switch (storage_type_) {
1322 case TensorStorageType::BUFFER:
1323 case TensorStorageType::IMAGE_BUFFER:
1324 case TensorStorageType::TEXTURE_ARRAY:
1325 case TensorStorageType::TEXTURE_3D:
1326 return ((((d * slices + s) * shape5d.h + y) * shape5d.w + x) * shape5d.b +
1327 b) *
1328 4 +
1329 sub_c; // DSHWBC4
1330 case TensorStorageType::TEXTURE_2D:
1331 return ((((y * slices + s) * shape5d.w + x) * shape5d.b + b) * shape5d.d +
1332 d) *
1333 4 +
1334 sub_c; // HSWBDC4
1335 case TensorStorageType::SINGLE_TEXTURE_2D:
1336 return (((y * shape5d.w + x) * shape5d.b + b) * shape5d.d + d) *
1337 shape5d.c +
1338 sub_c; // HWBDC
1339 case TensorStorageType::UNKNOWN:
1340 return -1;
1341 }
1342 }
1343
UploadData(const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)1344 void TensorDescriptor::UploadData(
1345 const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
1346 shape_ = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
1347 UploadData(src.data.data());
1348 }
1349
SupportsZeroClamp(const Axis & axis,const GpuInfo & gpu_info) const1350 bool TensorDescriptor::SupportsZeroClamp(const Axis& axis,
1351 const GpuInfo& gpu_info) const {
1352 switch (storage_type_) {
1353 case TensorStorageType::UNKNOWN:
1354 return false;
1355 case TensorStorageType::BUFFER:
1356 case TensorStorageType::IMAGE_BUFFER:
1357 return false;
1358 case TensorStorageType::TEXTURE_ARRAY:
1359 return (axis == Axis::WIDTH || axis == Axis::HEIGHT) &&
1360 gpu_info.SupportsZeroClampForImages();
1361 case TensorStorageType::TEXTURE_2D:
1362 case TensorStorageType::SINGLE_TEXTURE_2D:
1363 return (axis == Axis::WIDTH || axis == Axis::HEIGHT) &&
1364 gpu_info.SupportsZeroClampForImages();
1365 case TensorStorageType::TEXTURE_3D:
1366 return (axis == Axis::WIDTH || axis == Axis::HEIGHT ||
1367 axis == Axis::DEPTH) &&
1368 gpu_info.SupportsZeroClampForImages();
1369 }
1370 }
1371
CanReadOutOfBorder(const Axis & axis) const1372 bool TensorDescriptor::CanReadOutOfBorder(const Axis& axis) const {
1373 switch (storage_type_) {
1374 case TensorStorageType::UNKNOWN:
1375 return false;
1376 case TensorStorageType::BUFFER:
1377 return false;
1378 case TensorStorageType::IMAGE_BUFFER:
1379 case TensorStorageType::TEXTURE_2D:
1380 case TensorStorageType::TEXTURE_3D:
1381 case TensorStorageType::SINGLE_TEXTURE_2D:
1382 case TensorStorageType::TEXTURE_ARRAY:
1383 return true;
1384 }
1385 }
1386
IsLinear() const1387 bool TensorDescriptor::IsLinear() const {
1388 return storage_type_ == TensorStorageType::BUFFER ||
1389 storage_type_ == TensorStorageType::IMAGE_BUFFER;
1390 }
1391
ReturnsZeroForNegOneRead(const GpuInfo & gpu_info) const1392 bool TensorDescriptor::ReturnsZeroForNegOneRead(const GpuInfo& gpu_info) const {
1393 return storage_type_ == TensorStorageType::IMAGE_BUFFER &&
1394 gpu_info.SupportsZeroClampForImageBuffer();
1395 }
1396
CanCreateTensorWithShape(const GpuInfo & gpu_info,const BHWDC & shape) const1397 absl::Status TensorDescriptor::CanCreateTensorWithShape(
1398 const GpuInfo& gpu_info, const BHWDC& shape) const {
1399 const int slices = DivideRoundUp(shape.c, 4);
1400 const uint64_t allocation_size = GetSizeInBytesForShape(shape);
1401 const std::string common_desc = "Shape - " + ToString(shape) +
1402 ", data type - " + ToString(data_type_) + ".";
1403 if (allocation_size > gpu_info.GetMaxMemoryAllocationSize()) {
1404 return absl::ResourceExhaustedError(absl::StrCat(
1405 "Requested allocation size - ", allocation_size,
1406 " bytes. Max allocation size for this GPU - ",
1407 gpu_info.GetMaxMemoryAllocationSize(), " bytes. ", common_desc));
1408 }
1409 switch (storage_type_) {
1410 case TensorStorageType::BUFFER: {
1411 if (allocation_size > gpu_info.GetMaxBufferSize()) {
1412 return absl::ResourceExhaustedError(absl::StrCat(
1413 "Buffer with size - ", allocation_size,
1414 " bytes can not be created. Max buffer size for this GPU - ",
1415 gpu_info.GetMaxBufferSize(), " bytes. ", common_desc));
1416 } else {
1417 return absl::OkStatus();
1418 }
1419 }
1420 case TensorStorageType::IMAGE_BUFFER: {
1421 const uint64_t element_size = 4 * SizeOf(data_type_);
1422 const uint64_t image_width = allocation_size / element_size;
1423 if (image_width > gpu_info.GetMaxImageBufferWidth()) {
1424 return absl::ResourceExhaustedError(absl::StrCat(
1425 "Image buffer with width - ", image_width,
1426 " can not be created. Max image buffer width for this GPU - ",
1427 gpu_info.GetMaxImageBufferWidth(), ". ", common_desc));
1428 } else if (allocation_size > gpu_info.GetMaxBufferSize()) {
1429 return absl::ResourceExhaustedError(absl::StrCat(
1430 "Buffer with size - ", allocation_size,
1431 " bytes can not be created. Max buffer size for this GPU - ",
1432 gpu_info.GetMaxBufferSize(), " bytes. ", common_desc));
1433 } else {
1434 return absl::OkStatus();
1435 }
1436 }
1437 case TensorStorageType::TEXTURE_3D: {
1438 if (gpu_info.IsApiOpenCl() &&
1439 gpu_info.opencl_info.cl_version < OpenClVersion::kCl1_2 &&
1440 slices == 1) {
1441 return absl::InternalError(
1442 "clCreateImage3D (that used in CL 1.0/1.1) can not create image "
1443 "with depth = 1 by specification.");
1444 }
1445 const int image_width = shape.w * shape.b;
1446 const int image_height = shape.h;
1447 const int image_depth = slices * shape.d;
1448 if (image_width > gpu_info.GetMaxImage3DWidth()) {
1449 return absl::ResourceExhaustedError(absl::StrCat(
1450 "Image3D with width - ", image_width,
1451 " can not be created. Max Image3D width for this GPU - ",
1452 gpu_info.GetMaxImage3DWidth(), ". ", common_desc));
1453 } else if (image_height > gpu_info.GetMaxImage3DHeight()) {
1454 return absl::ResourceExhaustedError(absl::StrCat(
1455 "Image3D with height - ", image_height,
1456 " can not be created. Max Image3D height for this GPU - ",
1457 gpu_info.GetMaxImage3DHeight(), ". ", common_desc));
1458 } else if (image_depth > gpu_info.GetMaxImage3DDepth()) {
1459 return absl::ResourceExhaustedError(absl::StrCat(
1460 "Image3D with depth - ", image_depth,
1461 " can not be created. Max Image3D depth for this GPU - ",
1462 gpu_info.GetMaxImage3DDepth(), ". ", common_desc));
1463 } else {
1464 return absl::OkStatus();
1465 }
1466 }
1467 case TensorStorageType::TEXTURE_ARRAY: {
1468 // Bug on some Adreno. b/131099086
1469 if (gpu_info.IsApiOpenCl() && slices == 1 && gpu_info.IsAdreno() &&
1470 !gpu_info.adreno_info.support_one_layer_texture_array) {
1471 return absl::InternalError(
1472 "Image2DArray with layer = 1 works incorrect on some Adreno in "
1473 "OpenCL. Can not be created.");
1474 }
1475 const int image_width = shape.w * shape.b;
1476 const int image_height = shape.h;
1477 const int image_layers = slices * shape.d;
1478 if (image_width > gpu_info.GetMaxImage2DWidth()) {
1479 return absl::ResourceExhaustedError(absl::StrCat(
1480 "Image2DArray with width - ", image_width,
1481 " can not be created. Max Image2DArray width for this GPU - ",
1482 gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1483 } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1484 return absl::ResourceExhaustedError(absl::StrCat(
1485 "Image2DArray with height - ", image_height,
1486 " can not be created. Max Image2DArray height for this GPU - ",
1487 gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1488 } else if (image_layers > gpu_info.GetMaxImage2DArrayLayers()) {
1489 return absl::ResourceExhaustedError(absl::StrCat(
1490 "Image2DArray with layers - ", image_layers,
1491 " can not be created. Max Image2DArray layers for this GPU - ",
1492 gpu_info.GetMaxImage2DArrayLayers(), ". ", common_desc));
1493 } else {
1494 return absl::OkStatus();
1495 }
1496 }
1497 case TensorStorageType::TEXTURE_2D: {
1498 const int image_width = shape.w * shape.b * shape.d;
1499 const int image_height = shape.h * slices;
1500 if (image_width > gpu_info.GetMaxImage2DWidth()) {
1501 return absl::ResourceExhaustedError(absl::StrCat(
1502 "Image2D with width - ", image_width,
1503 " can not be created. Max Image2D width for this GPU - ",
1504 gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1505 } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1506 return absl::ResourceExhaustedError(absl::StrCat(
1507 "Image2D with height - ", image_height,
1508 " can not be created. Max Image2D height for this GPU - ",
1509 gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1510 } else {
1511 return absl::OkStatus();
1512 }
1513 }
1514 case TensorStorageType::SINGLE_TEXTURE_2D: {
1515 const int image_width = shape.w * shape.b * shape.d;
1516 const int image_height = shape.h;
1517 if (shape.c > 4) {
1518 return absl::ResourceExhaustedError(absl::StrCat(
1519 "Image2D with channels - ", shape.c, " can not be created."));
1520 } else if (!gpu_info.SupportsFloatImage2D(data_type_, shape.c)) {
1521 return absl::ResourceExhaustedError(
1522 "Image2D doesn't support this pixel layout.");
1523 } else if (image_width > gpu_info.GetMaxImage2DWidth()) {
1524 return absl::ResourceExhaustedError(absl::StrCat(
1525 "Image2D with width - ", image_width,
1526 " can not be created. Max Image2D width for this GPU - ",
1527 gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1528 } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1529 return absl::ResourceExhaustedError(absl::StrCat(
1530 "Image2D with height - ", image_height,
1531 " can not be created. Max Image2D height for this GPU - ",
1532 gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1533 } else {
1534 return absl::OkStatus();
1535 }
1536 }
1537 default:
1538 return absl::UnimplementedError(
1539 "Can not create resources for unknown storage type.");
1540 }
1541 }
1542
CanCreateTensorWithShape(const GpuInfo & gpu_info,const BHWC & shape) const1543 absl::Status TensorDescriptor::CanCreateTensorWithShape(
1544 const GpuInfo& gpu_info, const BHWC& shape) const {
1545 const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
1546 return CanCreateTensorWithShape(gpu_info, shape5D);
1547 }
1548
UpdateToSupportedStorageType(const GpuInfo & gpu_info,const BHWC & shape)1549 absl::Status TensorDescriptor::UpdateToSupportedStorageType(
1550 const GpuInfo& gpu_info, const BHWC& shape) {
1551 if (CanCreateTensorWithShape(gpu_info, shape).ok()) {
1552 return absl::OkStatus();
1553 }
1554 if (gpu_info.IsApiMetal()) {
1555 storage_type_ = TensorStorageType::BUFFER;
1556 return CanCreateTensorWithShape(gpu_info, shape);
1557 }
1558
1559 storage_type_ = TensorStorageType::IMAGE_BUFFER;
1560 if (gpu_info.SupportsImageBuffer() &&
1561 CanCreateTensorWithShape(gpu_info, shape).ok()) {
1562 return absl::OkStatus();
1563 }
1564 storage_type_ = TensorStorageType::BUFFER;
1565 return CanCreateTensorWithShape(gpu_info, shape);
1566 }
1567
CreateBhwcTensorDescriptor(DataType data_type,TensorStorageType storage_type,const BHWC & shape)1568 TensorDescriptor CreateBhwcTensorDescriptor(DataType data_type,
1569 TensorStorageType storage_type,
1570 const BHWC& shape) {
1571 TensorDescriptor tensor_desc =
1572 TensorDescriptor(data_type, storage_type, Layout::BHWC);
1573 tensor_desc.SetBHWCShape(shape);
1574 return tensor_desc;
1575 }
1576
CreateHwcTensorDescriptor(DataType data_type,TensorStorageType storage_type,const HWC & shape)1577 TensorDescriptor CreateHwcTensorDescriptor(DataType data_type,
1578 TensorStorageType storage_type,
1579 const HWC& shape) {
1580 TensorDescriptor tensor_desc =
1581 TensorDescriptor(data_type, storage_type, Layout::HWC);
1582 tensor_desc.SetBHWCShape(BHWC(1, shape.h, shape.w, shape.c));
1583 return tensor_desc;
1584 }
1585
GetStorageTypeForLinearTensor(const GpuInfo & gpu_info,DataType data_type,const Linear & shape)1586 TensorStorageType GetStorageTypeForLinearTensor(const GpuInfo& gpu_info,
1587 DataType data_type,
1588 const Linear& shape) {
1589 if (gpu_info.IsApple()) {
1590 if (gpu_info.apple_info.IsA7GenerationGpu() ||
1591 gpu_info.apple_info.IsA8GenerationGpu()) {
1592 return TensorStorageType::TEXTURE_2D;
1593 }
1594 }
1595 if (!gpu_info.SupportsImages() || gpu_info.IsMali() || gpu_info.IsApple() ||
1596 gpu_info.IsAMD()) {
1597 return TensorStorageType::BUFFER;
1598 } else {
1599 return TensorStorageType::TEXTURE_2D;
1600 }
1601 }
1602
CreateConstantLinearTensorDescriptor(DataType data_type,TensorStorageType storage_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)1603 TensorDescriptor CreateConstantLinearTensorDescriptor(
1604 DataType data_type, TensorStorageType storage_type,
1605 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
1606 TensorDescriptor tensor_desc =
1607 TensorDescriptor(data_type, storage_type, Layout::LINEAR);
1608 tensor_desc.SetBHWDCShape(BHWDC(1, 1, 1, 1, src.shape.v));
1609 tensor_desc.UploadData(src.data.data());
1610 return tensor_desc;
1611 }
1612
CreateConstantLinearTensorDescriptor(const GpuInfo & gpu_info,DataType data_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)1613 TensorDescriptor CreateConstantLinearTensorDescriptor(
1614 const GpuInfo& gpu_info, DataType data_type,
1615 const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
1616 return CreateConstantLinearTensorDescriptor(
1617 data_type, GetStorageTypeForLinearTensor(gpu_info, data_type, src.shape),
1618 src);
1619 }
1620
CreateConstantHWVec4TensorDescriptor(DataType data_type,TensorStorageType storage_type,int width,int height,const uint8_t * data)1621 TensorDescriptor CreateConstantHWVec4TensorDescriptor(
1622 DataType data_type, TensorStorageType storage_type, int width, int height,
1623 const uint8_t* data) {
1624 TensorDescriptor tensor_desc =
1625 TensorDescriptor(data_type, storage_type, Layout::HW);
1626 tensor_desc.SetBHWDCShape(BHWDC(1, height, width, 1, 4));
1627 int data_size = height * width * 4 * SizeOf(data_type);
1628 tensor_desc.data_.resize(data_size);
1629 memcpy(tensor_desc.data_.data(), data, data_size);
1630 return tensor_desc;
1631 }
1632
1633 } // namespace gpu
1634 } // namespace tflite
1635