• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
17 
18 #include <cstdint>
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/substitute.h"
25 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
26 #include "tensorflow/lite/delegates/gpu/common/shape.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/util.h"
28 #include "tensorflow/lite/delegates/gpu/common/util.h"
29 
30 namespace tflite {
31 namespace gpu {
32 namespace {
GetReadImageFromDataType(DataType data_type)33 std::string GetReadImageFromDataType(DataType data_type) {
34   if (data_type == DataType::FLOAT32) {
35     return "read_imagef";
36   } else if (data_type == DataType::FLOAT16) {
37     return "read_imageh";
38   } else if (data_type == DataType::INT8 || data_type == DataType::INT16 ||
39              data_type == DataType::INT32) {
40     return "read_imagei";
41   } else if (data_type == DataType::UINT8 || data_type == DataType::UINT16 ||
42              data_type == DataType::UINT32 || data_type == DataType::BOOL) {
43     return "read_imageui";
44   } else {
45     return "error";
46   }
47 }
48 
ToClTextureType(DataType data_type)49 DataType ToClTextureType(DataType data_type) {
50   switch (data_type) {
51     case DataType::FLOAT32:
52     case DataType::FLOAT16:
53     case DataType::INT32:
54     case DataType::UINT32:
55       return data_type;
56     case DataType::INT16:
57     case DataType::INT8:
58       return DataType::INT32;
59     case DataType::BOOL:
60     case DataType::UINT16:
61     case DataType::UINT8:
62       return DataType::UINT32;
63     default:
64       return DataType::UNKNOWN;
65   }
66 }
67 
GetWriteImageFromDataType(DataType data_type)68 std::string GetWriteImageFromDataType(DataType data_type) {
69   if (data_type == DataType::FLOAT32) {
70     return "write_imagef";
71   } else if (data_type == DataType::FLOAT16) {
72     return "write_imageh";
73   } else if (data_type == DataType::INT8 || data_type == DataType::INT16 ||
74              data_type == DataType::INT32) {
75     return "write_imagei";
76   } else if (data_type == DataType::UINT8 || data_type == DataType::UINT16 ||
77              data_type == DataType::UINT32 || data_type == DataType::BOOL) {
78     return "write_imageui";
79   } else {
80     return "error";
81   }
82 }
83 
GetConversionForImage(const GpuInfo & gpu_info,DataType src_type,DataType dst_type)84 std::string GetConversionForImage(const GpuInfo& gpu_info, DataType src_type,
85                                   DataType dst_type) {
86   DataType interm_type = src_type;
87   if (gpu_info.IsApiOpenCl()) {
88     if (src_type == DataType::FLOAT16 && dst_type == DataType::FLOAT32) {
89       return "$0";
90     }
91     interm_type = ToClTextureType(src_type);
92   } else if (gpu_info.IsApiMetal()) {
93     interm_type = ToMetalTextureType(src_type);
94   }
95   return GetTypeConversion(gpu_info, interm_type, dst_type, 4);
96 }
97 
GetConversion(const GpuInfo & gpu_info,TensorStorageType storage_type,DataType src_type,DataType dst_type)98 std::string GetConversion(const GpuInfo& gpu_info,
99                           TensorStorageType storage_type, DataType src_type,
100                           DataType dst_type) {
101   if (src_type == DataType::BOOL) {
102     // DataType::BOOL stored as DataType::UINT8
103     src_type = DataType::UINT8;
104   }
105   if (storage_type == TensorStorageType::BUFFER) {
106     return GetTypeConversion(gpu_info, src_type, dst_type, 4);
107   } else {
108     return GetConversionForImage(gpu_info, src_type, dst_type);
109   }
110 }
111 
MayBeAddConversion(const std::string & conversion,std::string * result)112 void MayBeAddConversion(const std::string& conversion, std::string* result) {
113   *result = absl::Substitute(conversion, *result);
114 }
115 
116 }  // namespace
117 
ToString(TensorStorageType type)118 std::string ToString(TensorStorageType type) {
119   switch (type) {
120     case TensorStorageType::UNKNOWN:
121       return "TensorStorageType::UNKNOWN";
122     case TensorStorageType::BUFFER:
123       return "TensorStorageType::BUFFER";
124     case TensorStorageType::TEXTURE_ARRAY:
125       return "TensorStorageType::TEXTURE_ARRAY";
126     case TensorStorageType::TEXTURE_2D:
127       return "TensorStorageType::TEXTURE_2D";
128     case TensorStorageType::TEXTURE_3D:
129       return "TensorStorageType::TEXTURE_3D";
130     case TensorStorageType::SINGLE_TEXTURE_2D:
131       return "TensorStorageType::SINGLE_TEXTURE_2D";
132     case TensorStorageType::IMAGE_BUFFER:
133       return "TensorStorageType::IMAGE_BUFFER";
134   }
135 }
136 
TensorDescriptor(TensorDescriptor && desc)137 TensorDescriptor::TensorDescriptor(TensorDescriptor&& desc)
138     : GPUObjectDescriptor(std::move(desc)),
139       data_type_(desc.data_type_),
140       storage_type_(desc.storage_type_),
141       layout_(desc.layout_),
142       use_buffer_for_write_only_2d_texture_(
143           desc.use_buffer_for_write_only_2d_texture_),
144       use_buffer_for_write_only_image_buffer_(
145           desc.use_buffer_for_write_only_image_buffer_),
146       shape_(desc.shape_),
147       data_(std::move(desc.data_)) {}
operator =(TensorDescriptor && desc)148 TensorDescriptor& TensorDescriptor::operator=(TensorDescriptor&& desc) {
149   if (this != &desc) {
150     std::swap(data_type_, desc.data_type_);
151     std::swap(storage_type_, desc.storage_type_);
152     std::swap(layout_, desc.layout_);
153     std::swap(use_buffer_for_write_only_2d_texture_,
154               desc.use_buffer_for_write_only_2d_texture_);
155     std::swap(use_buffer_for_write_only_image_buffer_,
156               desc.use_buffer_for_write_only_image_buffer_);
157     std::swap(shape_, desc.shape_);
158     data_ = std::move(desc.data_);
159     GPUObjectDescriptor::operator=(std::move(desc));
160   }
161   return *this;
162 }
163 
CopyWithoutData(TensorDescriptor * desc) const164 void TensorDescriptor::CopyWithoutData(TensorDescriptor* desc) const {
165   desc->data_type_ = data_type_;
166   desc->storage_type_ = storage_type_;
167   desc->layout_ = layout_;
168   desc->use_buffer_for_write_only_2d_texture_ =
169       use_buffer_for_write_only_2d_texture_;
170   desc->use_buffer_for_write_only_image_buffer_ =
171       use_buffer_for_write_only_image_buffer_;
172   desc->shape_ = shape_;
173 }
174 
GetStorageDims() const175 std::vector<uint64_t> TensorDescriptor::GetStorageDims() const {
176   const int slices = DivideRoundUp(shape_.c, 4);
177   if (layout_ == Layout::LINEAR) {
178     switch (storage_type_) {
179       case TensorStorageType::BUFFER:
180       case TensorStorageType::IMAGE_BUFFER:
181         return {static_cast<uint64_t>(slices)};
182       case TensorStorageType::TEXTURE_ARRAY:
183       case TensorStorageType::TEXTURE_3D:
184         return {static_cast<uint64_t>(slices), 1u, 1u};
185       case TensorStorageType::TEXTURE_2D:
186       case TensorStorageType::SINGLE_TEXTURE_2D:
187         return {static_cast<uint64_t>(slices), 1u};
188       case TensorStorageType::UNKNOWN:
189         return {};
190     }
191   } else if (layout_ == Layout::HW) {
192     switch (storage_type_) {
193       case TensorStorageType::BUFFER:
194       case TensorStorageType::IMAGE_BUFFER:
195         return {static_cast<uint64_t>(shape_.w * shape_.h)};
196       case TensorStorageType::TEXTURE_ARRAY:
197       case TensorStorageType::TEXTURE_3D:
198         return {static_cast<uint64_t>(shape_.w),
199                 static_cast<uint64_t>(shape_.h), 1u};
200       case TensorStorageType::TEXTURE_2D:
201       case TensorStorageType::SINGLE_TEXTURE_2D:
202         return {static_cast<uint64_t>(shape_.w),
203                 static_cast<uint64_t>(shape_.h)};
204       case TensorStorageType::UNKNOWN:
205         return {};
206     }
207   }
208   // HWC/BHWC/HWDC/BHWDC
209   switch (storage_type_) {
210     case TensorStorageType::BUFFER:
211     case TensorStorageType::IMAGE_BUFFER:
212       return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.h * shape_.d *
213                                     slices)};
214     case TensorStorageType::TEXTURE_ARRAY:
215     case TensorStorageType::TEXTURE_3D:
216       return {static_cast<uint64_t>(shape_.w * shape_.b),
217               static_cast<uint64_t>(shape_.h),
218               static_cast<uint64_t>(shape_.d * slices)};
219     case TensorStorageType::TEXTURE_2D:
220       return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.d),
221               static_cast<uint64_t>(shape_.h * slices)};
222     case TensorStorageType::SINGLE_TEXTURE_2D:
223       return {static_cast<uint64_t>(shape_.w * shape_.b * shape_.d),
224               static_cast<uint64_t>(shape_.h)};
225     case TensorStorageType::UNKNOWN:
226       return {};
227   }
228 }
229 
GetFullTensorRegion() const230 int3 TensorDescriptor::GetFullTensorRegion() const {
231   std::vector<uint64_t> storage_dims = GetStorageDims();
232   if (layout_ == Layout::LINEAR) {
233     return int3(static_cast<int>(storage_dims[0]), 1, 1);
234   } else if (layout_ == Layout::HW) {
235     switch (storage_type_) {
236       case TensorStorageType::BUFFER:
237       case TensorStorageType::IMAGE_BUFFER:
238         return int3(static_cast<int>(storage_dims[0]), 1, 1);
239       case TensorStorageType::TEXTURE_2D:
240       case TensorStorageType::SINGLE_TEXTURE_2D:
241       case TensorStorageType::TEXTURE_ARRAY:
242       case TensorStorageType::TEXTURE_3D:
243         return int3(static_cast<int>(storage_dims[0]),
244                     static_cast<int>(storage_dims[1]), 1);
245       case TensorStorageType::UNKNOWN:
246         return {-1, -1, -1};
247     }
248   }
249   // HWC/BHWC/HWDC/BHWDC
250   switch (storage_type_) {
251     case TensorStorageType::BUFFER:
252     case TensorStorageType::IMAGE_BUFFER:
253       // 1D resources
254       return int3(static_cast<int>(storage_dims[0]), 1, 1);
255     case TensorStorageType::TEXTURE_2D:
256     case TensorStorageType::SINGLE_TEXTURE_2D:
257       // 2D resources
258       return int3(static_cast<int>(storage_dims[0]),
259                   static_cast<int>(storage_dims[1]), 1);
260     case TensorStorageType::TEXTURE_ARRAY:
261     case TensorStorageType::TEXTURE_3D:
262       // 3D resources
263       return int3(static_cast<int>(storage_dims[0]),
264                   static_cast<int>(storage_dims[1]),
265                   static_cast<int>(storage_dims[2]));
266     case TensorStorageType::UNKNOWN:
267       return {-1, -1, -1};
268   }
269 }
GetMemorySizeInBytes() const270 uint64_t TensorDescriptor::GetMemorySizeInBytes() const {
271   std::vector<uint64_t> storage_dims = GetStorageDims();
272   uint64_t total_size = 1;
273   for (int i = 0; i < storage_dims.size(); ++i) {
274     total_size *= storage_dims[i];
275   }
276   const int element_size = GetElementSize() * SizeOf(data_type_);
277   return total_size * element_size;
278 }
279 
GetElementSize() const280 int TensorDescriptor::GetElementSize() const {
281   if (storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D) {
282     return shape_.c;
283   } else {
284     return 4;
285   }
286 }
287 
GetGPUResources(const GpuInfo & gpu_info) const288 GPUResources TensorDescriptor::GetGPUResources(const GpuInfo& gpu_info) const {
289   GPUResources resources;
290   resources.ints.push_back("slice_stride");
291   if (HasAxis(Axis::WIDTH)) {
292     resources.ints.push_back("width");
293   }
294   if (HasAxis(Axis::HEIGHT)) {
295     resources.ints.push_back("height");
296   }
297   if (HasAxis(Axis::CHANNELS)) {
298     resources.ints.push_back("slices");
299     resources.ints.push_back("channels");
300   }
301   if (HasAxis(Axis::BATCH)) {
302     resources.ints.push_back("batch");
303   }
304   if (HasAxis(Axis::DEPTH)) {
305     resources.ints.push_back("depth");
306   }
307   if (storage_type_ == TensorStorageType::BUFFER) {
308     GPUBufferDescriptor desc;
309     desc.data_type = data_type_;
310     desc.access_type = access_type_;
311     desc.element_size = 4;
312     resources.buffers.push_back({"buffer", desc});
313   } else if (storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D ||
314              storage_type_ == TensorStorageType::TEXTURE_2D) {
315     if (access_type_ == AccessType::WRITE &&
316         use_buffer_for_write_only_2d_texture_) {
317       resources.ints.push_back("aligned_texture_width");
318       GPUBufferDescriptor desc;
319       desc.data_type = data_type_;
320       desc.access_type = access_type_;
321       desc.element_size = 4;
322       resources.buffers.push_back({"buffer", desc});
323     } else {
324       GPUImage2DDescriptor desc;
325       desc.data_type = data_type_;
326       desc.normalized = false;
327       desc.access_type = access_type_;
328       resources.images2d.push_back({"image2d", desc});
329     }
330   } else if (storage_type_ == TensorStorageType::TEXTURE_ARRAY) {
331     GPUImage2DArrayDescriptor desc;
332     desc.data_type = data_type_;
333     desc.access_type = access_type_;
334     resources.image2d_arrays.push_back({"image2d_array", desc});
335   } else if (storage_type_ == TensorStorageType::TEXTURE_3D) {
336     GPUImage3DDescriptor desc;
337     desc.data_type = data_type_;
338     desc.access_type = access_type_;
339     resources.images3d.push_back({"image3d", desc});
340   } else if (storage_type_ == TensorStorageType::IMAGE_BUFFER) {
341     if (access_type_ == AccessType::WRITE &&
342         use_buffer_for_write_only_image_buffer_) {
343       GPUBufferDescriptor desc;
344       desc.data_type = data_type_;
345       desc.access_type = access_type_;
346       desc.element_size = 4;
347       resources.buffers.push_back({"buffer", desc});
348     } else {
349       GPUImageBufferDescriptor desc;
350       desc.data_type = data_type_;
351       desc.access_type = access_type_;
352       resources.image_buffers.push_back({"image_buffer", desc});
353     }
354   }
355   return resources;
356 }
357 
GetGpuResources(const BHWDC & tensor_shape,GenericGPUResourcesWithValue * resources) const358 void TensorDescriptor::GetGpuResources(
359     const BHWDC& tensor_shape, GenericGPUResourcesWithValue* resources) const {
360   if (HasAxis(Axis::BATCH)) {
361     resources->AddInt("slice_stride",
362                       tensor_shape.w * tensor_shape.h * tensor_shape.b);
363   } else {
364     resources->AddInt("slice_stride", tensor_shape.w * tensor_shape.h);
365   }
366   if (HasAxis(Axis::WIDTH)) {
367     resources->AddInt("width", tensor_shape.w);
368   }
369   if (HasAxis(Axis::HEIGHT)) {
370     resources->AddInt("height", tensor_shape.h);
371   }
372   if (HasAxis(Axis::CHANNELS)) {
373     resources->AddInt("slices", DivideRoundUp(tensor_shape.c, 4));
374     resources->AddInt("channels", tensor_shape.c);
375   }
376   if (HasAxis(Axis::BATCH)) {
377     resources->AddInt("batch", tensor_shape.b);
378   }
379   if (HasAxis(Axis::DEPTH)) {
380     resources->AddInt("depth", tensor_shape.d);
381   }
382 }
383 
PerformConstExpr(const GpuInfo & gpu_info,const std::string & const_expr,std::string * result) const384 absl::Status TensorDescriptor::PerformConstExpr(const GpuInfo& gpu_info,
385                                                 const std::string& const_expr,
386                                                 std::string* result) const {
387   if (const_expr == "type" || const_expr == "scalar_type") {
388     const int vec_size = const_expr == "scalar_type" ? 1 : 4;
389     *result = GetTypeDeclaration(gpu_info, data_type_, vec_size);
390     return absl::OkStatus();
391   } else if (const_expr == "zero_value" || const_expr == "scalar_zero_value") {
392     const int vec_size = const_expr == "scalar_zero_value" ? 1 : 4;
393     *result = GetZeroValue(gpu_info, data_type_, vec_size);
394     return absl::OkStatus();
395   } else {
396     return absl::UnimplementedError(
397         absl::StrCat("Can not resolve constant expression - ", const_expr));
398   }
399 }
400 
PerformSelector(const GpuInfo & gpu_info,const std::string & selector,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const401 absl::Status TensorDescriptor::PerformSelector(
402     const GpuInfo& gpu_info, const std::string& selector,
403     const std::vector<std::string>& args,
404     const std::vector<std::string>& template_args, std::string* result) const {
405   if (selector == "Width") {
406     *result = "width";
407     return absl::OkStatus();
408   } else if (selector == "Height") {
409     *result = "height";
410     return absl::OkStatus();
411   } else if (selector == "Slices") {
412     *result = "slices";
413     return absl::OkStatus();
414   } else if (selector == "SliceStride") {
415     *result = "slice_stride";
416     return absl::OkStatus();
417   } else if (selector == "Channels") {
418     *result = "channels";
419     return absl::OkStatus();
420   } else if (selector == "Batch") {
421     if (HasAxis(Axis::BATCH)) {
422       *result = "batch";
423     } else {
424       *result = "1";
425     }
426     return absl::OkStatus();
427   } else if (selector == "Depth") {
428     *result = "depth";
429     return absl::OkStatus();
430   } else if (selector == "SetBatchRef") {
431     if (args.size() != 1) {
432       return absl::InvalidArgumentError(
433           "Unsupported arguments in SetBatchRef selector");
434     }
435     state_vars_["batch_id"] = args[0];
436     *result = "";
437     return absl::OkStatus();
438   } else if (selector == "Read") {
439     return PerformReadSelector(gpu_info, args, template_args, result);
440   } else if (selector == "ReadNearest") {
441     return PerformReadNearestSelector(gpu_info, args, result);
442   } else if (selector == "ReadBilinear") {
443     return PerformReadBilinearSelector(gpu_info, args, result);
444   } else if (selector == "ReadPerChannel") {
445     return PerformReadPerChannelSelector(gpu_info, args, template_args, result);
446   } else if (selector == "Write") {
447     return PerformWriteSelector(gpu_info, args, template_args, result);
448   } else if (selector == "WriteLinear") {
449     return PerformWriteLinearSelector(gpu_info, args, template_args, result);
450   } else if (selector == "Write2D") {
451     return PerformWrite2DSelector(gpu_info, args, template_args, result);
452   } else if (selector == "GetAddress") {
453     return PerformGetAddressSelector(args, result);
454   } else if (selector == "GetHandle") {
455     return PerformGetHandleSelector(args, result);
456   } else {
457     return absl::NotFoundError(absl::StrCat(
458         "TensorDescriptor don't have selector with name - ", selector));
459   }
460 }
461 
PerformReadSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const462 absl::Status TensorDescriptor::PerformReadSelector(
463     const GpuInfo& gpu_info, const std::vector<std::string>& args,
464     const std::vector<std::string>& template_args, std::string* result) const {
465   DataType read_as_type = data_type_;
466   RETURN_IF_ERROR(
467       MaybeGetDataTypeFromTemplateArgs(template_args, &read_as_type));
468   if (layout_ == Layout::LINEAR) {
469     if (args.size() != 1) {
470       return absl::InvalidArgumentError(
471           "Read selector for LINEAR tensor require single argument");
472     }
473     *result = Read(gpu_info, read_as_type, GetPhysicalCoordsLinear(args[0]));
474     return absl::OkStatus();
475   }
476   if (layout_ == Layout::HW) {
477     if (args.size() != 2) {
478       return absl::InvalidArgumentError(
479           "Read selector for HW tensor require two arguments");
480     }
481     *result =
482         Read(gpu_info, read_as_type, GetPhysicalCoordsHW(args[0], args[1]));
483     return absl::OkStatus();
484   }
485   if (args.size() == 1) {  // function overload for 1D linear types.
486     if (storage_type_ == TensorStorageType::BUFFER ||
487         storage_type_ == TensorStorageType::IMAGE_BUFFER) {
488       *result = Read(gpu_info, read_as_type, {args[0]});
489       return absl::OkStatus();
490     } else {
491       return absl::InvalidArgumentError(
492           "Read selector with single argument can be used only with linear "
493           "storage types(BUFFER or IMAGE_BUFFER)");
494     }
495   }
496   std::string xc;
497   std::string yc;
498   std::string zc;
499   std::string sc;
500   std::string bc;
501   bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
502   if (args.size() < 2 || !parsed) {
503     return absl::NotFoundError("Unrecognized Read selector");
504   }
505 
506   *result = Read(gpu_info, read_as_type, GetPhysicalCoords(xc, yc, zc, sc, bc));
507   return absl::OkStatus();
508 }
509 
PerformReadNearestSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,std::string * result) const510 absl::Status TensorDescriptor::PerformReadNearestSelector(
511     const GpuInfo& gpu_info, const std::vector<std::string>& args,
512     std::string* result) const {
513   // ReadNearest(result, fc_x, fc_y, {fc_z}, slice);
514   if (!((args.size() == 5 && HasAxis(Axis::DEPTH)) || args.size() == 4)) {
515     return absl::NotFoundError("Unrecognized ReadNearest selector");
516   }
517   std::vector<std::string> coord_args =
518       std::vector<std::string>(args.begin() + 1, args.end());
519   std::string c;
520   c += "  {\n";
521   c += "  int coord_x_TMP = INIT_INT(" + coord_args[0] + ");\n";
522   c += "  coord_x_TMP = max(coord_x_TMP, 0);\n";
523   c += "  coord_x_TMP = min(coord_x_TMP, width - 1);\n";
524   coord_args[0] = "coord_x_TMP";
525   c += "  int coord_y_TMP = INIT_INT(" + coord_args[1] + ");\n";
526   c += "  coord_y_TMP = max(coord_y_TMP, 0);\n";
527   c += "  coord_y_TMP = min(coord_y_TMP, height - 1);\n";
528   coord_args[1] = "coord_y_TMP";
529   if (HasAxis(Axis::DEPTH)) {
530     c += "  int coord_z_TMP = INIT_INT(" + coord_args[2] + ");\n";
531     c += "  coord_z_TMP = max(coord_z_TMP, 0);\n";
532     c += "  coord_z_TMP = min(coord_z_TMP, depth - 1);\n";
533     coord_args[2] = "coord_z_TMP";
534   }
535   std::string src_value;
536   RETURN_IF_ERROR(PerformReadSelector(gpu_info, coord_args, {}, &src_value));
537   c += "  " + args[0] + " = " + src_value + ";\n";
538   c += "  }";
539   *result = c;
540   return absl::OkStatus();
541 }
542 
PerformReadBilinearSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,std::string * result) const543 absl::Status TensorDescriptor::PerformReadBilinearSelector(
544     const GpuInfo& gpu_info, const std::vector<std::string>& args,
545     std::string* result) const {
546   // ReadBilinear(result, fc_x, fc_y, {fc_z}, slice);
547   if (!((args.size() == 5 && HasAxis(Axis::DEPTH)) || args.size() == 4)) {
548     return absl::NotFoundError("Unrecognized ReadBilinear selector");
549   }
550   std::vector<std::string> coord_args =
551       std::vector<std::string>(args.begin() + 1, args.end());
552   std::string c;
553   c += "  {\n";
554   c += "  float f_x_TMP = floor(" + coord_args[0] + ");\n";
555   c += "  float x_scale_TMP = (" + coord_args[0] + ") - f_x_TMP;\n";
556   c += "  int i_x_TMP = INIT_INT(f_x_TMP);\n";
557   c += "  int start_x_TMP = max(i_x_TMP, 0);\n";
558   c += "  int end_x_TMP = min(i_x_TMP + 1, width - 1);\n";
559   c += "  float f_y_TMP = floor(" + coord_args[1] + ");\n";
560   c += "  float y_scale_TMP = (" + coord_args[1] + ") - f_y_TMP;\n";
561   c += "  int i_y_TMP = INIT_INT(f_y_TMP);\n";
562   c += "  int start_y_TMP = max(i_y_TMP, 0);\n";
563   c += "  int end_y_TMP = min(i_y_TMP + 1, height - 1);\n";
564   if (HasAxis(Axis::DEPTH)) {
565     // 3d bilinear read, x, y, z
566     c += "  float f_z_TMP = floor(" + coord_args[2] + ");\n";
567     c += "  float z_scale_TMP = (" + coord_args[2] + ") - f_z_TMP;\n";
568     c += "  int i_z_TMP = INIT_INT(f_z_TMP);\n";
569     c += "  int start_z_TMP = max(i_z_TMP, 0);\n";
570     c += "  int end_z_TMP = min(i_z_TMP + 1, depth - 1);\n";
571     int index = 0;
572     for (const auto& src_z : {"start_z_TMP", "end_z_TMP"}) {
573       for (const auto& src_y : {"start_y_TMP", "end_y_TMP"}) {
574         for (const auto& src_x : {"start_x_TMP", "end_x_TMP"}) {
575           coord_args[0] = src_x;
576           coord_args[1] = src_y;
577           coord_args[2] = src_z;
578           std::string src_value;
579           RETURN_IF_ERROR(
580               PerformReadSelector(gpu_info, coord_args, {"float"}, &src_value));
581           c += "  float4 src" + std::to_string(index) + "_TMP = " + src_value +
582                ";\n";
583           index++;
584         }
585       }
586     }
587     c += "  float4 t0_TMP = mix(mix(src0_TMP, src1_TMP, x_scale_TMP), "
588          "mix(src2_TMP, src3_TMP, x_scale_TMP), y_scale_TMP);\n";
589     c += "  float4 t1_TMP = mix(mix(src4_TMP, src5_TMP, x_scale_TMP), "
590          "mix(src6_TMP, src7_TMP, x_scale_TMP), y_scale_TMP);\n";
591     c += "  " + args[0] + " = TO_FLT4(mix(t0_TMP, t1_TMP, z_scale_TMP));\n";
592   } else {
593     // 2d bilinear read, x, y
594     int index = 0;
595     for (const auto& src_y : {"start_y_TMP", "end_y_TMP"}) {
596       for (const auto& src_x : {"start_x_TMP", "end_x_TMP"}) {
597         coord_args[0] = src_x;
598         coord_args[1] = src_y;
599         std::string src_value;
600         RETURN_IF_ERROR(
601             PerformReadSelector(gpu_info, coord_args, {"float"}, &src_value));
602         c += "  float4 src" + std::to_string(index) + "_TMP = " + src_value +
603              ";\n";
604         index++;
605       }
606     }
607     c += "  " + args[0] +
608          " = TO_FLT4(mix(mix(src0_TMP, src1_TMP, x_scale_TMP), mix(src2_TMP, "
609          "src3_TMP, x_scale_TMP), y_scale_TMP));\n";
610   }
611   c += "  }";
612   *result = c;
613   return absl::OkStatus();
614 }
615 
PerformReadPerChannelSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const616 absl::Status TensorDescriptor::PerformReadPerChannelSelector(
617     const GpuInfo& gpu_info, const std::vector<std::string>& args,
618     const std::vector<std::string>& template_args, std::string* result) const {
619   std::vector<std::string> coord_args =
620       std::vector<std::string>(args.begin() + 1, args.end());
621   int channels_index = 0;
622   if (HasAxis(Axis::WIDTH)) {
623     channels_index++;
624   }
625   if (HasAxis(Axis::HEIGHT)) {
626     channels_index++;
627   }
628   if (HasAxis(Axis::DEPTH)) {
629     channels_index++;
630   }
631   if (channels_index >= coord_args.size()) {
632     return absl::NotFoundError(
633         "Wrong number of coordinates in ReadPerChannel.");
634   }
635   std::string c = "  {\n";
636   c += "  int slice_coord_TMP = (" + coord_args[channels_index] + ") / 4;\n";
637   c += "  int sub_ch_coord_TMP = (" + coord_args[channels_index] + ") % 4;\n";
638   coord_args[channels_index] = "slice_coord_TMP";
639   std::string src_value;
640   RETURN_IF_ERROR(
641       PerformReadSelector(gpu_info, coord_args, template_args, &src_value));
642   if (gpu_info.IsApiOpenCl()) {
643     DataType dst_type = data_type_;
644     RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &dst_type));
645     c += "  " + GetTypeDeclaration(gpu_info, dst_type, 4) +
646          " src_TMP = " + src_value + ";\n";
647     c +=
648         "  " + args[0] + " = (" + ToCLDataType(dst_type, 1) +
649         "[4]){src_TMP.x, src_TMP.y, src_TMP.z, src_TMP.w}[sub_ch_coord_TMP];\n";
650   } else {
651     if (gpu_info.IsAdreno() && gpu_info.IsApiVulkan()) {
652       DataType dst_type = data_type_;
653       RETURN_IF_ERROR(
654           MaybeGetDataTypeFromTemplateArgs(template_args, &dst_type));
655       c += "  " + GetTypeDeclaration(gpu_info, dst_type, 4) +
656            " src_TMP = " + src_value + ";\n";
657       c += "  " + args[0] + " = " +
658            ToGlslShaderDataType(dst_type, 1, /*add_precision*/ false,
659                                 gpu_info.vulkan_info.SupportsExplicitFp16()) +
660            "[4](src_TMP.x, src_TMP.y, src_TMP.z, "
661            "src_TMP.w)[sub_ch_coord_TMP];\n";
662     } else {
663       c += "  " + args[0] + " = " + src_value + "[sub_ch_coord_TMP];\n";
664     }
665   }
666 
667   c += "  }";
668   *result = c;
669   return absl::OkStatus();
670 }
671 
GetLinkingContextFromWriteSelector(const std::vector<std::string> & args,std::string * value_name,std::string * x_coord,std::string * y_coord,std::string * z_coord,std::string * s_coord,std::string * b_coord) const672 absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector(
673     const std::vector<std::string>& args, std::string* value_name,
674     std::string* x_coord, std::string* y_coord, std::string* z_coord,
675     std::string* s_coord, std::string* b_coord) const {
676   std::string xc;
677   std::string yc;
678   std::string zc;
679   std::string sc;
680   std::string bc;
681   bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
682   if (args.size() < 2 || !parsed) {
683     return absl::NotFoundError("Unrecognized Write selector");
684   }
685   *value_name = args[0];
686   *b_coord = absl::StrCat("(", bc, ")");
687   *x_coord = absl::StrCat("(", xc, ")");
688   *y_coord = absl::StrCat("(", yc, ")");
689   *z_coord = absl::StrCat("(", zc, ")");
690   *s_coord = absl::StrCat("(", sc, ")");
691   return absl::OkStatus();
692 }
693 
PerformWriteSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const694 absl::Status TensorDescriptor::PerformWriteSelector(
695     const GpuInfo& gpu_info, const std::vector<std::string>& args,
696     const std::vector<std::string>& template_args, std::string* result) const {
697   std::string xc;
698   std::string yc;
699   std::string zc;
700   std::string sc;
701   std::string bc;
702   bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
703   if (args.size() < 2 || !parsed) {
704     return absl::NotFoundError("Unrecognized Write selector");
705   }
706   DataType write_type = data_type_;
707   RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
708   *result = Write(gpu_info, write_type, args[0],
709                   GetPhysicalCoords(xc, yc, zc, sc, bc));
710   return absl::OkStatus();
711 }
712 
PerformWriteLinearSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const713 absl::Status TensorDescriptor::PerformWriteLinearSelector(
714     const GpuInfo& gpu_info, const std::vector<std::string>& args,
715     const std::vector<std::string>& template_args, std::string* result) const {
716   if (storage_type_ != TensorStorageType::BUFFER &&
717       storage_type_ != TensorStorageType::IMAGE_BUFFER) {
718     return absl::InvalidArgumentError(
719         "WriteLinear selector can be used only with linear "
720         "storages(BUFFER/IMAGE_BUFFER)");
721   }
722   if (args.size() != 2) {
723     return absl::NotFoundError("Unrecognized WriteLinear selector");
724   }
725   DataType write_type = data_type_;
726   RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
727   *result = Write(gpu_info, write_type, args[0], {args[1]});
728   return absl::OkStatus();
729 }
730 
PerformWrite2DSelector(const GpuInfo & gpu_info,const std::vector<std::string> & args,const std::vector<std::string> & template_args,std::string * result) const731 absl::Status TensorDescriptor::PerformWrite2DSelector(
732     const GpuInfo& gpu_info, const std::vector<std::string>& args,
733     const std::vector<std::string>& template_args, std::string* result) const {
734   if (storage_type_ != TensorStorageType::TEXTURE_2D) {
735     return absl::InvalidArgumentError(
736         "Write2D selector can be used only with 2d "
737         "storages(TEXTURE_2D)");
738   }
739   if (args.size() != 3) {
740     return absl::NotFoundError("Unrecognized Write2D selector");
741   }
742   DataType write_type = data_type_;
743   RETURN_IF_ERROR(MaybeGetDataTypeFromTemplateArgs(template_args, &write_type));
744   *result = Write(gpu_info, write_type, args[0], {args[1], args[2]});
745   return absl::OkStatus();
746 }
747 
Read(const GpuInfo & gpu_info,DataType read_as_type,const std::vector<std::string> & coords) const748 std::string TensorDescriptor::Read(
749     const GpuInfo& gpu_info, DataType read_as_type,
750     const std::vector<std::string>& coords) const {
751   const std::string conversion =
752       GetConversion(gpu_info, storage_type_, data_type_, read_as_type);
753   if (gpu_info.IsApiOpenCl() &&
754       !(data_type_ == DataType::FLOAT16 && read_as_type == DataType::FLOAT32)) {
755     read_as_type = data_type_;
756   }
757   switch (storage_type_) {
758     case TensorStorageType::BUFFER: {
759       std::string result;
760       if (gpu_info.IsGlsl() && data_type_ == DataType::FLOAT16 &&
761           !gpu_info.IsGlslSupportsExplicitFp16()) {
762         result =
763             absl::StrCat("vec4(unpackHalf2x16(buffer[", coords[0],
764                          "].x), unpackHalf2x16(buffer[", coords[0], "].y))");
765       } else {
766         result = absl::StrCat("buffer[", coords[0], "]");
767       }
768       MayBeAddConversion(conversion, &result);
769       return result;
770     }
771     case TensorStorageType::TEXTURE_2D:
772     case TensorStorageType::SINGLE_TEXTURE_2D: {
773       std::string result;
774       if (gpu_info.IsApiOpenCl()) {
775         result = absl::Substitute("$0(image2d, smp_zero, (int2)($1, $2))",
776                                   GetReadImageFromDataType(read_as_type),
777                                   coords[0], coords[1]);
778       } else if (gpu_info.IsApiMetal()) {
779         result = absl::Substitute("image2d.read(ushort2($0, $1))", coords[0],
780                                   coords[1]);
781       } else if (gpu_info.IsGlsl()) {
782         result = "texelFetch(image2d, ivec2(" + coords[0] + ", " + coords[1] +
783                  "), 0)";
784         if (data_type_ == DataType::FLOAT16 &&
785             gpu_info.IsGlslSupportsExplicitFp16()) {
786           result = "f16vec4(" + result + ")";
787         }
788       }
789       MayBeAddConversion(conversion, &result);
790       return result;
791     }
792     case TensorStorageType::TEXTURE_3D: {
793       std::string result;
794       if (gpu_info.IsApiOpenCl()) {
795         result =
796             absl::Substitute("$0(image3d, smp_zero, (int4)($1, $2, $3, 0))",
797                              GetReadImageFromDataType(read_as_type), coords[0],
798                              coords[1], coords[2]);
799       } else if (gpu_info.IsApiMetal()) {
800         result = absl::Substitute("image3d.read(ushort3($0, $1, $2))",
801                                   coords[0], coords[1], coords[2]);
802       } else if (gpu_info.IsGlsl()) {
803         result = "texelFetch(image3d, ivec3(" + coords[0] + ", " + coords[1] +
804                  ", " + coords[2] + "), 0)";
805         if (data_type_ == DataType::FLOAT16 &&
806             gpu_info.IsGlslSupportsExplicitFp16()) {
807           result = "f16vec4(" + result + ")";
808         }
809       }
810       MayBeAddConversion(conversion, &result);
811       return result;
812     }
813     case TensorStorageType::TEXTURE_ARRAY: {
814       std::string result;
815       if (gpu_info.IsApiOpenCl()) {
816         result = absl::Substitute(
817             "$0(image2d_array, smp_zero, (int4)($1, $2, $3, 0))",
818             GetReadImageFromDataType(read_as_type), coords[0], coords[1],
819             coords[2]);
820       } else if (gpu_info.IsApiMetal()) {
821         result = absl::Substitute("image2d_array.read(ushort2($0, $1), $2)",
822                                   coords[0], coords[1], coords[2]);
823       } else if (gpu_info.IsGlsl()) {
824         result = "texelFetch(image2d_array, ivec3(" + coords[0] + ", " +
825                  coords[1] + ", " + coords[2] + "), 0)";
826         if (data_type_ == DataType::FLOAT16 &&
827             gpu_info.IsGlslSupportsExplicitFp16()) {
828           result = "f16vec4(" + result + ")";
829         }
830       }
831       MayBeAddConversion(conversion, &result);
832       return result;
833     }
834     case TensorStorageType::IMAGE_BUFFER: {
835       std::string result;
836       if (gpu_info.IsApiOpenCl()) {
837         result = absl::StrCat(GetReadImageFromDataType(read_as_type),
838                               "(image_buffer, ", coords[0], ")");
839       } else if (gpu_info.IsApiMetal()) {
840         result = absl::Substitute("image_buffer.read(uint($0))", coords[0]);
841       } else if (gpu_info.IsGlsl()) {
842         result = "texelFetch(image_buffer, " + coords[0] + ")";
843         if (data_type_ == DataType::FLOAT16 &&
844             gpu_info.IsGlslSupportsExplicitFp16()) {
845           result = "f16vec4(" + result + ")";
846         }
847       }
848       MayBeAddConversion(conversion, &result);
849       return result;
850     }
851     case TensorStorageType::UNKNOWN:
852       return "";
853   }
854 }
855 
Write(const GpuInfo & gpu_info,DataType write_type,const std::string & var_name,const std::vector<std::string> & coords) const856 std::string TensorDescriptor::Write(
857     const GpuInfo& gpu_info, DataType write_type, const std::string& var_name,
858     const std::vector<std::string>& coords) const {
859   bool is_texture_write = storage_type_ == TensorStorageType::IMAGE_BUFFER ||
860                           storage_type_ == TensorStorageType::TEXTURE_2D ||
861                           storage_type_ == TensorStorageType::TEXTURE_ARRAY ||
862                           storage_type_ == TensorStorageType::TEXTURE_3D;
863   if (storage_type_ == TensorStorageType::IMAGE_BUFFER &&
864       use_buffer_for_write_only_image_buffer_) {
865     is_texture_write = false;
866   }
867   if (storage_type_ == TensorStorageType::TEXTURE_2D &&
868       use_buffer_for_write_only_2d_texture_) {
869     is_texture_write = false;
870   }
871   std::string write_expr = var_name;
872   DataType write_required_type = data_type_;
873   if (data_type_ == DataType::BOOL) {
874     // DataType::BOOL stored as DataType::UINT8
875     const std::string conversion =
876         GetTypeConversion(gpu_info, DataType::BOOL, DataType::UINT8, 4);
877     write_expr = absl::Substitute(conversion, write_expr);
878     write_required_type = DataType::UINT8;
879   }
880   if (is_texture_write) {
881     if (gpu_info.IsApiOpenCl()) {
882       write_required_type = ToClTextureType(write_required_type);
883     } else if (gpu_info.IsApiMetal()) {
884       write_required_type = ToMetalTextureType(write_required_type);
885     }
886   }
887   if (write_type != write_required_type) {
888     const std::string conversion =
889         GetTypeConversion(gpu_info, write_type, write_required_type, 4);
890     write_expr = absl::Substitute(conversion, write_expr);
891   }
892   switch (storage_type_) {
893     case TensorStorageType::BUFFER:
894     case TensorStorageType::IMAGE_BUFFER:
895       if (gpu_info.IsApiOpenCl()) {
896         if (use_buffer_for_write_only_image_buffer_) {
897           return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
898         } else {
899           return absl::Substitute("$0(image_buffer, $1, $2)",
900                                   GetWriteImageFromDataType(data_type_),
901                                   coords[0], write_expr);
902         }
903       } else if (gpu_info.IsApiMetal()) {
904         if (use_buffer_for_write_only_image_buffer_) {
905           return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
906         } else {
907           return absl::Substitute("image_buffer.write($0, uint($1))",
908                                   write_expr, coords[0]);
909         }
910       } else if (gpu_info.IsGlsl()) {
911         if (data_type_ == DataType::FLOAT16 &&
912             !gpu_info.IsGlslSupportsExplicitFp16()) {
913           return absl::StrCat("buffer[", coords[0], "] = uvec2(packHalf2x16(",
914                               write_expr, ".xy), packHalf2x16(", write_expr,
915                               ".zw))");
916         } else {
917           return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
918         }
919       } else {
920         return absl::StrCat("buffer[", coords[0], "] = ", write_expr);
921       }
922     case TensorStorageType::SINGLE_TEXTURE_2D:
923     case TensorStorageType::TEXTURE_2D:
924       if (gpu_info.IsApiOpenCl()) {
925         if (use_buffer_for_write_only_2d_texture_) {
926           return absl::Substitute(
927               "buffer[($2) * aligned_texture_width + ($1)] = $0", write_expr,
928               coords[0], coords[1]);
929         } else {
930           return absl::Substitute("$0(image2d, (int2)($1, $2), $3)",
931                                   GetWriteImageFromDataType(data_type_),
932                                   coords[0], coords[1], write_expr);
933         }
934       } else if (gpu_info.IsApiMetal()) {
935         if (use_buffer_for_write_only_2d_texture_) {
936           return absl::Substitute(
937               "buffer[($2) * aligned_texture_width + ($1)] = $0", write_expr,
938               coords[0], coords[1]);
939         } else {
940           return absl::Substitute("image2d.write($0, ushort2($1, $2))",
941                                   write_expr, coords[0], coords[1]);
942         }
943       } else if (gpu_info.IsGlsl()) {
944         return absl::Substitute("imageStore(image2d, ivec2($0, $1), $2)",
945                                 coords[0], coords[1], write_expr);
946       } else {
947         return "";
948       }
949     case TensorStorageType::TEXTURE_3D:
950       if (gpu_info.IsApiOpenCl()) {
951         return absl::Substitute("$0(image3d, (int4)($1, $2, $3, 0), $4)",
952                                 GetWriteImageFromDataType(data_type_),
953                                 coords[0], coords[1], coords[2], write_expr);
954       } else if (gpu_info.IsApiMetal()) {
955         return absl::Substitute("image3d.write($0, ushort3($1, $2, $3))",
956                                 write_expr, coords[0], coords[1], coords[2]);
957       } else if (gpu_info.IsGlsl()) {
958         return absl::Substitute("imageStore(image3d, ivec3($0, $1, $2), $3)",
959                                 coords[0], coords[1], coords[2], write_expr);
960       } else {
961         return "";
962       }
963     case TensorStorageType::TEXTURE_ARRAY:
964       if (gpu_info.IsApiOpenCl()) {
965         return absl::Substitute("$0(image2d_array, (int4)($1, $2, $3, 0), $4)",
966                                 GetWriteImageFromDataType(data_type_),
967                                 coords[0], coords[1], coords[2], write_expr);
968       } else if (gpu_info.IsApiMetal()) {
969         return absl::Substitute("image2d_array.write($0, ushort2($1, $2), $3)",
970                                 write_expr, coords[0], coords[1], coords[2]);
971       } else if (gpu_info.IsGlsl()) {
972         return absl::Substitute(
973             "imageStore(image2d_array, ivec3($0, $1, $2), $3)", coords[0],
974             coords[1], coords[2], write_expr);
975       } else {
976         return "";
977       }
978     case TensorStorageType::UNKNOWN:
979       return "";
980   }
981 }
982 
PerformGetAddressSelector(const std::vector<std::string> & args,std::string * result) const983 absl::Status TensorDescriptor::PerformGetAddressSelector(
984     const std::vector<std::string>& args, std::string* result) const {
985   std::string xc, yc, zc, sc, bc;
986   bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
987   if (!parsed) {
988     return absl::NotFoundError("Unrecognized GetAddress selector");
989   }
990 
991   *result = GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc);
992   return absl::OkStatus();
993 }
994 
PerformGetHandleSelector(const std::vector<std::string> & args,std::string * result) const995 absl::Status TensorDescriptor::PerformGetHandleSelector(
996     const std::vector<std::string>& args, std::string* result) const {
997   if (!args.empty()) {
998     return absl::NotFoundError(
999         absl::StrCat("GetHandle does not require arguments, but ", args.size(),
1000                      " was passed"));
1001   }
1002   switch (storage_type_) {
1003     case TensorStorageType::BUFFER:
1004       *result = "buffer";
1005       return absl::OkStatus();
1006     case TensorStorageType::IMAGE_BUFFER:
1007       if (access_type_ == AccessType::READ) {
1008         *result = "image_buffer";
1009       } else {
1010         *result = "buffer";
1011       }
1012       return absl::OkStatus();
1013     case TensorStorageType::TEXTURE_2D:
1014     case TensorStorageType::SINGLE_TEXTURE_2D:
1015       *result = "image2d";
1016       return absl::OkStatus();
1017     case TensorStorageType::TEXTURE_ARRAY:
1018       *result = "image2d_array";
1019       return absl::OkStatus();
1020     case TensorStorageType::TEXTURE_3D:
1021       *result = "image3d";
1022       return absl::OkStatus();
1023     case TensorStorageType::UNKNOWN:
1024       return absl::UnavailableError("Unknown type");
1025   }
1026 }
1027 
StorageTypeToAddressType() const1028 std::string TensorDescriptor::StorageTypeToAddressType() const {
1029   switch (storage_type_) {
1030     case TensorStorageType::BUFFER:
1031     case TensorStorageType::IMAGE_BUFFER:
1032       return "int";
1033     case TensorStorageType::TEXTURE_2D:
1034     case TensorStorageType::SINGLE_TEXTURE_2D:
1035       return "int2";
1036     case TensorStorageType::TEXTURE_ARRAY:
1037     case TensorStorageType::TEXTURE_3D:
1038       return "int4";
1039     case TensorStorageType::UNKNOWN:
1040       return "";
1041   }
1042 }
1043 
GetPhysicalCoordsLinear(const std::string & x) const1044 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsLinear(
1045     const std::string& x) const {
1046   switch (storage_type_) {
1047     case TensorStorageType::BUFFER:
1048     case TensorStorageType::IMAGE_BUFFER:
1049       return {absl::Substitute("($0)", x)};
1050     case TensorStorageType::TEXTURE_2D:
1051     case TensorStorageType::SINGLE_TEXTURE_2D:
1052       return {absl::Substitute("($0)", x), "0"};
1053     case TensorStorageType::TEXTURE_ARRAY:
1054     case TensorStorageType::TEXTURE_3D:
1055       return {absl::Substitute("($0)", x), "0", "0"};
1056     case TensorStorageType::UNKNOWN:
1057       return {""};
1058     default:
1059       return {""};
1060   }
1061 }
1062 
GetPhysicalCoordsHW(const std::string & x,const std::string & y) const1063 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsHW(
1064     const std::string& x, const std::string& y) const {
1065   switch (storage_type_) {
1066     case TensorStorageType::BUFFER:
1067     case TensorStorageType::IMAGE_BUFFER:
1068       return {absl::Substitute("(($1) * width + ($0))", x, y)};
1069     case TensorStorageType::TEXTURE_2D:
1070     case TensorStorageType::SINGLE_TEXTURE_2D:
1071       return {absl::Substitute("($0)", x), absl::Substitute("($0)", y)};
1072     case TensorStorageType::TEXTURE_ARRAY:
1073     case TensorStorageType::TEXTURE_3D:
1074       return {absl::Substitute("($0)", x), absl::Substitute("($0)", y), "0"};
1075     case TensorStorageType::UNKNOWN:
1076       return {""};
1077     default:
1078       return {""};
1079   }
1080 }
1081 
GetPhysicalCoordsWHS(const std::string & x,const std::string & y,const std::string & s) const1082 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHS(
1083     const std::string& x, const std::string& y, const std::string& s) const {
1084   switch (storage_type_) {
1085     case TensorStorageType::BUFFER:
1086     case TensorStorageType::IMAGE_BUFFER:
1087       return {
1088           absl::Substitute("((($2) * height + ($1)) * width + ($0))", x, y, s)};
1089     case TensorStorageType::TEXTURE_2D:
1090       return {absl::Substitute("($0)", x),
1091               absl::Substitute("(($0) * slices + ($1))", y, s)};
1092     case TensorStorageType::SINGLE_TEXTURE_2D:
1093       return {absl::Substitute("($0)", x), absl::Substitute("($0)", y)};
1094     case TensorStorageType::TEXTURE_ARRAY:
1095     case TensorStorageType::TEXTURE_3D:
1096       return {absl::Substitute("($0)", x), absl::Substitute("($0)", y),
1097               absl::Substitute("($0)", s)};
1098     case TensorStorageType::UNKNOWN:
1099       return {""};
1100     default:
1101       return {""};
1102   }
1103 }
1104 
GetPhysicalCoordsWHSB(const std::string & x,const std::string & y,const std::string & s,const std::string & b) const1105 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHSB(
1106     const std::string& x, const std::string& y, const std::string& s,
1107     const std::string& b) const {
1108   switch (storage_type_) {
1109     case TensorStorageType::BUFFER:
1110     case TensorStorageType::IMAGE_BUFFER:
1111       return {absl::Substitute(
1112           "(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y,
1113           s)};
1114     case TensorStorageType::TEXTURE_2D:
1115       return {absl::Substitute("(($0) * batch + ($1))", x, b),
1116               absl::Substitute("(($0) * slices + ($1))", y, s)};
1117     case TensorStorageType::SINGLE_TEXTURE_2D:
1118       return {absl::Substitute("(($0) * batch + ($1))", x, b),
1119               absl::Substitute("($0)", y)};
1120     case TensorStorageType::TEXTURE_ARRAY:
1121     case TensorStorageType::TEXTURE_3D:
1122       return {absl::Substitute("(($0) * batch + ($1))", x, b),
1123               absl::Substitute("($0)", y), absl::Substitute("($0)", s)};
1124     case TensorStorageType::UNKNOWN:
1125       return {""};
1126     default:
1127       return {""};
1128   }
1129 }
1130 
GetPhysicalCoordsWHDS(const std::string & x,const std::string & y,const std::string & z,const std::string & s) const1131 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHDS(
1132     const std::string& x, const std::string& y, const std::string& z,
1133     const std::string& s) const {
1134   switch (storage_type_) {
1135     case TensorStorageType::BUFFER:
1136     case TensorStorageType::IMAGE_BUFFER:
1137       return {absl::Substitute(
1138           "(((($3) * slices + ($2)) * height + ($1)) * width + ($0))", x, y, s,
1139           z)};
1140     case TensorStorageType::TEXTURE_2D:
1141       return {absl::Substitute("(($0) * depth + ($1))", x, z),
1142               absl::Substitute("(($0) * slices + ($1))", y, s)};
1143     case TensorStorageType::SINGLE_TEXTURE_2D:
1144       return {absl::Substitute("(($0) * depth + ($1))", x, z),
1145               absl::Substitute("($0)", y)};
1146     case TensorStorageType::TEXTURE_ARRAY:
1147     case TensorStorageType::TEXTURE_3D:
1148       return {absl::Substitute("($0)", x), absl::Substitute("($0)", y),
1149               absl::Substitute("(($0) * slices + ($1))", z, s)};
1150     case TensorStorageType::UNKNOWN:
1151       return {""};
1152     default:
1153       return {""};
1154   }
1155 }
1156 
GetPhysicalCoordsWHDSB(const std::string & x,const std::string & y,const std::string & z,const std::string & s,const std::string & b) const1157 std::vector<std::string> TensorDescriptor::GetPhysicalCoordsWHDSB(
1158     const std::string& x, const std::string& y, const std::string& z,
1159     const std::string& s, const std::string& b) const {
1160   switch (storage_type_) {
1161     case TensorStorageType::BUFFER:
1162     case TensorStorageType::IMAGE_BUFFER:
1163       return {absl::Substitute(
1164           "((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + "
1165           "($0))",
1166           b, x, y, s, z)};
1167     case TensorStorageType::TEXTURE_2D:
1168       return {absl::Substitute("((($0)*batch + ($1))*depth + ($2))", x, b, z),
1169               absl::Substitute("(($0) * slices + ($1))", y, s)};
1170     case TensorStorageType::SINGLE_TEXTURE_2D:
1171       return {absl::Substitute("((($0)*batch + ($1))*depth + ($2))", x, b, z),
1172               absl::Substitute("($0)", y)};
1173     case TensorStorageType::TEXTURE_ARRAY:
1174     case TensorStorageType::TEXTURE_3D:
1175       return {absl::Substitute("(($0) * batch + ($1))", x, b),
1176               absl::Substitute("($0)", y),
1177               absl::Substitute("(($0) * slices + ($1))", z, s)};
1178     case TensorStorageType::UNKNOWN:
1179       return {""};
1180     default:
1181       return {""};
1182   }
1183 }
1184 
GetGlobalAddressNoDeclaration(const std::string & xc,const std::string & yc,const std::string & zc,const std::string & sc,const std::string & bc) const1185 std::string TensorDescriptor::GetGlobalAddressNoDeclaration(
1186     const std::string& xc, const std::string& yc, const std::string& zc,
1187     const std::string& sc, const std::string& bc) const {
1188   auto coords = GetPhysicalCoords(xc, yc, zc, sc, bc);
1189   switch (storage_type_) {
1190     case TensorStorageType::BUFFER:
1191     case TensorStorageType::IMAGE_BUFFER: {
1192       return coords[0];
1193     }
1194     case TensorStorageType::TEXTURE_2D:
1195     case TensorStorageType::SINGLE_TEXTURE_2D:
1196       return absl::Substitute("(int2)($0, $1)", coords[0], coords[1]);
1197     case TensorStorageType::TEXTURE_ARRAY:
1198     case TensorStorageType::TEXTURE_3D:
1199       return absl::Substitute("(int4)($0, $1, $2, 0)", coords[0], coords[1],
1200                               coords[2]);
1201     case TensorStorageType::UNKNOWN:
1202       return "error";
1203   }
1204 }
1205 
GetPhysicalCoords(const std::string & xc,const std::string & yc,const std::string & zc,const std::string & sc,const std::string & bc) const1206 std::vector<std::string> TensorDescriptor::GetPhysicalCoords(
1207     const std::string& xc, const std::string& yc, const std::string& zc,
1208     const std::string& sc, const std::string& bc) const {
1209   if (layout_ == Layout::HWC) {
1210     return GetPhysicalCoordsWHS(xc, yc, sc);
1211   } else if (layout_ == Layout::BHWC) {
1212     return GetPhysicalCoordsWHSB(xc, yc, sc, bc);
1213   } else if (layout_ == Layout::HWDC) {
1214     return GetPhysicalCoordsWHDS(xc, yc, zc, sc);
1215   } else if (layout_ == Layout::BHWDC) {
1216     return GetPhysicalCoordsWHDSB(xc, yc, zc, sc, bc);
1217   } else {
1218     return {""};
1219   }
1220 }
1221 
MaybeGetDataTypeFromTemplateArgs(const std::vector<std::string> & template_args,DataType * result) const1222 absl::Status TensorDescriptor::MaybeGetDataTypeFromTemplateArgs(
1223     const std::vector<std::string>& template_args, DataType* result) const {
1224   for (const auto& template_arg : template_args) {
1225     std::string read_type = template_arg;
1226     if (read_type == "half") {
1227       *result = DataType::FLOAT16;
1228       return absl::OkStatus();
1229     } else if (read_type == "float") {
1230       *result = DataType::FLOAT32;
1231       return absl::OkStatus();
1232     } else if (read_type == "int") {
1233       *result = DataType::INT32;
1234       return absl::OkStatus();
1235     } else if (read_type == "short") {
1236       *result = DataType::INT16;
1237       return absl::OkStatus();
1238     } else if (read_type == "char") {
1239       *result = DataType::INT8;
1240       return absl::OkStatus();
1241     } else if (read_type == "uint") {
1242       *result = DataType::UINT32;
1243       return absl::OkStatus();
1244     } else if (read_type == "ushort") {
1245       *result = DataType::UINT16;
1246       return absl::OkStatus();
1247     } else if (read_type == "uchar") {
1248       *result = DataType::UINT8;
1249       return absl::OkStatus();
1250     } else if (read_type == "bool") {
1251       *result = DataType::BOOL;
1252       return absl::OkStatus();
1253     }
1254   }
1255   return absl::OkStatus();
1256 }
1257 
HasAxis(Axis axis) const1258 bool TensorDescriptor::HasAxis(Axis axis) const {
1259   if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS) {
1260     return true;
1261   }
1262   if (axis == Axis::BATCH &&
1263       (layout_ == Layout::BHWC || layout_ == Layout::BHWDC)) {
1264     return true;
1265   }
1266   if (axis == Axis::DEPTH &&
1267       (layout_ == Layout::HWDC || layout_ == Layout::BHWDC)) {
1268     return true;
1269   }
1270   return false;
1271 }
1272 
ParseCoordsFromArgs(const std::vector<std::string> & args,int offset,std::string * xc,std::string * yc,std::string * zc,std::string * sc,std::string * bc) const1273 bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string>& args,
1274                                            int offset, std::string* xc,
1275                                            std::string* yc, std::string* zc,
1276                                            std::string* sc,
1277                                            std::string* bc) const {
1278   if (HasAxis(Axis::WIDTH)) {
1279     if (offset >= args.size()) return false;
1280     *xc = args[offset++];
1281   }
1282   if (HasAxis(Axis::HEIGHT)) {
1283     if (offset >= args.size()) return false;
1284     *yc = args[offset++];
1285   }
1286   if (HasAxis(Axis::DEPTH)) {
1287     if (offset >= args.size()) return false;
1288     *zc = args[offset++];
1289   }
1290   if (HasAxis(Axis::CHANNELS)) {
1291     if (offset >= args.size()) return false;
1292     *sc = args[offset++];
1293   }
1294   if (HasAxis(Axis::BATCH)) {
1295     if (offset >= args.size()) {
1296       auto it = state_vars_.find("batch_id");
1297       if (it == state_vars_.end()) {
1298         return false;
1299       } else {
1300         *bc = it->second;
1301       }
1302     } else {
1303       *bc = args[offset++];
1304     }
1305   }
1306   return true;
1307 }
1308 
GetSizeInBytesForShape(const BHWDC & shape5d) const1309 size_t TensorDescriptor::GetSizeInBytesForShape(const BHWDC& shape5d) const {
1310   int aligned_channels = storage_type_ == TensorStorageType::SINGLE_TEXTURE_2D
1311                              ? shape5d.c
1312                              : AlignByN(shape5d.c, 4);
1313   int elements_count =
1314       shape5d.b * shape5d.w * shape5d.h * shape5d.d * aligned_channels;
1315   return elements_count * SizeOf(data_type_);
1316 }
1317 
GetLinearIndex(const BHWDC & shape5d,int b,int x,int y,int d,int s,int sub_c) const1318 int TensorDescriptor::GetLinearIndex(const BHWDC& shape5d, int b, int x, int y,
1319                                      int d, int s, int sub_c) const {
1320   const int slices = DivideRoundUp(shape5d.c, 4);
1321   switch (storage_type_) {
1322     case TensorStorageType::BUFFER:
1323     case TensorStorageType::IMAGE_BUFFER:
1324     case TensorStorageType::TEXTURE_ARRAY:
1325     case TensorStorageType::TEXTURE_3D:
1326       return ((((d * slices + s) * shape5d.h + y) * shape5d.w + x) * shape5d.b +
1327               b) *
1328                  4 +
1329              sub_c;  // DSHWBC4
1330     case TensorStorageType::TEXTURE_2D:
1331       return ((((y * slices + s) * shape5d.w + x) * shape5d.b + b) * shape5d.d +
1332               d) *
1333                  4 +
1334              sub_c;  // HSWBDC4
1335     case TensorStorageType::SINGLE_TEXTURE_2D:
1336       return (((y * shape5d.w + x) * shape5d.b + b) * shape5d.d + d) *
1337                  shape5d.c +
1338              sub_c;  // HWBDC
1339     case TensorStorageType::UNKNOWN:
1340       return -1;
1341   }
1342 }
1343 
UploadData(const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & src)1344 void TensorDescriptor::UploadData(
1345     const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& src) {
1346   shape_ = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
1347   UploadData(src.data.data());
1348 }
1349 
SupportsZeroClamp(const Axis & axis,const GpuInfo & gpu_info) const1350 bool TensorDescriptor::SupportsZeroClamp(const Axis& axis,
1351                                          const GpuInfo& gpu_info) const {
1352   switch (storage_type_) {
1353     case TensorStorageType::UNKNOWN:
1354       return false;
1355     case TensorStorageType::BUFFER:
1356     case TensorStorageType::IMAGE_BUFFER:
1357       return false;
1358     case TensorStorageType::TEXTURE_ARRAY:
1359       return (axis == Axis::WIDTH || axis == Axis::HEIGHT) &&
1360              gpu_info.SupportsZeroClampForImages();
1361     case TensorStorageType::TEXTURE_2D:
1362     case TensorStorageType::SINGLE_TEXTURE_2D:
1363       return (axis == Axis::WIDTH || axis == Axis::HEIGHT) &&
1364              gpu_info.SupportsZeroClampForImages();
1365     case TensorStorageType::TEXTURE_3D:
1366       return (axis == Axis::WIDTH || axis == Axis::HEIGHT ||
1367               axis == Axis::DEPTH) &&
1368              gpu_info.SupportsZeroClampForImages();
1369   }
1370 }
1371 
CanReadOutOfBorder(const Axis & axis) const1372 bool TensorDescriptor::CanReadOutOfBorder(const Axis& axis) const {
1373   switch (storage_type_) {
1374     case TensorStorageType::UNKNOWN:
1375       return false;
1376     case TensorStorageType::BUFFER:
1377       return false;
1378     case TensorStorageType::IMAGE_BUFFER:
1379     case TensorStorageType::TEXTURE_2D:
1380     case TensorStorageType::TEXTURE_3D:
1381     case TensorStorageType::SINGLE_TEXTURE_2D:
1382     case TensorStorageType::TEXTURE_ARRAY:
1383       return true;
1384   }
1385 }
1386 
IsLinear() const1387 bool TensorDescriptor::IsLinear() const {
1388   return storage_type_ == TensorStorageType::BUFFER ||
1389          storage_type_ == TensorStorageType::IMAGE_BUFFER;
1390 }
1391 
ReturnsZeroForNegOneRead(const GpuInfo & gpu_info) const1392 bool TensorDescriptor::ReturnsZeroForNegOneRead(const GpuInfo& gpu_info) const {
1393   return storage_type_ == TensorStorageType::IMAGE_BUFFER &&
1394          gpu_info.SupportsZeroClampForImageBuffer();
1395 }
1396 
CanCreateTensorWithShape(const GpuInfo & gpu_info,const BHWDC & shape) const1397 absl::Status TensorDescriptor::CanCreateTensorWithShape(
1398     const GpuInfo& gpu_info, const BHWDC& shape) const {
1399   const int slices = DivideRoundUp(shape.c, 4);
1400   const uint64_t allocation_size = GetSizeInBytesForShape(shape);
1401   const std::string common_desc = "Shape - " + ToString(shape) +
1402                                   ", data type - " + ToString(data_type_) + ".";
1403   if (allocation_size > gpu_info.GetMaxMemoryAllocationSize()) {
1404     return absl::ResourceExhaustedError(absl::StrCat(
1405         "Requested allocation size - ", allocation_size,
1406         " bytes. Max allocation size for this GPU - ",
1407         gpu_info.GetMaxMemoryAllocationSize(), " bytes. ", common_desc));
1408   }
1409   switch (storage_type_) {
1410     case TensorStorageType::BUFFER: {
1411       if (allocation_size > gpu_info.GetMaxBufferSize()) {
1412         return absl::ResourceExhaustedError(absl::StrCat(
1413             "Buffer with size - ", allocation_size,
1414             " bytes can not be created. Max buffer size for this GPU - ",
1415             gpu_info.GetMaxBufferSize(), " bytes. ", common_desc));
1416       } else {
1417         return absl::OkStatus();
1418       }
1419     }
1420     case TensorStorageType::IMAGE_BUFFER: {
1421       const uint64_t element_size = 4 * SizeOf(data_type_);
1422       const uint64_t image_width = allocation_size / element_size;
1423       if (image_width > gpu_info.GetMaxImageBufferWidth()) {
1424         return absl::ResourceExhaustedError(absl::StrCat(
1425             "Image buffer with width - ", image_width,
1426             " can not be created. Max image buffer width for this GPU - ",
1427             gpu_info.GetMaxImageBufferWidth(), ". ", common_desc));
1428       } else if (allocation_size > gpu_info.GetMaxBufferSize()) {
1429         return absl::ResourceExhaustedError(absl::StrCat(
1430             "Buffer with size - ", allocation_size,
1431             " bytes can not be created. Max buffer size for this GPU - ",
1432             gpu_info.GetMaxBufferSize(), " bytes. ", common_desc));
1433       } else {
1434         return absl::OkStatus();
1435       }
1436     }
1437     case TensorStorageType::TEXTURE_3D: {
1438       if (gpu_info.IsApiOpenCl() &&
1439           gpu_info.opencl_info.cl_version < OpenClVersion::kCl1_2 &&
1440           slices == 1) {
1441         return absl::InternalError(
1442             "clCreateImage3D (that used in CL 1.0/1.1) can not create image "
1443             "with depth = 1 by specification.");
1444       }
1445       const int image_width = shape.w * shape.b;
1446       const int image_height = shape.h;
1447       const int image_depth = slices * shape.d;
1448       if (image_width > gpu_info.GetMaxImage3DWidth()) {
1449         return absl::ResourceExhaustedError(absl::StrCat(
1450             "Image3D with width - ", image_width,
1451             " can not be created. Max Image3D width for this GPU - ",
1452             gpu_info.GetMaxImage3DWidth(), ". ", common_desc));
1453       } else if (image_height > gpu_info.GetMaxImage3DHeight()) {
1454         return absl::ResourceExhaustedError(absl::StrCat(
1455             "Image3D with height - ", image_height,
1456             " can not be created. Max Image3D height for this GPU - ",
1457             gpu_info.GetMaxImage3DHeight(), ". ", common_desc));
1458       } else if (image_depth > gpu_info.GetMaxImage3DDepth()) {
1459         return absl::ResourceExhaustedError(absl::StrCat(
1460             "Image3D with depth - ", image_depth,
1461             " can not be created. Max Image3D depth for this GPU - ",
1462             gpu_info.GetMaxImage3DDepth(), ". ", common_desc));
1463       } else {
1464         return absl::OkStatus();
1465       }
1466     }
1467     case TensorStorageType::TEXTURE_ARRAY: {
1468       // Bug on some Adreno. b/131099086
1469       if (gpu_info.IsApiOpenCl() && slices == 1 && gpu_info.IsAdreno() &&
1470           !gpu_info.adreno_info.support_one_layer_texture_array) {
1471         return absl::InternalError(
1472             "Image2DArray with layer = 1 works incorrect on some Adreno in "
1473             "OpenCL. Can not be created.");
1474       }
1475       const int image_width = shape.w * shape.b;
1476       const int image_height = shape.h;
1477       const int image_layers = slices * shape.d;
1478       if (image_width > gpu_info.GetMaxImage2DWidth()) {
1479         return absl::ResourceExhaustedError(absl::StrCat(
1480             "Image2DArray with width - ", image_width,
1481             " can not be created. Max Image2DArray width for this GPU - ",
1482             gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1483       } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1484         return absl::ResourceExhaustedError(absl::StrCat(
1485             "Image2DArray with height - ", image_height,
1486             " can not be created. Max Image2DArray height for this GPU - ",
1487             gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1488       } else if (image_layers > gpu_info.GetMaxImage2DArrayLayers()) {
1489         return absl::ResourceExhaustedError(absl::StrCat(
1490             "Image2DArray with layers - ", image_layers,
1491             " can not be created. Max Image2DArray layers for this GPU - ",
1492             gpu_info.GetMaxImage2DArrayLayers(), ". ", common_desc));
1493       } else {
1494         return absl::OkStatus();
1495       }
1496     }
1497     case TensorStorageType::TEXTURE_2D: {
1498       const int image_width = shape.w * shape.b * shape.d;
1499       const int image_height = shape.h * slices;
1500       if (image_width > gpu_info.GetMaxImage2DWidth()) {
1501         return absl::ResourceExhaustedError(absl::StrCat(
1502             "Image2D with width - ", image_width,
1503             " can not be created. Max Image2D width for this GPU - ",
1504             gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1505       } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1506         return absl::ResourceExhaustedError(absl::StrCat(
1507             "Image2D with height - ", image_height,
1508             " can not be created. Max Image2D height for this GPU - ",
1509             gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1510       } else {
1511         return absl::OkStatus();
1512       }
1513     }
1514     case TensorStorageType::SINGLE_TEXTURE_2D: {
1515       const int image_width = shape.w * shape.b * shape.d;
1516       const int image_height = shape.h;
1517       if (shape.c > 4) {
1518         return absl::ResourceExhaustedError(absl::StrCat(
1519             "Image2D with channels - ", shape.c, " can not be created."));
1520       } else if (!gpu_info.SupportsFloatImage2D(data_type_, shape.c)) {
1521         return absl::ResourceExhaustedError(
1522             "Image2D doesn't support this pixel layout.");
1523       } else if (image_width > gpu_info.GetMaxImage2DWidth()) {
1524         return absl::ResourceExhaustedError(absl::StrCat(
1525             "Image2D with width - ", image_width,
1526             " can not be created. Max Image2D width for this GPU - ",
1527             gpu_info.GetMaxImage2DWidth(), ". ", common_desc));
1528       } else if (image_height > gpu_info.GetMaxImage2DHeight()) {
1529         return absl::ResourceExhaustedError(absl::StrCat(
1530             "Image2D with height - ", image_height,
1531             " can not be created. Max Image2D height for this GPU - ",
1532             gpu_info.GetMaxImage2DHeight(), ". ", common_desc));
1533       } else {
1534         return absl::OkStatus();
1535       }
1536     }
1537     default:
1538       return absl::UnimplementedError(
1539           "Can not create resources for unknown storage type.");
1540   }
1541 }
1542 
CanCreateTensorWithShape(const GpuInfo & gpu_info,const BHWC & shape) const1543 absl::Status TensorDescriptor::CanCreateTensorWithShape(
1544     const GpuInfo& gpu_info, const BHWC& shape) const {
1545   const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
1546   return CanCreateTensorWithShape(gpu_info, shape5D);
1547 }
1548 
UpdateToSupportedStorageType(const GpuInfo & gpu_info,const BHWC & shape)1549 absl::Status TensorDescriptor::UpdateToSupportedStorageType(
1550     const GpuInfo& gpu_info, const BHWC& shape) {
1551   if (CanCreateTensorWithShape(gpu_info, shape).ok()) {
1552     return absl::OkStatus();
1553   }
1554   if (gpu_info.IsApiMetal()) {
1555     storage_type_ = TensorStorageType::BUFFER;
1556     return CanCreateTensorWithShape(gpu_info, shape);
1557   }
1558 
1559   storage_type_ = TensorStorageType::IMAGE_BUFFER;
1560   if (gpu_info.SupportsImageBuffer() &&
1561       CanCreateTensorWithShape(gpu_info, shape).ok()) {
1562     return absl::OkStatus();
1563   }
1564   storage_type_ = TensorStorageType::BUFFER;
1565   return CanCreateTensorWithShape(gpu_info, shape);
1566 }
1567 
CreateBhwcTensorDescriptor(DataType data_type,TensorStorageType storage_type,const BHWC & shape)1568 TensorDescriptor CreateBhwcTensorDescriptor(DataType data_type,
1569                                             TensorStorageType storage_type,
1570                                             const BHWC& shape) {
1571   TensorDescriptor tensor_desc =
1572       TensorDescriptor(data_type, storage_type, Layout::BHWC);
1573   tensor_desc.SetBHWCShape(shape);
1574   return tensor_desc;
1575 }
1576 
CreateHwcTensorDescriptor(DataType data_type,TensorStorageType storage_type,const HWC & shape)1577 TensorDescriptor CreateHwcTensorDescriptor(DataType data_type,
1578                                            TensorStorageType storage_type,
1579                                            const HWC& shape) {
1580   TensorDescriptor tensor_desc =
1581       TensorDescriptor(data_type, storage_type, Layout::HWC);
1582   tensor_desc.SetBHWCShape(BHWC(1, shape.h, shape.w, shape.c));
1583   return tensor_desc;
1584 }
1585 
GetStorageTypeForLinearTensor(const GpuInfo & gpu_info,DataType data_type,const Linear & shape)1586 TensorStorageType GetStorageTypeForLinearTensor(const GpuInfo& gpu_info,
1587                                                 DataType data_type,
1588                                                 const Linear& shape) {
1589   if (gpu_info.IsApple()) {
1590     if (gpu_info.apple_info.IsA7GenerationGpu() ||
1591         gpu_info.apple_info.IsA8GenerationGpu()) {
1592       return TensorStorageType::TEXTURE_2D;
1593     }
1594   }
1595   if (!gpu_info.SupportsImages() || gpu_info.IsMali() || gpu_info.IsApple() ||
1596       gpu_info.IsAMD()) {
1597     return TensorStorageType::BUFFER;
1598   } else {
1599     return TensorStorageType::TEXTURE_2D;
1600   }
1601 }
1602 
CreateConstantLinearTensorDescriptor(DataType data_type,TensorStorageType storage_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)1603 TensorDescriptor CreateConstantLinearTensorDescriptor(
1604     DataType data_type, TensorStorageType storage_type,
1605     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
1606   TensorDescriptor tensor_desc =
1607       TensorDescriptor(data_type, storage_type, Layout::LINEAR);
1608   tensor_desc.SetBHWDCShape(BHWDC(1, 1, 1, 1, src.shape.v));
1609   tensor_desc.UploadData(src.data.data());
1610   return tensor_desc;
1611 }
1612 
CreateConstantLinearTensorDescriptor(const GpuInfo & gpu_info,DataType data_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & src)1613 TensorDescriptor CreateConstantLinearTensorDescriptor(
1614     const GpuInfo& gpu_info, DataType data_type,
1615     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& src) {
1616   return CreateConstantLinearTensorDescriptor(
1617       data_type, GetStorageTypeForLinearTensor(gpu_info, data_type, src.shape),
1618       src);
1619 }
1620 
CreateConstantHWVec4TensorDescriptor(DataType data_type,TensorStorageType storage_type,int width,int height,const uint8_t * data)1621 TensorDescriptor CreateConstantHWVec4TensorDescriptor(
1622     DataType data_type, TensorStorageType storage_type, int width, int height,
1623     const uint8_t* data) {
1624   TensorDescriptor tensor_desc =
1625       TensorDescriptor(data_type, storage_type, Layout::HW);
1626   tensor_desc.SetBHWDCShape(BHWDC(1, height, width, 1, 4));
1627   int data_size = height * width * 4 * SizeOf(data_type);
1628   tensor_desc.data_.resize(data_size);
1629   memcpy(tensor_desc.data_.data(), data, data_size);
1630   return tensor_desc;
1631 }
1632 
1633 }  // namespace gpu
1634 }  // namespace tflite
1635