• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
17 
18 #include <memory>
19 #include <string>
20 #include <utility>
21 
22 #include "absl/strings/ascii.h"
23 #include "absl/strings/match.h"
24 #include "absl/strings/str_cat.h"
25 #include "absl/strings/substitute.h"
26 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
27 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
28 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/util.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31 
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35 namespace {
IsWordSymbol(char symbol)36 bool IsWordSymbol(char symbol) {
37   return absl::ascii_isalnum(symbol) || symbol == '_';
38 }
39 
ReplaceAllWords(const std::string & old_word,const std::string & new_word,std::string * str)40 void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
41                      std::string* str) {
42   size_t position = str->find(old_word);
43   while (position != std::string::npos) {
44     char prev = position == 0 ? '.' : (*str)[position - 1];
45     char next = position + old_word.size() < str->size()
46                     ? (*str)[position + old_word.size()]
47                     : '.';
48     if (IsWordSymbol(prev) || IsWordSymbol(next)) {
49       position = str->find(old_word, position + 1);
50       continue;
51     }
52     str->replace(position, old_word.size(), new_word);
53     position = str->find(old_word, position + new_word.size());
54   }
55 }
56 
AppendArgument(const std::string & arg,std::string * args)57 void AppendArgument(const std::string& arg, std::string* args) {
58   if (!args->empty()) {
59     absl::StrAppend(args, ",\n  ");
60   }
61   absl::StrAppend(args, arg);
62 }
63 
GetImageModifier(AccessType access)64 std::string GetImageModifier(AccessType access) {
65   switch (access) {
66     case AccessType::READ:
67       return "__read_only";
68     case AccessType::WRITE:
69       return "__write_only";
70     case AccessType::READ_WRITE:
71       return "__read_write";
72   }
73 }
74 
GetDefaultSamplers(const GpuInfo & gpu_info)75 std::string GetDefaultSamplers(const GpuInfo& gpu_info) {
76   std::string result;
77   result +=
78       "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
79       "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
80   if (gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) {
81     // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
82     // we can observe huge register overhead when compared to other modes.
83 
84     // While using CLK_ADDRESS_NONE with out-of-range image coordinates is
85     // undefined in the OpenCL specification, we have observed that
86     // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
87     // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
88     // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
89     // 3xx.
90     result +=
91         "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
92         "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
93   } else {
94     result +=
95         "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
96         "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
97   }
98 
99   return result;
100 }
101 
CreateCLObject(GPUObjectDescriptor * desc,CLContext * context,GPUObjectPtr * result)102 absl::Status CreateCLObject(GPUObjectDescriptor* desc, CLContext* context,
103                             GPUObjectPtr* result) {
104   const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(desc);
105   if (buffer_desc) {
106     Buffer gpu_buffer;
107     RETURN_IF_ERROR(
108         gpu_buffer.CreateFromBufferDescriptor(*buffer_desc, context));
109     *result = std::make_unique<Buffer>(std::move(gpu_buffer));
110     return absl::OkStatus();
111   }
112 
113   const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc);
114   if (tensor_desc) {
115     Tensor gpu_tensor;
116     RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*tensor_desc, context));
117     *result = std::make_unique<Tensor>(std::move(gpu_tensor));
118     return absl::OkStatus();
119   }
120 
121   return absl::InvalidArgumentError("Unknown GPU descriptor.");
122 }
123 
124 }  // namespace
125 
126 // Static
127 constexpr char CLArguments::kArgsPrefix[];
128 
Init(const GpuInfo & gpu_info,CLContext * context,Arguments * args,std::string * code)129 absl::Status CLArguments::Init(const GpuInfo& gpu_info, CLContext* context,
130                                Arguments* args, std::string* code) {
131   RETURN_IF_ERROR(AllocateObjects(*args, context));
132   RETURN_IF_ERROR(AddObjectArgs(gpu_info, *args));
133   object_refs_ = std::move(args->object_refs_);
134   const bool use_f32_for_halfs = gpu_info.IsPowerVR();
135   CopyArguments(*args, use_f32_for_halfs);
136   RETURN_IF_ERROR(SetObjectsResources(*args));
137   RenameArgumentsInCode(code);
138   args->ResolveArgsPass(code);
139   *code = absl::Substitute(*code, GetListOfArgs());
140   if (gpu_info.SupportsImages()) {
141     *code = GetDefaultSamplers(gpu_info) + *code;
142   }
143   return absl::OkStatus();
144 }
145 
Init(const GpuInfo & gpu_info,Arguments * args,CLContext * context)146 absl::Status CLArguments::Init(const GpuInfo& gpu_info, Arguments* args,
147                                CLContext* context) {
148   RETURN_IF_ERROR(AllocateObjects(*args, context));
149   RETURN_IF_ERROR(AddObjectArgs(gpu_info, *args));
150   object_refs_ = std::move(args->object_refs_);
151   const bool use_f32_for_halfs = gpu_info.IsPowerVR();
152   CopyArguments(*args, use_f32_for_halfs);
153   RETURN_IF_ERROR(SetObjectsResources(*args));
154   return absl::OkStatus();
155 }
156 
AllocateObjects(const Arguments & args,CLContext * context)157 absl::Status CLArguments::AllocateObjects(const Arguments& args,
158                                           CLContext* context) {
159   objects_.resize(args.objects_.size());
160   int i = 0;
161   for (auto& t : args.objects_) {
162     RETURN_IF_ERROR(CreateCLObject(t.second.get(), context, &objects_[i]));
163     i++;
164   }
165   return absl::OkStatus();
166 }
167 
AddObjectArgs(const GpuInfo & gpu_info,const Arguments & args)168 absl::Status CLArguments::AddObjectArgs(const GpuInfo& gpu_info,
169                                         const Arguments& args) {
170   for (const auto& t : args.objects_) {
171     AddGPUResources(t.first, t.second->GetGPUResources(gpu_info));
172   }
173   for (const auto& t : args.object_refs_) {
174     AddGPUResources(t.first, t.second->GetGPUResources(gpu_info));
175   }
176   return absl::OkStatus();
177 }
178 
SetObjectsResources(const Arguments & args)179 absl::Status CLArguments::SetObjectsResources(const Arguments& args) {
180   int i = 0;
181   for (const auto& t : args.objects_) {
182     GPUResourcesWithValue resources;
183     RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources));
184     RETURN_IF_ERROR(SetGPUResources(t.first, resources));
185     i++;
186   }
187   return absl::OkStatus();
188 }
189 
CopyArguments(const Arguments & args,bool use_f32_for_halfs)190 void CLArguments::CopyArguments(const Arguments& args, bool use_f32_for_halfs) {
191   for (const auto& fvalue : args.float_values_) {
192     auto& new_val = float_values_[fvalue.first];
193     new_val.value = fvalue.second.value;
194     new_val.active = fvalue.second.active;
195     if (fvalue.second.active) {
196       new_val.offset = shared_float4s_data_.size();
197       shared_float4s_data_.push_back(new_val.value);
198     }
199   }
200   for (const auto& ivalue : args.int_values_) {
201     auto& new_val = int_values_[ivalue.first];
202     new_val.value = ivalue.second.value;
203     new_val.active = ivalue.second.active;
204     if (ivalue.second.active) {
205       new_val.offset = shared_int4s_data_.size();
206       shared_int4s_data_.push_back(new_val.value);
207     }
208   }
209   for (const auto& hfvalue : args.half_values_) {
210     auto& new_val = half_values_[hfvalue.first];
211     new_val.value = hfvalue.second.value;
212     new_val.active = hfvalue.second.active;
213     if (hfvalue.second.active) {
214       if (use_f32_for_halfs) {
215         new_val.store_as_f32 = true;
216         new_val.offset = shared_float4s_data_.size();
217         shared_float4s_data_.push_back(new_val.value);
218       } else {
219         new_val.store_as_f32 = false;
220         new_val.offset = shared_half4s_data_.size();
221         shared_half4s_data_.push_back(new_val.value);
222       }
223     }
224   }
225   int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
226   shared_int4s_data_.resize(shared_int4s_aligned_size);
227   int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
228   shared_float4s_data_.resize(shared_float4s_aligned_size);
229   int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
230   shared_half4s_data_.resize(shared_half4s_aligned_size);
231 }
232 
RenameArgumentsInCode(std::string * code)233 void CLArguments::RenameArgumentsInCode(std::string* code) {
234   const std::string postfixes[4] = {"x", "y", "z", "w"};
235   for (const auto& fvalue : float_values_) {
236     if (fvalue.second.active) {
237       std::string index = std::to_string(fvalue.second.offset / 4);
238       std::string new_name =
239           "shared_float4_" + index + "." + postfixes[fvalue.second.offset % 4];
240       ReplaceAllWords(kArgsPrefix + fvalue.first, new_name, code);
241     }
242   }
243   for (const auto& ivalue : int_values_) {
244     if (ivalue.second.active) {
245       std::string index = std::to_string(ivalue.second.offset / 4);
246       std::string new_name =
247           "shared_int4_" + index + "." + postfixes[ivalue.second.offset % 4];
248       ReplaceAllWords(kArgsPrefix + ivalue.first, new_name, code);
249     }
250   }
251   for (const auto& hfvalue : half_values_) {
252     if (hfvalue.second.active) {
253       std::string index = std::to_string(hfvalue.second.offset / 4);
254       std::string new_name;
255       if (hfvalue.second.store_as_f32) {
256         new_name = "(half)(shared_float4_" + index + "." +
257                    postfixes[hfvalue.second.offset % 4] + ")";
258       } else {
259         new_name = "shared_half4_" + index + "." +
260                    postfixes[hfvalue.second.offset % 4];
261       }
262       ReplaceAllWords(kArgsPrefix + hfvalue.first, new_name, code);
263     }
264   }
265 }
266 
AddBuffer(const std::string & name,const GPUBufferDescriptor & desc)267 void CLArguments::AddBuffer(const std::string& name,
268                             const GPUBufferDescriptor& desc) {
269   buffers_[name].desc = desc;
270 }
AddImage2D(const std::string & name,const GPUImage2DDescriptor & desc)271 void CLArguments::AddImage2D(const std::string& name,
272                              const GPUImage2DDescriptor& desc) {
273   images2d_[name].desc = desc;
274 }
275 
AddImage2DArray(const std::string & name,const GPUImage2DArrayDescriptor & desc)276 void CLArguments::AddImage2DArray(const std::string& name,
277                                   const GPUImage2DArrayDescriptor& desc) {
278   image2d_arrays_[name].desc = desc;
279 }
280 
AddImage3D(const std::string & name,const GPUImage3DDescriptor & desc)281 void CLArguments::AddImage3D(const std::string& name,
282                              const GPUImage3DDescriptor& desc) {
283   images3d_[name].desc = desc;
284 }
285 
AddImageBuffer(const std::string & name,const GPUImageBufferDescriptor & desc)286 void CLArguments::AddImageBuffer(const std::string& name,
287                                  const GPUImageBufferDescriptor& desc) {
288   image_buffers_[name].desc = desc;
289 }
290 
AddCustomMemory(const std::string & name,const GPUCustomMemoryDescriptor & desc)291 void CLArguments::AddCustomMemory(const std::string& name,
292                                   const GPUCustomMemoryDescriptor& desc) {
293   custom_memories_[name].desc = desc;
294 }
295 
AddGPUResources(const std::string & name,const GPUResources & resources)296 void CLArguments::AddGPUResources(const std::string& name,
297                                   const GPUResources& resources) {
298   for (const auto& r : resources.buffers) {
299     AddBuffer(absl::StrCat(name, "_", r.first), r.second);
300   }
301   for (const auto& r : resources.images2d) {
302     AddImage2D(absl::StrCat(name, "_", r.first), r.second);
303   }
304   for (const auto& r : resources.image2d_arrays) {
305     AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
306   }
307   for (const auto& r : resources.images3d) {
308     AddImage3D(absl::StrCat(name, "_", r.first), r.second);
309   }
310   for (const auto& r : resources.image_buffers) {
311     AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
312   }
313   for (const auto& r : resources.custom_memories) {
314     AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
315   }
316 }
317 
SetInt(const std::string & name,int value)318 absl::Status CLArguments::SetInt(const std::string& name, int value) {
319   auto it = int_values_.find(name);
320   if (it == int_values_.end()) {
321     return absl::NotFoundError(
322         absl::StrCat("No int argument with name - ", name));
323   }
324   it->second.value = value;
325   if (it->second.active) {
326     shared_int4s_data_[it->second.offset] = value;
327   }
328   return absl::OkStatus();
329 }
SetFloat(const std::string & name,float value)330 absl::Status CLArguments::SetFloat(const std::string& name, float value) {
331   auto it = float_values_.find(name);
332   if (it == float_values_.end()) {
333     return absl::NotFoundError(
334         absl::StrCat("No float argument with name - ", name));
335   }
336   it->second.value = value;
337   if (it->second.active) {
338     shared_float4s_data_[it->second.offset] = value;
339   }
340   return absl::OkStatus();
341 }
342 
SetHalf(const std::string & name,half value)343 absl::Status CLArguments::SetHalf(const std::string& name, half value) {
344   auto it = half_values_.find(name);
345   if (it == half_values_.end()) {
346     return absl::NotFoundError(
347         absl::StrCat("No half argument with name - ", name));
348   }
349   it->second.value = value;
350   if (it->second.active) {
351     if (it->second.store_as_f32) {
352       shared_float4s_data_[it->second.offset] = value;
353     } else {
354       shared_half4s_data_[it->second.offset] = value;
355     }
356   }
357   return absl::OkStatus();
358 }
359 
SetImage2D(const std::string & name,cl_mem memory)360 absl::Status CLArguments::SetImage2D(const std::string& name, cl_mem memory) {
361   auto it = images2d_.find(name);
362   if (it == images2d_.end()) {
363     return absl::NotFoundError(
364         absl::StrCat("No image2D argument with name - ", name));
365   }
366   it->second.memory = memory;
367   return absl::OkStatus();
368 }
369 
SetBuffer(const std::string & name,cl_mem memory)370 absl::Status CLArguments::SetBuffer(const std::string& name, cl_mem memory) {
371   auto it = buffers_.find(name);
372   if (it == buffers_.end()) {
373     return absl::NotFoundError(
374         absl::StrCat("No buffer argument with name - ", name));
375   }
376   it->second.memory = memory;
377   return absl::OkStatus();
378 }
379 
SetImage2DArray(const std::string & name,cl_mem memory)380 absl::Status CLArguments::SetImage2DArray(const std::string& name,
381                                           cl_mem memory) {
382   auto it = image2d_arrays_.find(name);
383   if (it == image2d_arrays_.end()) {
384     return absl::NotFoundError(
385         absl::StrCat("No image2D array argument with name - ", name));
386   }
387   it->second.memory = memory;
388   return absl::OkStatus();
389 }
390 
SetImage3D(const std::string & name,cl_mem memory)391 absl::Status CLArguments::SetImage3D(const std::string& name, cl_mem memory) {
392   auto it = images3d_.find(name);
393   if (it == images3d_.end()) {
394     return absl::NotFoundError(
395         absl::StrCat("No image3D argument with name - ", name));
396   }
397   it->second.memory = memory;
398   return absl::OkStatus();
399 }
400 
SetImageBuffer(const std::string & name,cl_mem memory)401 absl::Status CLArguments::SetImageBuffer(const std::string& name,
402                                          cl_mem memory) {
403   auto it = image_buffers_.find(name);
404   if (it == image_buffers_.end()) {
405     return absl::NotFoundError(
406         absl::StrCat("No image buffer argument with name - ", name));
407   }
408   it->second.memory = memory;
409   return absl::OkStatus();
410 }
411 
SetCustomMemory(const std::string & name,cl_mem memory)412 absl::Status CLArguments::SetCustomMemory(const std::string& name,
413                                           cl_mem memory) {
414   auto it = custom_memories_.find(name);
415   if (it == custom_memories_.end()) {
416     return absl::NotFoundError(
417         absl::StrCat("No custom memory argument with name - ", name));
418   }
419   it->second.memory = memory;
420   return absl::OkStatus();
421 }
422 
SetObjectRef(const std::string & name,const GPUObject * object)423 absl::Status CLArguments::SetObjectRef(const std::string& name,
424                                        const GPUObject* object) {
425   auto it = object_refs_.find(name);
426   if (it == object_refs_.end()) {
427     return absl::NotFoundError(
428         absl::StrCat("No object ref with name - ", name));
429   }
430   GPUResourcesWithValue resources;
431   RETURN_IF_ERROR(object->GetGPUResources(it->second.get(), &resources));
432   return SetGPUResources(name, resources);
433 }
434 
SetGPUResources(const std::string & name,const GPUResourcesWithValue & resources)435 absl::Status CLArguments::SetGPUResources(
436     const std::string& name, const GPUResourcesWithValue& resources) {
437   for (const auto& r : resources.generic.ints) {
438     RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
439   }
440   for (const auto& r : resources.generic.floats) {
441     RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
442   }
443   for (const auto& r : resources.buffers) {
444     RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
445   }
446   for (const auto& r : resources.images2d) {
447     RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
448   }
449   for (const auto& r : resources.image2d_arrays) {
450     RETURN_IF_ERROR(
451         SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
452   }
453   for (const auto& r : resources.images3d) {
454     RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
455   }
456   for (const auto& r : resources.image_buffers) {
457     RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
458   }
459   for (const auto& r : resources.custom_memories) {
460     RETURN_IF_ERROR(
461         SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
462   }
463   return absl::OkStatus();
464 }
465 
GetListOfArgs()466 std::string CLArguments::GetListOfArgs() {
467   std::string result;
468   for (auto& t : buffers_) {
469     const std::string type_name =
470         t.second.desc.data_type == DataType::FLOAT32 ? "float" : "half";
471     std::string attributes;
472     for (const auto& attr : t.second.desc.attributes) {
473       attributes += absl::StrCat("  __attribute__((", attr, "))");
474     }
475     std::string cl_type;
476     if (t.second.desc.data_type == DataType::BOOL) {
477       cl_type = ToCLDataType(DataType::UINT8, t.second.desc.element_size);
478     } else {
479       cl_type =
480           ToCLDataType(t.second.desc.data_type, t.second.desc.element_size);
481     }
482     AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.desc.memory_type),
483                                 " ", cl_type, "* ", t.first, attributes),
484                    &result);
485   }
486   for (auto& t : image_buffers_) {
487     AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
488                                 " image1d_buffer_t ", t.first),
489                    &result);
490   }
491   for (auto& t : images2d_) {
492     AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
493                                 " image2d_t ", t.first),
494                    &result);
495   }
496   for (auto& t : image2d_arrays_) {
497     AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
498                                 " image2d_array_t ", t.first),
499                    &result);
500   }
501   for (auto& t : images3d_) {
502     AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
503                                 " image3d_t ", t.first),
504                    &result);
505   }
506   for (auto& t : custom_memories_) {
507     AppendArgument(absl::StrCat(t.second.desc.type_name, " ", t.first),
508                    &result);
509   }
510   for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
511     AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
512   }
513   for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
514     AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
515   }
516   for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
517     AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
518   }
519   return result;
520 }
521 
Bind(cl_kernel kernel,int offset)522 absl::Status CLArguments::Bind(cl_kernel kernel, int offset) {
523   for (auto& t : buffers_) {
524     const int error_code =
525         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
526     if (error_code != CL_SUCCESS) {
527       return absl::UnknownError(absl::StrCat(
528           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
529           "(at index - ", offset, ")"));
530     }
531     offset++;
532   }
533   for (auto& t : image_buffers_) {
534     const int error_code =
535         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
536     if (error_code != CL_SUCCESS) {
537       return absl::UnknownError(absl::StrCat(
538           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
539           "(at index - ", offset, ")"));
540     }
541     offset++;
542   }
543   for (auto& t : images2d_) {
544     const int error_code =
545         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
546     if (error_code != CL_SUCCESS) {
547       return absl::UnknownError(absl::StrCat(
548           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
549           "(at index - ", offset, ")"));
550     }
551     offset++;
552   }
553   for (auto& t : image2d_arrays_) {
554     const int error_code =
555         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
556     if (error_code != CL_SUCCESS) {
557       return absl::UnknownError(absl::StrCat(
558           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
559           "(at index - ", offset, ")"));
560     }
561     offset++;
562   }
563   for (auto& t : images3d_) {
564     const int error_code =
565         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
566     if (error_code != CL_SUCCESS) {
567       return absl::UnknownError(absl::StrCat(
568           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
569           "(at index - ", offset, ")"));
570     }
571     offset++;
572   }
573   for (auto& t : custom_memories_) {
574     const int error_code =
575         clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
576     if (error_code != CL_SUCCESS) {
577       return absl::UnknownError(absl::StrCat(
578           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
579           "(at index - ", offset, ")"));
580     }
581     offset++;
582   }
583   for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
584     const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
585                                           &shared_int4s_data_[i * 4]);
586     if (error_code != CL_SUCCESS) {
587       return absl::UnknownError(absl::StrCat(
588           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
589           "(at index - ", offset, ")"));
590     }
591     offset++;
592   }
593   for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
594     const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
595                                           &shared_float4s_data_[i * 4]);
596     if (error_code != CL_SUCCESS) {
597       return absl::UnknownError(absl::StrCat(
598           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
599           "(at index - ", offset, ")"));
600     }
601     offset++;
602   }
603   for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
604     const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
605                                           &shared_half4s_data_[i * 4]);
606     if (error_code != CL_SUCCESS) {
607       return absl::UnknownError(absl::StrCat(
608           "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
609           "(at index - ", offset, ")"));
610     }
611     offset++;
612   }
613   return absl::OkStatus();
614 }
615 
616 }  // namespace cl
617 }  // namespace gpu
618 }  // namespace tflite
619