1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/cl_arguments.h"
17
18 #include <memory>
19 #include <string>
20 #include <utility>
21
22 #include "absl/strings/ascii.h"
23 #include "absl/strings/match.h"
24 #include "absl/strings/str_cat.h"
25 #include "absl/strings/substitute.h"
26 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
27 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
28 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/util.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31
32 namespace tflite {
33 namespace gpu {
34 namespace cl {
35 namespace {
IsWordSymbol(char symbol)36 bool IsWordSymbol(char symbol) {
37 return absl::ascii_isalnum(symbol) || symbol == '_';
38 }
39
ReplaceAllWords(const std::string & old_word,const std::string & new_word,std::string * str)40 void ReplaceAllWords(const std::string& old_word, const std::string& new_word,
41 std::string* str) {
42 size_t position = str->find(old_word);
43 while (position != std::string::npos) {
44 char prev = position == 0 ? '.' : (*str)[position - 1];
45 char next = position + old_word.size() < str->size()
46 ? (*str)[position + old_word.size()]
47 : '.';
48 if (IsWordSymbol(prev) || IsWordSymbol(next)) {
49 position = str->find(old_word, position + 1);
50 continue;
51 }
52 str->replace(position, old_word.size(), new_word);
53 position = str->find(old_word, position + new_word.size());
54 }
55 }
56
AppendArgument(const std::string & arg,std::string * args)57 void AppendArgument(const std::string& arg, std::string* args) {
58 if (!args->empty()) {
59 absl::StrAppend(args, ",\n ");
60 }
61 absl::StrAppend(args, arg);
62 }
63
GetImageModifier(AccessType access)64 std::string GetImageModifier(AccessType access) {
65 switch (access) {
66 case AccessType::READ:
67 return "__read_only";
68 case AccessType::WRITE:
69 return "__write_only";
70 case AccessType::READ_WRITE:
71 return "__read_write";
72 }
73 }
74
GetDefaultSamplers(const GpuInfo & gpu_info)75 std::string GetDefaultSamplers(const GpuInfo& gpu_info) {
76 std::string result;
77 result +=
78 "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
79 "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
80 if (gpu_info.IsAdreno() && gpu_info.adreno_info.IsAdreno3xx()) {
81 // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
82 // we can observe huge register overhead when compared to other modes.
83
84 // While using CLK_ADDRESS_NONE with out-of-range image coordinates is
85 // undefined in the OpenCL specification, we have observed that
86 // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
87 // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
88 // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
89 // 3xx.
90 result +=
91 "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
92 "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
93 } else {
94 result +=
95 "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
96 "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
97 }
98
99 return result;
100 }
101
CreateCLObject(GPUObjectDescriptor * desc,CLContext * context,GPUObjectPtr * result)102 absl::Status CreateCLObject(GPUObjectDescriptor* desc, CLContext* context,
103 GPUObjectPtr* result) {
104 const auto* buffer_desc = dynamic_cast<const BufferDescriptor*>(desc);
105 if (buffer_desc) {
106 Buffer gpu_buffer;
107 RETURN_IF_ERROR(
108 gpu_buffer.CreateFromBufferDescriptor(*buffer_desc, context));
109 *result = std::make_unique<Buffer>(std::move(gpu_buffer));
110 return absl::OkStatus();
111 }
112
113 const auto* tensor_desc = dynamic_cast<const TensorDescriptor*>(desc);
114 if (tensor_desc) {
115 Tensor gpu_tensor;
116 RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*tensor_desc, context));
117 *result = std::make_unique<Tensor>(std::move(gpu_tensor));
118 return absl::OkStatus();
119 }
120
121 return absl::InvalidArgumentError("Unknown GPU descriptor.");
122 }
123
124 } // namespace
125
126 // Static
127 constexpr char CLArguments::kArgsPrefix[];
128
Init(const GpuInfo & gpu_info,CLContext * context,Arguments * args,std::string * code)129 absl::Status CLArguments::Init(const GpuInfo& gpu_info, CLContext* context,
130 Arguments* args, std::string* code) {
131 RETURN_IF_ERROR(AllocateObjects(*args, context));
132 RETURN_IF_ERROR(AddObjectArgs(gpu_info, *args));
133 object_refs_ = std::move(args->object_refs_);
134 const bool use_f32_for_halfs = gpu_info.IsPowerVR();
135 CopyArguments(*args, use_f32_for_halfs);
136 RETURN_IF_ERROR(SetObjectsResources(*args));
137 RenameArgumentsInCode(code);
138 args->ResolveArgsPass(code);
139 *code = absl::Substitute(*code, GetListOfArgs());
140 if (gpu_info.SupportsImages()) {
141 *code = GetDefaultSamplers(gpu_info) + *code;
142 }
143 return absl::OkStatus();
144 }
145
Init(const GpuInfo & gpu_info,Arguments * args,CLContext * context)146 absl::Status CLArguments::Init(const GpuInfo& gpu_info, Arguments* args,
147 CLContext* context) {
148 RETURN_IF_ERROR(AllocateObjects(*args, context));
149 RETURN_IF_ERROR(AddObjectArgs(gpu_info, *args));
150 object_refs_ = std::move(args->object_refs_);
151 const bool use_f32_for_halfs = gpu_info.IsPowerVR();
152 CopyArguments(*args, use_f32_for_halfs);
153 RETURN_IF_ERROR(SetObjectsResources(*args));
154 return absl::OkStatus();
155 }
156
AllocateObjects(const Arguments & args,CLContext * context)157 absl::Status CLArguments::AllocateObjects(const Arguments& args,
158 CLContext* context) {
159 objects_.resize(args.objects_.size());
160 int i = 0;
161 for (auto& t : args.objects_) {
162 RETURN_IF_ERROR(CreateCLObject(t.second.get(), context, &objects_[i]));
163 i++;
164 }
165 return absl::OkStatus();
166 }
167
AddObjectArgs(const GpuInfo & gpu_info,const Arguments & args)168 absl::Status CLArguments::AddObjectArgs(const GpuInfo& gpu_info,
169 const Arguments& args) {
170 for (const auto& t : args.objects_) {
171 AddGPUResources(t.first, t.second->GetGPUResources(gpu_info));
172 }
173 for (const auto& t : args.object_refs_) {
174 AddGPUResources(t.first, t.second->GetGPUResources(gpu_info));
175 }
176 return absl::OkStatus();
177 }
178
SetObjectsResources(const Arguments & args)179 absl::Status CLArguments::SetObjectsResources(const Arguments& args) {
180 int i = 0;
181 for (const auto& t : args.objects_) {
182 GPUResourcesWithValue resources;
183 RETURN_IF_ERROR(objects_[i]->GetGPUResources(t.second.get(), &resources));
184 RETURN_IF_ERROR(SetGPUResources(t.first, resources));
185 i++;
186 }
187 return absl::OkStatus();
188 }
189
CopyArguments(const Arguments & args,bool use_f32_for_halfs)190 void CLArguments::CopyArguments(const Arguments& args, bool use_f32_for_halfs) {
191 for (const auto& fvalue : args.float_values_) {
192 auto& new_val = float_values_[fvalue.first];
193 new_val.value = fvalue.second.value;
194 new_val.active = fvalue.second.active;
195 if (fvalue.second.active) {
196 new_val.offset = shared_float4s_data_.size();
197 shared_float4s_data_.push_back(new_val.value);
198 }
199 }
200 for (const auto& ivalue : args.int_values_) {
201 auto& new_val = int_values_[ivalue.first];
202 new_val.value = ivalue.second.value;
203 new_val.active = ivalue.second.active;
204 if (ivalue.second.active) {
205 new_val.offset = shared_int4s_data_.size();
206 shared_int4s_data_.push_back(new_val.value);
207 }
208 }
209 for (const auto& hfvalue : args.half_values_) {
210 auto& new_val = half_values_[hfvalue.first];
211 new_val.value = hfvalue.second.value;
212 new_val.active = hfvalue.second.active;
213 if (hfvalue.second.active) {
214 if (use_f32_for_halfs) {
215 new_val.store_as_f32 = true;
216 new_val.offset = shared_float4s_data_.size();
217 shared_float4s_data_.push_back(new_val.value);
218 } else {
219 new_val.store_as_f32 = false;
220 new_val.offset = shared_half4s_data_.size();
221 shared_half4s_data_.push_back(new_val.value);
222 }
223 }
224 }
225 int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
226 shared_int4s_data_.resize(shared_int4s_aligned_size);
227 int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
228 shared_float4s_data_.resize(shared_float4s_aligned_size);
229 int shared_half4s_aligned_size = AlignByN(shared_half4s_data_.size(), 4);
230 shared_half4s_data_.resize(shared_half4s_aligned_size);
231 }
232
RenameArgumentsInCode(std::string * code)233 void CLArguments::RenameArgumentsInCode(std::string* code) {
234 const std::string postfixes[4] = {"x", "y", "z", "w"};
235 for (const auto& fvalue : float_values_) {
236 if (fvalue.second.active) {
237 std::string index = std::to_string(fvalue.second.offset / 4);
238 std::string new_name =
239 "shared_float4_" + index + "." + postfixes[fvalue.second.offset % 4];
240 ReplaceAllWords(kArgsPrefix + fvalue.first, new_name, code);
241 }
242 }
243 for (const auto& ivalue : int_values_) {
244 if (ivalue.second.active) {
245 std::string index = std::to_string(ivalue.second.offset / 4);
246 std::string new_name =
247 "shared_int4_" + index + "." + postfixes[ivalue.second.offset % 4];
248 ReplaceAllWords(kArgsPrefix + ivalue.first, new_name, code);
249 }
250 }
251 for (const auto& hfvalue : half_values_) {
252 if (hfvalue.second.active) {
253 std::string index = std::to_string(hfvalue.second.offset / 4);
254 std::string new_name;
255 if (hfvalue.second.store_as_f32) {
256 new_name = "(half)(shared_float4_" + index + "." +
257 postfixes[hfvalue.second.offset % 4] + ")";
258 } else {
259 new_name = "shared_half4_" + index + "." +
260 postfixes[hfvalue.second.offset % 4];
261 }
262 ReplaceAllWords(kArgsPrefix + hfvalue.first, new_name, code);
263 }
264 }
265 }
266
AddBuffer(const std::string & name,const GPUBufferDescriptor & desc)267 void CLArguments::AddBuffer(const std::string& name,
268 const GPUBufferDescriptor& desc) {
269 buffers_[name].desc = desc;
270 }
AddImage2D(const std::string & name,const GPUImage2DDescriptor & desc)271 void CLArguments::AddImage2D(const std::string& name,
272 const GPUImage2DDescriptor& desc) {
273 images2d_[name].desc = desc;
274 }
275
AddImage2DArray(const std::string & name,const GPUImage2DArrayDescriptor & desc)276 void CLArguments::AddImage2DArray(const std::string& name,
277 const GPUImage2DArrayDescriptor& desc) {
278 image2d_arrays_[name].desc = desc;
279 }
280
AddImage3D(const std::string & name,const GPUImage3DDescriptor & desc)281 void CLArguments::AddImage3D(const std::string& name,
282 const GPUImage3DDescriptor& desc) {
283 images3d_[name].desc = desc;
284 }
285
AddImageBuffer(const std::string & name,const GPUImageBufferDescriptor & desc)286 void CLArguments::AddImageBuffer(const std::string& name,
287 const GPUImageBufferDescriptor& desc) {
288 image_buffers_[name].desc = desc;
289 }
290
AddCustomMemory(const std::string & name,const GPUCustomMemoryDescriptor & desc)291 void CLArguments::AddCustomMemory(const std::string& name,
292 const GPUCustomMemoryDescriptor& desc) {
293 custom_memories_[name].desc = desc;
294 }
295
AddGPUResources(const std::string & name,const GPUResources & resources)296 void CLArguments::AddGPUResources(const std::string& name,
297 const GPUResources& resources) {
298 for (const auto& r : resources.buffers) {
299 AddBuffer(absl::StrCat(name, "_", r.first), r.second);
300 }
301 for (const auto& r : resources.images2d) {
302 AddImage2D(absl::StrCat(name, "_", r.first), r.second);
303 }
304 for (const auto& r : resources.image2d_arrays) {
305 AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
306 }
307 for (const auto& r : resources.images3d) {
308 AddImage3D(absl::StrCat(name, "_", r.first), r.second);
309 }
310 for (const auto& r : resources.image_buffers) {
311 AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
312 }
313 for (const auto& r : resources.custom_memories) {
314 AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
315 }
316 }
317
SetInt(const std::string & name,int value)318 absl::Status CLArguments::SetInt(const std::string& name, int value) {
319 auto it = int_values_.find(name);
320 if (it == int_values_.end()) {
321 return absl::NotFoundError(
322 absl::StrCat("No int argument with name - ", name));
323 }
324 it->second.value = value;
325 if (it->second.active) {
326 shared_int4s_data_[it->second.offset] = value;
327 }
328 return absl::OkStatus();
329 }
SetFloat(const std::string & name,float value)330 absl::Status CLArguments::SetFloat(const std::string& name, float value) {
331 auto it = float_values_.find(name);
332 if (it == float_values_.end()) {
333 return absl::NotFoundError(
334 absl::StrCat("No float argument with name - ", name));
335 }
336 it->second.value = value;
337 if (it->second.active) {
338 shared_float4s_data_[it->second.offset] = value;
339 }
340 return absl::OkStatus();
341 }
342
SetHalf(const std::string & name,half value)343 absl::Status CLArguments::SetHalf(const std::string& name, half value) {
344 auto it = half_values_.find(name);
345 if (it == half_values_.end()) {
346 return absl::NotFoundError(
347 absl::StrCat("No half argument with name - ", name));
348 }
349 it->second.value = value;
350 if (it->second.active) {
351 if (it->second.store_as_f32) {
352 shared_float4s_data_[it->second.offset] = value;
353 } else {
354 shared_half4s_data_[it->second.offset] = value;
355 }
356 }
357 return absl::OkStatus();
358 }
359
SetImage2D(const std::string & name,cl_mem memory)360 absl::Status CLArguments::SetImage2D(const std::string& name, cl_mem memory) {
361 auto it = images2d_.find(name);
362 if (it == images2d_.end()) {
363 return absl::NotFoundError(
364 absl::StrCat("No image2D argument with name - ", name));
365 }
366 it->second.memory = memory;
367 return absl::OkStatus();
368 }
369
SetBuffer(const std::string & name,cl_mem memory)370 absl::Status CLArguments::SetBuffer(const std::string& name, cl_mem memory) {
371 auto it = buffers_.find(name);
372 if (it == buffers_.end()) {
373 return absl::NotFoundError(
374 absl::StrCat("No buffer argument with name - ", name));
375 }
376 it->second.memory = memory;
377 return absl::OkStatus();
378 }
379
SetImage2DArray(const std::string & name,cl_mem memory)380 absl::Status CLArguments::SetImage2DArray(const std::string& name,
381 cl_mem memory) {
382 auto it = image2d_arrays_.find(name);
383 if (it == image2d_arrays_.end()) {
384 return absl::NotFoundError(
385 absl::StrCat("No image2D array argument with name - ", name));
386 }
387 it->second.memory = memory;
388 return absl::OkStatus();
389 }
390
SetImage3D(const std::string & name,cl_mem memory)391 absl::Status CLArguments::SetImage3D(const std::string& name, cl_mem memory) {
392 auto it = images3d_.find(name);
393 if (it == images3d_.end()) {
394 return absl::NotFoundError(
395 absl::StrCat("No image3D argument with name - ", name));
396 }
397 it->second.memory = memory;
398 return absl::OkStatus();
399 }
400
SetImageBuffer(const std::string & name,cl_mem memory)401 absl::Status CLArguments::SetImageBuffer(const std::string& name,
402 cl_mem memory) {
403 auto it = image_buffers_.find(name);
404 if (it == image_buffers_.end()) {
405 return absl::NotFoundError(
406 absl::StrCat("No image buffer argument with name - ", name));
407 }
408 it->second.memory = memory;
409 return absl::OkStatus();
410 }
411
SetCustomMemory(const std::string & name,cl_mem memory)412 absl::Status CLArguments::SetCustomMemory(const std::string& name,
413 cl_mem memory) {
414 auto it = custom_memories_.find(name);
415 if (it == custom_memories_.end()) {
416 return absl::NotFoundError(
417 absl::StrCat("No custom memory argument with name - ", name));
418 }
419 it->second.memory = memory;
420 return absl::OkStatus();
421 }
422
SetObjectRef(const std::string & name,const GPUObject * object)423 absl::Status CLArguments::SetObjectRef(const std::string& name,
424 const GPUObject* object) {
425 auto it = object_refs_.find(name);
426 if (it == object_refs_.end()) {
427 return absl::NotFoundError(
428 absl::StrCat("No object ref with name - ", name));
429 }
430 GPUResourcesWithValue resources;
431 RETURN_IF_ERROR(object->GetGPUResources(it->second.get(), &resources));
432 return SetGPUResources(name, resources);
433 }
434
SetGPUResources(const std::string & name,const GPUResourcesWithValue & resources)435 absl::Status CLArguments::SetGPUResources(
436 const std::string& name, const GPUResourcesWithValue& resources) {
437 for (const auto& r : resources.generic.ints) {
438 RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
439 }
440 for (const auto& r : resources.generic.floats) {
441 RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
442 }
443 for (const auto& r : resources.buffers) {
444 RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
445 }
446 for (const auto& r : resources.images2d) {
447 RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
448 }
449 for (const auto& r : resources.image2d_arrays) {
450 RETURN_IF_ERROR(
451 SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
452 }
453 for (const auto& r : resources.images3d) {
454 RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
455 }
456 for (const auto& r : resources.image_buffers) {
457 RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
458 }
459 for (const auto& r : resources.custom_memories) {
460 RETURN_IF_ERROR(
461 SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
462 }
463 return absl::OkStatus();
464 }
465
GetListOfArgs()466 std::string CLArguments::GetListOfArgs() {
467 std::string result;
468 for (auto& t : buffers_) {
469 const std::string type_name =
470 t.second.desc.data_type == DataType::FLOAT32 ? "float" : "half";
471 std::string attributes;
472 for (const auto& attr : t.second.desc.attributes) {
473 attributes += absl::StrCat(" __attribute__((", attr, "))");
474 }
475 std::string cl_type;
476 if (t.second.desc.data_type == DataType::BOOL) {
477 cl_type = ToCLDataType(DataType::UINT8, t.second.desc.element_size);
478 } else {
479 cl_type =
480 ToCLDataType(t.second.desc.data_type, t.second.desc.element_size);
481 }
482 AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.desc.memory_type),
483 " ", cl_type, "* ", t.first, attributes),
484 &result);
485 }
486 for (auto& t : image_buffers_) {
487 AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
488 " image1d_buffer_t ", t.first),
489 &result);
490 }
491 for (auto& t : images2d_) {
492 AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
493 " image2d_t ", t.first),
494 &result);
495 }
496 for (auto& t : image2d_arrays_) {
497 AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
498 " image2d_array_t ", t.first),
499 &result);
500 }
501 for (auto& t : images3d_) {
502 AppendArgument(absl::StrCat(GetImageModifier(t.second.desc.access_type),
503 " image3d_t ", t.first),
504 &result);
505 }
506 for (auto& t : custom_memories_) {
507 AppendArgument(absl::StrCat(t.second.desc.type_name, " ", t.first),
508 &result);
509 }
510 for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
511 AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
512 }
513 for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
514 AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
515 }
516 for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
517 AppendArgument(absl::StrCat("half4 shared_half4_", i), &result);
518 }
519 return result;
520 }
521
Bind(cl_kernel kernel,int offset)522 absl::Status CLArguments::Bind(cl_kernel kernel, int offset) {
523 for (auto& t : buffers_) {
524 const int error_code =
525 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
526 if (error_code != CL_SUCCESS) {
527 return absl::UnknownError(absl::StrCat(
528 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
529 "(at index - ", offset, ")"));
530 }
531 offset++;
532 }
533 for (auto& t : image_buffers_) {
534 const int error_code =
535 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
536 if (error_code != CL_SUCCESS) {
537 return absl::UnknownError(absl::StrCat(
538 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
539 "(at index - ", offset, ")"));
540 }
541 offset++;
542 }
543 for (auto& t : images2d_) {
544 const int error_code =
545 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
546 if (error_code != CL_SUCCESS) {
547 return absl::UnknownError(absl::StrCat(
548 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
549 "(at index - ", offset, ")"));
550 }
551 offset++;
552 }
553 for (auto& t : image2d_arrays_) {
554 const int error_code =
555 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
556 if (error_code != CL_SUCCESS) {
557 return absl::UnknownError(absl::StrCat(
558 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
559 "(at index - ", offset, ")"));
560 }
561 offset++;
562 }
563 for (auto& t : images3d_) {
564 const int error_code =
565 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
566 if (error_code != CL_SUCCESS) {
567 return absl::UnknownError(absl::StrCat(
568 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
569 "(at index - ", offset, ")"));
570 }
571 offset++;
572 }
573 for (auto& t : custom_memories_) {
574 const int error_code =
575 clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
576 if (error_code != CL_SUCCESS) {
577 return absl::UnknownError(absl::StrCat(
578 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
579 "(at index - ", offset, ")"));
580 }
581 offset++;
582 }
583 for (int i = 0; i < shared_int4s_data_.size() / 4; ++i) {
584 const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
585 &shared_int4s_data_[i * 4]);
586 if (error_code != CL_SUCCESS) {
587 return absl::UnknownError(absl::StrCat(
588 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
589 "(at index - ", offset, ")"));
590 }
591 offset++;
592 }
593 for (int i = 0; i < shared_float4s_data_.size() / 4; ++i) {
594 const int error_code = clSetKernelArg(kernel, offset, sizeof(int32_t) * 4,
595 &shared_float4s_data_[i * 4]);
596 if (error_code != CL_SUCCESS) {
597 return absl::UnknownError(absl::StrCat(
598 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
599 "(at index - ", offset, ")"));
600 }
601 offset++;
602 }
603 for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) {
604 const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4,
605 &shared_half4s_data_[i * 4]);
606 if (error_code != CL_SUCCESS) {
607 return absl::UnknownError(absl::StrCat(
608 "Failed to set kernel arguments - ", CLErrorCodeToString(error_code),
609 "(at index - ", offset, ")"));
610 }
611 offset++;
612 }
613 return absl::OkStatus();
614 }
615
616 } // namespace cl
617 } // namespace gpu
618 } // namespace tflite
619