1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ 18 19 #include <map> 20 #include <string> 21 #include <vector> 22 23 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" 24 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" 25 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" 26 #include "tensorflow/lite/delegates/gpu/common/status.h" 27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h" 28 29 namespace tflite { 30 namespace gpu { 31 namespace cl { 32 33 class CLArguments : public ArgumentsBinder { 34 public: 35 CLArguments() = default; 36 37 absl::Status Init(const GpuInfo& gpu_info, 38 const std::map<std::string, std::string>& linkables, 39 CLContext* context, Arguments* args, std::string* code); 40 absl::Status Init(const GpuInfo& gpu_info, Arguments* args, 41 CLContext* context); 42 43 // Temporary, will be resolved later MoveObjectRefsIn(Arguments * args)44 void MoveObjectRefsIn(Arguments* args) { 45 object_refs_ = std::move(args->object_refs_); 46 } MoveObjectRefsOut(Arguments * args)47 void MoveObjectRefsOut(Arguments* args) { 48 args->object_refs_ = std::move(object_refs_); 49 } 50 void CopyScalarValues(Arguments* args) const; 51 52 // Move only 53 CLArguments(CLArguments&& args) = default; 54 CLArguments& operator=(CLArguments&& args) = default; 55 CLArguments(const CLArguments&) = delete; 56 CLArguments& operator=(const CLArguments&) = delete; 57 58 absl::Status SetInt(const std::string& name, int value) override; 59 absl::Status SetFloat(const std::string& name, float value) override; 60 absl::Status SetHalf(const std::string& name, half value) override; 61 absl::Status SetObjectRef(const std::string& name, const GPUObject* object); 62 63 absl::Status Bind(cl_kernel kernel, int offset = 0); 64 65 private: 66 absl::Status AllocateObjects(const Arguments& args, CLContext* context); 67 absl::Status AddObjectArgs(Arguments* args); 68 69 absl::Status ResolveSelectorsPass( 70 const GpuInfo& gpu_info, const Arguments& args, 71 const std::map<std::string, std::string>& linkables, std::string* code); 72 absl::Status ResolveSelector( 73 const GpuInfo& gpu_info, const Arguments& args, 74 const std::map<std::string, std::string>& linkables, 75 const std::string& object_name, const std::string& selector, 76 const std::vector<std::string>& function_args, 77 const std::vector<std::string>& template_args, std::string* result); 78 void ResolveObjectNames(const std::string& object_name, 79 const std::vector<std::string>& member_names, 80 std::string* code); 81 void ResolveArgsPass(std::string* code); 82 83 void CopyArguments(const Arguments& args, bool use_f32_for_halfs); 84 void RenameArgumentsInCode(std::string* code); 85 std::string GetListOfArgs(); 86 87 void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc); 88 void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc); 89 void AddImage2DArray(const std::string& name, 90 const GPUImage2DArrayDescriptor& desc); 91 void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc); 92 void AddImageBuffer(const std::string& name, 93 const GPUImageBufferDescriptor& desc); 94 void AddCustomMemory(const std::string& name, 95 const GPUCustomMemoryDescriptor& desc); 96 void AddGPUResources(const std::string& name, const GPUResources& resources, 97 Arguments* args); 98 absl::Status SetObjectsResources(const Arguments& args); 99 absl::Status SetGPUResources(const std::string& name, 100 const GPUResourcesWithValue& resources); 101 102 absl::Status SetImage2D(const std::string& name, cl_mem memory); 103 absl::Status SetBuffer(const std::string& name, cl_mem memory); 104 absl::Status SetImage2DArray(const std::string& name, cl_mem memory); 105 absl::Status SetImage3D(const std::string& name, cl_mem memory); 106 absl::Status SetImageBuffer(const std::string& name, cl_mem memory); 107 absl::Status SetCustomMemory(const std::string& name, cl_mem memory); 108 109 static constexpr char kArgsPrefix[] = "args."; 110 struct IntValue { 111 int value; 112 113 // many arguments generated automatically and not used 114 // to reduce amount of data transferred we adding this optimization 115 bool active = false; 116 117 // offset to shared storage. 118 uint32_t offset = -1; 119 }; 120 std::map<std::string, IntValue> int_values_; 121 std::vector<int32_t> shared_int4s_data_; 122 123 struct FloatValue { 124 float value; 125 126 // many arguments generated automatically and not used 127 // to reduce amount of data transferred we adding this optimization 128 bool active = false; 129 130 // offset to shared storage. 131 uint32_t offset = -1; 132 }; 133 std::map<std::string, FloatValue> float_values_; 134 std::vector<float> shared_float4s_data_; 135 136 struct HalfValue { 137 half value; 138 139 // many arguments generated automatically and not used 140 // to reduce amount of data transferred we adding this optimization 141 bool active = false; 142 143 // some devices have issues with half parameters. 144 bool store_as_f32 = false; 145 146 // offset to shared uniform storage. 147 uint32_t offset = -1; 148 }; 149 std::map<std::string, HalfValue> half_values_; 150 std::vector<half> shared_half4s_data_; 151 152 struct CLBufferDescriptor { 153 GPUBufferDescriptor desc; 154 cl_mem memory; 155 }; 156 struct CLImage2DDescriptor { 157 GPUImage2DDescriptor desc; 158 cl_mem memory; 159 }; 160 struct CLImage2DArrayDescriptor { 161 GPUImage2DArrayDescriptor desc; 162 cl_mem memory; 163 }; 164 struct CLImage3DDescriptor { 165 GPUImage3DDescriptor desc; 166 cl_mem memory; 167 }; 168 struct CLImageBufferDescriptor { 169 GPUImageBufferDescriptor desc; 170 cl_mem memory; 171 }; 172 struct CLCustomMemoryDescriptor { 173 GPUCustomMemoryDescriptor desc; 174 cl_mem memory; 175 }; 176 177 std::map<std::string, CLBufferDescriptor> buffers_; 178 std::map<std::string, CLImage2DDescriptor> images2d_; 179 std::map<std::string, CLImage2DArrayDescriptor> image2d_arrays_; 180 std::map<std::string, CLImage3DDescriptor> images3d_; 181 std::map<std::string, CLImageBufferDescriptor> image_buffers_; 182 std::map<std::string, CLCustomMemoryDescriptor> custom_memories_; 183 184 std::map<std::string, GPUObjectDescriptorPtr> object_refs_; 185 std::vector<GPUObjectPtr> objects_; 186 }; 187 188 } // namespace cl 189 } // namespace gpu 190 } // namespace tflite 191 192 #endif // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_ 193