1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/opencl/utils.h"
18 #include <fstream>
19 #include <algorithm>
20 #include <vector>
21 #include <map>
22 #include "src/kernel_registry.h"
23 #include "src/common/file_utils.h"
24
25 using mindspore::schema::ActivationType_LEAKY_RELU;
26 using mindspore::schema::ActivationType_RELU;
27 using mindspore::schema::ActivationType_RELU6;
28 using mindspore::schema::ActivationType_SIGMOID;
29 using mindspore::schema::ActivationType_TANH;
30
31 namespace mindspore::kernel {
32 const std::set<schema::PrimitiveType> ArithmeticPrimitives = {schema::PrimitiveType_MulFusion,
33 schema::PrimitiveType_AddFusion,
34 schema::PrimitiveType_SubFusion,
35 schema::PrimitiveType_DivFusion,
36 schema::PrimitiveType_LogicalAnd,
37 schema::PrimitiveType_LogicalOr,
38 schema::PrimitiveType_Maximum,
39 schema::PrimitiveType_Minimum,
40 schema::PrimitiveType_FloorDiv,
41 schema::PrimitiveType_FloorMod,
42 schema::PrimitiveType_SquaredDifference,
43 schema::PrimitiveType_Equal,
44 schema::PrimitiveType_NotEqual,
45 schema::PrimitiveType_Less,
46 schema::PrimitiveType_LessEqual,
47 schema::PrimitiveType_Greater,
48 schema::PrimitiveType_GreaterEqual,
49 schema::PrimitiveType_Eltwise,
50 schema::PrimitiveType_BiasAdd};
51
52 const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives = {
53 schema::PrimitiveType_Abs, schema::PrimitiveType_Ceil, schema::PrimitiveType_Cos,
54 schema::PrimitiveType_ExpFusion, schema::PrimitiveType_Floor, schema::PrimitiveType_Log,
55 schema::PrimitiveType_LogicalNot, schema::PrimitiveType_Round, schema::PrimitiveType_Rsqrt,
56 schema::PrimitiveType_Sin, schema::PrimitiveType_Neg, schema::PrimitiveType_Sqrt,
57 schema::PrimitiveType_Square};
58
GetActDefines()59 std::string GetActDefines() {
60 static std::string act_defines = "#define ActivationType_RELU " + std::to_string(ActivationType_RELU) +
61 "\n#define ActivationType_RELU6 " + std::to_string(ActivationType_RELU6) +
62 "\n#define ActivationType_LEAKY_RELU " + std::to_string(ActivationType_LEAKY_RELU) +
63 "\n#define ActivationType_TANH " + std::to_string(ActivationType_TANH) +
64 "\n#define ActivationType_SIGMOID " + std::to_string(ActivationType_SIGMOID) + "\n";
65 return act_defines;
66 }
67
GetUpPow2(int n)68 int GetUpPow2(int n) {
69 int i = 0;
70 int j = 0;
71 while (n > 0) {
72 j += n & 1;
73 n = n >> 1;
74 i++;
75 }
76 return 1 << (i - (j == 1));
77 }
78
GetMaxDivisor(int x,int divisor)79 int GetMaxDivisor(int x, int divisor) {
80 int i = divisor;
81 while (i > 0) {
82 if (x % i == 0) {
83 return i;
84 }
85 i--;
86 }
87 return 1;
88 }
89
GetMaxDivisorStrategy0(int x,int divisor)90 int GetMaxDivisorStrategy0(int x, int divisor) {
91 if (divisor >= 8 && x % 8 == 0) {
92 return 8;
93 } else if (divisor >= 4 && x % 4 == 0) {
94 return 4;
95 } else if (divisor >= 2 && x % 2 == 0) {
96 return 2;
97 } else {
98 return GetMaxDivisor(x, divisor);
99 }
100 }
101
GetMaxDivisorStrategy1(int x,int divisor)102 int GetMaxDivisorStrategy1(int x, int divisor) {
103 if (divisor >= 8 && x % 8 == 0) {
104 return x / 8;
105 } else if (divisor >= 4 && x % 4 == 0) {
106 return x / 4;
107 } else if (divisor >= 2 && x % 2 == 0) {
108 return x / 2;
109 } else {
110 return GetMaxDivisor(x, divisor);
111 }
112 }
113
114 std::map<cl_int, std::string> error_infos = {
115 {CL_SUCCESS, "Success"},
116 {CL_DEVICE_NOT_FOUND, "Device not found"},
117 {CL_DEVICE_NOT_AVAILABLE, "Device not available"},
118 {CL_COMPILER_NOT_AVAILABLE, "Compiler not available"},
119 {CL_MEM_OBJECT_ALLOCATION_FAILURE, "Memory object allocation failure"},
120 {CL_OUT_OF_RESOURCES, "Out of resources"},
121 {CL_OUT_OF_HOST_MEMORY, "Out of host memory"},
122 {CL_PROFILING_INFO_NOT_AVAILABLE, "Profiling information not available"},
123 {CL_MEM_COPY_OVERLAP, "Memory copy overlap"},
124 {CL_IMAGE_FORMAT_MISMATCH, "Image format mismatch"},
125 {CL_IMAGE_FORMAT_NOT_SUPPORTED, "Image format not supported"},
126 {CL_BUILD_PROGRAM_FAILURE, "Build program failure"},
127 {CL_MAP_FAILURE, "Mapping failure"},
128 {CL_MISALIGNED_SUB_BUFFER_OFFSET, "Misaligned sub-buffer offset"},
129 {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "Execution status error for events in wait list"},
130 {CL_COMPILE_PROGRAM_FAILURE, "Compile program failure"},
131 {CL_LINKER_NOT_AVAILABLE, "Linker not available"},
132 {CL_LINK_PROGRAM_FAILURE, "Link program failure"},
133 {CL_DEVICE_PARTITION_FAILED, "Device partition failed"},
134 {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "Kernel argument information not available"},
135 {CL_INVALID_VALUE, "Invalid value"},
136 {CL_INVALID_DEVICE_TYPE, "Invalid device type"},
137 {CL_INVALID_PLATFORM, "Invalid platform"},
138 {CL_INVALID_DEVICE, "Invalid device"},
139 {CL_INVALID_CONTEXT, "Invalid context"},
140 {CL_INVALID_QUEUE_PROPERTIES, "Invalid queue properties"},
141 {CL_INVALID_COMMAND_QUEUE, "Invalid command queue"},
142 {CL_INVALID_HOST_PTR, "Invalid host pointer"},
143 {CL_INVALID_MEM_OBJECT, "Invalid memory object"},
144 {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "Invalid image format descriptor"},
145 {CL_INVALID_IMAGE_SIZE, "Invalid image size"},
146 {CL_INVALID_SAMPLER, "Invalid sampler"},
147 {CL_INVALID_BINARY, "Invalid binary"},
148 {CL_INVALID_BUILD_OPTIONS, "Invalid build options"},
149 {CL_INVALID_PROGRAM, "Invalid program"},
150 {CL_INVALID_PROGRAM_EXECUTABLE, "Invalid program executable"},
151 {CL_INVALID_KERNEL_NAME, "Invalid kernel name"},
152 {CL_INVALID_KERNEL_DEFINITION, "Invalid kernel definition"},
153 {CL_INVALID_KERNEL, "Invalid kernel"},
154 {CL_INVALID_ARG_INDEX, "Invalid argument index"},
155 {CL_INVALID_ARG_VALUE, "Invalid argument value"},
156 {CL_INVALID_ARG_SIZE, "Invalid argument size"},
157 {CL_INVALID_KERNEL_ARGS, "Invalid kernel arguments"},
158 {CL_INVALID_WORK_DIMENSION, "Invalid work dimension"},
159 {CL_INVALID_WORK_GROUP_SIZE, "Invalid work group size"},
160 {CL_INVALID_WORK_ITEM_SIZE, "Invalid work item size"},
161 {CL_INVALID_GLOBAL_OFFSET, "Invalid global offset"},
162 {CL_INVALID_EVENT_WAIT_LIST, "Invalid event wait list"},
163 {CL_INVALID_EVENT, "Invalid event"},
164 {CL_INVALID_OPERATION, "Invalid operation"},
165 {CL_INVALID_GL_OBJECT, "Invalid GL object"},
166 {CL_INVALID_BUFFER_SIZE, "Invalid buffer size"},
167 {CL_INVALID_MIP_LEVEL, "Invalid mip-level"},
168 {CL_INVALID_GLOBAL_WORK_SIZE, "Invalid global work size"},
169 {CL_INVALID_PROPERTY, "Invalid property"},
170 {CL_INVALID_IMAGE_DESCRIPTOR, "Invalid image descriptor"},
171 {CL_INVALID_COMPILER_OPTIONS, "Invalid compiler options"},
172 {CL_INVALID_LINKER_OPTIONS, "Invalid linker options"},
173 {CL_INVALID_DEVICE_PARTITION_COUNT, "Invalid device partition count"},
174 {CL_INVALID_PIPE_SIZE, "Invalid pipe size"},
175 {CL_INVALID_DEVICE_QUEUE, "Invalid device queue"},
176 {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, "Invalid GL share group reference KHR"}};
177
CLErrorCode(cl_int error_code)178 std::string CLErrorCode(cl_int error_code) {
179 auto it = error_infos.find(error_code);
180 if (it == error_infos.end()) {
181 return "Unknown OpenCL error code";
182 } else {
183 return it->second;
184 }
185 }
186
GetBroadcastGpuAxis(int ndim,int ori_axis)187 int GetBroadcastGpuAxis(int ndim, int ori_axis) {
188 if (ori_axis >= ndim) {
189 return ndim - 1;
190 }
191 int axis = 0;
192 if (ndim == DIMENSION_1D) {
193 axis = 3;
194 } else if (ndim == DIMENSION_2D) {
195 axis = ori_axis == 0 ? 0 : 3;
196 } else if (ndim == DIMENSION_3D) {
197 axis = ori_axis == 0 ? 0 : ori_axis == 1 ? 2 : 3;
198 } else if (ndim == DIMENSION_4D) {
199 axis = ori_axis;
200 } else if (ndim > DIMENSION_4D) {
201 MS_LOG(ERROR) << "GPU doesn't support ndim>=" << ndim;
202 }
203 return axis;
204 }
205
PackNHWCToNHWC4(void * src,void * dst,bool src_is_fp16,bool dst_is_fp16,const GpuTensorInfo & tensor,int data_type)206 void PackNHWCToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
207 int data_type) {
208 MS_ASSERT(src);
209 MS_ASSERT(dst);
210 auto src_fp16 = reinterpret_cast<float16_t *>(src);
211 auto src_fp32 = reinterpret_cast<float32_t *>(src);
212 auto src_int32 = reinterpret_cast<int32_t *>(src);
213 auto dst_fp16 = reinterpret_cast<float16_t *>(dst);
214 auto dst_fp32 = reinterpret_cast<float32_t *>(dst);
215 auto dst_int32 = reinterpret_cast<int32_t *>(dst);
216 for (int n = 0, src_idx = 0; n < tensor.N; n++) {
217 for (int h = 0; h < tensor.H; ++h) {
218 for (int w = 0; w < tensor.W; ++w) {
219 for (int c = 0; c < tensor.C; ++c, ++src_idx) {
220 int dst_idx = ((n * tensor.H + h) * tensor.W + w) * tensor.Slice * C4NUM + c;
221 if (data_type == kNumberTypeInt32) {
222 dst_int32[dst_idx] = src_int32[src_idx];
223 } else if (dst_is_fp16) {
224 dst_fp16[dst_idx] = src_is_fp16 ? src_fp16[src_idx] : static_cast<float16_t>(src_fp32[src_idx]);
225 } else {
226 dst_fp32[dst_idx] = src_is_fp16 ? static_cast<float32_t>(src_fp16[src_idx]) : src_fp32[src_idx];
227 }
228 }
229 }
230 }
231 }
232 // scalar
233 if (tensor.ElementsNum == 1) {
234 if (dst_is_fp16) {
235 dst_fp16[3] = dst_fp16[2] = dst_fp16[1] = dst_fp16[0];
236 } else {
237 dst_fp32[3] = dst_fp32[2] = dst_fp32[1] = dst_fp32[0];
238 }
239 }
240 }
241
CheckParamLikeTensor(const std::string & kernel_name,const std::string & tensor_name,lite::Tensor * tensor,TypeId expect_data_type,const std::vector<int> & expect_shape)242 int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tensor_name, lite::Tensor *tensor,
243 TypeId expect_data_type, const std::vector<int> &expect_shape) {
244 if (!tensor->IsConst()) {
245 MS_LOG(ERROR) << "in " << kernel_name << ": tensor " << tensor_name << " must be Const.";
246 return RET_ERROR;
247 }
248 if (tensor->data_type() != expect_data_type) {
249 MS_LOG(ERROR) << "in " << kernel_name << ": tensor's data_type must be " << expect_data_type;
250 return RET_ERROR;
251 }
252 if (tensor->shape() != expect_shape) {
253 std::string expect_shape_str = "(";
254 for (auto i : expect_shape) {
255 expect_shape_str += std::to_string(i) + ",";
256 }
257 expect_shape_str += ")";
258
259 std::string tensor_shape_str = "(";
260 for (auto i : tensor->shape()) {
261 tensor_shape_str += std::to_string(i) + ",";
262 }
263 tensor_shape_str += ")";
264
265 MS_LOG(ERROR) << "in " << kernel_name
266 << ": tensor's shape is error. expect_shape: " + expect_shape_str +
267 " tensor->shape(): " + tensor_shape_str;
268 return RET_ERROR;
269 }
270 return RET_OK;
271 }
272
StoreTensorData(lite::Tensor * tensor)273 void *StoreTensorData(lite::Tensor *tensor) {
274 if ((tensor != nullptr) && (tensor->data() != nullptr) && (tensor->Size() > 0)) {
275 void *stored_data = malloc(tensor->Size());
276 if (stored_data == nullptr) {
277 MS_LOG(ERROR) << "StoreTensorData Malloc Failed.";
278 return nullptr;
279 }
280 memcpy(stored_data, tensor->data(), tensor->Size());
281 return stored_data;
282 }
283 return nullptr;
284 }
285
FreeStoredData(void * data)286 void FreeStoredData(void *data) {
287 if (data != nullptr) {
288 free(data);
289 }
290 }
291
CreateBuildOptionsExtByDType(TypeId type_id)292 std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id) {
293 std::vector<std::string> build_options_ext;
294 if (type_id == kNumberTypeInt32) {
295 build_options_ext = {" -DDTYPE=int -DDTYPE4=int4 -DWRITE_IMAGE=write_imagei -DREAD_IMAGE=read_imagei "};
296 } else if (type_id == kNumberTypeFloat32) {
297 build_options_ext = {" -DDTYPE=float -DDTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "};
298 } else if (type_id == kNumberTypeFloat16) {
299 build_options_ext = {" -DDTYPE=half -DDTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "};
300 }
301 return build_options_ext;
302 }
303 } // namespace mindspore::kernel
304