• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/opencl/utils.h"
18 #include <fstream>
19 #include <algorithm>
20 #include <vector>
21 #include <map>
22 #include "src/kernel_registry.h"
23 #include "src/common/file_utils.h"
24 
25 using mindspore::schema::ActivationType_LEAKY_RELU;
26 using mindspore::schema::ActivationType_RELU;
27 using mindspore::schema::ActivationType_RELU6;
28 using mindspore::schema::ActivationType_SIGMOID;
29 using mindspore::schema::ActivationType_TANH;
30 
31 namespace mindspore::kernel {
32 const std::set<schema::PrimitiveType> ArithmeticPrimitives = {schema::PrimitiveType_MulFusion,
33                                                               schema::PrimitiveType_AddFusion,
34                                                               schema::PrimitiveType_SubFusion,
35                                                               schema::PrimitiveType_DivFusion,
36                                                               schema::PrimitiveType_LogicalAnd,
37                                                               schema::PrimitiveType_LogicalOr,
38                                                               schema::PrimitiveType_Maximum,
39                                                               schema::PrimitiveType_Minimum,
40                                                               schema::PrimitiveType_FloorDiv,
41                                                               schema::PrimitiveType_FloorMod,
42                                                               schema::PrimitiveType_SquaredDifference,
43                                                               schema::PrimitiveType_Equal,
44                                                               schema::PrimitiveType_NotEqual,
45                                                               schema::PrimitiveType_Less,
46                                                               schema::PrimitiveType_LessEqual,
47                                                               schema::PrimitiveType_Greater,
48                                                               schema::PrimitiveType_GreaterEqual,
49                                                               schema::PrimitiveType_Eltwise,
50                                                               schema::PrimitiveType_BiasAdd};
51 
52 const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives = {
53   schema::PrimitiveType_Abs,        schema::PrimitiveType_Ceil,  schema::PrimitiveType_Cos,
54   schema::PrimitiveType_ExpFusion,  schema::PrimitiveType_Floor, schema::PrimitiveType_Log,
55   schema::PrimitiveType_LogicalNot, schema::PrimitiveType_Round, schema::PrimitiveType_Rsqrt,
56   schema::PrimitiveType_Sin,        schema::PrimitiveType_Neg,   schema::PrimitiveType_Sqrt,
57   schema::PrimitiveType_Square};
58 
GetActDefines()59 std::string GetActDefines() {
60   static std::string act_defines = "#define ActivationType_RELU " + std::to_string(ActivationType_RELU) +
61                                    "\n#define ActivationType_RELU6 " + std::to_string(ActivationType_RELU6) +
62                                    "\n#define ActivationType_LEAKY_RELU " + std::to_string(ActivationType_LEAKY_RELU) +
63                                    "\n#define ActivationType_TANH " + std::to_string(ActivationType_TANH) +
64                                    "\n#define ActivationType_SIGMOID " + std::to_string(ActivationType_SIGMOID) + "\n";
65   return act_defines;
66 }
67 
GetUpPow2(int n)68 int GetUpPow2(int n) {
69   int i = 0;
70   int j = 0;
71   while (n > 0) {
72     j += n & 1;
73     n = n >> 1;
74     i++;
75   }
76   return 1 << (i - (j == 1));
77 }
78 
GetMaxDivisor(int x,int divisor)79 int GetMaxDivisor(int x, int divisor) {
80   int i = divisor;
81   while (i > 0) {
82     if (x % i == 0) {
83       return i;
84     }
85     i--;
86   }
87   return 1;
88 }
89 
GetMaxDivisorStrategy0(int x,int divisor)90 int GetMaxDivisorStrategy0(int x, int divisor) {
91   if (divisor >= 8 && x % 8 == 0) {
92     return 8;
93   } else if (divisor >= 4 && x % 4 == 0) {
94     return 4;
95   } else if (divisor >= 2 && x % 2 == 0) {
96     return 2;
97   } else {
98     return GetMaxDivisor(x, divisor);
99   }
100 }
101 
GetMaxDivisorStrategy1(int x,int divisor)102 int GetMaxDivisorStrategy1(int x, int divisor) {
103   if (divisor >= 8 && x % 8 == 0) {
104     return x / 8;
105   } else if (divisor >= 4 && x % 4 == 0) {
106     return x / 4;
107   } else if (divisor >= 2 && x % 2 == 0) {
108     return x / 2;
109   } else {
110     return GetMaxDivisor(x, divisor);
111   }
112 }
113 
114 std::map<cl_int, std::string> error_infos = {
115   {CL_SUCCESS, "Success"},
116   {CL_DEVICE_NOT_FOUND, "Device not found"},
117   {CL_DEVICE_NOT_AVAILABLE, "Device not available"},
118   {CL_COMPILER_NOT_AVAILABLE, "Compiler not available"},
119   {CL_MEM_OBJECT_ALLOCATION_FAILURE, "Memory object allocation failure"},
120   {CL_OUT_OF_RESOURCES, "Out of resources"},
121   {CL_OUT_OF_HOST_MEMORY, "Out of host memory"},
122   {CL_PROFILING_INFO_NOT_AVAILABLE, "Profiling information not available"},
123   {CL_MEM_COPY_OVERLAP, "Memory copy overlap"},
124   {CL_IMAGE_FORMAT_MISMATCH, "Image format mismatch"},
125   {CL_IMAGE_FORMAT_NOT_SUPPORTED, "Image format not supported"},
126   {CL_BUILD_PROGRAM_FAILURE, "Build program failure"},
127   {CL_MAP_FAILURE, "Mapping failure"},
128   {CL_MISALIGNED_SUB_BUFFER_OFFSET, "Misaligned sub-buffer offset"},
129   {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "Execution status error for events in wait list"},
130   {CL_COMPILE_PROGRAM_FAILURE, "Compile program failure"},
131   {CL_LINKER_NOT_AVAILABLE, "Linker not available"},
132   {CL_LINK_PROGRAM_FAILURE, "Link program failure"},
133   {CL_DEVICE_PARTITION_FAILED, "Device partition failed"},
134   {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "Kernel argument information not available"},
135   {CL_INVALID_VALUE, "Invalid value"},
136   {CL_INVALID_DEVICE_TYPE, "Invalid device type"},
137   {CL_INVALID_PLATFORM, "Invalid platform"},
138   {CL_INVALID_DEVICE, "Invalid device"},
139   {CL_INVALID_CONTEXT, "Invalid context"},
140   {CL_INVALID_QUEUE_PROPERTIES, "Invalid queue properties"},
141   {CL_INVALID_COMMAND_QUEUE, "Invalid command queue"},
142   {CL_INVALID_HOST_PTR, "Invalid host pointer"},
143   {CL_INVALID_MEM_OBJECT, "Invalid memory object"},
144   {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "Invalid image format descriptor"},
145   {CL_INVALID_IMAGE_SIZE, "Invalid image size"},
146   {CL_INVALID_SAMPLER, "Invalid sampler"},
147   {CL_INVALID_BINARY, "Invalid binary"},
148   {CL_INVALID_BUILD_OPTIONS, "Invalid build options"},
149   {CL_INVALID_PROGRAM, "Invalid program"},
150   {CL_INVALID_PROGRAM_EXECUTABLE, "Invalid program executable"},
151   {CL_INVALID_KERNEL_NAME, "Invalid kernel name"},
152   {CL_INVALID_KERNEL_DEFINITION, "Invalid kernel definition"},
153   {CL_INVALID_KERNEL, "Invalid kernel"},
154   {CL_INVALID_ARG_INDEX, "Invalid argument index"},
155   {CL_INVALID_ARG_VALUE, "Invalid argument value"},
156   {CL_INVALID_ARG_SIZE, "Invalid argument size"},
157   {CL_INVALID_KERNEL_ARGS, "Invalid kernel arguments"},
158   {CL_INVALID_WORK_DIMENSION, "Invalid work dimension"},
159   {CL_INVALID_WORK_GROUP_SIZE, "Invalid work group size"},
160   {CL_INVALID_WORK_ITEM_SIZE, "Invalid work item size"},
161   {CL_INVALID_GLOBAL_OFFSET, "Invalid global offset"},
162   {CL_INVALID_EVENT_WAIT_LIST, "Invalid event wait list"},
163   {CL_INVALID_EVENT, "Invalid event"},
164   {CL_INVALID_OPERATION, "Invalid operation"},
165   {CL_INVALID_GL_OBJECT, "Invalid GL object"},
166   {CL_INVALID_BUFFER_SIZE, "Invalid buffer size"},
167   {CL_INVALID_MIP_LEVEL, "Invalid mip-level"},
168   {CL_INVALID_GLOBAL_WORK_SIZE, "Invalid global work size"},
169   {CL_INVALID_PROPERTY, "Invalid property"},
170   {CL_INVALID_IMAGE_DESCRIPTOR, "Invalid image descriptor"},
171   {CL_INVALID_COMPILER_OPTIONS, "Invalid compiler options"},
172   {CL_INVALID_LINKER_OPTIONS, "Invalid linker options"},
173   {CL_INVALID_DEVICE_PARTITION_COUNT, "Invalid device partition count"},
174   {CL_INVALID_PIPE_SIZE, "Invalid pipe size"},
175   {CL_INVALID_DEVICE_QUEUE, "Invalid device queue"},
176   {CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR, "Invalid GL share group reference KHR"}};
177 
CLErrorCode(cl_int error_code)178 std::string CLErrorCode(cl_int error_code) {
179   auto it = error_infos.find(error_code);
180   if (it == error_infos.end()) {
181     return "Unknown OpenCL error code";
182   } else {
183     return it->second;
184   }
185 }
186 
GetBroadcastGpuAxis(int ndim,int ori_axis)187 int GetBroadcastGpuAxis(int ndim, int ori_axis) {
188   if (ori_axis >= ndim) {
189     return ndim - 1;
190   }
191   int axis = 0;
192   if (ndim == DIMENSION_1D) {
193     axis = 3;
194   } else if (ndim == DIMENSION_2D) {
195     axis = ori_axis == 0 ? 0 : 3;
196   } else if (ndim == DIMENSION_3D) {
197     axis = ori_axis == 0 ? 0 : ori_axis == 1 ? 2 : 3;
198   } else if (ndim == DIMENSION_4D) {
199     axis = ori_axis;
200   } else if (ndim > DIMENSION_4D) {
201     MS_LOG(ERROR) << "GPU doesn't support ndim>=" << ndim;
202   }
203   return axis;
204 }
205 
PackNHWCToNHWC4(void * src,void * dst,bool src_is_fp16,bool dst_is_fp16,const GpuTensorInfo & tensor,int data_type)206 void PackNHWCToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
207                      int data_type) {
208   MS_ASSERT(src);
209   MS_ASSERT(dst);
210   auto src_fp16 = reinterpret_cast<float16_t *>(src);
211   auto src_fp32 = reinterpret_cast<float32_t *>(src);
212   auto src_int32 = reinterpret_cast<int32_t *>(src);
213   auto dst_fp16 = reinterpret_cast<float16_t *>(dst);
214   auto dst_fp32 = reinterpret_cast<float32_t *>(dst);
215   auto dst_int32 = reinterpret_cast<int32_t *>(dst);
216   for (int n = 0, src_idx = 0; n < tensor.N; n++) {
217     for (int h = 0; h < tensor.H; ++h) {
218       for (int w = 0; w < tensor.W; ++w) {
219         for (int c = 0; c < tensor.C; ++c, ++src_idx) {
220           int dst_idx = ((n * tensor.H + h) * tensor.W + w) * tensor.Slice * C4NUM + c;
221           if (data_type == kNumberTypeInt32) {
222             dst_int32[dst_idx] = src_int32[src_idx];
223           } else if (dst_is_fp16) {
224             dst_fp16[dst_idx] = src_is_fp16 ? src_fp16[src_idx] : static_cast<float16_t>(src_fp32[src_idx]);
225           } else {
226             dst_fp32[dst_idx] = src_is_fp16 ? static_cast<float32_t>(src_fp16[src_idx]) : src_fp32[src_idx];
227           }
228         }
229       }
230     }
231   }
232   // scalar
233   if (tensor.ElementsNum == 1) {
234     if (dst_is_fp16) {
235       dst_fp16[3] = dst_fp16[2] = dst_fp16[1] = dst_fp16[0];
236     } else {
237       dst_fp32[3] = dst_fp32[2] = dst_fp32[1] = dst_fp32[0];
238     }
239   }
240 }
241 
CheckParamLikeTensor(const std::string & kernel_name,const std::string & tensor_name,lite::Tensor * tensor,TypeId expect_data_type,const std::vector<int> & expect_shape)242 int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tensor_name, lite::Tensor *tensor,
243                          TypeId expect_data_type, const std::vector<int> &expect_shape) {
244   if (!tensor->IsConst()) {
245     MS_LOG(ERROR) << "in " << kernel_name << ": tensor " << tensor_name << " must be Const.";
246     return RET_ERROR;
247   }
248   if (tensor->data_type() != expect_data_type) {
249     MS_LOG(ERROR) << "in " << kernel_name << ": tensor's data_type must be " << expect_data_type;
250     return RET_ERROR;
251   }
252   if (tensor->shape() != expect_shape) {
253     std::string expect_shape_str = "(";
254     for (auto i : expect_shape) {
255       expect_shape_str += std::to_string(i) + ",";
256     }
257     expect_shape_str += ")";
258 
259     std::string tensor_shape_str = "(";
260     for (auto i : tensor->shape()) {
261       tensor_shape_str += std::to_string(i) + ",";
262     }
263     tensor_shape_str += ")";
264 
265     MS_LOG(ERROR) << "in " << kernel_name
266                   << ": tensor's shape is error. expect_shape: " + expect_shape_str +
267                        " tensor->shape(): " + tensor_shape_str;
268     return RET_ERROR;
269   }
270   return RET_OK;
271 }
272 
StoreTensorData(lite::Tensor * tensor)273 void *StoreTensorData(lite::Tensor *tensor) {
274   if ((tensor != nullptr) && (tensor->data() != nullptr) && (tensor->Size() > 0)) {
275     void *stored_data = malloc(tensor->Size());
276     if (stored_data == nullptr) {
277       MS_LOG(ERROR) << "StoreTensorData Malloc Failed.";
278       return nullptr;
279     }
280     memcpy(stored_data, tensor->data(), tensor->Size());
281     return stored_data;
282   }
283   return nullptr;
284 }
285 
FreeStoredData(void * data)286 void FreeStoredData(void *data) {
287   if (data != nullptr) {
288     free(data);
289   }
290 }
291 
CreateBuildOptionsExtByDType(TypeId type_id)292 std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id) {
293   std::vector<std::string> build_options_ext;
294   if (type_id == kNumberTypeInt32) {
295     build_options_ext = {" -DDTYPE=int -DDTYPE4=int4 -DWRITE_IMAGE=write_imagei  -DREAD_IMAGE=read_imagei "};
296   } else if (type_id == kNumberTypeFloat32) {
297     build_options_ext = {" -DDTYPE=float -DDTYPE4=float4 -DWRITE_IMAGE=write_imagef -DREAD_IMAGE=read_imagef "};
298   } else if (type_id == kNumberTypeFloat16) {
299     build_options_ext = {" -DDTYPE=half -DDTYPE4=half4 -DWRITE_IMAGE=write_imageh -DREAD_IMAGE=read_imageh "};
300   }
301   return build_options_ext;
302 }
303 }  // namespace mindspore::kernel
304