• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_UTILS_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_UTILS_H_
19 
20 #include <string>
21 #include <vector>
22 #include <set>
23 #include "CL/cl2.hpp"
24 #include "src/common/log_adapter.h"
25 #include "nnacl/op_base.h"
26 #include "src/inner_kernel.h"
27 #include "src/common/utils.h"
28 #include "src/runtime/kernel/opencl/opencl_kernel.h"
29 
30 namespace mindspore::kernel {
31 struct GpuTensorInfo;
32 
33 // for fusion
34 extern const std::set<schema::PrimitiveType> ArithmeticPrimitives;
35 extern const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives;
IsArithmetic(schema::PrimitiveType type)36 inline bool IsArithmetic(schema::PrimitiveType type) { return ArithmeticPrimitives.count(type); }
IsArithmeticSelf(schema::PrimitiveType type)37 inline bool IsArithmeticSelf(schema::PrimitiveType type) { return ArithmeticSelfPrimitives.count(type); }
38 
39 std::string GetActDefines();
40 
41 int GetUpPow2(int n);
42 
43 int GetMaxDivisor(int x, int divisor);
44 
45 int GetMaxDivisorStrategy0(int x, int divisor);
46 
47 int GetMaxDivisorStrategy1(int x, int divisor);
48 
49 std::string CLErrorCode(cl_int error_code);
50 
51 int GetBroadcastGpuAxis(int ndim, int ori_axis);
52 
53 void PackNHWCToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
54                      int data_type = kNumberTypeFloat32);
55 
56 int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tensor_name, lite::Tensor *tensor,
57                          TypeId expect_data_type, const std::vector<int> &expect_shape);
58 
59 void *StoreTensorData(lite::Tensor *tensor);
60 
61 void FreeStoredData(void *data);
62 
63 std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id);
64 
65 template <class T1, class T2>
PackNCHWToNC4HW4(void * src,void * dst,int batch,int plane_in,int plane_out,int channel,const std::function<T2 (T1)> & to_dtype)66 void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane_in, int plane_out, int channel,
67                       const std::function<T2(T1)> &to_dtype) {
68   MS_ASSERT(src);
69   MS_ASSERT(dst);
70   int c4 = UP_DIV(channel, C4NUM);
71   for (int b = 0; b < batch; b++) {
72     int src_offset = b * plane_in * channel;
73     int dst_offset = b * plane_out * c4;
74     for (int c = 0; c < channel; c++) {
75       int c4_block_num = c / C4NUM;
76       int c4_block_rem = c % C4NUM;
77       int src_c_offset = src_offset + c * plane_in;
78       int dst_c_offset = dst_offset + c4_block_num * plane_out;
79       for (int k = 0; k < plane_in; k++) {
80         int src_kernel_offset = src_c_offset + k;
81         int dst_kernel_offset = dst_c_offset + C4NUM * k + c4_block_rem;
82         (static_cast<T2 *>(dst) + dst_kernel_offset)[0] = to_dtype((static_cast<T1 *>(src) + src_kernel_offset)[0]);
83       }
84     }
85   }
86 }
87 
88 template <class T>
MatrixMultiply(const T A[],const T B[],int M,int N,int K)89 std::vector<T> MatrixMultiply(const T A[], const T B[], int M, int N, int K) {
90   std::vector<T> C(M * K);
91   for (int i = 0; i < M; ++i) {
92     for (int j = 0; j < K; ++j) {
93       float s = 0.0f;
94       for (int k = 0; k < N; ++k) {
95         s += A[i * N + k] * B[k * K + j];
96       }
97       C[i * K + j] = s;
98     }
99   }
100   return C;
101 }
102 
103 }  // namespace mindspore::kernel
104 
105 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_UTILS_H_
106