• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_UTILS_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_UTILS_H_
19 
20 #include <string>
21 #include <vector>
22 #include <set>
23 #include "CL/cl2.hpp"
24 #include "src/common/log_adapter.h"
25 #include "nnacl/op_base.h"
26 #include "src/litert/lite_kernel.h"
27 #include "src/common/utils.h"
28 #include "src/litert/kernel/opencl/opencl_kernel.h"
29 
30 namespace mindspore::kernel {
31 struct GpuTensorInfo;
32 
33 // for fusion
34 extern const std::set<schema::PrimitiveType> ArithmeticPrimitives;
35 extern const std::set<schema::PrimitiveType> ArithmeticSelfPrimitives;
IsArithmetic(schema::PrimitiveType type)36 inline bool IsArithmetic(schema::PrimitiveType type) { return ArithmeticPrimitives.count(type); }
IsArithmeticSelf(schema::PrimitiveType type)37 inline bool IsArithmeticSelf(schema::PrimitiveType type) { return ArithmeticSelfPrimitives.count(type); }
38 
39 std::string GetActDefines();
40 
41 int GetUpPow2(int n);
42 
43 int GetMaxDivisor(int x, int divisor);
44 
45 int GetMaxDivisorStrategy0(int x, int divisor);
46 
47 int GetMaxDivisorStrategy1(int x, int divisor);
48 
49 std::string CLErrorCode(cl_int error_code);
50 
51 int GetBroadcastGpuAxis(int ndim, int ori_axis);
52 
53 void PackNHWCToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
54                      int data_type = kNumberTypeFloat32);
55 
56 void PackNCHWToNHWC4(void *src, void *dst, bool src_is_fp16, bool dst_is_fp16, const GpuTensorInfo &tensor,
57                      int data_type = kNumberTypeFloat32);
58 
59 int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tensor_name, lite::Tensor *tensor,
60                          TypeId expect_data_type, const std::vector<int> &expect_shape);
61 
62 void *StoreTensorData(lite::Tensor *tensor);
63 
64 void FreeStoredData(void *data);
65 
66 std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id);
67 
68 template <class T1, class T2>
PackNCHWToNC4HW4(void * src,void * dst,int batch,int plane_in,int plane_out,int channel,const std::function<T2 (T1)> & to_dtype)69 void PackNCHWToNC4HW4(void *src, void *dst, int batch, int plane_in, int plane_out, int channel,
70                       const std::function<T2(T1)> &to_dtype) {
71   MS_ASSERT(src);
72   MS_ASSERT(dst);
73   int c4 = UP_DIV(channel, C4NUM);
74   for (int b = 0; b < batch; b++) {
75     int src_offset = b * plane_in * channel;
76     int dst_offset = b * plane_out * c4;
77     for (int c = 0; c < channel; c++) {
78       int c4_block_num = c / C4NUM;
79       int c4_block_rem = c % C4NUM;
80       int src_c_offset = src_offset + c * plane_in;
81       int dst_c_offset = dst_offset + c4_block_num * plane_out;
82       for (int k = 0; k < plane_in; k++) {
83         int src_kernel_offset = src_c_offset + k;
84         int dst_kernel_offset = dst_c_offset + C4NUM * k + c4_block_rem;
85         (static_cast<T2 *>(dst) + dst_kernel_offset)[0] = to_dtype((static_cast<T1 *>(src) + src_kernel_offset)[0]);
86       }
87     }
88   }
89 }
90 
91 template <class T>
MatrixMultiply(const T A[],const T B[],int M,int N,int K)92 std::vector<T> MatrixMultiply(const T A[], const T B[], int M, int N, int K) {
93   std::vector<T> C(M * K);
94   for (int i = 0; i < M; ++i) {
95     for (int j = 0; j < K; ++j) {
96       float s = 0.0f;
97       for (int k = 0; k < N; ++k) {
98         s += A[i * N + k] * B[k * K + j];
99       }
100       C[i * K + j] = s;
101     }
102   }
103   return C;
104 }
105 
106 }  // namespace mindspore::kernel
107 
108 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_UTILS_H_
109