1 /*
2 * Copyright (c) 2020 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.h"
25
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/GPUTarget.h"
29 #include "src/core/CL/gemm/CLGEMMHelpers.h"
30
31 #include <map>
32 #include <utility>
33
34 namespace arm_compute
35 {
36 namespace cl_gemm
37 {
CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu)38 CLGEMMReshapedKernelConfigurationValhall::CLGEMMReshapedKernelConfigurationValhall(GPUTarget gpu)
39 : ICLGEMMKernelConfiguration(gpu)
40 {
41 }
42
configure(unsigned int m,unsigned int n,unsigned int k,unsigned int b,DataType data_type)43 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
44 {
45 using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMReshapedKernelConfigurationValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
46
47 // Configurations for Mali-G77
48 static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G77 =
49 {
50 { DataType::F32, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_f32 },
51 { DataType::F16, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_f16 },
52 { DataType::QASYMM8, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_u8 },
53 { DataType::QSYMM8, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_u8 },
54 { DataType::QASYMM8_SIGNED, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_u8 },
55 { DataType::QSYMM8_PER_CHANNEL, &CLGEMMReshapedKernelConfigurationValhall::configure_G77_u8 }
56 };
57
58 switch(_target)
59 {
60 case GPUTarget::G77:
61 default:
62 if(gemm_configs_G77.find(data_type) != gemm_configs_G77.end())
63 {
64 return (this->*gemm_configs_G77[data_type])(m, n, k, b);
65 }
66 else
67 {
68 ARM_COMPUTE_ERROR("Not supported data type");
69 }
70 }
71 }
72
configure_G77_f32(unsigned int m,unsigned int n,unsigned int k,unsigned int b)73 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
74 {
75 ARM_COMPUTE_UNUSED(k);
76 ARM_COMPUTE_UNUSED(b);
77
78 if(n <= 4)
79 {
80 return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
81 }
82 else
83 {
84 return configure_lhs_rhs_info(m, n, 5, 4, 4, 2, 16, false, true, false, true);
85 }
86 }
87
configure_G77_f16(unsigned int m,unsigned int n,unsigned int k,unsigned int b)88 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
89 {
90 ARM_COMPUTE_UNUSED(k);
91 ARM_COMPUTE_UNUSED(b);
92
93 const float r_mn = static_cast<float>(m) / static_cast<float>(n);
94 const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
95 const float r_mk = static_cast<float>(m) / static_cast<float>(k);
96 const float r_nk = static_cast<float>(n) / static_cast<float>(k);
97
98 GEMMLHSMatrixInfo lhs_info_buf;
99 GEMMRHSMatrixInfo rhs_info_buf;
100 GEMMLHSMatrixInfo lhs_info_img;
101 GEMMRHSMatrixInfo rhs_info_img;
102
103 std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, false, true, false, false);
104
105 if(r_mk <= 0.11824845522642136)
106 {
107 if(workload <= 880.0)
108 {
109 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, false, false, true, false, false);
110 }
111 else
112 {
113 if(r_nk <= 0.42521367967128754)
114 {
115 if(workload <= 1726.4000244140625)
116 {
117 return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, false, false, true, false, false);
118 }
119 else
120 {
121 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, false, true, true, false, true);
122
123 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
124 std::make_pair(lhs_info_buf, rhs_info_buf),
125 n, k, b, DataType::F16);
126 }
127 }
128 else
129 {
130 if(workload <= 1241.6000366210938)
131 {
132 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, false, false, true, false, false);
133 }
134 else
135 {
136 return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, false, false, true, false, false);
137 }
138 }
139 }
140 }
141 else
142 {
143 if(workload <= 11404.7998046875)
144 {
145 if(r_mk <= 1.0126488208770752)
146 {
147 if(r_mn <= 2.545312523841858)
148 {
149 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, false, true, true, false, true);
150
151 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
152 std::make_pair(lhs_info_buf, rhs_info_buf),
153 n, k, b, DataType::F16);
154 }
155 else
156 {
157 return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, false, false, true, false, false);
158 }
159 }
160 else
161 {
162 if(workload <= 2881.199951171875)
163 {
164 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, false, false, true, false, true);
165
166 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
167 std::make_pair(lhs_info_buf, rhs_info_buf),
168 n, k, b, DataType::F16);
169 }
170 else
171 {
172 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, false, true, true, false, true);
173
174 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
175 std::make_pair(lhs_info_buf, rhs_info_buf),
176 n, k, b, DataType::F16);
177 }
178 }
179 }
180 else
181 {
182 if(r_nk <= 0.5765306055545807)
183 {
184 if(r_mn <= 6.010416746139526)
185 {
186 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, false, true, true, false, true);
187
188 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
189 std::make_pair(lhs_info_buf, rhs_info_buf),
190 n, k, b, DataType::F16);
191 }
192 else
193 {
194 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, false, true, false, true);
195
196 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
197 std::make_pair(lhs_info_buf, rhs_info_buf),
198 n, k, b, DataType::F16);
199 }
200 }
201 else
202 {
203 std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, false, true, false, true);
204
205 return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
206 std::make_pair(lhs_info_buf, rhs_info_buf),
207 n, k, b, DataType::F16);
208 }
209 }
210 }
211 }
212
configure_G77_u8(unsigned int m,unsigned int n,unsigned int k,unsigned int b)213 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedKernelConfigurationValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
214 {
215 ARM_COMPUTE_UNUSED(k);
216 ARM_COMPUTE_UNUSED(b);
217
218 if(n <= 4)
219 {
220 return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, false, false, false, true);
221 }
222 else
223 {
224 return configure_lhs_rhs_info(m, n, 4, 4, 16, 2, 2, false, true, false, true);
225 }
226 }
227 } // namespace cl_gemm
228 } // namespace arm_compute
229