• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "src/core/CL/gemm/CLGEMMHelpers.h"
29 
30 #include <map>
31 #include <utility>
32 
33 namespace arm_compute
34 {
35 namespace cl_gemm
36 {
CLGEMMKernelSelectionValhall(GPUTarget gpu)37 CLGEMMKernelSelectionValhall::CLGEMMKernelSelectionValhall(GPUTarget gpu)
38     : ICLGEMMKernelSelection(gpu)
39 {
40 }
41 
select_kernel(const CLGEMMKernelSelectionParams & params)42 CLGEMMKernelType CLGEMMKernelSelectionValhall::select_kernel(const CLGEMMKernelSelectionParams &params)
43 {
44     // _target could be used in the future to have a dedicated heuristic for each GPU IP
45     ARM_COMPUTE_UNUSED(_target);
46 
47     using FunctionExecutorPtr = CLGEMMKernelType (CLGEMMKernelSelectionValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
48 
49     // Default configurations for Valhall architectures
50     static std::map<DataType, FunctionExecutorPtr> gemm_default_configs =
51     {
52         { DataType::F32, &CLGEMMKernelSelectionValhall::default_f32 },
53         { DataType::F16, &CLGEMMKernelSelectionValhall::default_f16 },
54         { DataType::QASYMM8, &CLGEMMKernelSelectionValhall::default_q8 },
55         { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionValhall::default_q8 },
56         { DataType::QSYMM8, &CLGEMMKernelSelectionValhall::default_q8 },
57         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionValhall::default_q8 }
58     };
59 
60     // Mali-G77 configurations
61     static std::map<DataType, FunctionExecutorPtr> gemm_g77_configs =
62     {
63         { DataType::F32, &CLGEMMKernelSelectionValhall::default_f32 },
64         { DataType::F16, &CLGEMMKernelSelectionValhall::g77_f16 },
65         { DataType::QASYMM8, &CLGEMMKernelSelectionValhall::default_q8 },
66         { DataType::QASYMM8_SIGNED, &CLGEMMKernelSelectionValhall::default_q8 },
67         { DataType::QSYMM8, &CLGEMMKernelSelectionValhall::default_q8 },
68         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMKernelSelectionValhall::default_q8 }
69     };
70 
71     const DataType data_type = params.data_type;
72 
73     switch(_target)
74     {
75         case GPUTarget::G77:
76             if(gemm_g77_configs.find(data_type) != gemm_g77_configs.end())
77             {
78                 return (this->*gemm_g77_configs[data_type])(params.m, params.n, params.k, params.b, params.is_rhs_constant);
79             }
80             ARM_COMPUTE_ERROR("Not supported data type");
81         default:
82             if(gemm_default_configs.find(data_type) != gemm_default_configs.end())
83             {
84                 return (this->*gemm_default_configs[data_type])(params.m, params.n, params.k, params.b, params.is_rhs_constant);
85             }
86             ARM_COMPUTE_ERROR("Not supported data type");
87     }
88 }
89 
default_f32(unsigned int m,unsigned int n,unsigned int k,unsigned int b,bool is_rhs_constant)90 CLGEMMKernelType CLGEMMKernelSelectionValhall::default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
91 {
92     ARM_COMPUTE_UNUSED(m, n, k, b);
93 
94     return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE_V1;
95 }
96 
default_f16(unsigned int m,unsigned int n,unsigned int k,unsigned int b,bool is_rhs_constant)97 CLGEMMKernelType CLGEMMKernelSelectionValhall::default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
98 {
99     ARM_COMPUTE_UNUSED(m, n, k, b);
100 
101     return is_rhs_constant ? CLGEMMKernelType::RESHAPED_ONLY_RHS : CLGEMMKernelType::NATIVE_V1;
102 }
103 
g77_f16(unsigned int m,unsigned int n,unsigned int k,unsigned int b,bool is_rhs_constant)104 CLGEMMKernelType CLGEMMKernelSelectionValhall::g77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
105 {
106     if (!is_rhs_constant)
107     {
108         return CLGEMMKernelType::NATIVE_V1;
109     }
110 
111     if (m == 1)
112     {
113         return CLGEMMKernelType::RESHAPED_ONLY_RHS;
114     }
115 
116     const float r_mn = static_cast<float>(m) / static_cast<float>(n);
117     const float r_mk = static_cast<float>(m) / static_cast<float>(k);
118     const float r_nk = static_cast<float>(n) / static_cast<float>(k);
119     const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
120 
121     if(r_mk <= 0.6817956566810608)
122     {
123         if(workload <= 801.6000061035156)
124         {
125             return CLGEMMKernelType::RESHAPED_ONLY_RHS;
126         }
127         else
128         {
129             if(r_mn <= 0.0839829258620739)
130             {
131                 return CLGEMMKernelType::RESHAPED_ONLY_RHS;
132             }
133             else
134             {
135                 if(r_mk <= 0.24917218834161758)
136                 {
137                     return CLGEMMKernelType::RESHAPED;
138                 }
139                 else
140                 {
141                     if(workload <= 2551.75)
142                     {
143                         return CLGEMMKernelType::RESHAPED_ONLY_RHS;
144                     }
145                     else
146                     {
147                         if(workload <= 5061.574951171875)
148                         {
149                             return CLGEMMKernelType::RESHAPED_ONLY_RHS;
150                         }
151                         else
152                         {
153                             return CLGEMMKernelType::RESHAPED;
154                         }
155                     }
156                 }
157             }
158         }
159     }
160     else
161     {
162         if(r_mk <= 4.849947690963745)
163         {
164             if(workload <= 17618.4501953125)
165             {
166                 if(workload <= 5224.699951171875)
167                 {
168                     return CLGEMMKernelType::RESHAPED_ONLY_RHS;
169                 }
170                 else
171                 {
172                     if(r_nk <= 0.7933054566383362)
173                     {
174                         return CLGEMMKernelType::RESHAPED;
175                     }
176                     else
177                     {
178                         return CLGEMMKernelType::RESHAPED_ONLY_RHS;
179                     }
180                 }
181             }
182             else
183             {
184                 if(workload <= 20275.2001953125)
185                 {
186                     return CLGEMMKernelType::RESHAPED;
187                 }
188                 else
189                 {
190                     if(r_mk <= 3.07421875)
191                     {
192                         return CLGEMMKernelType::RESHAPED_ONLY_RHS;
193                     }
194                     else
195                     {
196                         return CLGEMMKernelType::RESHAPED;
197                     }
198                 }
199             }
200         }
201         else
202         {
203             return CLGEMMKernelType::RESHAPED_ONLY_RHS;
204         }
205     }
206 }
207 
default_q8(unsigned int m,unsigned int n,unsigned int k,unsigned int b,bool is_rhs_constant)208 CLGEMMKernelType CLGEMMKernelSelectionValhall::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
209 {
210     ARM_COMPUTE_UNUSED(m, n, k, b);
211 
212     if(is_rhs_constant)
213     {
214         return CLGEMMKernelType::RESHAPED_ONLY_RHS;
215     }
216     else
217     {
218         return CLGEMMKernelType::NATIVE;
219     }
220 }
221 } // namespace cl_gemm
222 } // namespace arm_compute
223