• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include "arm_gemm.hpp"
28 
29 #include <cstddef>
30 
31 // Macro for unreachable code (e.g. impossible default cases on switch)
32 #define UNREACHABLE(why)  __builtin_unreachable()
33 
34 // Paranoid option for the above with assert
35 // #define UNREACHABLE(why)   assert(0 && why)
36 
37 namespace arm_gemm {
38 
39 template<typename T>
iceildiv(const T a,const T b)40 inline T iceildiv(const T a, const T b) {
41     return (a + b - 1) / b;
42 }
43 
44 template <typename T>
roundup(const T a,const T b)45 inline T roundup(const T a, const T b) {
46     T rem = a % b;
47 
48     if (rem) {
49         return a + b - rem;
50     } else {
51         return a;
52     }
53 }
54 
55 enum class VLType {
56     None,
57     SVE,
58 };
59 
60 template<typename T>
61 struct IndirectOutputArg {
62     struct {
63         T       *base;
64         size_t   stride;
65     } direct = {};
66     struct {
67         T * const *ptr;
68         size_t     offset;
69     } indirect = {};
70     bool is_indirect;
71 
72     // Direct
IndirectOutputArgarm_gemm::IndirectOutputArg73     IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
74         direct.base = base;
75         direct.stride = stride;
76     }
77 
78     // Indirect
IndirectOutputArgarm_gemm::IndirectOutputArg79     IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
80         indirect.ptr = ptr;
81         indirect.offset = offset;
82     }
83 
IndirectOutputArgarm_gemm::IndirectOutputArg84     IndirectOutputArg() : is_indirect(false) {
85         direct.base = nullptr;
86         direct.stride = 0;
87     }
88 };
89 
90 // Check that the provided Requantize32 doesn't have a left shift.
quant_no_left_shift(const Requantize32 & qp)91 inline bool quant_no_left_shift(const Requantize32 &qp) {
92     if (qp.per_channel_requant) {
93         return (qp.per_channel_left_shifts == nullptr);
94     } else {
95         return (qp.per_layer_left_shift == 0);
96     }
97 }
98 
99 // Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels.  These don't include row
100 // sums, so the 'b_offset' has to be zero.
quant_hybrid_symmetric(const Requantize32 & qp)101 inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
102     return quant_no_left_shift(qp) && qp.b_offset == 0;
103 }
104 
105 // Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels.  These don't support per
106 // channel quantization.  Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
quant_hybrid_asymmetric(const Requantize32 & qp)107 inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
108     return quant_no_left_shift(qp) /*  && qp.b_offset != 0 */ && qp.per_channel_requant==false;
109 }
110 
111 template<typename T>
112 struct IndirectInputArg {
113     struct {
114         const T *base;
115         size_t   stride;
116     } direct = {};
117     struct {
118         const T * const * const * ptr;
119         unsigned int start_row;
120         unsigned int start_col;
121     } indirect = {};
122     bool is_indirect;
123 
124     // Direct
IndirectInputArgarm_gemm::IndirectInputArg125     IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
126         direct.base = base;
127         direct.stride = stride;
128     }
129 
130     // Indirect
IndirectInputArgarm_gemm::IndirectInputArg131     IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
132         indirect.ptr = ptr;
133         indirect.start_row = start_row;
134         indirect.start_col = start_col;
135     }
136 
IndirectInputArgarm_gemm::IndirectInputArg137     IndirectInputArg() : is_indirect(false) {
138         direct.base = nullptr;
139         direct.stride = 0;
140     }
141 };
142 
143 namespace utils {
144 namespace {
145 
146 #ifdef __ARM_FEATURE_SVE
147 template<size_t sz>
get_vector_length_sz()148 inline unsigned long get_vector_length_sz() {
149     unsigned long v;
150 
151     __asm (
152         "cntb	%0"
153         : "=r" (v)
154     );
155 
156     return v / sz;
157 }
158 
159 #define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
160 
161 VEC_LEN_SPEC(8, "cntd")
162 VEC_LEN_SPEC(4, "cntw")
163 VEC_LEN_SPEC(2, "cnth")
164 VEC_LEN_SPEC(1, "cntb")
165 #endif
166 
167 } // anonymous namespace
168 
169 template <typename T>
get_vector_length()170 inline unsigned long get_vector_length() {
171 #ifdef __ARM_FEATURE_SVE
172     return get_vector_length_sz<sizeof(T)>();
173 #else
174     return 16 / sizeof(T);
175 #endif
176 }
177 
178 } // utils namespace
179 } // arm_gemm namespace
180 
181 using namespace arm_gemm::utils;
182