1 /* Copyright 2019 Google LLC. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef RUY_RUY_PATH_H_
17 #define RUY_RUY_PATH_H_
18
19 #include <cstdint>
20
21 #include "ruy/platform.h"
22 #include "ruy/size_util.h"
23
24 namespace ruy {
25
26 // A Path is an implementation path, typically corresponding to a SIMD
27 // instruction set being targetted. For example, on the ARM architecture,
28 // Path::kNeon means using NEON instructions, and Path::kNeonDotprod means
29 // also using the newer NEON dot-product instructions.
30 //
31 // Different Path enum values are defined on different CPU architectures,
32 // corresponding to different SIMD ISA extensions available there.
33 //
34 // Path::kStandardCpp is the one Path that is always available.
35 //
36 // Path enum values are bits and may be OR-ed to form "sets of Paths".
37 // Ruy entry points such as ruy::Mul either implicitly use such a set of Paths,
38 // or allow passing an explicit one as a template parameter. The meaning of such
39 // an OR-ed Path combination is "compile all of
40 // these paths; which path is used will be determined at runtime". This is why
41 // for most users, it is enough to call ruy::Mul(...), which will compile a
42 // reasonable selection of paths for the target CPU architecture's various
43 // SIMD ISA extensions, and let ruy determine at runtime which one to use.
44 // Internally, after the actual path has been resolved, ruy's internal functions
45 // templatized on a Path tend to require that to be a single bit.
46 //
47 // An element of ruy's internal design was to allow for code compiled for
48 // multiple such paths to coexist without violating the C++ One Definition Rule
49 // (ODR). This is achieved by having all ruy internal functions, whose
50 // definition depends on a choice of Path, be templatized on a Path, so that
51 // each path-specific specialization is a separate symbol. There is never
52 // a need to compile ruy code with different compilation flags to enable
53 // different SIMD extensions and dispatch at runtime between them, as this is
54 // taken care of internally by ruy in an ODR-correct way.
55 enum class Path : std::uint8_t {
56 // This is a special null value, representing the absence of any path.
57 kNone = 0,
58 // Standard C++ implementation of Ruy's architecture-specific parts.
59 //
60 // This is intended for testing/development, and as a fallback for when
61 // the SIMD ISA extensions required by other paths are unavailable at runtime.
62 kStandardCpp = 0x1,
63 // Internal, test-only variants of StandardCpp used to exercise more corners
64 // of internal ruy logic.
65 // They are intentionally omitted from ruy::kAllPaths and ruy::kNonArchPaths,
66 // and are only ever used in dedicated ruy tests explicitly referencing them.
67 kInternalStandardCppVariant1 = 0x2,
68 kInternalStandardCppVariant2 = 0x4,
69 kInternalStandardCppVariant3 = 0x8,
70
71 #if RUY_PLATFORM_ARM
72 // Optimized path using a widely available subset of ARM NEON instructions.
73 kNeon = 0x10,
74 // Optimized path making use of ARM NEON dot product instructions that are
75 // available on newer ARM cores.
76 kNeonDotprod = 0x20,
77 #endif // RUY_PLATFORM_ARM
78
79 #if RUY_PLATFORM_X86
80 // Optimized for AVX
81 // Compiled with -mavx
82 kAvx = 0x10,
83 // Optimized for AVX2+FMA.
84 // Compiled with -mavx2 -mfma.
85 kAvx2Fma = 0x20,
86 // Optimized for AVX-512.
87 // Compiled with -mavx512f -mavx512vl -mavx512cd -mavx512bw -mavx512dq.
88 kAvx512 = 0x40,
89 #endif // RUY_PLATFORM_X86
90 };
91
92 inline constexpr Path operator|(Path p, Path q) {
93 return static_cast<Path>(static_cast<std::uint32_t>(p) |
94 static_cast<std::uint32_t>(q));
95 }
96
97 inline constexpr Path operator&(Path p, Path q) {
98 return static_cast<Path>(static_cast<std::uint32_t>(p) &
99 static_cast<std::uint32_t>(q));
100 }
101
102 inline constexpr Path operator^(Path p, Path q) {
103 return static_cast<Path>(static_cast<std::uint32_t>(p) ^
104 static_cast<std::uint32_t>(q));
105 }
106
107 inline constexpr Path operator~(Path p) {
108 return static_cast<Path>(~static_cast<std::uint32_t>(p));
109 }
110
Disjoint(Path p,Path q)111 inline constexpr bool Disjoint(Path p, Path q) {
112 return (p & q) == Path::kNone;
113 }
114
GetMostSignificantPath(Path path_mask)115 inline Path GetMostSignificantPath(Path path_mask) {
116 return static_cast<Path>(round_down_pot(static_cast<int>(path_mask)));
117 }
118
119 // We define three disjoint sets of paths.
120 //
121 // kNonArchPaths is the set of paths that are defined regardless of
122 // the CPU architecture (excluding some internal test-only paths).
123 // These paths are slow, but portable. At the moment,
124 // that is only kStandardCpp. In the past, that used to also include a
125 // kReference path providing an even more basic implementation, but that has
126 // been split out into a separate library, see the ReferenceMul function.
127 constexpr Path kNonArchPaths = Path::kStandardCpp;
128
129 // The other two are specific to each CPU architecture. Note that these sets
130 // do NOT include a fallback for when none of these architecture paths are
131 // supported at runtime by the CPU. For that, see the other constants defined
132 // further below.
133 //
134 // kDefaultArchPaths is the set of architecture-specific paths that
135 // we recommend for most users. It is part of kDefaultPaths defined
136 // below.
137 //
138 // kExtraArchPaths is the set of all other architecture-specific paths
139 // that for whatever reason we're not recommending to most users at the moment.
140 // Typically that would include work-in-progress paths, or paths targeting
141 // minority hardware that isn't the best compromise of code size to performance
142 // for most users.
143
144 #if RUY_PLATFORM_NEON_64
145 constexpr Path kDefaultArchPaths = Path::kNeon | Path::kNeonDotprod;
146 constexpr Path kExtraArchPaths = Path::kNone;
147 #elif RUY_PLATFORM_NEON_32
148 constexpr Path kDefaultArchPaths = Path::kNeon;
149 constexpr Path kExtraArchPaths = Path::kNone;
150 #elif RUY_PLATFORM_X86
151 constexpr Path kDefaultArchPaths = Path::kAvx | Path::kAvx2Fma | Path::kAvx512;
152 constexpr Path kExtraArchPaths = Path::kNone;
153 #else
154 constexpr Path kDefaultArchPaths = Path::kNone;
155 constexpr Path kExtraArchPaths = Path::kNone;
156 #endif
157
158 // kNonArchPathsIncludingInternalVariants is the set of all
159 // non-architecture-specific paths without exception. This includes some paths
160 // that are internal-only and test-only and not useful to any user.
161 static constexpr Path kNonArchPathsIncludingInternalVariants =
162 kNonArchPaths | Path::kInternalStandardCppVariant1 |
163 Path::kInternalStandardCppVariant2 | Path::kInternalStandardCppVariant3;
164
165 // Enforce that kDefaultArchPaths, kExtraArchPaths and
166 // kNonArchPathsIncludingInternalVariants are mutually disjoint,
167 // and that kNonArchPaths is a subset of kNonArchPathsIncludingInternalVariants.
168 static_assert(Disjoint(kDefaultArchPaths, kExtraArchPaths), "");
169 static_assert(Disjoint(kDefaultArchPaths,
170 kNonArchPathsIncludingInternalVariants),
171 "");
172 static_assert(Disjoint(kExtraArchPaths, kNonArchPathsIncludingInternalVariants),
173 "");
174 static_assert(Disjoint(kNonArchPaths, ~kNonArchPathsIncludingInternalVariants),
175 "");
176
177 // We now define two aggregate sets of paths for convenience, including
178 // both architecture-specific paths and some portable fallbacks.
179 //
180 // kDefaultPaths is the set of paths that we recommend most users to use.
181 // It is what ruy::Mul(...), the entry point not taking an explicit Path value,
182 // uses.
183 constexpr Path kDefaultPaths = Path::kStandardCpp | kDefaultArchPaths;
184
185 // kAllPaths is the set of all paths that are available to compile, except
186 // some some internal test-only paths that no user would ever want to use.
187 // In addition to the Default paths, it also includes the extra
188 // architecture paths, as well as any other non-arch path besides kStandardCpp
189 // (there is none at the moment).
190 constexpr Path kAllPaths = kNonArchPaths | kDefaultArchPaths | kExtraArchPaths;
191
192 // kAllPathsIncludingInternalVariants is the set of all paths without exception.
193 // This includes some paths that are internal-only and test-only and not useful
194 // to any user.
195 static constexpr Path kAllPathsIncludingInternalVariants =
196 kAllPaths | kNonArchPathsIncludingInternalVariants;
197
198 static_assert(Disjoint(kDefaultPaths, ~kAllPaths), "");
199 static_assert(Disjoint(kAllPaths, ~kAllPathsIncludingInternalVariants), "");
200
201 } // namespace ruy
202
203 #endif // RUY_RUY_PATH_H_
204