• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 Google LLC. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_
17 #define TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_
18 
19 #include <cstdint>
20 
21 #include "tensorflow/lite/experimental/ruy/platform.h"
22 #include "tensorflow/lite/experimental/ruy/size_util.h"
23 
24 namespace ruy {
25 
26 // A Path is a choice of implementation path, e.g. between reference code
27 // and optimized code, or between different optimized code paths using different
28 // instruction sets.
29 //
30 // It's important that any symbol that depends on such implementation
31 // details, is somehow templatized in such a Path, so that different Path values
32 // yield different symbols, so we never have the situation where a symbols has
33 // multiple inequivalent definitions based on which code paths are compiled.
34 // That would be a violation of the ODR (One Definition Rule) which is Undefined
35 // Behavior, and one of the most serious issues plaguing both Eigen and
36 // gemmlowp.
37 //
38 // This enum is actually a bit-field: aside from kNone, all other values are
39 // powers of two, thus are one bit each. We define bit-wise operators below
40 // for this enum. Some places in Ruy accept a Path bit-field where multiple
41 // Paths may be selected, while some other places require a single Path (i.e.
42 // just one of the enum values here). Typically, user-facing parts of Ruy
43 // accept arbitrary bit-fields, allowing the user to compile support for
44 // multiple paths and to inform Ruy of all the paths that are to be enabled
45 // at runtime; then, typically in dispatch.h, we internally pick one
46 // specific path and from there on, internal Ruy code deals with only one
47 // path.
48 //
49 // When a user selects a set of compiled paths, Ruy internally dispatches to the
50 // "best" one, which typically means the newest optimized instructions for a
51 // given base architecture (such as ARM). Higher values of this enum correspond
52 // to "better" code paths within a given base architecture for which Ruy has
53 // optimized code paths.
54 //
55 // Values are reused across architectures.
56 // Rationale: Scale better to N architectures, it is good to have small values
57 // both for the compile-time logic to select paths, and when manually spelling
58 // out Path values, such as when invoking a test or benchmark.
59 enum class Path : std::uint8_t {
60   // This is a special null value, representing the absence of any path.
61   kNone = 0,
62   // Reference multiplication code.
63   // The main purpose of this path is to have a very simple standalone Mul
64   // implementation to check against.
65   // This path bypasses almost all of Ruy's internal implementation details.
66   //
67   // This is intended for testing/development.
68   kReference = 0x1,
69   // Standard C++ implementation of Ruy's architecture-specific parts.
70   // Unlike Path::kReference, this path exercises most of Ruy's internal logic.
71   //
72   // This is intended for testing/development.
73   kStandardCpp = 0x2,
74 
75 #if RUY_PLATFORM(ARM)
76   // ARM architectures.
77   //
78   // Optimized path using a widely available subset of ARM NEON instructions.
79   kNeon = 0x4,
80   // Optimized path making use of ARM NEON dot product instructions that are
81   // available on newer ARM cores.
82   kNeonDotprod = 0x8,
83 #endif  // RUY_PLATFORM(ARM)
84 
85 #if RUY_PLATFORM(X86)
86   // x86 architectures.
87   //
88   // TODO(b/147376783): SSE 4.2 and AVX-VNNI support is incomplete /
89   // placeholder.
90   // Optimization is not finished. In particular the dimensions of the kernel
91   // blocks can be changed as desired.
92   //
93   // Optimized for SSE 4.2.
94   kSse42 = 0x4,
95   // Optimized for AVX2.
96   kAvx2 = 0x8,
97   // Optimized for AVX-512.
98   kAvx512 = 0x10,
99   // TODO(b/147376783): SSE 4.2 and AVX-VNNI support is incomplete /
100   // placeholder.
101   // Optimization is not finished. In particular the dimensions of the kernel
102   // blocks can be changed as desired.
103   //
104   // Optimized for AVX-VNNI.
105   kAvxVnni = 0x20,
106 #endif  // RUY_PLATFORM(X86)
107 };
108 
109 inline constexpr Path operator|(Path p, Path q) {
110   return static_cast<Path>(static_cast<std::uint32_t>(p) |
111                            static_cast<std::uint32_t>(q));
112 }
113 
114 inline constexpr Path operator&(Path p, Path q) {
115   return static_cast<Path>(static_cast<std::uint32_t>(p) &
116                            static_cast<std::uint32_t>(q));
117 }
118 
119 inline constexpr Path operator^(Path p, Path q) {
120   return static_cast<Path>(static_cast<std::uint32_t>(p) ^
121                            static_cast<std::uint32_t>(q));
122 }
123 
124 inline constexpr Path operator~(Path p) {
125   return static_cast<Path>(~static_cast<std::uint32_t>(p));
126 }
127 
GetMostSignificantPath(Path path_mask)128 inline Path GetMostSignificantPath(Path path_mask) {
129   return static_cast<Path>(round_down_pot(static_cast<int>(path_mask)));
130 }
131 
132 // ruy::kAllPaths represents all Path's that make sense to on a given
133 // base architecture.
134 #ifdef __linux__
135 #if RUY_PLATFORM(NEON_64)
136 constexpr Path kAllPaths =
137     Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod;
138 #elif RUY_PLATFORM(NEON_32)
139 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
140 #elif RUY_PLATFORM(X86)
141 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp |
142                            Path::kSse42 | Path::kAvx2 | Path::kAvx512 |
143                            Path::kAvxVnni;
144 #else
145 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
146 #endif
147 #else   // __linux__
148 // We don't know how to do runtime dotprod detection outside of linux for now.
149 #if RUY_PLATFORM(NEON)
150 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon;
151 #elif RUY_PLATFORM(X86)
152 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp |
153                            Path::kSse42 | Path::kAvx2 | Path::kAvx512 |
154                            Path::kAvxVnni;
155 #else
156 constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp;
157 #endif
158 #endif  // __linux__
159 
160 }  // namespace ruy
161 
162 #endif  // TENSORFLOW_LITE_EXPERIMENTAL_RUY_PATH_H_
163