• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_CPU_FEATURES_H
28 #define VIXL_CPU_FEATURES_H
29 
30 #include <bitset>
31 #include <ostream>
32 
33 #include "globals-vixl.h"
34 
35 namespace vixl {
36 
37 
38 // VIXL aims to handle and detect all architectural features that are likely to
39 // influence code-generation decisions at EL0 (user-space).
40 //
41 // - There may be multiple VIXL feature flags for a given architectural
42 //   extension. This occurs where the extension allow components to be
43 //   implemented independently, or where kernel support is needed, and is likely
44 //   to be fragmented.
45 //
46 //   For example, Pointer Authentication (kPAuth*) has a separate feature flag
47 //   for access to PACGA, and to indicate that the QARMA algorithm is
48 //   implemented.
49 //
50 // - Conversely, some extensions have configuration options that do not affect
51 //   EL0, so these are presented as a single VIXL feature.
52 //
53 //   For example, the RAS extension (kRAS) has several variants, but the only
54 //   feature relevant to VIXL is the addition of the ESB instruction so we only
55 //   need a single flag.
56 //
57 // - VIXL offers separate flags for separate features even if they're
58 //   architecturally linked.
59 //
60 //   For example, the architecture requires kFPHalf and kNEONHalf to be equal,
61 //   but they have separate hardware ID register fields so VIXL presents them as
62 //   separate features.
63 //
64 // - VIXL can detect every feature for which it can generate code.
65 //
66 // - VIXL can detect some features for which it cannot generate code.
67 //
68 // The CPUFeatures::Feature enum — derived from the macro list below — is
69 // frequently extended. New features may be added to the list at any point, and
70 // no assumptions should be made about the numerical values assigned to each
71 // enum constant. The symbolic names can be considered to be stable.
72 //
73 // The debug descriptions are used only for debug output. The 'cpuinfo' strings
74 // are informative; VIXL does not use /proc/cpuinfo for feature detection.
75 
76 // clang-format off
77 #define VIXL_CPU_FEATURE_LIST(V)                                               \
78   /* If set, the OS traps and emulates MRS accesses to relevant (EL1) ID_*  */ \
79   /* registers, so that the detailed feature registers can be read          */ \
80   /* directly.                                                              */ \
81                                                                                \
82   /* Constant name        Debug description         Linux 'cpuinfo' string. */ \
83   V(kIDRegisterEmulation, "ID register emulation",  "cpuid")                   \
84                                                                                \
85   V(kFP,                  "FP",                     "fp")                      \
86   V(kNEON,                "NEON",                   "asimd")                   \
87   V(kCRC32,               "CRC32",                  "crc32")                   \
88   V(kDGH,                 "DGH",                    "dgh")                     \
89   /* Speculation control features.                                          */ \
90   V(kCSV2,                "CSV2",                   NULL)                      \
91   V(kSCXTNUM,             "SCXTNUM",                NULL)                      \
92   V(kCSV3,                "CSV3",                   NULL)                      \
93   V(kSB,                  "SB",                     "sb")                      \
94   V(kSPECRES,             "SPECRES",                NULL)                      \
95   V(kSSBS,                "SSBS",                   NULL)                      \
96   V(kSSBSControl,         "SSBS (PSTATE control)",  "ssbs")                    \
97   /* Cryptographic support instructions.                                    */ \
98   V(kAES,                 "AES",                    "aes")                     \
99   V(kSHA1,                "SHA1",                   "sha1")                    \
100   V(kSHA2,                "SHA2",                   "sha2")                    \
101   /* A form of PMULL{2} with a 128-bit (1Q) result.                         */ \
102   V(kPmull1Q,             "Pmull1Q",                "pmull")                   \
103   /* Atomic operations on memory: CAS, LDADD, STADD, SWP, etc.              */ \
104   V(kAtomics,             "Atomics",                "atomics")                 \
105   /* Limited ordering regions: LDLAR, STLLR and their variants.             */ \
106   V(kLORegions,           "LORegions",              NULL)                      \
107   /* Rounding doubling multiply add/subtract: SQRDMLAH and SQRDMLSH.        */ \
108   V(kRDM,                 "RDM",                    "asimdrdm")                \
109   /* Scalable Vector Extension.                                             */ \
110   V(kSVE,                 "SVE",                    "sve")                     \
111   V(kSVEF64MM,            "SVE F64MM",              "svef64mm")                \
112   V(kSVEF32MM,            "SVE F32MM",              "svef32mm")                \
113   V(kSVEI8MM,             "SVE I8MM",               "svei8imm")                \
114   V(kSVEBF16,             "SVE BFloat16",           "svebf16")                 \
115   /* SDOT and UDOT support (in NEON).                                       */ \
116   V(kDotProduct,          "DotProduct",             "asimddp")                 \
117   /* Int8 matrix multiplication (in NEON).                                  */ \
118   V(kI8MM,                "NEON I8MM",              "i8mm")                    \
119   /* Half-precision (FP16) support for FP and NEON, respectively.           */ \
120   V(kFPHalf,              "FPHalf",                 "fphp")                    \
121   V(kNEONHalf,            "NEONHalf",               "asimdhp")                 \
122   /* BFloat16 support (in both FP and NEON.)                                */ \
123   V(kBF16,                "FP/NEON BFloat 16",      "bf16")                    \
124   /* The RAS extension, including the ESB instruction.                      */ \
125   V(kRAS,                 "RAS",                    NULL)                      \
126   /* Data cache clean to the point of persistence: DC CVAP.                 */ \
127   V(kDCPoP,               "DCPoP",                  "dcpop")                   \
128   /* Data cache clean to the point of deep persistence: DC CVADP.           */ \
129   V(kDCCVADP,             "DCCVADP",                "dcpodp")                  \
130   /* Cryptographic support instructions.                                    */ \
131   V(kSHA3,                "SHA3",                   "sha3")                    \
132   V(kSHA512,              "SHA512",                 "sha512")                  \
133   V(kSM3,                 "SM3",                    "sm3")                     \
134   V(kSM4,                 "SM4",                    "sm4")                     \
135   /* Pointer authentication for addresses.                                  */ \
136   V(kPAuth,               "PAuth",                  "paca")                    \
137   /* Pointer authentication for addresses uses QARMA.                       */ \
138   V(kPAuthQARMA,          "PAuthQARMA",             NULL)                      \
139   /* Generic authentication (using the PACGA instruction).                  */ \
140   V(kPAuthGeneric,        "PAuthGeneric",           "pacg")                    \
141   /* Generic authentication uses QARMA.                                     */ \
142   V(kPAuthGenericQARMA,   "PAuthGenericQARMA",      NULL)                      \
143   /* JavaScript-style FP -> integer conversion instruction: FJCVTZS.        */ \
144   V(kJSCVT,               "JSCVT",                  "jscvt")                   \
145   /* Complex number support for NEON: FCMLA and FCADD.                      */ \
146   V(kFcma,                "Fcma",                   "fcma")                    \
147   /* RCpc-based model (for weaker release consistency): LDAPR and variants. */ \
148   V(kRCpc,                "RCpc",                   "lrcpc")                   \
149   V(kRCpcImm,             "RCpc (imm)",             "ilrcpc")                  \
150   /* Flag manipulation instructions: SETF{8,16}, CFINV, RMIF.               */ \
151   V(kFlagM,               "FlagM",                  "flagm")                   \
152   /* Unaligned single-copy atomicity.                                       */ \
153   V(kUSCAT,               "USCAT",                  "uscat")                   \
154   /* FP16 fused multiply-add or -subtract long: FMLAL{2}, FMLSL{2}.         */ \
155   V(kFHM,                 "FHM",                    "asimdfhm")                \
156   /* Data-independent timing (for selected instructions).                   */ \
157   V(kDIT,                 "DIT",                    "dit")                     \
158   /* Branch target identification.                                          */ \
159   V(kBTI,                 "BTI",                    "bti")                     \
160   /* Flag manipulation instructions: {AX,XA}FLAG                            */ \
161   V(kAXFlag,              "AXFlag",                 "flagm2")                  \
162   /* Random number generation extension,                                    */ \
163   V(kRNG,                 "RNG",                    "rng")                     \
164   /* Floating-point round to {32,64}-bit integer.                           */ \
165   V(kFrintToFixedSizedInt,"Frint (bounded)",        "frint")                   \
166   /* Memory Tagging Extension.                                              */ \
167   V(kMTEInstructions,     "MTE (EL0 instructions)", NULL)                      \
168   V(kMTE,                 "MTE",                    NULL)                      \
169   /* PAuth extensions.                                                      */ \
170   V(kPAuthEnhancedPAC,    "PAuth EnhancedPAC",      NULL)                      \
171   V(kPAuthEnhancedPAC2,   "PAuth EnhancedPAC2",     NULL)                      \
172   V(kPAuthFPAC,           "PAuth FPAC",             NULL)                      \
173   V(kPAuthFPACCombined,   "PAuth FPACCombined",     NULL)                      \
174   /* Scalable Vector Extension 2.                                           */ \
175   V(kSVE2,                "SVE2",                   "sve2")                    \
176   V(kSVESM4,              "SVE SM4",                "svesm4")                  \
177   V(kSVESHA3,             "SVE SHA3",               "svesha3")                 \
178   V(kSVEBitPerm,          "SVE BitPerm",            "svebitperm")              \
179   V(kSVEAES,              "SVE AES",                "sveaes")                  \
180   V(kSVEPmull128,         "SVE Pmull128",           "svepmull")                \
181   /* Alternate floating-point behavior                                      */ \
182   V(kAFP,                 "AFP",                    "afp")                     \
183   /* Enhanced Counter Virtualization                                        */ \
184   V(kECV,                 "ECV",                    "ecv")                     \
185   /* Increased precision of Reciprocal Estimate and Square Root Estimate    */ \
186   V(kRPRES,               "RPRES",                  "rpres")
187 // clang-format on
188 
189 
190 class CPUFeaturesConstIterator;
191 
192 // A representation of the set of features known to be supported by the target
193 // device. Each feature is represented by a simple boolean flag.
194 //
195 //   - When the Assembler is asked to assemble an instruction, it asserts (in
196 //     debug mode) that the necessary features are available.
197 //
198 //   - TODO: The MacroAssembler relies on the Assembler's assertions, but in
199 //     some cases it may be useful for macros to generate a fall-back sequence
200 //     in case features are not available.
201 //
202 //   - The Simulator assumes by default that all features are available, but it
203 //     is possible to configure it to fail if the simulated code uses features
204 //     that are not enabled.
205 //
206 //     The Simulator also offers pseudo-instructions to allow features to be
207 //     enabled and disabled dynamically. This is useful when you want to ensure
208 //     that some features are constrained to certain areas of code.
209 //
210 //   - The base Disassembler knows nothing about CPU features, but the
211 //     PrintDisassembler can be configured to annotate its output with warnings
212 //     about unavailable features. The Simulator uses this feature when
213 //     instruction trace is enabled.
214 //
215 //   - The Decoder-based components -- the Simulator and PrintDisassembler --
216 //     rely on a CPUFeaturesAuditor visitor. This visitor keeps a list of
217 //     features actually encountered so that a large block of code can be
218 //     examined (either directly or through simulation), and the required
219 //     features analysed later.
220 //
221 // Expected usage:
222 //
223 //     // By default, VIXL uses CPUFeatures::AArch64LegacyBaseline(), for
224 //     // compatibility with older version of VIXL.
225 //     MacroAssembler masm;
226 //
227 //     // Generate code only for the current CPU.
228 //     masm.SetCPUFeatures(CPUFeatures::InferFromOS());
229 //
230 //     // Turn off feature checking entirely.
231 //     masm.SetCPUFeatures(CPUFeatures::All());
232 //
233 // Feature set manipulation:
234 //
235 //     CPUFeatures f;  // The default constructor gives an empty set.
236 //     // Individual features can be added (or removed).
237 //     f.Combine(CPUFeatures::kFP, CPUFeatures::kNEON, CPUFeatures::AES);
238 //     f.Remove(CPUFeatures::kNEON);
239 //
240 //     // Some helpers exist for extensions that provide several features.
241 //     f.Remove(CPUFeatures::All());
242 //     f.Combine(CPUFeatures::AArch64LegacyBaseline());
243 //
244 //     // Chained construction is also possible.
245 //     CPUFeatures g =
246 //         f.With(CPUFeatures::kPmull1Q).Without(CPUFeatures::kCRC32);
247 //
248 //     // Features can be queried. Where multiple features are given, they are
249 //     // combined with logical AND.
250 //     if (h.Has(CPUFeatures::kNEON)) { ... }
251 //     if (h.Has(CPUFeatures::kFP, CPUFeatures::kNEON)) { ... }
252 //     if (h.Has(g)) { ... }
253 //     // If the empty set is requested, the result is always 'true'.
254 //     VIXL_ASSERT(h.Has(CPUFeatures()));
255 //
256 //     // For debug and reporting purposes, features can be enumerated (or
257 //     // printed directly):
258 //     std::cout << CPUFeatures::kNEON;  // Prints something like "NEON".
259 //     std::cout << f;  // Prints something like "FP, NEON, CRC32".
260 class CPUFeatures {
261  public:
262   // clang-format off
263   // Individual features.
264   // These should be treated as opaque tokens. User code should not rely on
265   // specific numeric values or ordering.
266   enum Feature {
267     // Refer to VIXL_CPU_FEATURE_LIST (above) for the list of feature names that
268     // this class supports.
269 
270     kNone = -1,
271 #define VIXL_DECLARE_FEATURE(SYMBOL, NAME, CPUINFO) SYMBOL,
272     VIXL_CPU_FEATURE_LIST(VIXL_DECLARE_FEATURE)
273 #undef VIXL_DECLARE_FEATURE
274     kNumberOfFeatures
275   };
276   // clang-format on
277 
278   // By default, construct with no features enabled.
CPUFeatures()279   CPUFeatures() : features_{} {}
280 
281   // Construct with some features already enabled.
282   template <typename T, typename... U>
CPUFeatures(T first,U...others)283   CPUFeatures(T first, U... others) : features_{} {
284     Combine(first, others...);
285   }
286 
287   // Construct with all features enabled. This can be used to disable feature
288   // checking: `Has(...)` returns true regardless of the argument.
289   static CPUFeatures All();
290 
291   // Construct an empty CPUFeatures. This is equivalent to the default
292   // constructor, but is provided for symmetry and convenience.
None()293   static CPUFeatures None() { return CPUFeatures(); }
294 
295   // The presence of these features was assumed by version of VIXL before this
296   // API was added, so using this set by default ensures API compatibility.
AArch64LegacyBaseline()297   static CPUFeatures AArch64LegacyBaseline() {
298     return CPUFeatures(kFP, kNEON, kCRC32);
299   }
300 
301   // Construct a new CPUFeatures object using ID registers. This assumes that
302   // kIDRegisterEmulation is present.
303   static CPUFeatures InferFromIDRegisters();
304 
305   enum QueryIDRegistersOption {
306     kDontQueryIDRegisters,
307     kQueryIDRegistersIfAvailable
308   };
309 
310   // Construct a new CPUFeatures object based on what the OS reports.
311   static CPUFeatures InferFromOS(
312       QueryIDRegistersOption option = kQueryIDRegistersIfAvailable);
313 
314   // Combine another CPUFeatures object into this one. Features that already
315   // exist in this set are left unchanged.
316   void Combine(const CPUFeatures& other);
317 
318   // Combine a specific feature into this set. If it already exists in the set,
319   // the set is left unchanged.
320   void Combine(Feature feature);
321 
322   // Combine multiple features (or feature sets) into this set.
323   template <typename T, typename... U>
Combine(T first,U...others)324   void Combine(T first, U... others) {
325     Combine(first);
326     Combine(others...);
327   }
328 
329   // Remove features in another CPUFeatures object from this one.
330   void Remove(const CPUFeatures& other);
331 
332   // Remove a specific feature from this set. This has no effect if the feature
333   // doesn't exist in the set.
334   void Remove(Feature feature0);
335 
336   // Remove multiple features (or feature sets) from this set.
337   template <typename T, typename... U>
Remove(T first,U...others)338   void Remove(T first, U... others) {
339     Remove(first);
340     Remove(others...);
341   }
342 
343   // Chaining helpers for convenient construction by combining other CPUFeatures
344   // or individual Features.
345   template <typename... T>
With(T...others)346   CPUFeatures With(T... others) const {
347     CPUFeatures f(*this);
348     f.Combine(others...);
349     return f;
350   }
351 
352   template <typename... T>
Without(T...others)353   CPUFeatures Without(T... others) const {
354     CPUFeatures f(*this);
355     f.Remove(others...);
356     return f;
357   }
358 
359   // Test whether the `other` feature set is equal to or a subset of this one.
360   bool Has(const CPUFeatures& other) const;
361 
362   // Test whether a single feature exists in this set.
363   // Note that `Has(kNone)` always returns true.
364   bool Has(Feature feature) const;
365 
366   // Test whether all of the specified features exist in this set.
367   template <typename T, typename... U>
Has(T first,U...others)368   bool Has(T first, U... others) const {
369     return Has(first) && Has(others...);
370   }
371 
372   // Return the number of enabled features.
373   size_t Count() const;
HasNoFeatures()374   bool HasNoFeatures() const { return Count() == 0; }
375 
376   // Check for equivalence.
377   bool operator==(const CPUFeatures& other) const {
378     return Has(other) && other.Has(*this);
379   }
380   bool operator!=(const CPUFeatures& other) const { return !(*this == other); }
381 
382   typedef CPUFeaturesConstIterator const_iterator;
383 
384   const_iterator begin() const;
385   const_iterator end() const;
386 
387  private:
388   // Each bit represents a feature. This set will be extended as needed.
389   std::bitset<kNumberOfFeatures> features_;
390 
391   friend std::ostream& operator<<(std::ostream& os,
392                                   const vixl::CPUFeatures& features);
393 };
394 
395 std::ostream& operator<<(std::ostream& os, vixl::CPUFeatures::Feature feature);
396 std::ostream& operator<<(std::ostream& os, const vixl::CPUFeatures& features);
397 
398 // This is not a proper C++ iterator type, but it simulates enough of
399 // ForwardIterator that simple loops can be written.
400 class CPUFeaturesConstIterator {
401  public:
402   CPUFeaturesConstIterator(const CPUFeatures* cpu_features = NULL,
403                            CPUFeatures::Feature start = CPUFeatures::kNone)
cpu_features_(cpu_features)404       : cpu_features_(cpu_features), feature_(start) {
405     VIXL_ASSERT(IsValid());
406   }
407 
408   bool operator==(const CPUFeaturesConstIterator& other) const;
409   bool operator!=(const CPUFeaturesConstIterator& other) const {
410     return !(*this == other);
411   }
412   CPUFeaturesConstIterator& operator++();
413   CPUFeaturesConstIterator operator++(int);
414 
415   CPUFeatures::Feature operator*() const {
416     VIXL_ASSERT(IsValid());
417     return feature_;
418   }
419 
420   // For proper support of C++'s simplest "Iterator" concept, this class would
421   // have to define member types (such as CPUFeaturesIterator::pointer) to make
422   // it appear as if it iterates over Feature objects in memory. That is, we'd
423   // need CPUFeatures::iterator to behave like std::vector<Feature>::iterator.
424   // This is at least partially possible -- the std::vector<bool> specialisation
425   // does something similar -- but it doesn't seem worthwhile for a
426   // special-purpose debug helper, so they are omitted here.
427  private:
428   const CPUFeatures* cpu_features_;
429   CPUFeatures::Feature feature_;
430 
IsValid()431   bool IsValid() const {
432     if (cpu_features_ == NULL) {
433       return feature_ == CPUFeatures::kNone;
434     }
435     return cpu_features_->Has(feature_);
436   }
437 };
438 
439 // A convenience scope for temporarily modifying a CPU features object. This
440 // allows features to be enabled for short sequences.
441 //
442 // Expected usage:
443 //
444 //  {
445 //    CPUFeaturesScope cpu(&masm, CPUFeatures::kCRC32);
446 //    // This scope can now use CRC32, as well as anything else that was enabled
447 //    // before the scope.
448 //
449 //    ...
450 //
451 //    // At the end of the scope, the original CPU features are restored.
452 //  }
453 class CPUFeaturesScope {
454  public:
455   // Start a CPUFeaturesScope on any object that implements
456   // `CPUFeatures* GetCPUFeatures()`.
457   template <typename T>
CPUFeaturesScope(T * cpu_features_wrapper)458   explicit CPUFeaturesScope(T* cpu_features_wrapper)
459       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
460         old_features_(*cpu_features_) {}
461 
462   // Start a CPUFeaturesScope on any object that implements
463   // `CPUFeatures* GetCPUFeatures()`, with the specified features enabled.
464   template <typename T, typename U, typename... V>
CPUFeaturesScope(T * cpu_features_wrapper,U first,V...features)465   CPUFeaturesScope(T* cpu_features_wrapper, U first, V... features)
466       : cpu_features_(cpu_features_wrapper->GetCPUFeatures()),
467         old_features_(*cpu_features_) {
468     cpu_features_->Combine(first, features...);
469   }
470 
~CPUFeaturesScope()471   ~CPUFeaturesScope() { *cpu_features_ = old_features_; }
472 
473   // For advanced usage, the CPUFeatures object can be accessed directly.
474   // The scope will restore the original state when it ends.
475 
GetCPUFeatures()476   CPUFeatures* GetCPUFeatures() const { return cpu_features_; }
477 
SetCPUFeatures(const CPUFeatures & cpu_features)478   void SetCPUFeatures(const CPUFeatures& cpu_features) {
479     *cpu_features_ = cpu_features;
480   }
481 
482  private:
483   CPUFeatures* const cpu_features_;
484   const CPUFeatures old_features_;
485 };
486 
487 
488 }  // namespace vixl
489 
490 #endif  // VIXL_CPU_FEATURES_H
491