• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28 #include <sys/auxv.h>
29 #define VIXL_USE_LINUX_HWCAP 1
30 #endif
31 
32 #include "../utils-vixl.h"
33 
34 #include "cpu-aarch64.h"
35 
36 namespace vixl {
37 namespace aarch64 {
38 
39 
40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42 const IDRegister::Field AA64PFR0::kSVE(32);
43 const IDRegister::Field AA64PFR0::kDIT(48);
44 
45 const IDRegister::Field AA64PFR1::kBT(0);
46 
47 const IDRegister::Field AA64ISAR0::kAES(4);
48 const IDRegister::Field AA64ISAR0::kSHA1(8);
49 const IDRegister::Field AA64ISAR0::kSHA2(12);
50 const IDRegister::Field AA64ISAR0::kCRC32(16);
51 const IDRegister::Field AA64ISAR0::kAtomic(20);
52 const IDRegister::Field AA64ISAR0::kRDM(28);
53 const IDRegister::Field AA64ISAR0::kSHA3(32);
54 const IDRegister::Field AA64ISAR0::kSM3(36);
55 const IDRegister::Field AA64ISAR0::kSM4(40);
56 const IDRegister::Field AA64ISAR0::kDP(44);
57 const IDRegister::Field AA64ISAR0::kFHM(48);
58 const IDRegister::Field AA64ISAR0::kTS(52);
59 
60 const IDRegister::Field AA64ISAR1::kDPB(0);
61 const IDRegister::Field AA64ISAR1::kAPA(4);
62 const IDRegister::Field AA64ISAR1::kAPI(8);
63 const IDRegister::Field AA64ISAR1::kJSCVT(12);
64 const IDRegister::Field AA64ISAR1::kFCMA(16);
65 const IDRegister::Field AA64ISAR1::kLRCPC(20);
66 const IDRegister::Field AA64ISAR1::kGPA(24);
67 const IDRegister::Field AA64ISAR1::kGPI(28);
68 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
69 const IDRegister::Field AA64ISAR1::kSB(36);
70 const IDRegister::Field AA64ISAR1::kSPECRES(40);
71 
72 const IDRegister::Field AA64MMFR1::kLO(16);
73 
GetCPUFeatures() const74 CPUFeatures AA64PFR0::GetCPUFeatures() const {
75   CPUFeatures f;
76   if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
77   if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
78   if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
79   if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
80   if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
81   if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
82   return f;
83 }
84 
GetCPUFeatures() const85 CPUFeatures AA64PFR1::GetCPUFeatures() const {
86   CPUFeatures f;
87   if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
88   return f;
89 }
90 
GetCPUFeatures() const91 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
92   CPUFeatures f;
93   if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
94   if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
95   if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
96   if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
97   if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
98   if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
99   if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
100   if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
101   if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
102   if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
103   if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
104   if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
105   if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
106   if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
107   if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
108   return f;
109 }
110 
GetCPUFeatures() const111 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
112   CPUFeatures f;
113   if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
114   if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
115   if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
116   if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
117   if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
118   if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
119 
120   if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
121   if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
122   if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
123   if (Get(kGPA) >= 1) {
124     f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
125   }
126   return f;
127 }
128 
GetCPUFeatures() const129 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
130   CPUFeatures f;
131   if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
132   return f;
133 }
134 
Get(IDRegister::Field field) const135 int IDRegister::Get(IDRegister::Field field) const {
136   int msb = field.GetMsb();
137   int lsb = field.GetLsb();
138   VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
139                      (sizeof(int) * kBitsPerByte));
140   switch (field.GetType()) {
141     case Field::kSigned:
142       return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
143     case Field::kUnsigned:
144       return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
145   }
146   VIXL_UNREACHABLE();
147   return 0;
148 }
149 
InferCPUFeaturesFromIDRegisters()150 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
151   CPUFeatures f;
152 #define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
153   VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
154 #undef VIXL_COMBINE_ID_REG
155   return f;
156 }
157 
InferCPUFeaturesFromOS(CPUFeatures::QueryIDRegistersOption option)158 CPUFeatures CPU::InferCPUFeaturesFromOS(
159     CPUFeatures::QueryIDRegistersOption option) {
160   CPUFeatures features;
161 
162 #if VIXL_USE_LINUX_HWCAP
163   // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
164   // than explicit bits, but explicit bits allow us to identify features that
165   // the toolchain doesn't know about.
166   static const CPUFeatures::Feature kFeatureBits[] = {
167       // Bits 0-7
168       CPUFeatures::kFP,
169       CPUFeatures::kNEON,
170       CPUFeatures::kNone,  // "EVTSTRM", which VIXL doesn't track.
171       CPUFeatures::kAES,
172       CPUFeatures::kPmull1Q,
173       CPUFeatures::kSHA1,
174       CPUFeatures::kSHA2,
175       CPUFeatures::kCRC32,
176       // Bits 8-15
177       CPUFeatures::kAtomics,
178       CPUFeatures::kFPHalf,
179       CPUFeatures::kNEONHalf,
180       CPUFeatures::kIDRegisterEmulation,
181       CPUFeatures::kRDM,
182       CPUFeatures::kJSCVT,
183       CPUFeatures::kFcma,
184       CPUFeatures::kRCpc,
185       // Bits 16-23
186       CPUFeatures::kDCPoP,
187       CPUFeatures::kSHA3,
188       CPUFeatures::kSM3,
189       CPUFeatures::kSM4,
190       CPUFeatures::kDotProduct,
191       CPUFeatures::kSHA512,
192       CPUFeatures::kSVE,
193       CPUFeatures::kFHM,
194       // Bits 24-27
195       CPUFeatures::kDIT,
196       CPUFeatures::kUSCAT,
197       CPUFeatures::kRCpcImm,
198       CPUFeatures::kFlagM
199       // Bits 28-31 are unassigned.
200   };
201   static const size_t kFeatureBitCount =
202       sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
203 
204   unsigned long auxv = getauxval(AT_HWCAP);  // NOLINT(runtime/int)
205 
206   VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
207   for (size_t i = 0; i < kFeatureBitCount; i++) {
208     if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
209   }
210 #endif  // VIXL_USE_LINUX_HWCAP
211 
212   if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
213       (features.Has(CPUFeatures::kIDRegisterEmulation))) {
214     features.Combine(InferCPUFeaturesFromIDRegisters());
215   }
216   return features;
217 }
218 
219 
220 #ifdef __aarch64__
221 #define VIXL_READ_ID_REG(NAME)                         \
222   NAME CPU::Read##NAME() {                             \
223     uint64_t value = 0;                                \
224     __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
225     return NAME(value);                                \
226   }
227 #else  // __aarch64__
228 #define VIXL_READ_ID_REG(NAME)                                        \
229   NAME CPU::Read##NAME() {                                            \
230     /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
231     VIXL_ABORT();                                                     \
232   }
233 #endif  // __aarch64__
234 
235 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
236 
237 #undef VIXL_READ_ID_REG
238 
239 
240 // Initialise to smallest possible cache size.
241 unsigned CPU::dcache_line_size_ = 1;
242 unsigned CPU::icache_line_size_ = 1;
243 
244 
245 // Currently computes I and D cache line size.
SetUp()246 void CPU::SetUp() {
247   uint32_t cache_type_register = GetCacheType();
248 
249   // The cache type register holds information about the caches, including I
250   // D caches line size.
251   static const int kDCacheLineSizeShift = 16;
252   static const int kICacheLineSizeShift = 0;
253   static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
254   static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
255 
256   // The cache type register holds the size of the I and D caches in words as
257   // a power of two.
258   uint32_t dcache_line_size_power_of_two =
259       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
260   uint32_t icache_line_size_power_of_two =
261       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
262 
263   dcache_line_size_ = 4 << dcache_line_size_power_of_two;
264   icache_line_size_ = 4 << icache_line_size_power_of_two;
265 }
266 
267 
GetCacheType()268 uint32_t CPU::GetCacheType() {
269 #ifdef __aarch64__
270   uint64_t cache_type_register;
271   // Copy the content of the cache type register to a core register.
272   __asm__ __volatile__("mrs %[ctr], ctr_el0"  // NOLINT(runtime/references)
273                        : [ctr] "=r"(cache_type_register));
274   VIXL_ASSERT(IsUint32(cache_type_register));
275   return static_cast<uint32_t>(cache_type_register);
276 #else
277   // This will lead to a cache with 1 byte long lines, which is fine since
278   // neither EnsureIAndDCacheCoherency nor the simulator will need this
279   // information.
280   return 0;
281 #endif
282 }
283 
284 
EnsureIAndDCacheCoherency(void * address,size_t length)285 void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
286 #ifdef __aarch64__
287   // Implement the cache synchronisation for all targets where AArch64 is the
288   // host, even if we're building the simulator for an AAarch64 host. This
289   // allows for cases where the user wants to simulate code as well as run it
290   // natively.
291 
292   if (length == 0) {
293     return;
294   }
295 
296   // The code below assumes user space cache operations are allowed.
297 
298   // Work out the line sizes for each cache, and use them to determine the
299   // start addresses.
300   uintptr_t start = reinterpret_cast<uintptr_t>(address);
301   uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
302   uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
303   uintptr_t dline = start & ~(dsize - 1);
304   uintptr_t iline = start & ~(isize - 1);
305 
306   // Cache line sizes are always a power of 2.
307   VIXL_ASSERT(IsPowerOf2(dsize));
308   VIXL_ASSERT(IsPowerOf2(isize));
309   uintptr_t end = start + length;
310 
311   do {
312     __asm__ __volatile__(
313         // Clean each line of the D cache containing the target data.
314         //
315         // dc       : Data Cache maintenance
316         //     c    : Clean
317         //      va  : by (Virtual) Address
318         //        u : to the point of Unification
319         // The point of unification for a processor is the point by which the
320         // instruction and data caches are guaranteed to see the same copy of a
321         // memory location. See ARM DDI 0406B page B2-12 for more information.
322         "   dc    cvau, %[dline]\n"
323         :
324         : [dline] "r"(dline)
325         // This code does not write to memory, but the "memory" dependency
326         // prevents GCC from reordering the code.
327         : "memory");
328     dline += dsize;
329   } while (dline < end);
330 
331   __asm__ __volatile__(
332       // Make sure that the data cache operations (above) complete before the
333       // instruction cache operations (below).
334       //
335       // dsb      : Data Synchronisation Barrier
336       //      ish : Inner SHareable domain
337       //
338       // The point of unification for an Inner Shareable shareability domain is
339       // the point by which the instruction and data caches of all the
340       // processors
341       // in that Inner Shareable shareability domain are guaranteed to see the
342       // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
343       // information.
344       "   dsb   ish\n"
345       :
346       :
347       : "memory");
348 
349   do {
350     __asm__ __volatile__(
351         // Invalidate each line of the I cache containing the target data.
352         //
353         // ic      : Instruction Cache maintenance
354         //    i    : Invalidate
355         //     va  : by Address
356         //       u : to the point of Unification
357         "   ic   ivau, %[iline]\n"
358         :
359         : [iline] "r"(iline)
360         : "memory");
361     iline += isize;
362   } while (iline < end);
363 
364   __asm__ __volatile__(
365       // Make sure that the instruction cache operations (above) take effect
366       // before the isb (below).
367       "   dsb  ish\n"
368 
369       // Ensure that any instructions already in the pipeline are discarded and
370       // reloaded from the new data.
371       // isb : Instruction Synchronisation Barrier
372       "   isb\n"
373       :
374       :
375       : "memory");
376 #else
377   // If the host isn't AArch64, we must be using the simulator, so this function
378   // doesn't have to do anything.
379   USE(address, length);
380 #endif
381 }
382 
383 }  // namespace aarch64
384 }  // namespace vixl
385