1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28 #include <sys/auxv.h>
29 #define VIXL_USE_LINUX_HWCAP 1
30 #endif
31
32 #include "../utils-vixl.h"
33
34 #include "cpu-aarch64.h"
35
36 namespace vixl {
37 namespace aarch64 {
38
39
40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42 const IDRegister::Field AA64PFR0::kRAS(28);
43 const IDRegister::Field AA64PFR0::kSVE(32);
44 const IDRegister::Field AA64PFR0::kDIT(48);
45 const IDRegister::Field AA64PFR0::kCSV2(56);
46 const IDRegister::Field AA64PFR0::kCSV3(60);
47
48 const IDRegister::Field AA64PFR1::kBT(0);
49 const IDRegister::Field AA64PFR1::kSSBS(4);
50 const IDRegister::Field AA64PFR1::kMTE(8);
51
52 const IDRegister::Field AA64ISAR0::kAES(4);
53 const IDRegister::Field AA64ISAR0::kSHA1(8);
54 const IDRegister::Field AA64ISAR0::kSHA2(12);
55 const IDRegister::Field AA64ISAR0::kCRC32(16);
56 const IDRegister::Field AA64ISAR0::kAtomic(20);
57 const IDRegister::Field AA64ISAR0::kRDM(28);
58 const IDRegister::Field AA64ISAR0::kSHA3(32);
59 const IDRegister::Field AA64ISAR0::kSM3(36);
60 const IDRegister::Field AA64ISAR0::kSM4(40);
61 const IDRegister::Field AA64ISAR0::kDP(44);
62 const IDRegister::Field AA64ISAR0::kFHM(48);
63 const IDRegister::Field AA64ISAR0::kTS(52);
64 const IDRegister::Field AA64ISAR0::kRNDR(60);
65
66 const IDRegister::Field AA64ISAR1::kDPB(0);
67 const IDRegister::Field AA64ISAR1::kAPA(4);
68 const IDRegister::Field AA64ISAR1::kAPI(8);
69 const IDRegister::Field AA64ISAR1::kJSCVT(12);
70 const IDRegister::Field AA64ISAR1::kFCMA(16);
71 const IDRegister::Field AA64ISAR1::kLRCPC(20);
72 const IDRegister::Field AA64ISAR1::kGPA(24);
73 const IDRegister::Field AA64ISAR1::kGPI(28);
74 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
75 const IDRegister::Field AA64ISAR1::kSB(36);
76 const IDRegister::Field AA64ISAR1::kSPECRES(40);
77 const IDRegister::Field AA64ISAR1::kBF16(44);
78 const IDRegister::Field AA64ISAR1::kDGH(48);
79 const IDRegister::Field AA64ISAR1::kI8MM(52);
80
81 const IDRegister::Field AA64MMFR1::kLO(16);
82
83 const IDRegister::Field AA64MMFR2::kAT(32);
84
85 const IDRegister::Field AA64ZFR0::kBF16(20);
86 const IDRegister::Field AA64ZFR0::kI8MM(44);
87 const IDRegister::Field AA64ZFR0::kF32MM(52);
88 const IDRegister::Field AA64ZFR0::kF64MM(56);
89
GetCPUFeatures() const90 CPUFeatures AA64PFR0::GetCPUFeatures() const {
91 CPUFeatures f;
92 if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
93 if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
94 if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
95 if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
96 if (Get(kRAS) >= 1) f.Combine(CPUFeatures::kRAS);
97 if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
98 if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
99 if (Get(kCSV2) >= 1) f.Combine(CPUFeatures::kCSV2);
100 if (Get(kCSV2) >= 2) f.Combine(CPUFeatures::kSCXTNUM);
101 if (Get(kCSV3) >= 1) f.Combine(CPUFeatures::kCSV3);
102 return f;
103 }
104
GetCPUFeatures() const105 CPUFeatures AA64PFR1::GetCPUFeatures() const {
106 CPUFeatures f;
107 if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
108 if (Get(kSSBS) >= 1) f.Combine(CPUFeatures::kSSBS);
109 if (Get(kSSBS) >= 2) f.Combine(CPUFeatures::kSSBSControl);
110 if (Get(kMTE) >= 1) f.Combine(CPUFeatures::kMTEInstructions);
111 if (Get(kMTE) >= 2) f.Combine(CPUFeatures::kMTE);
112 return f;
113 }
114
GetCPUFeatures() const115 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
116 CPUFeatures f;
117 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
118 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
119 if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
120 if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
121 if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
122 if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
123 if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
124 if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
125 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
126 if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
127 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
128 if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
129 if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
130 if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
131 if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
132 if (Get(kRNDR) >= 1) f.Combine(CPUFeatures::kRNG);
133 return f;
134 }
135
GetCPUFeatures() const136 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
137 CPUFeatures f;
138 if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
139 if (Get(kDPB) >= 2) f.Combine(CPUFeatures::kDCCVADP);
140 if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
141 if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
142 if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
143 if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
144 if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
145 if (Get(kSB) >= 1) f.Combine(CPUFeatures::kSB);
146 if (Get(kSPECRES) >= 1) f.Combine(CPUFeatures::kSPECRES);
147 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kBF16);
148 if (Get(kDGH) >= 1) f.Combine(CPUFeatures::kDGH);
149 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kI8MM);
150
151 // Only one of these fields should be non-zero, but they have the same
152 // encodings, so merge the logic.
153 int apx = std::max(Get(kAPI), Get(kAPA));
154 if (apx >= 1) {
155 f.Combine(CPUFeatures::kPAuth);
156 // APA (rather than API) indicates QARMA.
157 if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuthQARMA);
158 if (apx == 0b0010) f.Combine(CPUFeatures::kPAuthEnhancedPAC);
159 if (apx >= 0b0011) f.Combine(CPUFeatures::kPAuthEnhancedPAC2);
160 if (apx >= 0b0100) f.Combine(CPUFeatures::kPAuthFPAC);
161 if (apx >= 0b0101) f.Combine(CPUFeatures::kPAuthFPACCombined);
162 }
163
164 if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
165 if (Get(kGPA) >= 1) {
166 f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
167 }
168 return f;
169 }
170
GetCPUFeatures() const171 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
172 CPUFeatures f;
173 if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
174 return f;
175 }
176
GetCPUFeatures() const177 CPUFeatures AA64MMFR2::GetCPUFeatures() const {
178 CPUFeatures f;
179 if (Get(kAT) >= 1) f.Combine(CPUFeatures::kUSCAT);
180 return f;
181 }
182
GetCPUFeatures() const183 CPUFeatures AA64ZFR0::GetCPUFeatures() const {
184 // This register is only available with SVE, but reads-as-zero in its absence,
185 // so it's always safe to read it.
186 CPUFeatures f;
187 if (Get(kF64MM) >= 1) f.Combine(CPUFeatures::kSVEF64MM);
188 if (Get(kF32MM) >= 1) f.Combine(CPUFeatures::kSVEF32MM);
189 if (Get(kI8MM) >= 1) f.Combine(CPUFeatures::kSVEI8MM);
190 if (Get(kBF16) >= 1) f.Combine(CPUFeatures::kSVEBF16);
191 return f;
192 }
193
Get(IDRegister::Field field) const194 int IDRegister::Get(IDRegister::Field field) const {
195 int msb = field.GetMsb();
196 int lsb = field.GetLsb();
197 VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
198 (sizeof(int) * kBitsPerByte));
199 switch (field.GetType()) {
200 case Field::kSigned:
201 return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
202 case Field::kUnsigned:
203 return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
204 }
205 VIXL_UNREACHABLE();
206 return 0;
207 }
208
InferCPUFeaturesFromIDRegisters()209 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
210 CPUFeatures f;
211 #define VIXL_COMBINE_ID_REG(NAME, MRS_ARG) \
212 f.Combine(Read##NAME().GetCPUFeatures());
213 VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
214 #undef VIXL_COMBINE_ID_REG
215 return f;
216 }
217
InferCPUFeaturesFromOS(CPUFeatures::QueryIDRegistersOption option)218 CPUFeatures CPU::InferCPUFeaturesFromOS(
219 CPUFeatures::QueryIDRegistersOption option) {
220 CPUFeatures features;
221
222 #if VIXL_USE_LINUX_HWCAP
223 // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
224 // than explicit bits, but explicit bits allow us to identify features that
225 // the toolchain doesn't know about.
226 static const CPUFeatures::Feature kFeatureBits[] =
227 {// Bits 0-7
228 CPUFeatures::kFP,
229 CPUFeatures::kNEON,
230 CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
231 CPUFeatures::kAES,
232 CPUFeatures::kPmull1Q,
233 CPUFeatures::kSHA1,
234 CPUFeatures::kSHA2,
235 CPUFeatures::kCRC32,
236 // Bits 8-15
237 CPUFeatures::kAtomics,
238 CPUFeatures::kFPHalf,
239 CPUFeatures::kNEONHalf,
240 CPUFeatures::kIDRegisterEmulation,
241 CPUFeatures::kRDM,
242 CPUFeatures::kJSCVT,
243 CPUFeatures::kFcma,
244 CPUFeatures::kRCpc,
245 // Bits 16-23
246 CPUFeatures::kDCPoP,
247 CPUFeatures::kSHA3,
248 CPUFeatures::kSM3,
249 CPUFeatures::kSM4,
250 CPUFeatures::kDotProduct,
251 CPUFeatures::kSHA512,
252 CPUFeatures::kSVE,
253 CPUFeatures::kFHM,
254 // Bits 24-31
255 CPUFeatures::kDIT,
256 CPUFeatures::kUSCAT,
257 CPUFeatures::kRCpcImm,
258 CPUFeatures::kFlagM,
259 CPUFeatures::kSSBSControl,
260 CPUFeatures::kSB,
261 CPUFeatures::kPAuth,
262 CPUFeatures::kPAuthGeneric,
263 // Bits 32-39
264 CPUFeatures::kDCCVADP,
265 CPUFeatures::kNone, // "sve2"
266 CPUFeatures::kNone, // "sveaes"
267 CPUFeatures::kNone, // "svepmull"
268 CPUFeatures::kNone, // "svebitperm"
269 CPUFeatures::kNone, // "svesha3"
270 CPUFeatures::kNone, // "svesm4"
271 CPUFeatures::kFrintToFixedSizedInt,
272 // Bits 40-47
273 CPUFeatures::kSVEI8MM,
274 CPUFeatures::kSVEF32MM,
275 CPUFeatures::kSVEF64MM,
276 CPUFeatures::kSVEBF16,
277 CPUFeatures::kI8MM,
278 CPUFeatures::kBF16,
279 CPUFeatures::kDGH,
280 CPUFeatures::kRNG,
281 // Bits 48+
282 CPUFeatures::kBTI};
283
284 uint64_t hwcap_low32 = getauxval(AT_HWCAP);
285 uint64_t hwcap_high32 = getauxval(AT_HWCAP2);
286 VIXL_ASSERT(IsUint32(hwcap_low32));
287 VIXL_ASSERT(IsUint32(hwcap_high32));
288 uint64_t hwcap = hwcap_low32 | (hwcap_high32 << 32);
289
290 VIXL_STATIC_ASSERT(ArrayLength(kFeatureBits) < 64);
291 for (size_t i = 0; i < ArrayLength(kFeatureBits); i++) {
292 if (hwcap & (UINT64_C(1) << i)) features.Combine(kFeatureBits[i]);
293 }
294 #endif // VIXL_USE_LINUX_HWCAP
295
296 if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
297 (features.Has(CPUFeatures::kIDRegisterEmulation))) {
298 features.Combine(InferCPUFeaturesFromIDRegisters());
299 }
300 return features;
301 }
302
303
304 #ifdef __aarch64__
305 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
306 NAME CPU::Read##NAME() { \
307 uint64_t value = 0; \
308 __asm__("mrs %0, " MRS_ARG : "=r"(value)); \
309 return NAME(value); \
310 }
311 #else // __aarch64__
312 #define VIXL_READ_ID_REG(NAME, MRS_ARG) \
313 NAME CPU::Read##NAME() { \
314 VIXL_UNREACHABLE(); \
315 return NAME(0); \
316 }
317 #endif // __aarch64__
318
319 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
320
321 #undef VIXL_READ_ID_REG
322
323
324 // Initialise to smallest possible cache size.
325 unsigned CPU::dcache_line_size_ = 1;
326 unsigned CPU::icache_line_size_ = 1;
327
328
329 // Currently computes I and D cache line size.
SetUp()330 void CPU::SetUp() {
331 uint32_t cache_type_register = GetCacheType();
332
333 // The cache type register holds information about the caches, including I
334 // D caches line size.
335 static const int kDCacheLineSizeShift = 16;
336 static const int kICacheLineSizeShift = 0;
337 static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
338 static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
339
340 // The cache type register holds the size of the I and D caches in words as
341 // a power of two.
342 uint32_t dcache_line_size_power_of_two =
343 (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
344 uint32_t icache_line_size_power_of_two =
345 (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
346
347 dcache_line_size_ = 4 << dcache_line_size_power_of_two;
348 icache_line_size_ = 4 << icache_line_size_power_of_two;
349 }
350
351
GetCacheType()352 uint32_t CPU::GetCacheType() {
353 #ifdef __aarch64__
354 uint64_t cache_type_register;
355 // Copy the content of the cache type register to a core register.
356 __asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT(runtime/references)
357 : [ctr] "=r"(cache_type_register));
358 VIXL_ASSERT(IsUint32(cache_type_register));
359 return static_cast<uint32_t>(cache_type_register);
360 #else
361 // This will lead to a cache with 1 byte long lines, which is fine since
362 // neither EnsureIAndDCacheCoherency nor the simulator will need this
363 // information.
364 return 0;
365 #endif
366 }
367
368
369 // Query the SVE vector length. This requires CPUFeatures::kSVE.
ReadSVEVectorLengthInBits()370 int CPU::ReadSVEVectorLengthInBits() {
371 #ifdef __aarch64__
372 uint64_t vl;
373 // To support compilers that don't understand `rdvl`, encode the value
374 // directly and move it manually.
375 __asm__(
376 " .word 0x04bf5100\n" // rdvl x0, #8
377 " mov %[vl], x0\n"
378 : [vl] "=r"(vl)
379 :
380 : "x0");
381 VIXL_ASSERT(vl <= INT_MAX);
382 return static_cast<int>(vl);
383 #else
384 VIXL_UNREACHABLE();
385 return 0;
386 #endif
387 }
388
389
EnsureIAndDCacheCoherency(void * address,size_t length)390 void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
391 #ifdef __aarch64__
392 // Implement the cache synchronisation for all targets where AArch64 is the
393 // host, even if we're building the simulator for an AAarch64 host. This
394 // allows for cases where the user wants to simulate code as well as run it
395 // natively.
396
397 if (length == 0) {
398 return;
399 }
400
401 // The code below assumes user space cache operations are allowed.
402
403 // Work out the line sizes for each cache, and use them to determine the
404 // start addresses.
405 uintptr_t start = reinterpret_cast<uintptr_t>(address);
406 uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
407 uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
408 uintptr_t dline = start & ~(dsize - 1);
409 uintptr_t iline = start & ~(isize - 1);
410
411 // Cache line sizes are always a power of 2.
412 VIXL_ASSERT(IsPowerOf2(dsize));
413 VIXL_ASSERT(IsPowerOf2(isize));
414 uintptr_t end = start + length;
415
416 do {
417 __asm__ __volatile__(
418 // Clean each line of the D cache containing the target data.
419 //
420 // dc : Data Cache maintenance
421 // c : Clean
422 // va : by (Virtual) Address
423 // u : to the point of Unification
424 // The point of unification for a processor is the point by which the
425 // instruction and data caches are guaranteed to see the same copy of a
426 // memory location. See ARM DDI 0406B page B2-12 for more information.
427 " dc cvau, %[dline]\n"
428 :
429 : [dline] "r"(dline)
430 // This code does not write to memory, but the "memory" dependency
431 // prevents GCC from reordering the code.
432 : "memory");
433 dline += dsize;
434 } while (dline < end);
435
436 __asm__ __volatile__(
437 // Make sure that the data cache operations (above) complete before the
438 // instruction cache operations (below).
439 //
440 // dsb : Data Synchronisation Barrier
441 // ish : Inner SHareable domain
442 //
443 // The point of unification for an Inner Shareable shareability domain is
444 // the point by which the instruction and data caches of all the
445 // processors
446 // in that Inner Shareable shareability domain are guaranteed to see the
447 // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
448 // information.
449 " dsb ish\n"
450 :
451 :
452 : "memory");
453
454 do {
455 __asm__ __volatile__(
456 // Invalidate each line of the I cache containing the target data.
457 //
458 // ic : Instruction Cache maintenance
459 // i : Invalidate
460 // va : by Address
461 // u : to the point of Unification
462 " ic ivau, %[iline]\n"
463 :
464 : [iline] "r"(iline)
465 : "memory");
466 iline += isize;
467 } while (iline < end);
468
469 __asm__ __volatile__(
470 // Make sure that the instruction cache operations (above) take effect
471 // before the isb (below).
472 " dsb ish\n"
473
474 // Ensure that any instructions already in the pipeline are discarded and
475 // reloaded from the new data.
476 // isb : Instruction Synchronisation Barrier
477 " isb\n"
478 :
479 :
480 : "memory");
481 #else
482 // If the host isn't AArch64, we must be using the simulator, so this function
483 // doesn't have to do anything.
484 USE(address, length);
485 #endif
486 }
487
488 } // namespace aarch64
489 } // namespace vixl
490