1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
28 #include <sys/auxv.h>
29 #define VIXL_USE_LINUX_HWCAP 1
30 #endif
31
32 #include "../utils-vixl.h"
33
34 #include "cpu-aarch64.h"
35
36 namespace vixl {
37 namespace aarch64 {
38
39
40 const IDRegister::Field AA64PFR0::kFP(16, Field::kSigned);
41 const IDRegister::Field AA64PFR0::kAdvSIMD(20, Field::kSigned);
42 const IDRegister::Field AA64PFR0::kSVE(32);
43 const IDRegister::Field AA64PFR0::kDIT(48);
44
45 const IDRegister::Field AA64PFR1::kBT(0);
46
47 const IDRegister::Field AA64ISAR0::kAES(4);
48 const IDRegister::Field AA64ISAR0::kSHA1(8);
49 const IDRegister::Field AA64ISAR0::kSHA2(12);
50 const IDRegister::Field AA64ISAR0::kCRC32(16);
51 const IDRegister::Field AA64ISAR0::kAtomic(20);
52 const IDRegister::Field AA64ISAR0::kRDM(28);
53 const IDRegister::Field AA64ISAR0::kSHA3(32);
54 const IDRegister::Field AA64ISAR0::kSM3(36);
55 const IDRegister::Field AA64ISAR0::kSM4(40);
56 const IDRegister::Field AA64ISAR0::kDP(44);
57 const IDRegister::Field AA64ISAR0::kFHM(48);
58 const IDRegister::Field AA64ISAR0::kTS(52);
59
60 const IDRegister::Field AA64ISAR1::kDPB(0);
61 const IDRegister::Field AA64ISAR1::kAPA(4);
62 const IDRegister::Field AA64ISAR1::kAPI(8);
63 const IDRegister::Field AA64ISAR1::kJSCVT(12);
64 const IDRegister::Field AA64ISAR1::kFCMA(16);
65 const IDRegister::Field AA64ISAR1::kLRCPC(20);
66 const IDRegister::Field AA64ISAR1::kGPA(24);
67 const IDRegister::Field AA64ISAR1::kGPI(28);
68 const IDRegister::Field AA64ISAR1::kFRINTTS(32);
69 const IDRegister::Field AA64ISAR1::kSB(36);
70 const IDRegister::Field AA64ISAR1::kSPECRES(40);
71
72 const IDRegister::Field AA64MMFR1::kLO(16);
73
GetCPUFeatures() const74 CPUFeatures AA64PFR0::GetCPUFeatures() const {
75 CPUFeatures f;
76 if (Get(kFP) >= 0) f.Combine(CPUFeatures::kFP);
77 if (Get(kFP) >= 1) f.Combine(CPUFeatures::kFPHalf);
78 if (Get(kAdvSIMD) >= 0) f.Combine(CPUFeatures::kNEON);
79 if (Get(kAdvSIMD) >= 1) f.Combine(CPUFeatures::kNEONHalf);
80 if (Get(kSVE) >= 1) f.Combine(CPUFeatures::kSVE);
81 if (Get(kDIT) >= 1) f.Combine(CPUFeatures::kDIT);
82 return f;
83 }
84
GetCPUFeatures() const85 CPUFeatures AA64PFR1::GetCPUFeatures() const {
86 CPUFeatures f;
87 if (Get(kBT) >= 1) f.Combine(CPUFeatures::kBTI);
88 return f;
89 }
90
GetCPUFeatures() const91 CPUFeatures AA64ISAR0::GetCPUFeatures() const {
92 CPUFeatures f;
93 if (Get(kAES) >= 1) f.Combine(CPUFeatures::kAES);
94 if (Get(kAES) >= 2) f.Combine(CPUFeatures::kPmull1Q);
95 if (Get(kSHA1) >= 1) f.Combine(CPUFeatures::kSHA1);
96 if (Get(kSHA2) >= 1) f.Combine(CPUFeatures::kSHA2);
97 if (Get(kSHA2) >= 2) f.Combine(CPUFeatures::kSHA512);
98 if (Get(kCRC32) >= 1) f.Combine(CPUFeatures::kCRC32);
99 if (Get(kAtomic) >= 1) f.Combine(CPUFeatures::kAtomics);
100 if (Get(kRDM) >= 1) f.Combine(CPUFeatures::kRDM);
101 if (Get(kSHA3) >= 1) f.Combine(CPUFeatures::kSHA3);
102 if (Get(kSM3) >= 1) f.Combine(CPUFeatures::kSM3);
103 if (Get(kSM4) >= 1) f.Combine(CPUFeatures::kSM4);
104 if (Get(kDP) >= 1) f.Combine(CPUFeatures::kDotProduct);
105 if (Get(kFHM) >= 1) f.Combine(CPUFeatures::kFHM);
106 if (Get(kTS) >= 1) f.Combine(CPUFeatures::kFlagM);
107 if (Get(kTS) >= 2) f.Combine(CPUFeatures::kAXFlag);
108 return f;
109 }
110
GetCPUFeatures() const111 CPUFeatures AA64ISAR1::GetCPUFeatures() const {
112 CPUFeatures f;
113 if (Get(kDPB) >= 1) f.Combine(CPUFeatures::kDCPoP);
114 if (Get(kJSCVT) >= 1) f.Combine(CPUFeatures::kJSCVT);
115 if (Get(kFCMA) >= 1) f.Combine(CPUFeatures::kFcma);
116 if (Get(kLRCPC) >= 1) f.Combine(CPUFeatures::kRCpc);
117 if (Get(kLRCPC) >= 2) f.Combine(CPUFeatures::kRCpcImm);
118 if (Get(kFRINTTS) >= 1) f.Combine(CPUFeatures::kFrintToFixedSizedInt);
119
120 if (Get(kAPI) >= 1) f.Combine(CPUFeatures::kPAuth);
121 if (Get(kAPA) >= 1) f.Combine(CPUFeatures::kPAuth, CPUFeatures::kPAuthQARMA);
122 if (Get(kGPI) >= 1) f.Combine(CPUFeatures::kPAuthGeneric);
123 if (Get(kGPA) >= 1) {
124 f.Combine(CPUFeatures::kPAuthGeneric, CPUFeatures::kPAuthGenericQARMA);
125 }
126 return f;
127 }
128
GetCPUFeatures() const129 CPUFeatures AA64MMFR1::GetCPUFeatures() const {
130 CPUFeatures f;
131 if (Get(kLO) >= 1) f.Combine(CPUFeatures::kLORegions);
132 return f;
133 }
134
Get(IDRegister::Field field) const135 int IDRegister::Get(IDRegister::Field field) const {
136 int msb = field.GetMsb();
137 int lsb = field.GetLsb();
138 VIXL_STATIC_ASSERT(static_cast<size_t>(Field::kMaxWidthInBits) <
139 (sizeof(int) * kBitsPerByte));
140 switch (field.GetType()) {
141 case Field::kSigned:
142 return static_cast<int>(ExtractSignedBitfield64(msb, lsb, value_));
143 case Field::kUnsigned:
144 return static_cast<int>(ExtractUnsignedBitfield64(msb, lsb, value_));
145 }
146 VIXL_UNREACHABLE();
147 return 0;
148 }
149
InferCPUFeaturesFromIDRegisters()150 CPUFeatures CPU::InferCPUFeaturesFromIDRegisters() {
151 CPUFeatures f;
152 #define VIXL_COMBINE_ID_REG(NAME) f.Combine(Read##NAME().GetCPUFeatures());
153 VIXL_AARCH64_ID_REG_LIST(VIXL_COMBINE_ID_REG)
154 #undef VIXL_COMBINE_ID_REG
155 return f;
156 }
157
InferCPUFeaturesFromOS(CPUFeatures::QueryIDRegistersOption option)158 CPUFeatures CPU::InferCPUFeaturesFromOS(
159 CPUFeatures::QueryIDRegistersOption option) {
160 CPUFeatures features;
161
162 #if VIXL_USE_LINUX_HWCAP
163 // Map each set bit onto a feature. Ideally, we'd use HWCAP_* macros rather
164 // than explicit bits, but explicit bits allow us to identify features that
165 // the toolchain doesn't know about.
166 static const CPUFeatures::Feature kFeatureBits[] = {
167 // Bits 0-7
168 CPUFeatures::kFP,
169 CPUFeatures::kNEON,
170 CPUFeatures::kNone, // "EVTSTRM", which VIXL doesn't track.
171 CPUFeatures::kAES,
172 CPUFeatures::kPmull1Q,
173 CPUFeatures::kSHA1,
174 CPUFeatures::kSHA2,
175 CPUFeatures::kCRC32,
176 // Bits 8-15
177 CPUFeatures::kAtomics,
178 CPUFeatures::kFPHalf,
179 CPUFeatures::kNEONHalf,
180 CPUFeatures::kIDRegisterEmulation,
181 CPUFeatures::kRDM,
182 CPUFeatures::kJSCVT,
183 CPUFeatures::kFcma,
184 CPUFeatures::kRCpc,
185 // Bits 16-23
186 CPUFeatures::kDCPoP,
187 CPUFeatures::kSHA3,
188 CPUFeatures::kSM3,
189 CPUFeatures::kSM4,
190 CPUFeatures::kDotProduct,
191 CPUFeatures::kSHA512,
192 CPUFeatures::kSVE,
193 CPUFeatures::kFHM,
194 // Bits 24-27
195 CPUFeatures::kDIT,
196 CPUFeatures::kUSCAT,
197 CPUFeatures::kRCpcImm,
198 CPUFeatures::kFlagM
199 // Bits 28-31 are unassigned.
200 };
201 static const size_t kFeatureBitCount =
202 sizeof(kFeatureBits) / sizeof(kFeatureBits[0]);
203
204 unsigned long auxv = getauxval(AT_HWCAP); // NOLINT(runtime/int)
205
206 VIXL_STATIC_ASSERT(kFeatureBitCount < (sizeof(auxv) * kBitsPerByte));
207 for (size_t i = 0; i < kFeatureBitCount; i++) {
208 if (auxv & (1UL << i)) features.Combine(kFeatureBits[i]);
209 }
210 #endif // VIXL_USE_LINUX_HWCAP
211
212 if ((option == CPUFeatures::kQueryIDRegistersIfAvailable) &&
213 (features.Has(CPUFeatures::kIDRegisterEmulation))) {
214 features.Combine(InferCPUFeaturesFromIDRegisters());
215 }
216 return features;
217 }
218
219
220 #ifdef __aarch64__
221 #define VIXL_READ_ID_REG(NAME) \
222 NAME CPU::Read##NAME() { \
223 uint64_t value = 0; \
224 __asm__("mrs %0, ID_" #NAME "_EL1" : "=r"(value)); \
225 return NAME(value); \
226 }
227 #else // __aarch64__
228 #define VIXL_READ_ID_REG(NAME) \
229 NAME CPU::Read##NAME() { \
230 /* TODO: Use VIXL_UNREACHABLE once it works in release builds. */ \
231 VIXL_ABORT(); \
232 }
233 #endif // __aarch64__
234
235 VIXL_AARCH64_ID_REG_LIST(VIXL_READ_ID_REG)
236
237 #undef VIXL_READ_ID_REG
238
239
240 // Initialise to smallest possible cache size.
241 unsigned CPU::dcache_line_size_ = 1;
242 unsigned CPU::icache_line_size_ = 1;
243
244
245 // Currently computes I and D cache line size.
SetUp()246 void CPU::SetUp() {
247 uint32_t cache_type_register = GetCacheType();
248
249 // The cache type register holds information about the caches, including I
250 // D caches line size.
251 static const int kDCacheLineSizeShift = 16;
252 static const int kICacheLineSizeShift = 0;
253 static const uint32_t kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
254 static const uint32_t kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
255
256 // The cache type register holds the size of the I and D caches in words as
257 // a power of two.
258 uint32_t dcache_line_size_power_of_two =
259 (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
260 uint32_t icache_line_size_power_of_two =
261 (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
262
263 dcache_line_size_ = 4 << dcache_line_size_power_of_two;
264 icache_line_size_ = 4 << icache_line_size_power_of_two;
265 }
266
267
GetCacheType()268 uint32_t CPU::GetCacheType() {
269 #ifdef __aarch64__
270 uint64_t cache_type_register;
271 // Copy the content of the cache type register to a core register.
272 __asm__ __volatile__("mrs %[ctr], ctr_el0" // NOLINT(runtime/references)
273 : [ctr] "=r"(cache_type_register));
274 VIXL_ASSERT(IsUint32(cache_type_register));
275 return static_cast<uint32_t>(cache_type_register);
276 #else
277 // This will lead to a cache with 1 byte long lines, which is fine since
278 // neither EnsureIAndDCacheCoherency nor the simulator will need this
279 // information.
280 return 0;
281 #endif
282 }
283
284
EnsureIAndDCacheCoherency(void * address,size_t length)285 void CPU::EnsureIAndDCacheCoherency(void *address, size_t length) {
286 #ifdef __aarch64__
287 // Implement the cache synchronisation for all targets where AArch64 is the
288 // host, even if we're building the simulator for an AAarch64 host. This
289 // allows for cases where the user wants to simulate code as well as run it
290 // natively.
291
292 if (length == 0) {
293 return;
294 }
295
296 // The code below assumes user space cache operations are allowed.
297
298 // Work out the line sizes for each cache, and use them to determine the
299 // start addresses.
300 uintptr_t start = reinterpret_cast<uintptr_t>(address);
301 uintptr_t dsize = static_cast<uintptr_t>(dcache_line_size_);
302 uintptr_t isize = static_cast<uintptr_t>(icache_line_size_);
303 uintptr_t dline = start & ~(dsize - 1);
304 uintptr_t iline = start & ~(isize - 1);
305
306 // Cache line sizes are always a power of 2.
307 VIXL_ASSERT(IsPowerOf2(dsize));
308 VIXL_ASSERT(IsPowerOf2(isize));
309 uintptr_t end = start + length;
310
311 do {
312 __asm__ __volatile__(
313 // Clean each line of the D cache containing the target data.
314 //
315 // dc : Data Cache maintenance
316 // c : Clean
317 // va : by (Virtual) Address
318 // u : to the point of Unification
319 // The point of unification for a processor is the point by which the
320 // instruction and data caches are guaranteed to see the same copy of a
321 // memory location. See ARM DDI 0406B page B2-12 for more information.
322 " dc cvau, %[dline]\n"
323 :
324 : [dline] "r"(dline)
325 // This code does not write to memory, but the "memory" dependency
326 // prevents GCC from reordering the code.
327 : "memory");
328 dline += dsize;
329 } while (dline < end);
330
331 __asm__ __volatile__(
332 // Make sure that the data cache operations (above) complete before the
333 // instruction cache operations (below).
334 //
335 // dsb : Data Synchronisation Barrier
336 // ish : Inner SHareable domain
337 //
338 // The point of unification for an Inner Shareable shareability domain is
339 // the point by which the instruction and data caches of all the
340 // processors
341 // in that Inner Shareable shareability domain are guaranteed to see the
342 // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
343 // information.
344 " dsb ish\n"
345 :
346 :
347 : "memory");
348
349 do {
350 __asm__ __volatile__(
351 // Invalidate each line of the I cache containing the target data.
352 //
353 // ic : Instruction Cache maintenance
354 // i : Invalidate
355 // va : by Address
356 // u : to the point of Unification
357 " ic ivau, %[iline]\n"
358 :
359 : [iline] "r"(iline)
360 : "memory");
361 iline += isize;
362 } while (iline < end);
363
364 __asm__ __volatile__(
365 // Make sure that the instruction cache operations (above) take effect
366 // before the isb (below).
367 " dsb ish\n"
368
369 // Ensure that any instructions already in the pipeline are discarded and
370 // reloaded from the new data.
371 // isb : Instruction Synchronisation Barrier
372 " isb\n"
373 :
374 :
375 : "memory");
376 #else
377 // If the host isn't AArch64, we must be using the simulator, so this function
378 // doesn't have to do anything.
379 USE(address, length);
380 #endif
381 }
382
383 } // namespace aarch64
384 } // namespace vixl
385