1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file declares the AArch64 specific subclass of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 15 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H 16 17 #include "AArch64FrameLowering.h" 18 #include "AArch64ISelLowering.h" 19 #include "AArch64InstrInfo.h" 20 #include "AArch64RegisterInfo.h" 21 #include "AArch64SelectionDAGInfo.h" 22 #include "llvm/CodeGen/GlobalISel/CallLowering.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 25 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" 26 #include "llvm/CodeGen/TargetSubtargetInfo.h" 27 #include "llvm/IR/DataLayout.h" 28 #include <string> 29 30 #define GET_SUBTARGETINFO_HEADER 31 #include "AArch64GenSubtargetInfo.inc" 32 33 namespace llvm { 34 class GlobalValue; 35 class StringRef; 36 class Triple; 37 38 class AArch64Subtarget final : public AArch64GenSubtargetInfo { 39 public: 40 enum ARMProcFamilyEnum : uint8_t { 41 Others, 42 CortexA35, 43 CortexA53, 44 CortexA55, 45 CortexA57, 46 CortexA72, 47 CortexA73, 48 CortexA75, 49 Cyclone, 50 ExynosM1, 51 ExynosM3, 52 Falkor, 53 Kryo, 54 Saphira, 55 ThunderX2T99, 56 ThunderX, 57 ThunderXT81, 58 ThunderXT83, 59 ThunderXT88 60 }; 61 62 protected: 63 /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. 64 ARMProcFamilyEnum ARMProcFamily = Others; 65 66 bool HasV8_1aOps = false; 67 bool HasV8_2aOps = false; 68 bool HasV8_3aOps = false; 69 bool HasV8_4aOps = false; 70 71 bool HasFPARMv8 = false; 72 bool HasNEON = false; 73 bool HasCrypto = false; 74 bool HasDotProd = false; 75 bool HasCRC = false; 76 bool HasLSE = false; 77 bool HasRAS = false; 78 bool HasRDM = false; 79 bool HasPerfMon = false; 80 bool HasFullFP16 = false; 81 bool HasSPE = false; 82 83 // ARMv8.4 Crypto extensions 84 bool HasSM4 = true; 85 bool HasSHA3 = true; 86 87 bool HasSHA2 = true; 88 bool HasAES = true; 89 90 bool HasLSLFast = false; 91 bool HasSVE = false; 92 bool HasRCPC = false; 93 bool HasAggressiveFMA = false; 94 95 // HasZeroCycleRegMove - Has zero-cycle register mov instructions. 96 bool HasZeroCycleRegMove = false; 97 98 // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. 99 bool HasZeroCycleZeroing = false; 100 bool HasZeroCycleZeroingFPWorkaround = false; 101 102 // StrictAlign - Disallow unaligned memory accesses. 103 bool StrictAlign = false; 104 105 // NegativeImmediates - transform instructions with negative immediates 106 bool NegativeImmediates = true; 107 108 // Enable 64-bit vectorization in SLP. 109 unsigned MinVectorRegisterBitWidth = 64; 110 111 bool UseAA = false; 112 bool PredictableSelectIsExpensive = false; 113 bool BalanceFPOps = false; 114 bool CustomAsCheapAsMove = false; 115 bool ExynosAsCheapAsMove = false; 116 bool UsePostRAScheduler = false; 117 bool Misaligned128StoreIsSlow = false; 118 bool Paired128IsSlow = false; 119 bool STRQroIsSlow = false; 120 bool UseAlternateSExtLoadCVTF32Pattern = false; 121 bool HasArithmeticBccFusion = false; 122 bool HasArithmeticCbzFusion = false; 123 bool HasFuseAddress = false; 124 bool HasFuseAES = false; 125 bool HasFuseCCSelect = false; 126 bool HasFuseLiterals = false; 127 bool DisableLatencySchedHeuristic = false; 128 bool UseRSqrt = false; 129 uint8_t MaxInterleaveFactor = 2; 130 uint8_t VectorInsertExtractBaseCost = 3; 131 uint16_t CacheLineSize = 0; 132 uint16_t PrefetchDistance = 0; 133 uint16_t MinPrefetchStride = 1; 134 unsigned MaxPrefetchIterationsAhead = UINT_MAX; 135 unsigned PrefFunctionAlignment = 0; 136 unsigned PrefLoopAlignment = 0; 137 unsigned MaxJumpTableSize = 0; 138 unsigned WideningBaseCost = 0; 139 140 // ReserveX18 - X18 is not available as a general purpose register. 141 bool ReserveX18; 142 143 // ReserveX20 - X20 is not available as a general purpose register. 144 bool ReserveX20 = false; 145 146 bool IsLittle; 147 148 /// TargetTriple - What processor and OS we're targeting. 149 Triple TargetTriple; 150 151 AArch64FrameLowering FrameLowering; 152 AArch64InstrInfo InstrInfo; 153 AArch64SelectionDAGInfo TSInfo; 154 AArch64TargetLowering TLInfo; 155 156 /// GlobalISel related APIs. 157 std::unique_ptr<CallLowering> CallLoweringInfo; 158 std::unique_ptr<InstructionSelector> InstSelector; 159 std::unique_ptr<LegalizerInfo> Legalizer; 160 std::unique_ptr<RegisterBankInfo> RegBankInfo; 161 162 private: 163 /// initializeSubtargetDependencies - Initializes using CPUString and the 164 /// passed in feature string so that we can use initializer lists for 165 /// subtarget initialization. 166 AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, 167 StringRef CPUString); 168 169 /// Initialize properties based on the selected processor family. 170 void initializeProperties(); 171 172 public: 173 /// This constructor initializes the data members to match that 174 /// of the specified triple. 175 AArch64Subtarget(const Triple &TT, const std::string &CPU, 176 const std::string &FS, const TargetMachine &TM, 177 bool LittleEndian); 178 getSelectionDAGInfo()179 const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { 180 return &TSInfo; 181 } getFrameLowering()182 const AArch64FrameLowering *getFrameLowering() const override { 183 return &FrameLowering; 184 } getTargetLowering()185 const AArch64TargetLowering *getTargetLowering() const override { 186 return &TLInfo; 187 } getInstrInfo()188 const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } getRegisterInfo()189 const AArch64RegisterInfo *getRegisterInfo() const override { 190 return &getInstrInfo()->getRegisterInfo(); 191 } 192 const CallLowering *getCallLowering() const override; 193 const InstructionSelector *getInstructionSelector() const override; 194 const LegalizerInfo *getLegalizerInfo() const override; 195 const RegisterBankInfo *getRegBankInfo() const override; getTargetTriple()196 const Triple &getTargetTriple() const { return TargetTriple; } enableMachineScheduler()197 bool enableMachineScheduler() const override { return true; } enablePostRAScheduler()198 bool enablePostRAScheduler() const override { 199 return UsePostRAScheduler; 200 } 201 202 /// Returns ARM processor family. 203 /// Avoid this function! CPU specifics should be kept local to this class 204 /// and preferably modeled with SubtargetFeatures or properties in 205 /// initializeProperties(). getProcFamily()206 ARMProcFamilyEnum getProcFamily() const { 207 return ARMProcFamily; 208 } 209 hasV8_1aOps()210 bool hasV8_1aOps() const { return HasV8_1aOps; } hasV8_2aOps()211 bool hasV8_2aOps() const { return HasV8_2aOps; } hasV8_3aOps()212 bool hasV8_3aOps() const { return HasV8_3aOps; } hasV8_4aOps()213 bool hasV8_4aOps() const { return HasV8_4aOps; } 214 hasZeroCycleRegMove()215 bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } 216 hasZeroCycleZeroing()217 bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } 218 hasZeroCycleZeroingFPWorkaround()219 bool hasZeroCycleZeroingFPWorkaround() const { 220 return HasZeroCycleZeroingFPWorkaround; 221 } 222 requiresStrictAlign()223 bool requiresStrictAlign() const { return StrictAlign; } 224 isXRaySupported()225 bool isXRaySupported() const override { return true; } 226 getMinVectorRegisterBitWidth()227 unsigned getMinVectorRegisterBitWidth() const { 228 return MinVectorRegisterBitWidth; 229 } 230 isX18Reserved()231 bool isX18Reserved() const { return ReserveX18; } isX20Reserved()232 bool isX20Reserved() const { return ReserveX20; } hasFPARMv8()233 bool hasFPARMv8() const { return HasFPARMv8; } hasNEON()234 bool hasNEON() const { return HasNEON; } hasCrypto()235 bool hasCrypto() const { return HasCrypto; } hasDotProd()236 bool hasDotProd() const { return HasDotProd; } hasCRC()237 bool hasCRC() const { return HasCRC; } hasLSE()238 bool hasLSE() const { return HasLSE; } hasRAS()239 bool hasRAS() const { return HasRAS; } hasRDM()240 bool hasRDM() const { return HasRDM; } hasSM4()241 bool hasSM4() const { return HasSM4; } hasSHA3()242 bool hasSHA3() const { return HasSHA3; } hasSHA2()243 bool hasSHA2() const { return HasSHA2; } hasAES()244 bool hasAES() const { return HasAES; } balanceFPOps()245 bool balanceFPOps() const { return BalanceFPOps; } predictableSelectIsExpensive()246 bool predictableSelectIsExpensive() const { 247 return PredictableSelectIsExpensive; 248 } hasCustomCheapAsMoveHandling()249 bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; } hasExynosCheapAsMoveHandling()250 bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; } isMisaligned128StoreSlow()251 bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; } isPaired128Slow()252 bool isPaired128Slow() const { return Paired128IsSlow; } isSTRQroSlow()253 bool isSTRQroSlow() const { return STRQroIsSlow; } useAlternateSExtLoadCVTF32Pattern()254 bool useAlternateSExtLoadCVTF32Pattern() const { 255 return UseAlternateSExtLoadCVTF32Pattern; 256 } hasArithmeticBccFusion()257 bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } hasArithmeticCbzFusion()258 bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } hasFuseAddress()259 bool hasFuseAddress() const { return HasFuseAddress; } hasFuseAES()260 bool hasFuseAES() const { return HasFuseAES; } hasFuseCCSelect()261 bool hasFuseCCSelect() const { return HasFuseCCSelect; } hasFuseLiterals()262 bool hasFuseLiterals() const { return HasFuseLiterals; } 263 264 /// Return true if the CPU supports any kind of instruction fusion. hasFusion()265 bool hasFusion() const { 266 return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || 267 hasFuseAES() || hasFuseCCSelect() || hasFuseLiterals(); 268 } 269 useRSqrt()270 bool useRSqrt() const { return UseRSqrt; } getMaxInterleaveFactor()271 unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } getVectorInsertExtractBaseCost()272 unsigned getVectorInsertExtractBaseCost() const { 273 return VectorInsertExtractBaseCost; 274 } getCacheLineSize()275 unsigned getCacheLineSize() const { return CacheLineSize; } getPrefetchDistance()276 unsigned getPrefetchDistance() const { return PrefetchDistance; } getMinPrefetchStride()277 unsigned getMinPrefetchStride() const { return MinPrefetchStride; } getMaxPrefetchIterationsAhead()278 unsigned getMaxPrefetchIterationsAhead() const { 279 return MaxPrefetchIterationsAhead; 280 } getPrefFunctionAlignment()281 unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; } getPrefLoopAlignment()282 unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } 283 getMaximumJumpTableSize()284 unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; } 285 getWideningBaseCost()286 unsigned getWideningBaseCost() const { return WideningBaseCost; } 287 288 /// CPU has TBI (top byte of addresses is ignored during HW address 289 /// translation) and OS enables it. 290 bool supportsAddressTopByteIgnored() const; 291 hasPerfMon()292 bool hasPerfMon() const { return HasPerfMon; } hasFullFP16()293 bool hasFullFP16() const { return HasFullFP16; } hasSPE()294 bool hasSPE() const { return HasSPE; } hasLSLFast()295 bool hasLSLFast() const { return HasLSLFast; } hasSVE()296 bool hasSVE() const { return HasSVE; } hasRCPC()297 bool hasRCPC() const { return HasRCPC; } hasAggressiveFMA()298 bool hasAggressiveFMA() const { return HasAggressiveFMA; } 299 isLittleEndian()300 bool isLittleEndian() const { return IsLittle; } 301 isTargetDarwin()302 bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } isTargetIOS()303 bool isTargetIOS() const { return TargetTriple.isiOS(); } isTargetLinux()304 bool isTargetLinux() const { return TargetTriple.isOSLinux(); } isTargetWindows()305 bool isTargetWindows() const { return TargetTriple.isOSWindows(); } isTargetAndroid()306 bool isTargetAndroid() const { return TargetTriple.isAndroid(); } isTargetFuchsia()307 bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); } 308 isTargetCOFF()309 bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } isTargetELF()310 bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } isTargetMachO()311 bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } 312 useAA()313 bool useAA() const override { return UseAA; } 314 useSmallAddressing()315 bool useSmallAddressing() const { 316 switch (TLInfo.getTargetMachine().getCodeModel()) { 317 case CodeModel::Kernel: 318 // Kernel is currently allowed only for Fuchsia targets, 319 // where it is the same as Small for almost all purposes. 320 case CodeModel::Small: 321 return true; 322 default: 323 return false; 324 } 325 } 326 327 /// ParseSubtargetFeatures - Parses features string setting specified 328 /// subtarget options. Definition of function is auto generated by tblgen. 329 void ParseSubtargetFeatures(StringRef CPU, StringRef FS); 330 331 /// ClassifyGlobalReference - Find the target operand flags that describe 332 /// how a global value should be referenced for the current subtarget. 333 unsigned char ClassifyGlobalReference(const GlobalValue *GV, 334 const TargetMachine &TM) const; 335 336 unsigned char classifyGlobalFunctionReference(const GlobalValue *GV, 337 const TargetMachine &TM) const; 338 339 void overrideSchedPolicy(MachineSchedPolicy &Policy, 340 unsigned NumRegionInstrs) const override; 341 342 bool enableEarlyIfConversion() const override; 343 344 std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override; 345 isCallingConvWin64(CallingConv::ID CC)346 bool isCallingConvWin64(CallingConv::ID CC) const { 347 switch (CC) { 348 case CallingConv::C: 349 return isTargetWindows(); 350 case CallingConv::Win64: 351 return true; 352 default: 353 return false; 354 } 355 } 356 357 void mirFileLoaded(MachineFunction &MF) const override; 358 }; 359 } // End llvm namespace 360 361 #endif 362