1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// AArch64 Instruction definitions. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// ARM Instruction Predicate Definitions. 16// 17def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 18 AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; 19def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 20 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; 21def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 22 AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; 23def HasNEON : Predicate<"Subtarget->hasNEON()">, 24 AssemblerPredicate<"FeatureNEON", "neon">; 25def HasCrypto : Predicate<"Subtarget->hasCrypto()">, 26 AssemblerPredicate<"FeatureCrypto", "crypto">; 27def HasCRC : Predicate<"Subtarget->hasCRC()">, 28 AssemblerPredicate<"FeatureCRC", "crc">; 29def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; 30def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 31 AssemblerPredicate<"FeatureFullFP16", "fullfp16">; 32def HasSPE : Predicate<"Subtarget->hasSPE()">, 33 AssemblerPredicate<"FeatureSPE", "spe">; 34 35def IsLE : Predicate<"Subtarget->isLittleEndian()">; 36def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 37def IsCyclone : Predicate<"Subtarget->isCyclone()">; 38 39//===----------------------------------------------------------------------===// 40// AArch64-specific DAG Nodes. 41// 42 43// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 44def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 45 [SDTCisSameAs<0, 2>, 46 SDTCisSameAs<0, 3>, 47 SDTCisInt<0>, SDTCisVT<1, i32>]>; 48 49// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 50def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 51 [SDTCisSameAs<0, 1>, 52 SDTCisSameAs<0, 2>, 53 SDTCisInt<0>, 54 SDTCisVT<3, i32>]>; 55 56// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 57def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 58 [SDTCisSameAs<0, 2>, 59 SDTCisSameAs<0, 3>, 60 SDTCisInt<0>, 61 SDTCisVT<1, i32>, 62 SDTCisVT<4, i32>]>; 63 64def SDT_AArch64Brcond : SDTypeProfile<0, 3, 65 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 66 SDTCisVT<2, i32>]>; 67def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 68def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 69 SDTCisVT<2, OtherVT>]>; 70 71 72def SDT_AArch64CSel : SDTypeProfile<1, 4, 73 [SDTCisSameAs<0, 1>, 74 SDTCisSameAs<0, 2>, 75 SDTCisInt<3>, 76 SDTCisVT<4, i32>]>; 77def SDT_AArch64CCMP : SDTypeProfile<1, 5, 78 [SDTCisVT<0, i32>, 79 SDTCisInt<1>, 80 SDTCisSameAs<1, 2>, 81 SDTCisInt<3>, 82 SDTCisInt<4>, 83 SDTCisVT<5, i32>]>; 84def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 85 [SDTCisVT<0, i32>, 86 SDTCisFP<1>, 87 SDTCisSameAs<1, 2>, 88 SDTCisInt<3>, 89 SDTCisInt<4>, 90 SDTCisVT<5, i32>]>; 91def SDT_AArch64FCmp : SDTypeProfile<0, 2, 92 [SDTCisFP<0>, 93 SDTCisSameAs<0, 1>]>; 94def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 95def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 96def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 97 SDTCisSameAs<0, 1>, 98 SDTCisSameAs<0, 2>]>; 99def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 100def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 101def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 102 SDTCisInt<2>, SDTCisInt<3>]>; 103def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 104def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 105 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 106def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 107 108def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 109def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 110def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 111def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 112 SDTCisSameAs<0,2>]>; 113def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 114 SDTCisSameAs<0,2>, 115 SDTCisSameAs<0,3>]>; 116def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 117def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 118 119def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 120 121def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 122 SDTCisPtrTy<1>]>; 123 124// Generates the general dynamic sequences, i.e. 125// adrp x0, :tlsdesc:var 126// ldr x1, [x0, #:tlsdesc_lo12:var] 127// add x0, x0, #:tlsdesc_lo12:var 128// .tlsdesccall var 129// blr x1 130 131// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 132// number of operands (the variable) 133def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 134 [SDTCisPtrTy<0>]>; 135 136def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 137 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 138 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 139 SDTCisSameAs<1, 4>]>; 140 141 142// Node definitions. 143def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 144def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 145def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 146def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 147 SDCallSeqStart<[ SDTCisVT<0, i32> ]>, 148 [SDNPHasChain, SDNPOutGlue]>; 149def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 150 SDCallSeqEnd<[ SDTCisVT<0, i32>, 151 SDTCisVT<1, i32> ]>, 152 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 153def AArch64call : SDNode<"AArch64ISD::CALL", 154 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 155 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 156 SDNPVariadic]>; 157def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 158 [SDNPHasChain]>; 159def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 160 [SDNPHasChain]>; 161def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 162 [SDNPHasChain]>; 163def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 164 [SDNPHasChain]>; 165def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 166 [SDNPHasChain]>; 167 168 169def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 170def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 171def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 172def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 173def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, 174 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 175def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 176def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 177def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 178 [SDNPCommutative]>; 179def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 180def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 181 [SDNPCommutative]>; 182def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 183def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 184 185def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 186def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 187def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 188 189def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 190 191def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 192 193def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 194def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 195def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 196def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 197def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 198 199def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 200def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 201def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 202def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 203def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 204def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 205 206def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 207def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 208def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 209def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 210def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 211def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 212def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 213 214def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 215def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 216def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 217def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 218 219def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 220def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 221def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 222def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 223def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 224def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 225def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 226def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 227 228def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; 229def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 230def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; 231 232def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 233def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 234def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 235def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 236def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 237 238def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 239def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 240def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 241 242def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 243def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 244def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 245def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 246def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 247def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 248 (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 249 250def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 251def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 252def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 253def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 254def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 255 256def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 257def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 258 259def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; 260 261def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 262 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 263 264def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 265 [SDNPHasChain, SDNPSideEffect]>; 266 267def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 268def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 269 270def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 271 SDT_AArch64TLSDescCallSeq, 272 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 273 SDNPVariadic]>; 274 275 276def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 277 SDT_AArch64WrapperLarge>; 278 279def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 280 281def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 282 SDTCisSameAs<1, 2>]>; 283def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>; 284def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>; 285 286def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 287def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 288def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 289def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 290def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 291def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 292 293//===----------------------------------------------------------------------===// 294 295//===----------------------------------------------------------------------===// 296 297// AArch64 Instruction Predicate Definitions. 298// 299def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; 300def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; 301def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; 302def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; 303def ForCodeSize : Predicate<"ForCodeSize">; 304def NotForCodeSize : Predicate<"!ForCodeSize">; 305 306include "AArch64InstrFormats.td" 307 308//===----------------------------------------------------------------------===// 309 310//===----------------------------------------------------------------------===// 311// Miscellaneous instructions. 312//===----------------------------------------------------------------------===// 313 314let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 315def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), 316 [(AArch64callseq_start timm:$amt)]>; 317def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 318 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; 319} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 320 321let isReMaterializable = 1, isCodeGenOnly = 1 in { 322// FIXME: The following pseudo instructions are only needed because remat 323// cannot handle multiple instructions. When that changes, they can be 324// removed, along with the AArch64Wrapper node. 325 326let AddedComplexity = 10 in 327def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), 328 [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 329 Sched<[WriteLDAdr]>; 330 331// The MOVaddr instruction should match only when the add is not folded 332// into a load or store address. 333def MOVaddr 334 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 335 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 336 tglobaladdr:$low))]>, 337 Sched<[WriteAdrAdr]>; 338def MOVaddrJT 339 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 340 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 341 tjumptable:$low))]>, 342 Sched<[WriteAdrAdr]>; 343def MOVaddrCP 344 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 345 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 346 tconstpool:$low))]>, 347 Sched<[WriteAdrAdr]>; 348def MOVaddrBA 349 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 350 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 351 tblockaddress:$low))]>, 352 Sched<[WriteAdrAdr]>; 353def MOVaddrTLS 354 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 355 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 356 tglobaltlsaddr:$low))]>, 357 Sched<[WriteAdrAdr]>; 358def MOVaddrEXT 359 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 360 [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 361 texternalsym:$low))]>, 362 Sched<[WriteAdrAdr]>; 363 364} // isReMaterializable, isCodeGenOnly 365 366def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 367 (LOADgot tglobaltlsaddr:$addr)>; 368 369def : Pat<(AArch64LOADgot texternalsym:$addr), 370 (LOADgot texternalsym:$addr)>; 371 372def : Pat<(AArch64LOADgot tconstpool:$addr), 373 (LOADgot tconstpool:$addr)>; 374 375//===----------------------------------------------------------------------===// 376// System instructions. 377//===----------------------------------------------------------------------===// 378 379def HINT : HintI<"hint">; 380def : InstAlias<"nop", (HINT 0b000)>; 381def : InstAlias<"yield",(HINT 0b001)>; 382def : InstAlias<"wfe", (HINT 0b010)>; 383def : InstAlias<"wfi", (HINT 0b011)>; 384def : InstAlias<"sev", (HINT 0b100)>; 385def : InstAlias<"sevl", (HINT 0b101)>; 386 387// v8.2a Statistical Profiling extension 388def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 389 390// As far as LLVM is concerned this writes to the system's exclusive monitors. 391let mayLoad = 1, mayStore = 1 in 392def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 393 394// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 395// model patterns with sufficiently fine granularity. 396let mayLoad = ?, mayStore = ? in { 397def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 398 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 399 400def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 401 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 402 403def ISB : CRmSystemI<barrier_op, 0b110, "isb", 404 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 405} 406 407def : InstAlias<"clrex", (CLREX 0xf)>; 408def : InstAlias<"isb", (ISB 0xf)>; 409 410def MRS : MRSI; 411def MSR : MSRI; 412def MSRpstateImm1 : MSRpstateImm0_1; 413def MSRpstateImm4 : MSRpstateImm0_15; 414 415// The thread pointer (on Linux, at least, where this has been implemented) is 416// TPIDR_EL0. 417def : Pat<(AArch64threadpointer), (MRS 0xde82)>; 418 419// The cycle counter PMC register is PMCCNTR_EL0. 420let Predicates = [HasPerfMon] in 421def : Pat<(readcyclecounter), (MRS 0xdce8)>; 422 423// Generic system instructions 424def SYSxt : SystemXtI<0, "sys">; 425def SYSLxt : SystemLXtI<1, "sysl">; 426 427def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 428 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 429 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 430 431//===----------------------------------------------------------------------===// 432// Move immediate instructions. 433//===----------------------------------------------------------------------===// 434 435defm MOVK : InsertImmediate<0b11, "movk">; 436defm MOVN : MoveImmediate<0b00, "movn">; 437 438let PostEncoderMethod = "fixMOVZ" in 439defm MOVZ : MoveImmediate<0b10, "movz">; 440 441// First group of aliases covers an implicit "lsl #0". 442def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; 443def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; 444def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; 445def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; 446def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; 447def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; 448 449// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 450def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 451def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 452def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 453def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 454 455def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 456def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 457def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 458def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 459 460def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>; 461def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; 462def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; 463def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; 464 465def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 466def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 467 468def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 469def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 470 471def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; 472def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; 473 474// Final group of aliases covers true "mov $Rd, $imm" cases. 475multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 476 int width, int shift> { 477 def _asmoperand : AsmOperandClass { 478 let Name = basename # width # "_lsl" # shift # "MovAlias"; 479 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 480 # shift # ">"; 481 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 482 } 483 484 def _movimm : Operand<i32> { 485 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 486 } 487 488 def : InstAlias<"mov $Rd, $imm", 489 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 490} 491 492defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 493defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 494 495defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 496defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 497defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 498defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 499 500defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 501defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 502 503defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 504defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 505defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 506defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 507 508let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 509 isAsCheapAsAMove = 1 in { 510// FIXME: The following pseudo instructions are only needed because remat 511// cannot handle multiple instructions. When that changes, we can select 512// directly to the real instructions and get rid of these pseudos. 513 514def MOVi32imm 515 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 516 [(set GPR32:$dst, imm:$src)]>, 517 Sched<[WriteImm]>; 518def MOVi64imm 519 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 520 [(set GPR64:$dst, imm:$src)]>, 521 Sched<[WriteImm]>; 522} // isReMaterializable, isCodeGenOnly 523 524// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 525// eventual expansion code fewer bits to worry about getting right. Marshalling 526// the types is a little tricky though: 527def i64imm_32bit : ImmLeaf<i64, [{ 528 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 529}]>; 530 531def trunc_imm : SDNodeXForm<imm, [{ 532 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 533}]>; 534 535def : Pat<(i64 i64imm_32bit:$src), 536 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 537 538// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 539def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 540return CurDAG->getTargetConstant( 541 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 542}]>; 543 544def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 545return CurDAG->getTargetConstant( 546 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 547}]>; 548 549 550def : Pat<(f32 fpimm:$in), 551 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 552def : Pat<(f64 fpimm:$in), 553 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 554 555 556// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 557// sequences. 558def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 559 tglobaladdr:$g1, tglobaladdr:$g0), 560 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), 561 tglobaladdr:$g2, 32), 562 tglobaladdr:$g1, 16), 563 tglobaladdr:$g0, 0)>; 564 565def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 566 tblockaddress:$g1, tblockaddress:$g0), 567 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), 568 tblockaddress:$g2, 32), 569 tblockaddress:$g1, 16), 570 tblockaddress:$g0, 0)>; 571 572def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 573 tconstpool:$g1, tconstpool:$g0), 574 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), 575 tconstpool:$g2, 32), 576 tconstpool:$g1, 16), 577 tconstpool:$g0, 0)>; 578 579def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 580 tjumptable:$g1, tjumptable:$g0), 581 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48), 582 tjumptable:$g2, 32), 583 tjumptable:$g1, 16), 584 tjumptable:$g0, 0)>; 585 586 587//===----------------------------------------------------------------------===// 588// Arithmetic instructions. 589//===----------------------------------------------------------------------===// 590 591// Add/subtract with carry. 592defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 593defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 594 595def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 596def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 597def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 598def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 599 600// Add/subtract 601defm ADD : AddSub<0, "add", "sub", add>; 602defm SUB : AddSub<1, "sub", "add">; 603 604def : InstAlias<"mov $dst, $src", 605 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 606def : InstAlias<"mov $dst, $src", 607 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 608def : InstAlias<"mov $dst, $src", 609 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 610def : InstAlias<"mov $dst, $src", 611 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 612 613defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 614defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 615 616// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 617def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 618 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 619def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 620 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 621def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 622 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 623def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 624 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 625def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 626 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 627def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 628 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 629let AddedComplexity = 1 in { 630def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), 631 (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; 632def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), 633 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; 634} 635 636// Because of the immediate format for add/sub-imm instructions, the 637// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 638// These patterns capture that transformation. 639let AddedComplexity = 1 in { 640def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 641 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 642def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 643 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 644def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 645 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 646def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 647 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 648} 649 650// Because of the immediate format for add/sub-imm instructions, the 651// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 652// These patterns capture that transformation. 653let AddedComplexity = 1 in { 654def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 655 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 656def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 657 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 658def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 659 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 660def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 661 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 662} 663 664def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 665def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 666def : InstAlias<"neg $dst, $src$shift", 667 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 668def : InstAlias<"neg $dst, $src$shift", 669 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 670 671def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 672def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 673def : InstAlias<"negs $dst, $src$shift", 674 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 675def : InstAlias<"negs $dst, $src$shift", 676 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 677 678 679// Unsigned/Signed divide 680defm UDIV : Div<0, "udiv", udiv>; 681defm SDIV : Div<1, "sdiv", sdiv>; 682let isCodeGenOnly = 1 in { 683defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>; 684defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>; 685} 686 687// Variable shift 688defm ASRV : Shift<0b10, "asr", sra>; 689defm LSLV : Shift<0b00, "lsl", shl>; 690defm LSRV : Shift<0b01, "lsr", srl>; 691defm RORV : Shift<0b11, "ror", rotr>; 692 693def : ShiftAlias<"asrv", ASRVWr, GPR32>; 694def : ShiftAlias<"asrv", ASRVXr, GPR64>; 695def : ShiftAlias<"lslv", LSLVWr, GPR32>; 696def : ShiftAlias<"lslv", LSLVXr, GPR64>; 697def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 698def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 699def : ShiftAlias<"rorv", RORVWr, GPR32>; 700def : ShiftAlias<"rorv", RORVXr, GPR64>; 701 702// Multiply-add 703let AddedComplexity = 7 in { 704defm MADD : MulAccum<0, "madd", add>; 705defm MSUB : MulAccum<1, "msub", sub>; 706 707def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 708 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 709def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 710 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 711 712def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 713 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 714def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 715 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 716def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 717 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 718def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 719 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 720} // AddedComplexity = 7 721 722let AddedComplexity = 5 in { 723def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 724def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 725def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 726def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 727 728def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 729 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 730def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 731 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 732 733def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 734 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 735def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 736 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 737} // AddedComplexity = 5 738 739def : MulAccumWAlias<"mul", MADDWrrr>; 740def : MulAccumXAlias<"mul", MADDXrrr>; 741def : MulAccumWAlias<"mneg", MSUBWrrr>; 742def : MulAccumXAlias<"mneg", MSUBXrrr>; 743def : WideMulAccumAlias<"smull", SMADDLrrr>; 744def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 745def : WideMulAccumAlias<"umull", UMADDLrrr>; 746def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 747 748// Multiply-high 749def SMULHrr : MulHi<0b010, "smulh", mulhs>; 750def UMULHrr : MulHi<0b110, "umulh", mulhu>; 751 752// CRC32 753def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 754def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 755def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 756def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 757 758def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 759def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 760def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 761def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 762 763// v8.1 atomic CAS 764defm CAS : CompareAndSwap<0, 0, "">; 765defm CASA : CompareAndSwap<1, 0, "a">; 766defm CASL : CompareAndSwap<0, 1, "l">; 767defm CASAL : CompareAndSwap<1, 1, "al">; 768 769// v8.1 atomic CASP 770defm CASP : CompareAndSwapPair<0, 0, "">; 771defm CASPA : CompareAndSwapPair<1, 0, "a">; 772defm CASPL : CompareAndSwapPair<0, 1, "l">; 773defm CASPAL : CompareAndSwapPair<1, 1, "al">; 774 775// v8.1 atomic SWP 776defm SWP : Swap<0, 0, "">; 777defm SWPA : Swap<1, 0, "a">; 778defm SWPL : Swap<0, 1, "l">; 779defm SWPAL : Swap<1, 1, "al">; 780 781// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 782defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 783defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 784defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 785defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 786 787defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 788defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 789defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 790defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 791 792defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 793defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 794defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 795defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 796 797defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 798defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 799defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 800defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 801 802defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 803defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 804defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 805defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 806 807defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 808defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 809defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 810defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 811 812defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 813defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 814defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 815defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 816 817defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 818defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 819defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 820defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 821 822// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 823defm : STOPregister<"stadd","LDADD">; // STADDx 824defm : STOPregister<"stclr","LDCLR">; // STCLRx 825defm : STOPregister<"steor","LDEOR">; // STEORx 826defm : STOPregister<"stset","LDSET">; // STSETx 827defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 828defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 829defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 830defm : STOPregister<"stumin","LDUMIN">;// STUMINx 831 832//===----------------------------------------------------------------------===// 833// Logical instructions. 834//===----------------------------------------------------------------------===// 835 836// (immediate) 837defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 838defm AND : LogicalImm<0b00, "and", and, "bic">; 839defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 840defm ORR : LogicalImm<0b01, "orr", or, "orn">; 841 842// FIXME: these aliases *are* canonical sometimes (when movz can't be 843// used). Actually, it seems to be working right now, but putting logical_immXX 844// here is a bit dodgy on the AsmParser side too. 845def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 846 logical_imm32:$imm), 0>; 847def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 848 logical_imm64:$imm), 0>; 849 850 851// (register) 852defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 853defm BICS : LogicalRegS<0b11, 1, "bics", 854 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 855defm AND : LogicalReg<0b00, 0, "and", and>; 856defm BIC : LogicalReg<0b00, 1, "bic", 857 BinOpFrag<(and node:$LHS, (not node:$RHS))>>; 858defm EON : LogicalReg<0b10, 1, "eon", 859 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 860defm EOR : LogicalReg<0b10, 0, "eor", xor>; 861defm ORN : LogicalReg<0b01, 1, "orn", 862 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 863defm ORR : LogicalReg<0b01, 0, "orr", or>; 864 865def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 866def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 867 868def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 869def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 870 871def : InstAlias<"mvn $Wd, $Wm$sh", 872 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 873def : InstAlias<"mvn $Xd, $Xm$sh", 874 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 875 876def : InstAlias<"tst $src1, $src2", 877 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 878def : InstAlias<"tst $src1, $src2", 879 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 880 881def : InstAlias<"tst $src1, $src2", 882 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 883def : InstAlias<"tst $src1, $src2", 884 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 885 886def : InstAlias<"tst $src1, $src2$sh", 887 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 888def : InstAlias<"tst $src1, $src2$sh", 889 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 890 891 892def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 893def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 894 895 896//===----------------------------------------------------------------------===// 897// One operand data processing instructions. 898//===----------------------------------------------------------------------===// 899 900defm CLS : OneOperandData<0b101, "cls">; 901defm CLZ : OneOperandData<0b100, "clz", ctlz>; 902defm RBIT : OneOperandData<0b000, "rbit">; 903 904def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>; 905def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>; 906 907def REV16Wr : OneWRegData<0b001, "rev16", 908 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 909def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; 910 911def : Pat<(cttz GPR32:$Rn), 912 (CLZWr (RBITWr GPR32:$Rn))>; 913def : Pat<(cttz GPR64:$Rn), 914 (CLZXr (RBITXr GPR64:$Rn))>; 915def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 916 (i32 1))), 917 (CLSWr GPR32:$Rn)>; 918def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 919 (i64 1))), 920 (CLSXr GPR64:$Rn)>; 921 922// Unlike the other one operand instructions, the instructions with the "rev" 923// mnemonic do *not* just different in the size bit, but actually use different 924// opcode bits for the different sizes. 925def REVWr : OneWRegData<0b010, "rev", bswap>; 926def REVXr : OneXRegData<0b011, "rev", bswap>; 927def REV32Xr : OneXRegData<0b010, "rev32", 928 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 929 930def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 931 932// The bswap commutes with the rotr so we want a pattern for both possible 933// orders. 934def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 935def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 936 937//===----------------------------------------------------------------------===// 938// Bitfield immediate extraction instruction. 939//===----------------------------------------------------------------------===// 940let hasSideEffects = 0 in 941defm EXTR : ExtractImm<"extr">; 942def : InstAlias<"ror $dst, $src, $shift", 943 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 944def : InstAlias<"ror $dst, $src, $shift", 945 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 946 947def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 948 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 949def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 950 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 951 952//===----------------------------------------------------------------------===// 953// Other bitfield immediate instructions. 954//===----------------------------------------------------------------------===// 955let hasSideEffects = 0 in { 956defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 957defm SBFM : BitfieldImm<0b00, "sbfm">; 958defm UBFM : BitfieldImm<0b10, "ubfm">; 959} 960 961def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 962 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 963 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 964}]>; 965 966def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 967 uint64_t enc = 31 - N->getZExtValue(); 968 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 969}]>; 970 971// min(7, 31 - shift_amt) 972def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 973 uint64_t enc = 31 - N->getZExtValue(); 974 enc = enc > 7 ? 7 : enc; 975 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 976}]>; 977 978// min(15, 31 - shift_amt) 979def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 980 uint64_t enc = 31 - N->getZExtValue(); 981 enc = enc > 15 ? 15 : enc; 982 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 983}]>; 984 985def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 986 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 987 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 988}]>; 989 990def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 991 uint64_t enc = 63 - N->getZExtValue(); 992 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 993}]>; 994 995// min(7, 63 - shift_amt) 996def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 997 uint64_t enc = 63 - N->getZExtValue(); 998 enc = enc > 7 ? 7 : enc; 999 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 1000}]>; 1001 1002// min(15, 63 - shift_amt) 1003def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 1004 uint64_t enc = 63 - N->getZExtValue(); 1005 enc = enc > 15 ? 15 : enc; 1006 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 1007}]>; 1008 1009// min(31, 63 - shift_amt) 1010def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 1011 uint64_t enc = 63 - N->getZExtValue(); 1012 enc = enc > 31 ? 31 : enc; 1013 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 1014}]>; 1015 1016def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 1017 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 1018 (i64 (i32shift_b imm0_31:$imm)))>; 1019def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 1020 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 1021 (i64 (i64shift_b imm0_63:$imm)))>; 1022 1023let AddedComplexity = 10 in { 1024def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 1025 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 1026def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 1027 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 1028} 1029 1030def : InstAlias<"asr $dst, $src, $shift", 1031 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 1032def : InstAlias<"asr $dst, $src, $shift", 1033 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 1034def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 1035def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 1036def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 1037def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 1038def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 1039 1040def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 1041 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 1042def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 1043 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 1044 1045def : InstAlias<"lsr $dst, $src, $shift", 1046 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 1047def : InstAlias<"lsr $dst, $src, $shift", 1048 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 1049def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 1050def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 1051def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 1052def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 1053def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 1054 1055//===----------------------------------------------------------------------===// 1056// Conditional comparison instructions. 1057//===----------------------------------------------------------------------===// 1058defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 1059defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 1060 1061//===----------------------------------------------------------------------===// 1062// Conditional select instructions. 1063//===----------------------------------------------------------------------===// 1064defm CSEL : CondSelect<0, 0b00, "csel">; 1065 1066def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 1067defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 1068defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 1069defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 1070 1071def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 1072 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 1073def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 1074 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 1075def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 1076 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 1077def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 1078 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 1079def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 1080 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 1081def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 1082 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 1083 1084def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 1085 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 1086def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 1087 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 1088def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 1089 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 1090def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 1091 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 1092 1093// The inverse of the condition code from the alias instruction is what is used 1094// in the aliased instruction. The parser all ready inverts the condition code 1095// for these aliases. 1096def : InstAlias<"cset $dst, $cc", 1097 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 1098def : InstAlias<"cset $dst, $cc", 1099 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 1100 1101def : InstAlias<"csetm $dst, $cc", 1102 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 1103def : InstAlias<"csetm $dst, $cc", 1104 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 1105 1106def : InstAlias<"cinc $dst, $src, $cc", 1107 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 1108def : InstAlias<"cinc $dst, $src, $cc", 1109 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 1110 1111def : InstAlias<"cinv $dst, $src, $cc", 1112 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 1113def : InstAlias<"cinv $dst, $src, $cc", 1114 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 1115 1116def : InstAlias<"cneg $dst, $src, $cc", 1117 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 1118def : InstAlias<"cneg $dst, $src, $cc", 1119 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 1120 1121//===----------------------------------------------------------------------===// 1122// PC-relative instructions. 1123//===----------------------------------------------------------------------===// 1124let isReMaterializable = 1 in { 1125let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 1126def ADR : ADRI<0, "adr", adrlabel, []>; 1127} // hasSideEffects = 0 1128 1129def ADRP : ADRI<1, "adrp", adrplabel, 1130 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 1131} // isReMaterializable = 1 1132 1133// page address of a constant pool entry, block address 1134def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 1135def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 1136 1137//===----------------------------------------------------------------------===// 1138// Unconditional branch (register) instructions. 1139//===----------------------------------------------------------------------===// 1140 1141let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1142def RET : BranchReg<0b0010, "ret", []>; 1143def DRPS : SpecialReturn<0b0101, "drps">; 1144def ERET : SpecialReturn<0b0100, "eret">; 1145} // isReturn = 1, isTerminator = 1, isBarrier = 1 1146 1147// Default to the LR register. 1148def : InstAlias<"ret", (RET LR)>; 1149 1150let isCall = 1, Defs = [LR], Uses = [SP] in { 1151def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; 1152} // isCall 1153 1154let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1155def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 1156} // isBranch, isTerminator, isBarrier, isIndirectBranch 1157 1158// Create a separate pseudo-instruction for codegen to use so that we don't 1159// flag lr as used in every function. It'll be restored before the RET by the 1160// epilogue if it's legitimately used. 1161def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { 1162 let isTerminator = 1; 1163 let isBarrier = 1; 1164 let isReturn = 1; 1165} 1166 1167// This is a directive-like pseudo-instruction. The purpose is to insert an 1168// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 1169// (which in the usual case is a BLR). 1170let hasSideEffects = 1 in 1171def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { 1172 let AsmString = ".tlsdesccall $sym"; 1173} 1174 1175// FIXME: maybe the scratch register used shouldn't be fixed to X1? 1176// FIXME: can "hasSideEffects be dropped? 1177let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, 1178 isCodeGenOnly = 1 in 1179def TLSDESC_CALLSEQ 1180 : Pseudo<(outs), (ins i64imm:$sym), 1181 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>; 1182def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 1183 (TLSDESC_CALLSEQ texternalsym:$sym)>; 1184 1185//===----------------------------------------------------------------------===// 1186// Conditional branch (immediate) instruction. 1187//===----------------------------------------------------------------------===// 1188def Bcc : BranchCond; 1189 1190//===----------------------------------------------------------------------===// 1191// Compare-and-branch instructions. 1192//===----------------------------------------------------------------------===// 1193defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 1194defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 1195 1196//===----------------------------------------------------------------------===// 1197// Test-bit-and-branch instructions. 1198//===----------------------------------------------------------------------===// 1199defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 1200defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 1201 1202//===----------------------------------------------------------------------===// 1203// Unconditional branch (immediate) instructions. 1204//===----------------------------------------------------------------------===// 1205let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 1206def B : BranchImm<0, "b", [(br bb:$addr)]>; 1207} // isBranch, isTerminator, isBarrier 1208 1209let isCall = 1, Defs = [LR], Uses = [SP] in { 1210def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 1211} // isCall 1212def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 1213 1214//===----------------------------------------------------------------------===// 1215// Exception generation instructions. 1216//===----------------------------------------------------------------------===// 1217def BRK : ExceptionGeneration<0b001, 0b00, "brk">; 1218def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 1219def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 1220def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; 1221def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 1222def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 1223def SMC : ExceptionGeneration<0b000, 0b11, "smc">; 1224def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 1225 1226// DCPSn defaults to an immediate operand of zero if unspecified. 1227def : InstAlias<"dcps1", (DCPS1 0)>; 1228def : InstAlias<"dcps2", (DCPS2 0)>; 1229def : InstAlias<"dcps3", (DCPS3 0)>; 1230 1231//===----------------------------------------------------------------------===// 1232// Load instructions. 1233//===----------------------------------------------------------------------===// 1234 1235// Pair (indexed, offset) 1236defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; 1237defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; 1238defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; 1239defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; 1240defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; 1241 1242defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; 1243 1244// Pair (pre-indexed) 1245def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1246def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1247def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1248def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1249def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1250 1251def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1252 1253// Pair (post-indexed) 1254def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1255def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1256def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1257def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1258def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1259 1260def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1261 1262 1263// Pair (no allocate) 1264defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; 1265defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; 1266defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; 1267defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; 1268defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; 1269 1270//--- 1271// (register offset) 1272//--- 1273 1274// Integer 1275defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 1276defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 1277defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 1278defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 1279 1280// Floating-point 1281defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; 1282defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; 1283defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; 1284defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; 1285defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; 1286 1287// Load sign-extended half-word 1288defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 1289defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 1290 1291// Load sign-extended byte 1292defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 1293defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 1294 1295// Load sign-extended word 1296defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 1297 1298// Pre-fetch. 1299defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 1300 1301// For regular load, we do not have any alignment requirement. 1302// Thus, it is safe to directly map the vector loads with interesting 1303// addressing modes. 1304// FIXME: We could do the same for bitconvert to floating point vectors. 1305multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 1306 ValueType ScalTy, ValueType VecTy, 1307 Instruction LOADW, Instruction LOADX, 1308 SubRegIndex sub> { 1309 def : Pat<(VecTy (scalar_to_vector (ScalTy 1310 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 1311 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1312 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 1313 sub)>; 1314 1315 def : Pat<(VecTy (scalar_to_vector (ScalTy 1316 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 1317 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1318 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 1319 sub)>; 1320} 1321 1322let AddedComplexity = 10 in { 1323defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 1324defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 1325 1326defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 1327defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 1328 1329defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 1330defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 1331 1332defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 1333defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 1334 1335defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 1336defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 1337 1338defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 1339 1340defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 1341 1342 1343def : Pat <(v1i64 (scalar_to_vector (i64 1344 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 1345 ro_Wextend64:$extend))))), 1346 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 1347 1348def : Pat <(v1i64 (scalar_to_vector (i64 1349 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 1350 ro_Xextend64:$extend))))), 1351 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 1352} 1353 1354// Match all load 64 bits width whose type is compatible with FPR64 1355multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 1356 Instruction LOADW, Instruction LOADX> { 1357 1358 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1359 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1360 1361 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1362 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1363} 1364 1365let AddedComplexity = 10 in { 1366let Predicates = [IsLE] in { 1367 // We must do vector loads with LD1 in big-endian. 1368 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 1369 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 1370 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 1371 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 1372 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 1373} 1374 1375defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 1376defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 1377 1378// Match all load 128 bits width whose type is compatible with FPR128 1379let Predicates = [IsLE] in { 1380 // We must do vector loads with LD1 in big-endian. 1381 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 1382 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 1383 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 1384 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 1385 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 1386 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 1387 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 1388} 1389} // AddedComplexity = 10 1390 1391// zextload -> i64 1392multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 1393 Instruction INSTW, Instruction INSTX> { 1394 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1395 (SUBREG_TO_REG (i64 0), 1396 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 1397 sub_32)>; 1398 1399 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1400 (SUBREG_TO_REG (i64 0), 1401 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 1402 sub_32)>; 1403} 1404 1405let AddedComplexity = 10 in { 1406 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 1407 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 1408 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 1409 1410 // zextloadi1 -> zextloadi8 1411 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1412 1413 // extload -> zextload 1414 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1415 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1416 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1417 1418 // extloadi1 -> zextloadi8 1419 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 1420} 1421 1422 1423// zextload -> i64 1424multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 1425 Instruction INSTW, Instruction INSTX> { 1426 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1427 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1428 1429 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1430 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1431 1432} 1433 1434let AddedComplexity = 10 in { 1435 // extload -> zextload 1436 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1437 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1438 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1439 1440 // zextloadi1 -> zextloadi8 1441 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1442} 1443 1444//--- 1445// (unsigned immediate) 1446//--- 1447defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", 1448 [(set GPR64:$Rt, 1449 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1450defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", 1451 [(set GPR32:$Rt, 1452 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1453defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", 1454 [(set FPR8:$Rt, 1455 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 1456defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", 1457 [(set (f16 FPR16:$Rt), 1458 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 1459defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", 1460 [(set (f32 FPR32:$Rt), 1461 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1462defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", 1463 [(set (f64 FPR64:$Rt), 1464 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1465defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", 1466 [(set (f128 FPR128:$Rt), 1467 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 1468 1469// For regular load, we do not have any alignment requirement. 1470// Thus, it is safe to directly map the vector loads with interesting 1471// addressing modes. 1472// FIXME: We could do the same for bitconvert to floating point vectors. 1473def : Pat <(v8i8 (scalar_to_vector (i32 1474 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1475 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 1476 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1477def : Pat <(v16i8 (scalar_to_vector (i32 1478 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1479 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 1480 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1481def : Pat <(v4i16 (scalar_to_vector (i32 1482 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1483 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 1484 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1485def : Pat <(v8i16 (scalar_to_vector (i32 1486 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1487 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 1488 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1489def : Pat <(v2i32 (scalar_to_vector (i32 1490 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1491 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 1492 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1493def : Pat <(v4i32 (scalar_to_vector (i32 1494 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1495 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 1496 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1497def : Pat <(v1i64 (scalar_to_vector (i64 1498 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1499 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1500def : Pat <(v2i64 (scalar_to_vector (i64 1501 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1502 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 1503 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 1504 1505// Match all load 64 bits width whose type is compatible with FPR64 1506let Predicates = [IsLE] in { 1507 // We must use LD1 to perform vector loads in big-endian. 1508 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1509 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1510 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1511 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1512 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1513 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1514 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1515 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1516 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1517 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1518} 1519def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1520 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1521def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1522 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1523 1524// Match all load 128 bits width whose type is compatible with FPR128 1525let Predicates = [IsLE] in { 1526 // We must use LD1 to perform vector loads in big-endian. 1527 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1528 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1529 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1530 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1531 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1532 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1533 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1534 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1535 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1536 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1537 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1538 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1539 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1540 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1541} 1542def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1543 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1544 1545defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 1546 [(set GPR32:$Rt, 1547 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 1548 uimm12s2:$offset)))]>; 1549defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 1550 [(set GPR32:$Rt, 1551 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 1552 uimm12s1:$offset)))]>; 1553// zextload -> i64 1554def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1555 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1556def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1557 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1558 1559// zextloadi1 -> zextloadi8 1560def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1561 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1562def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1563 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1564 1565// extload -> zextload 1566def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1567 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 1568def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1569 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1570def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1571 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1572def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1573 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1574def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1575 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1576def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1577 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1578def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1579 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1580 1581// load sign-extended half-word 1582defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 1583 [(set GPR32:$Rt, 1584 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1585 uimm12s2:$offset)))]>; 1586defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 1587 [(set GPR64:$Rt, 1588 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1589 uimm12s2:$offset)))]>; 1590 1591// load sign-extended byte 1592defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 1593 [(set GPR32:$Rt, 1594 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1595 uimm12s1:$offset)))]>; 1596defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 1597 [(set GPR64:$Rt, 1598 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1599 uimm12s1:$offset)))]>; 1600 1601// load sign-extended word 1602defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 1603 [(set GPR64:$Rt, 1604 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 1605 uimm12s4:$offset)))]>; 1606 1607// load zero-extended word 1608def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1609 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1610 1611// Pre-fetch. 1612def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 1613 [(AArch64Prefetch imm:$Rt, 1614 (am_indexed64 GPR64sp:$Rn, 1615 uimm12s8:$offset))]>; 1616 1617def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 1618 1619//--- 1620// (literal) 1621def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; 1622def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; 1623def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; 1624def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; 1625def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; 1626 1627// load sign-extended word 1628def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; 1629 1630// prefetch 1631def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 1632// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 1633 1634//--- 1635// (unscaled immediate) 1636defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", 1637 [(set GPR64:$Rt, 1638 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1639defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", 1640 [(set GPR32:$Rt, 1641 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1642defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", 1643 [(set FPR8:$Rt, 1644 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1645defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", 1646 [(set FPR16:$Rt, 1647 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1648defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", 1649 [(set (f32 FPR32:$Rt), 1650 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1651defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", 1652 [(set (f64 FPR64:$Rt), 1653 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1654defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", 1655 [(set (f128 FPR128:$Rt), 1656 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 1657 1658defm LDURHH 1659 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 1660 [(set GPR32:$Rt, 1661 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1662defm LDURBB 1663 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 1664 [(set GPR32:$Rt, 1665 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1666 1667// Match all load 64 bits width whose type is compatible with FPR64 1668let Predicates = [IsLE] in { 1669 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1670 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1671 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1672 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1673 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1674 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1675 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1676 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1677 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1678 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1679} 1680def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1681 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1682def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1683 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1684 1685// Match all load 128 bits width whose type is compatible with FPR128 1686let Predicates = [IsLE] in { 1687 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1688 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1689 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1690 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1691 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1692 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1693 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1694 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1695 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1696 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1697 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1698 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1699 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1700 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1701} 1702 1703// anyext -> zext 1704def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1705 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1706def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1707 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1708def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1709 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1710def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1711 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1712def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1713 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1714def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1715 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1716def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1717 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1718// unscaled zext 1719def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1720 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1721def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1722 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1723def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1724 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1725def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1726 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1727def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1728 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1729def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1730 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1731def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1732 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1733 1734 1735//--- 1736// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 1737 1738// Define new assembler match classes as we want to only match these when 1739// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 1740// associate a DiagnosticType either, as we want the diagnostic for the 1741// canonical form (the scaled operand) to take precedence. 1742class SImm9OffsetOperand<int Width> : AsmOperandClass { 1743 let Name = "SImm9OffsetFB" # Width; 1744 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 1745 let RenderMethod = "addImmOperands"; 1746} 1747 1748def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 1749def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 1750def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 1751def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 1752def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 1753 1754def simm9_offset_fb8 : Operand<i64> { 1755 let ParserMatchClass = SImm9OffsetFB8Operand; 1756} 1757def simm9_offset_fb16 : Operand<i64> { 1758 let ParserMatchClass = SImm9OffsetFB16Operand; 1759} 1760def simm9_offset_fb32 : Operand<i64> { 1761 let ParserMatchClass = SImm9OffsetFB32Operand; 1762} 1763def simm9_offset_fb64 : Operand<i64> { 1764 let ParserMatchClass = SImm9OffsetFB64Operand; 1765} 1766def simm9_offset_fb128 : Operand<i64> { 1767 let ParserMatchClass = SImm9OffsetFB128Operand; 1768} 1769 1770def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1771 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1772def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1773 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1774def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1775 (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1776def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1777 (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1778def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1779 (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1780def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1781 (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1782def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1783 (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 1784 1785// zextload -> i64 1786def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1787 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1788def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1789 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1790 1791// load sign-extended half-word 1792defm LDURSHW 1793 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 1794 [(set GPR32:$Rt, 1795 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1796defm LDURSHX 1797 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 1798 [(set GPR64:$Rt, 1799 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1800 1801// load sign-extended byte 1802defm LDURSBW 1803 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 1804 [(set GPR32:$Rt, 1805 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1806defm LDURSBX 1807 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 1808 [(set GPR64:$Rt, 1809 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1810 1811// load sign-extended word 1812defm LDURSW 1813 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 1814 [(set GPR64:$Rt, 1815 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1816 1817// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 1818def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 1819 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1820def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 1821 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1822def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1823 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1824def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1825 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1826def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1827 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1828def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1829 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1830def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 1831 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1832 1833// Pre-fetch. 1834defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 1835 [(AArch64Prefetch imm:$Rt, 1836 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1837 1838//--- 1839// (unscaled immediate, unprivileged) 1840defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 1841defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 1842 1843defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 1844defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 1845 1846// load sign-extended half-word 1847defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 1848defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 1849 1850// load sign-extended byte 1851defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 1852defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 1853 1854// load sign-extended word 1855defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 1856 1857//--- 1858// (immediate pre-indexed) 1859def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; 1860def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; 1861def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; 1862def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; 1863def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; 1864def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; 1865def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; 1866 1867// load sign-extended half-word 1868def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1869def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1870 1871// load sign-extended byte 1872def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1873def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1874 1875// load zero-extended byte 1876def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1877def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1878 1879// load sign-extended word 1880def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1881 1882//--- 1883// (immediate post-indexed) 1884def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; 1885def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; 1886def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; 1887def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; 1888def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; 1889def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; 1890def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; 1891 1892// load sign-extended half-word 1893def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1894def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1895 1896// load sign-extended byte 1897def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1898def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1899 1900// load zero-extended byte 1901def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1902def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1903 1904// load sign-extended word 1905def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1906 1907//===----------------------------------------------------------------------===// 1908// Store instructions. 1909//===----------------------------------------------------------------------===// 1910 1911// Pair (indexed, offset) 1912// FIXME: Use dedicated range-checked addressing mode operand here. 1913defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">; 1914defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">; 1915defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">; 1916defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">; 1917defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">; 1918 1919// Pair (pre-indexed) 1920def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">; 1921def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">; 1922def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">; 1923def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">; 1924def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">; 1925 1926// Pair (pre-indexed) 1927def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; 1928def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; 1929def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; 1930def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; 1931def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; 1932 1933// Pair (no allocate) 1934defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">; 1935defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">; 1936defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">; 1937defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">; 1938defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">; 1939 1940//--- 1941// (Register offset) 1942 1943// Integer 1944defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 1945defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 1946defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 1947defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 1948 1949 1950// Floating-point 1951defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>; 1952defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>; 1953defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; 1954defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; 1955defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; 1956 1957multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 1958 Instruction STRW, Instruction STRX> { 1959 1960 def : Pat<(storeop GPR64:$Rt, 1961 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1962 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1963 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1964 1965 def : Pat<(storeop GPR64:$Rt, 1966 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1967 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1968 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1969} 1970 1971let AddedComplexity = 10 in { 1972 // truncstore i64 1973 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 1974 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 1975 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 1976} 1977 1978multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 1979 Instruction STRW, Instruction STRX> { 1980 def : Pat<(store (VecTy FPR:$Rt), 1981 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1982 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1983 1984 def : Pat<(store (VecTy FPR:$Rt), 1985 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1986 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1987} 1988 1989let AddedComplexity = 10 in { 1990// Match all store 64 bits width whose type is compatible with FPR64 1991let Predicates = [IsLE] in { 1992 // We must use ST1 to store vectors in big-endian. 1993 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 1994 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 1995 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 1996 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 1997 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 1998} 1999 2000defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 2001defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 2002 2003// Match all store 128 bits width whose type is compatible with FPR128 2004let Predicates = [IsLE] in { 2005 // We must use ST1 to store vectors in big-endian. 2006 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 2007 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 2008 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 2009 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 2010 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 2011 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 2012 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 2013} 2014} // AddedComplexity = 10 2015 2016// Match stores from lane 0 to the appropriate subreg's store. 2017multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 2018 ValueType VecTy, ValueType STy, 2019 SubRegIndex SubRegIdx, 2020 Instruction STRW, Instruction STRX> { 2021 2022 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 2023 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 2024 (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 2025 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2026 2027 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 2028 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 2029 (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 2030 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2031} 2032 2033let AddedComplexity = 19 in { 2034 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; 2035 defm : VecROStoreLane0Pat<ro16, store , v8i16, i16, hsub, STRHroW, STRHroX>; 2036 defm : VecROStoreLane0Pat<ro32, truncstorei32, v4i32, i32, ssub, STRSroW, STRSroX>; 2037 defm : VecROStoreLane0Pat<ro32, store , v4i32, i32, ssub, STRSroW, STRSroX>; 2038 defm : VecROStoreLane0Pat<ro32, store , v4f32, f32, ssub, STRSroW, STRSroX>; 2039 defm : VecROStoreLane0Pat<ro64, store , v2i64, i64, dsub, STRDroW, STRDroX>; 2040 defm : VecROStoreLane0Pat<ro64, store , v2f64, f64, dsub, STRDroW, STRDroX>; 2041} 2042 2043//--- 2044// (unsigned immediate) 2045defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", 2046 [(store GPR64:$Rt, 2047 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 2048defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", 2049 [(store GPR32:$Rt, 2050 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 2051defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", 2052 [(store FPR8:$Rt, 2053 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 2054defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str", 2055 [(store (f16 FPR16:$Rt), 2056 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 2057defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str", 2058 [(store (f32 FPR32:$Rt), 2059 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 2060defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", 2061 [(store (f64 FPR64:$Rt), 2062 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 2063defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; 2064 2065defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", 2066 [(truncstorei16 GPR32:$Rt, 2067 (am_indexed16 GPR64sp:$Rn, 2068 uimm12s2:$offset))]>; 2069defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", 2070 [(truncstorei8 GPR32:$Rt, 2071 (am_indexed8 GPR64sp:$Rn, 2072 uimm12s1:$offset))]>; 2073 2074// Match all store 64 bits width whose type is compatible with FPR64 2075let AddedComplexity = 10 in { 2076let Predicates = [IsLE] in { 2077 // We must use ST1 to store vectors in big-endian. 2078 def : Pat<(store (v2f32 FPR64:$Rt), 2079 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2080 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2081 def : Pat<(store (v8i8 FPR64:$Rt), 2082 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2083 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2084 def : Pat<(store (v4i16 FPR64:$Rt), 2085 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2086 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2087 def : Pat<(store (v2i32 FPR64:$Rt), 2088 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2089 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2090 def : Pat<(store (v4f16 FPR64:$Rt), 2091 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2092 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2093} 2094def : Pat<(store (v1f64 FPR64:$Rt), 2095 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2096 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2097def : Pat<(store (v1i64 FPR64:$Rt), 2098 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 2099 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 2100 2101// Match all store 128 bits width whose type is compatible with FPR128 2102let Predicates = [IsLE] in { 2103 // We must use ST1 to store vectors in big-endian. 2104 def : Pat<(store (v4f32 FPR128:$Rt), 2105 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2106 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2107 def : Pat<(store (v2f64 FPR128:$Rt), 2108 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2109 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2110 def : Pat<(store (v16i8 FPR128:$Rt), 2111 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2112 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2113 def : Pat<(store (v8i16 FPR128:$Rt), 2114 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2115 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2116 def : Pat<(store (v4i32 FPR128:$Rt), 2117 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2118 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2119 def : Pat<(store (v2i64 FPR128:$Rt), 2120 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2121 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2122 def : Pat<(store (v8f16 FPR128:$Rt), 2123 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2124 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2125} 2126def : Pat<(store (f128 FPR128:$Rt), 2127 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 2128 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 2129 2130// truncstore i64 2131def : Pat<(truncstorei32 GPR64:$Rt, 2132 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 2133 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 2134def : Pat<(truncstorei16 GPR64:$Rt, 2135 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 2136 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 2137def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 2138 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 2139 2140} // AddedComplexity = 10 2141 2142//--- 2143// (unscaled immediate) 2144defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur", 2145 [(store GPR64:$Rt, 2146 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 2147defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur", 2148 [(store GPR32:$Rt, 2149 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 2150defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur", 2151 [(store FPR8:$Rt, 2152 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 2153defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur", 2154 [(store (f16 FPR16:$Rt), 2155 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 2156defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur", 2157 [(store (f32 FPR32:$Rt), 2158 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 2159defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur", 2160 [(store (f64 FPR64:$Rt), 2161 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 2162defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur", 2163 [(store (f128 FPR128:$Rt), 2164 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 2165defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh", 2166 [(truncstorei16 GPR32:$Rt, 2167 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 2168defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb", 2169 [(truncstorei8 GPR32:$Rt, 2170 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 2171 2172// Match all store 64 bits width whose type is compatible with FPR64 2173let Predicates = [IsLE] in { 2174 // We must use ST1 to store vectors in big-endian. 2175 def : Pat<(store (v2f32 FPR64:$Rt), 2176 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2177 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2178 def : Pat<(store (v8i8 FPR64:$Rt), 2179 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2180 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2181 def : Pat<(store (v4i16 FPR64:$Rt), 2182 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2183 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2184 def : Pat<(store (v2i32 FPR64:$Rt), 2185 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2186 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2187 def : Pat<(store (v4f16 FPR64:$Rt), 2188 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2189 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2190} 2191def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2192 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2193def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 2194 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2195 2196// Match all store 128 bits width whose type is compatible with FPR128 2197let Predicates = [IsLE] in { 2198 // We must use ST1 to store vectors in big-endian. 2199 def : Pat<(store (v4f32 FPR128:$Rt), 2200 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2201 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2202 def : Pat<(store (v2f64 FPR128:$Rt), 2203 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2204 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2205 def : Pat<(store (v16i8 FPR128:$Rt), 2206 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2207 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2208 def : Pat<(store (v8i16 FPR128:$Rt), 2209 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2210 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2211 def : Pat<(store (v4i32 FPR128:$Rt), 2212 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2213 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2214 def : Pat<(store (v2i64 FPR128:$Rt), 2215 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2216 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2217 def : Pat<(store (v2f64 FPR128:$Rt), 2218 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2219 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2220 def : Pat<(store (v8f16 FPR128:$Rt), 2221 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2222 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2223} 2224 2225// unscaled i64 truncating stores 2226def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 2227 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2228def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 2229 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2230def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 2231 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2232 2233//--- 2234// STR mnemonics fall back to STUR for negative or unaligned offsets. 2235def : InstAlias<"str $Rt, [$Rn, $offset]", 2236 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2237def : InstAlias<"str $Rt, [$Rn, $offset]", 2238 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2239def : InstAlias<"str $Rt, [$Rn, $offset]", 2240 (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2241def : InstAlias<"str $Rt, [$Rn, $offset]", 2242 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2243def : InstAlias<"str $Rt, [$Rn, $offset]", 2244 (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2245def : InstAlias<"str $Rt, [$Rn, $offset]", 2246 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2247def : InstAlias<"str $Rt, [$Rn, $offset]", 2248 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 2249 2250def : InstAlias<"strb $Rt, [$Rn, $offset]", 2251 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2252def : InstAlias<"strh $Rt, [$Rn, $offset]", 2253 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2254 2255//--- 2256// (unscaled immediate, unprivileged) 2257defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 2258defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 2259 2260defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 2261defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 2262 2263//--- 2264// (immediate pre-indexed) 2265def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>; 2266def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>; 2267def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>; 2268def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>; 2269def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>; 2270def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>; 2271def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>; 2272 2273def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>; 2274def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>; 2275 2276// truncstore i64 2277def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2278 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2279 simm9:$off)>; 2280def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2281 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2282 simm9:$off)>; 2283def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2284 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2285 simm9:$off)>; 2286 2287def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2288 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2289def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2290 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2291def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2292 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2293def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2294 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2295def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2296 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2297def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2298 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2299def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2300 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2301 2302def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2303 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2304def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2305 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2306def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2307 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2308def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2309 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2310def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2311 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2312def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2313 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2314def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2315 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2316 2317//--- 2318// (immediate post-indexed) 2319def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>; 2320def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>; 2321def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>; 2322def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>; 2323def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>; 2324def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>; 2325def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>; 2326 2327def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>; 2328def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>; 2329 2330// truncstore i64 2331def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2332 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2333 simm9:$off)>; 2334def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2335 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2336 simm9:$off)>; 2337def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2338 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2339 simm9:$off)>; 2340 2341def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2342 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2343def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2344 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2345def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2346 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2347def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2348 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2349def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2350 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2351def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2352 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2353def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2354 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2355 2356def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2357 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2358def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2359 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2360def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2361 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2362def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2363 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2364def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2365 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2366def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2367 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2368def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2369 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2370 2371//===----------------------------------------------------------------------===// 2372// Load/store exclusive instructions. 2373//===----------------------------------------------------------------------===// 2374 2375def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 2376def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 2377def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 2378def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 2379 2380def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 2381def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 2382def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 2383def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 2384 2385def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 2386def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 2387def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 2388def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 2389 2390def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 2391def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 2392def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 2393def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 2394 2395def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 2396def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 2397def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 2398def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 2399 2400def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 2401def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 2402def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 2403def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 2404 2405def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 2406def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 2407 2408def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 2409def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 2410 2411def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 2412def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 2413 2414def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 2415def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 2416 2417let Predicates = [HasV8_1a] in { 2418 // v8.1a "Limited Order Region" extension load-acquire instructions 2419 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 2420 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 2421 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 2422 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 2423 2424 // v8.1a "Limited Order Region" extension store-release instructions 2425 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 2426 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 2427 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 2428 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 2429} 2430 2431//===----------------------------------------------------------------------===// 2432// Scaled floating point to integer conversion instructions. 2433//===----------------------------------------------------------------------===// 2434 2435defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 2436defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 2437defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 2438defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 2439defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 2440defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 2441defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 2442defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 2443defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2444defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2445defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2446defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2447let isCodeGenOnly = 1 in { 2448defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2449defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2450defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2451defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2452} 2453 2454multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> { 2455 def : Pat<(i32 (to_int (round f32:$Rn))), 2456 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 2457 def : Pat<(i64 (to_int (round f32:$Rn))), 2458 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 2459 def : Pat<(i32 (to_int (round f64:$Rn))), 2460 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 2461 def : Pat<(i64 (to_int (round f64:$Rn))), 2462 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 2463} 2464 2465defm : FPToIntegerPats<fp_to_sint, fceil, "FCVTPS">; 2466defm : FPToIntegerPats<fp_to_uint, fceil, "FCVTPU">; 2467defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">; 2468defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">; 2469defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">; 2470defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">; 2471defm : FPToIntegerPats<fp_to_sint, frnd, "FCVTAS">; 2472defm : FPToIntegerPats<fp_to_uint, frnd, "FCVTAU">; 2473 2474//===----------------------------------------------------------------------===// 2475// Scaled integer to floating point conversion instructions. 2476//===----------------------------------------------------------------------===// 2477 2478defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; 2479defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; 2480 2481//===----------------------------------------------------------------------===// 2482// Unscaled integer to floating point conversion instruction. 2483//===----------------------------------------------------------------------===// 2484 2485defm FMOV : UnscaledConversion<"fmov">; 2486 2487// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 2488let isReMaterializable = 1, isCodeGenOnly = 1 in { 2489def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 2490 PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>, 2491 Requires<[NoZCZ]>; 2492def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 2493 PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>, 2494 Requires<[NoZCZ]>; 2495} 2496 2497//===----------------------------------------------------------------------===// 2498// Floating point conversion instruction. 2499//===----------------------------------------------------------------------===// 2500 2501defm FCVT : FPConversion<"fcvt">; 2502 2503//===----------------------------------------------------------------------===// 2504// Floating point single operand instructions. 2505//===----------------------------------------------------------------------===// 2506 2507defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; 2508defm FMOV : SingleOperandFPData<0b0000, "fmov">; 2509defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; 2510defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; 2511defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; 2512defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; 2513defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; 2514defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; 2515 2516def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), 2517 (FRINTNDr FPR64:$Rn)>; 2518 2519defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; 2520defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; 2521 2522let SchedRW = [WriteFDiv] in { 2523defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; 2524} 2525 2526//===----------------------------------------------------------------------===// 2527// Floating point two operand instructions. 2528//===----------------------------------------------------------------------===// 2529 2530defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; 2531let SchedRW = [WriteFDiv] in { 2532defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; 2533} 2534defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>; 2535defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>; 2536defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>; 2537defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>; 2538let SchedRW = [WriteFMul] in { 2539defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; 2540defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; 2541} 2542defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; 2543 2544def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2545 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 2546def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2547 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 2548def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2549 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 2550def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2551 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 2552 2553//===----------------------------------------------------------------------===// 2554// Floating point three operand instructions. 2555//===----------------------------------------------------------------------===// 2556 2557defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; 2558defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 2559 TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 2560defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 2561 TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; 2562defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 2563 TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 2564 2565// The following def pats catch the case where the LHS of an FMA is negated. 2566// The TriOpFrag above catches the case where the middle operand is negated. 2567 2568// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 2569// the NEON variant. 2570def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 2571 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2572 2573def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 2574 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2575 2576// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and 2577// "(-a) + b*(-c)". 2578def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 2579 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2580 2581def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 2582 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2583 2584def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), 2585 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2586 2587def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), 2588 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2589 2590//===----------------------------------------------------------------------===// 2591// Floating point comparison instructions. 2592//===----------------------------------------------------------------------===// 2593 2594defm FCMPE : FPComparison<1, "fcmpe">; 2595defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; 2596 2597//===----------------------------------------------------------------------===// 2598// Floating point conditional comparison instructions. 2599//===----------------------------------------------------------------------===// 2600 2601defm FCCMPE : FPCondComparison<1, "fccmpe">; 2602defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 2603 2604//===----------------------------------------------------------------------===// 2605// Floating point conditional select instruction. 2606//===----------------------------------------------------------------------===// 2607 2608defm FCSEL : FPCondSelect<"fcsel">; 2609 2610// CSEL instructions providing f128 types need to be handled by a 2611// pseudo-instruction since the eventual code will need to introduce basic 2612// blocks and control flow. 2613def F128CSEL : Pseudo<(outs FPR128:$Rd), 2614 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 2615 [(set (f128 FPR128:$Rd), 2616 (AArch64csel FPR128:$Rn, FPR128:$Rm, 2617 (i32 imm:$cond), NZCV))]> { 2618 let Uses = [NZCV]; 2619 let usesCustomInserter = 1; 2620} 2621 2622 2623//===----------------------------------------------------------------------===// 2624// Floating point immediate move. 2625//===----------------------------------------------------------------------===// 2626 2627let isReMaterializable = 1 in { 2628defm FMOV : FPMoveImmediate<"fmov">; 2629} 2630 2631//===----------------------------------------------------------------------===// 2632// Advanced SIMD two vector instructions. 2633//===----------------------------------------------------------------------===// 2634 2635defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 2636 int_aarch64_neon_uabd>; 2637// Match UABDL in log2-shuffle patterns. 2638def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 2639 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 2640 (zext (v8i8 V64:$opB))), 2641 (AArch64vashr v8i16:$src, (i32 15))))), 2642 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 2643def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 2644 (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)), 2645 (zext (extract_high_v16i8 V128:$opB))), 2646 (AArch64vashr v8i16:$src, (i32 15))))), 2647 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 2648def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), 2649 (v4i32 (add (sub (zext (v4i16 V64:$opA)), 2650 (zext (v4i16 V64:$opB))), 2651 (AArch64vashr v4i32:$src, (i32 31))))), 2652 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 2653def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))), 2654 (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)), 2655 (zext (extract_high_v8i16 V128:$opB))), 2656 (AArch64vashr v4i32:$src, (i32 31))))), 2657 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 2658def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), 2659 (v2i64 (add (sub (zext (v2i32 V64:$opA)), 2660 (zext (v2i32 V64:$opB))), 2661 (AArch64vashr v2i64:$src, (i32 63))))), 2662 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 2663def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))), 2664 (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)), 2665 (zext (extract_high_v4i32 V128:$opB))), 2666 (AArch64vashr v2i64:$src, (i32 63))))), 2667 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 2668 2669defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; 2670def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))), 2671 (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))), 2672 (ABSv8i8 V64:$src)>; 2673def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))), 2674 (v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))), 2675 (ABSv4i16 V64:$src)>; 2676def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))), 2677 (v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))), 2678 (ABSv2i32 V64:$src)>; 2679def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))), 2680 (v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))), 2681 (ABSv16i8 V128:$src)>; 2682def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))), 2683 (v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))), 2684 (ABSv8i16 V128:$src)>; 2685def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))), 2686 (v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))), 2687 (ABSv4i32 V128:$src)>; 2688def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))), 2689 (v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))), 2690 (ABSv2i64 V128:$src)>; 2691 2692defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 2693defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 2694defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 2695defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 2696defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 2697defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 2698defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 2699defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 2700defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; 2701 2702defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 2703defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 2704defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 2705defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 2706defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 2707defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 2708defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 2709defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 2710def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 2711 (FCVTLv4i16 V64:$Rn)>; 2712def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 2713 (i64 4)))), 2714 (FCVTLv8i16 V128:$Rn)>; 2715def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; 2716def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), 2717 (i64 2))))), 2718 (FCVTLv4i32 V128:$Rn)>; 2719 2720def : Pat<(v4f32 (fextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; 2721def : Pat<(v4f32 (fextend (v4f16 (extract_subvector (v8f16 V128:$Rn), 2722 (i64 4))))), 2723 (FCVTLv8i16 V128:$Rn)>; 2724 2725defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 2726defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 2727defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 2728defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 2729defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 2730def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 2731 (FCVTNv4i16 V128:$Rn)>; 2732def : Pat<(concat_vectors V64:$Rd, 2733 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 2734 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2735def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; 2736def : Pat<(v4f16 (fround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; 2737def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), 2738 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2739defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 2740defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 2741defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 2742 int_aarch64_neon_fcvtxn>; 2743defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; 2744defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; 2745let isCodeGenOnly = 1 in { 2746defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", 2747 int_aarch64_neon_fcvtzs>; 2748defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", 2749 int_aarch64_neon_fcvtzu>; 2750} 2751defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; 2752defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 2753defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; 2754defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; 2755defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; 2756defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; 2757defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; 2758defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; 2759defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; 2760defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 2761defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; 2762defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 2763 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 2764defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 2765// Aliases for MVN -> NOT. 2766def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 2767 (NOTv8i8 V64:$Vd, V64:$Vn)>; 2768def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 2769 (NOTv16i8 V128:$Vd, V128:$Vn)>; 2770 2771def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; 2772def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; 2773def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; 2774def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; 2775def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; 2776def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; 2777def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; 2778 2779def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2780def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2781def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2782def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2783def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2784def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2785def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2786def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2787 2788def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2789def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2790def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2791def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2792def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2793 2794defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; 2795defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 2796defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 2797defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 2798defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 2799 BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; 2800defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; 2801defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; 2802defm SHLL : SIMDVectorLShiftLongBySizeBHS; 2803defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 2804defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 2805defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 2806defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 2807defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 2808defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 2809 BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; 2810defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", 2811 int_aarch64_neon_uaddlp>; 2812defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; 2813defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 2814defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 2815defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 2816defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 2817defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 2818 2819def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 2820def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 2821def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 2822def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 2823def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 2824def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 2825 2826// Patterns for vector long shift (by element width). These need to match all 2827// three of zext, sext and anyext so it's easier to pull the patterns out of the 2828// definition. 2829multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 2830 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 2831 (SHLLv8i8 V64:$Rn)>; 2832 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), 2833 (SHLLv16i8 V128:$Rn)>; 2834 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 2835 (SHLLv4i16 V64:$Rn)>; 2836 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), 2837 (SHLLv8i16 V128:$Rn)>; 2838 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 2839 (SHLLv2i32 V64:$Rn)>; 2840 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), 2841 (SHLLv4i32 V128:$Rn)>; 2842} 2843 2844defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 2845defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 2846defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 2847 2848//===----------------------------------------------------------------------===// 2849// Advanced SIMD three vector instructions. 2850//===----------------------------------------------------------------------===// 2851 2852defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 2853defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; 2854defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 2855defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 2856defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 2857defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 2858defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 2859defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 2860defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 2861defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; 2862defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; 2863defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; 2864defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; 2865defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 2866defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 2867defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 2868defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; 2869defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 2870defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; 2871defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 2872defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; 2873defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 2874defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; 2875defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 2876defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; 2877 2878// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 2879// instruction expects the addend first, while the fma intrinsic puts it last. 2880defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 2881 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 2882defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 2883 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 2884 2885// The following def pats catch the case where the LHS of an FMA is negated. 2886// The TriOpFrag above catches the case where the middle operand is negated. 2887def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), 2888 (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; 2889 2890def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2891 (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; 2892 2893def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2894 (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; 2895 2896defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 2897defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; 2898defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 2899defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 2900defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; 2901defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", 2902 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; 2903defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", 2904 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; 2905defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 2906defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 2907defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 2908 TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; 2909defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; 2910defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; 2911defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 2912defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 2913defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 2914defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 2915defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 2916defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 2917defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 2918defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 2919defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 2920defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 2921defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 2922defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; 2923defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 2924defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 2925defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 2926defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 2927 TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; 2928defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; 2929defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; 2930defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 2931defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 2932defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 2933defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 2934defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 2935defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 2936defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 2937defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 2938defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 2939defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; 2940defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 2941defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 2942defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 2943 int_aarch64_neon_sqadd>; 2944defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 2945 int_aarch64_neon_sqsub>; 2946 2947defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 2948defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 2949 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 2950defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; 2951defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 2952defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", 2953 TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; 2954defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 2955defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 2956 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 2957defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 2958 2959 2960def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 2961 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2962def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 2963 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2964def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 2965 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2966def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 2967 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2968 2969def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 2970 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2971def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 2972 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2973def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 2974 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2975def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 2976 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2977 2978def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 2979 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 2980def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 2981 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2982def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 2983 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2984def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 2985 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2986 2987def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 2988 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 2989def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 2990 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2991def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 2992 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2993def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 2994 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2995 2996def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 2997 "|cmls.8b\t$dst, $src1, $src2}", 2998 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2999def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 3000 "|cmls.16b\t$dst, $src1, $src2}", 3001 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 3002def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 3003 "|cmls.4h\t$dst, $src1, $src2}", 3004 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 3005def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 3006 "|cmls.8h\t$dst, $src1, $src2}", 3007 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 3008def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 3009 "|cmls.2s\t$dst, $src1, $src2}", 3010 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 3011def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 3012 "|cmls.4s\t$dst, $src1, $src2}", 3013 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 3014def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 3015 "|cmls.2d\t$dst, $src1, $src2}", 3016 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 3017 3018def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 3019 "|cmlo.8b\t$dst, $src1, $src2}", 3020 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 3021def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 3022 "|cmlo.16b\t$dst, $src1, $src2}", 3023 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 3024def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 3025 "|cmlo.4h\t$dst, $src1, $src2}", 3026 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 3027def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 3028 "|cmlo.8h\t$dst, $src1, $src2}", 3029 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 3030def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 3031 "|cmlo.2s\t$dst, $src1, $src2}", 3032 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 3033def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 3034 "|cmlo.4s\t$dst, $src1, $src2}", 3035 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 3036def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 3037 "|cmlo.2d\t$dst, $src1, $src2}", 3038 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 3039 3040def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 3041 "|cmle.8b\t$dst, $src1, $src2}", 3042 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 3043def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 3044 "|cmle.16b\t$dst, $src1, $src2}", 3045 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 3046def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 3047 "|cmle.4h\t$dst, $src1, $src2}", 3048 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 3049def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 3050 "|cmle.8h\t$dst, $src1, $src2}", 3051 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 3052def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 3053 "|cmle.2s\t$dst, $src1, $src2}", 3054 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 3055def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 3056 "|cmle.4s\t$dst, $src1, $src2}", 3057 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 3058def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 3059 "|cmle.2d\t$dst, $src1, $src2}", 3060 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 3061 3062def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 3063 "|cmlt.8b\t$dst, $src1, $src2}", 3064 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 3065def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 3066 "|cmlt.16b\t$dst, $src1, $src2}", 3067 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 3068def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 3069 "|cmlt.4h\t$dst, $src1, $src2}", 3070 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 3071def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 3072 "|cmlt.8h\t$dst, $src1, $src2}", 3073 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 3074def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 3075 "|cmlt.2s\t$dst, $src1, $src2}", 3076 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 3077def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 3078 "|cmlt.4s\t$dst, $src1, $src2}", 3079 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 3080def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 3081 "|cmlt.2d\t$dst, $src1, $src2}", 3082 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 3083 3084let Predicates = [HasNEON, HasFullFP16] in { 3085def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 3086 "|fcmle.4h\t$dst, $src1, $src2}", 3087 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 3088def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 3089 "|fcmle.8h\t$dst, $src1, $src2}", 3090 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 3091} 3092def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 3093 "|fcmle.2s\t$dst, $src1, $src2}", 3094 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 3095def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 3096 "|fcmle.4s\t$dst, $src1, $src2}", 3097 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 3098def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 3099 "|fcmle.2d\t$dst, $src1, $src2}", 3100 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 3101 3102let Predicates = [HasNEON, HasFullFP16] in { 3103def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 3104 "|fcmlt.4h\t$dst, $src1, $src2}", 3105 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 3106def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 3107 "|fcmlt.8h\t$dst, $src1, $src2}", 3108 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 3109} 3110def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 3111 "|fcmlt.2s\t$dst, $src1, $src2}", 3112 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 3113def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 3114 "|fcmlt.4s\t$dst, $src1, $src2}", 3115 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 3116def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 3117 "|fcmlt.2d\t$dst, $src1, $src2}", 3118 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 3119 3120let Predicates = [HasNEON, HasFullFP16] in { 3121def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 3122 "|facle.4h\t$dst, $src1, $src2}", 3123 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 3124def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 3125 "|facle.8h\t$dst, $src1, $src2}", 3126 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 3127} 3128def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 3129 "|facle.2s\t$dst, $src1, $src2}", 3130 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 3131def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 3132 "|facle.4s\t$dst, $src1, $src2}", 3133 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 3134def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 3135 "|facle.2d\t$dst, $src1, $src2}", 3136 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 3137 3138let Predicates = [HasNEON, HasFullFP16] in { 3139def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 3140 "|faclt.4h\t$dst, $src1, $src2}", 3141 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 3142def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 3143 "|faclt.8h\t$dst, $src1, $src2}", 3144 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 3145} 3146def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 3147 "|faclt.2s\t$dst, $src1, $src2}", 3148 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 3149def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 3150 "|faclt.4s\t$dst, $src1, $src2}", 3151 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 3152def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 3153 "|faclt.2d\t$dst, $src1, $src2}", 3154 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 3155 3156//===----------------------------------------------------------------------===// 3157// Advanced SIMD three scalar instructions. 3158//===----------------------------------------------------------------------===// 3159 3160defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 3161defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 3162defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 3163defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 3164defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 3165defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 3166defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 3167defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 3168def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 3169 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 3170defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 3171 int_aarch64_neon_facge>; 3172defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 3173 int_aarch64_neon_facgt>; 3174defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 3175defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 3176defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 3177defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; 3178defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; 3179defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; 3180defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 3181defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 3182defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 3183defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 3184defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 3185defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 3186defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 3187defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 3188defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 3189defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 3190defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 3191defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 3192defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 3193defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 3194defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 3195let Predicates = [HasV8_1a] in { 3196 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 3197 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 3198 def : Pat<(i32 (int_aarch64_neon_sqadd 3199 (i32 FPR32:$Rd), 3200 (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), 3201 (i32 FPR32:$Rm))))), 3202 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 3203 def : Pat<(i32 (int_aarch64_neon_sqsub 3204 (i32 FPR32:$Rd), 3205 (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn), 3206 (i32 FPR32:$Rm))))), 3207 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 3208} 3209 3210def : InstAlias<"cmls $dst, $src1, $src2", 3211 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3212def : InstAlias<"cmle $dst, $src1, $src2", 3213 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3214def : InstAlias<"cmlo $dst, $src1, $src2", 3215 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3216def : InstAlias<"cmlt $dst, $src1, $src2", 3217 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3218def : InstAlias<"fcmle $dst, $src1, $src2", 3219 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 3220def : InstAlias<"fcmle $dst, $src1, $src2", 3221 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3222def : InstAlias<"fcmlt $dst, $src1, $src2", 3223 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 3224def : InstAlias<"fcmlt $dst, $src1, $src2", 3225 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3226def : InstAlias<"facle $dst, $src1, $src2", 3227 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 3228def : InstAlias<"facle $dst, $src1, $src2", 3229 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3230def : InstAlias<"faclt $dst, $src1, $src2", 3231 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 3232def : InstAlias<"faclt $dst, $src1, $src2", 3233 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 3234 3235//===----------------------------------------------------------------------===// 3236// Advanced SIMD three scalar instructions (mixed operands). 3237//===----------------------------------------------------------------------===// 3238defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 3239 int_aarch64_neon_sqdmulls_scalar>; 3240defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 3241defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 3242 3243def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 3244 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 3245 (i32 FPR32:$Rm))))), 3246 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 3247def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 3248 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 3249 (i32 FPR32:$Rm))))), 3250 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 3251 3252//===----------------------------------------------------------------------===// 3253// Advanced SIMD two scalar instructions. 3254//===----------------------------------------------------------------------===// 3255 3256defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; 3257defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 3258defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 3259defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 3260defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 3261defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 3262defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 3263defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 3264defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 3265defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 3266defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 3267defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 3268defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 3269defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 3270defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 3271defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 3272defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 3273defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 3274defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 3275def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 3276defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 3277defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 3278defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; 3279defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; 3280defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; 3281defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 3282 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 3283defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 3284defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 3285defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 3286defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 3287defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 3288defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 3289 int_aarch64_neon_suqadd>; 3290defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 3291defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 3292defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 3293 int_aarch64_neon_usqadd>; 3294 3295def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; 3296 3297def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 3298 (FCVTASv1i64 FPR64:$Rn)>; 3299def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 3300 (FCVTAUv1i64 FPR64:$Rn)>; 3301def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 3302 (FCVTMSv1i64 FPR64:$Rn)>; 3303def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 3304 (FCVTMUv1i64 FPR64:$Rn)>; 3305def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 3306 (FCVTNSv1i64 FPR64:$Rn)>; 3307def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 3308 (FCVTNUv1i64 FPR64:$Rn)>; 3309def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 3310 (FCVTPSv1i64 FPR64:$Rn)>; 3311def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 3312 (FCVTPUv1i64 FPR64:$Rn)>; 3313 3314def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 3315 (FRECPEv1i32 FPR32:$Rn)>; 3316def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 3317 (FRECPEv1i64 FPR64:$Rn)>; 3318def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 3319 (FRECPEv1i64 FPR64:$Rn)>; 3320 3321def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 3322 (FRECPXv1i32 FPR32:$Rn)>; 3323def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 3324 (FRECPXv1i64 FPR64:$Rn)>; 3325 3326def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 3327 (FRSQRTEv1i32 FPR32:$Rn)>; 3328def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 3329 (FRSQRTEv1i64 FPR64:$Rn)>; 3330def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 3331 (FRSQRTEv1i64 FPR64:$Rn)>; 3332 3333// If an integer is about to be converted to a floating point value, 3334// just load it on the floating point unit. 3335// Here are the patterns for 8 and 16-bits to float. 3336// 8-bits -> float. 3337multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 3338 SDPatternOperator loadop, Instruction UCVTF, 3339 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 3340 SubRegIndex sub> { 3341 def : Pat<(DstTy (uint_to_fp (SrcTy 3342 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 3343 ro.Wext:$extend))))), 3344 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 3345 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 3346 sub))>; 3347 3348 def : Pat<(DstTy (uint_to_fp (SrcTy 3349 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 3350 ro.Wext:$extend))))), 3351 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 3352 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 3353 sub))>; 3354} 3355 3356defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 3357 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 3358def : Pat <(f32 (uint_to_fp (i32 3359 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3360 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3361 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 3362def : Pat <(f32 (uint_to_fp (i32 3363 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 3364 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3365 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 3366// 16-bits -> float. 3367defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 3368 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 3369def : Pat <(f32 (uint_to_fp (i32 3370 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3371 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3372 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3373def : Pat <(f32 (uint_to_fp (i32 3374 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3375 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3376 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3377// 32-bits are handled in target specific dag combine: 3378// performIntToFpCombine. 3379// 64-bits integer to 32-bits floating point, not possible with 3380// UCVTF on floating point registers (both source and destination 3381// must have the same size). 3382 3383// Here are the patterns for 8, 16, 32, and 64-bits to double. 3384// 8-bits -> double. 3385defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 3386 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 3387def : Pat <(f64 (uint_to_fp (i32 3388 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3389 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3390 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 3391def : Pat <(f64 (uint_to_fp (i32 3392 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 3393 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3394 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 3395// 16-bits -> double. 3396defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 3397 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 3398def : Pat <(f64 (uint_to_fp (i32 3399 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3400 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3401 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3402def : Pat <(f64 (uint_to_fp (i32 3403 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3404 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3405 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3406// 32-bits -> double. 3407defm : UIntToFPROLoadPat<f64, i32, load, 3408 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 3409def : Pat <(f64 (uint_to_fp (i32 3410 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3411 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3412 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 3413def : Pat <(f64 (uint_to_fp (i32 3414 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 3415 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3416 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 3417// 64-bits -> double are handled in target specific dag combine: 3418// performIntToFpCombine. 3419 3420//===----------------------------------------------------------------------===// 3421// Advanced SIMD three different-sized vector instructions. 3422//===----------------------------------------------------------------------===// 3423 3424defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 3425defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 3426defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 3427defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 3428defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; 3429defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 3430 int_aarch64_neon_sabd>; 3431defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 3432 int_aarch64_neon_sabd>; 3433defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 3434 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 3435defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 3436 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 3437defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 3438 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3439defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 3440 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3441defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; 3442defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 3443 int_aarch64_neon_sqadd>; 3444defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 3445 int_aarch64_neon_sqsub>; 3446defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 3447 int_aarch64_neon_sqdmull>; 3448defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 3449 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 3450defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 3451 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 3452defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 3453 int_aarch64_neon_uabd>; 3454defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 3455 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; 3456defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 3457 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; 3458defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 3459 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3460defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 3461 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3462defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; 3463defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 3464 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; 3465defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 3466 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; 3467 3468// Additional patterns for SMULL and UMULL 3469multiclass Neon_mul_widen_patterns<SDPatternOperator opnode, 3470 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 3471 def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))), 3472 (INST8B V64:$Rn, V64:$Rm)>; 3473 def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))), 3474 (INST4H V64:$Rn, V64:$Rm)>; 3475 def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))), 3476 (INST2S V64:$Rn, V64:$Rm)>; 3477} 3478 3479defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16, 3480 SMULLv4i16_v4i32, SMULLv2i32_v2i64>; 3481defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16, 3482 UMULLv4i16_v4i32, UMULLv2i32_v2i64>; 3483 3484// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL 3485multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode, 3486 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 3487 def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))), 3488 (INST8B V128:$Rd, V64:$Rn, V64:$Rm)>; 3489 def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))), 3490 (INST4H V128:$Rd, V64:$Rn, V64:$Rm)>; 3491 def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))), 3492 (INST2S V128:$Rd, V64:$Rn, V64:$Rm)>; 3493} 3494 3495defm : Neon_mulacc_widen_patterns< 3496 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, 3497 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 3498defm : Neon_mulacc_widen_patterns< 3499 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, 3500 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 3501defm : Neon_mulacc_widen_patterns< 3502 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>, 3503 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 3504defm : Neon_mulacc_widen_patterns< 3505 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>, 3506 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 3507 3508// Patterns for 64-bit pmull 3509def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), 3510 (PMULLv1i64 V64:$Rn, V64:$Rm)>; 3511def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)), 3512 (extractelt (v2i64 V128:$Rm), (i64 1))), 3513 (PMULLv2i64 V128:$Rn, V128:$Rm)>; 3514 3515// CodeGen patterns for addhn and subhn instructions, which can actually be 3516// written in LLVM IR without too much difficulty. 3517 3518// ADDHN 3519def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 3520 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3521def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3522 (i32 16))))), 3523 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3524def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3525 (i32 32))))), 3526 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3527def : Pat<(concat_vectors (v8i8 V64:$Rd), 3528 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3529 (i32 8))))), 3530 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3531 V128:$Rn, V128:$Rm)>; 3532def : Pat<(concat_vectors (v4i16 V64:$Rd), 3533 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3534 (i32 16))))), 3535 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3536 V128:$Rn, V128:$Rm)>; 3537def : Pat<(concat_vectors (v2i32 V64:$Rd), 3538 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3539 (i32 32))))), 3540 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3541 V128:$Rn, V128:$Rm)>; 3542 3543// SUBHN 3544def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 3545 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3546def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3547 (i32 16))))), 3548 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3549def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3550 (i32 32))))), 3551 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3552def : Pat<(concat_vectors (v8i8 V64:$Rd), 3553 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3554 (i32 8))))), 3555 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3556 V128:$Rn, V128:$Rm)>; 3557def : Pat<(concat_vectors (v4i16 V64:$Rd), 3558 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3559 (i32 16))))), 3560 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3561 V128:$Rn, V128:$Rm)>; 3562def : Pat<(concat_vectors (v2i32 V64:$Rd), 3563 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3564 (i32 32))))), 3565 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3566 V128:$Rn, V128:$Rm)>; 3567 3568//---------------------------------------------------------------------------- 3569// AdvSIMD bitwise extract from vector instruction. 3570//---------------------------------------------------------------------------- 3571 3572defm EXT : SIMDBitwiseExtract<"ext">; 3573 3574def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3575 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3576def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3577 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3578def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3579 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3580def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3581 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3582def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3583 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3584def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3585 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3586def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3587 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3588def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3589 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3590def : Pat<(v4f16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3591 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3592def : Pat<(v8f16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3593 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3594 3595// We use EXT to handle extract_subvector to copy the upper 64-bits of a 3596// 128-bit vector. 3597def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), 3598 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3599def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), 3600 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3601def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), 3602 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3603def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), 3604 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3605def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 4))), 3606 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3607def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), 3608 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3609def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), 3610 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3611 3612 3613//---------------------------------------------------------------------------- 3614// AdvSIMD zip vector 3615//---------------------------------------------------------------------------- 3616 3617defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 3618defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 3619defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 3620defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 3621defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 3622defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 3623 3624//---------------------------------------------------------------------------- 3625// AdvSIMD TBL/TBX instructions 3626//---------------------------------------------------------------------------- 3627 3628defm TBL : SIMDTableLookup< 0, "tbl">; 3629defm TBX : SIMDTableLookupTied<1, "tbx">; 3630 3631def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3632 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 3633def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3634 (TBLv16i8One V128:$Ri, V128:$Rn)>; 3635 3636def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 3637 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3638 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 3639def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 3640 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3641 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 3642 3643 3644//---------------------------------------------------------------------------- 3645// AdvSIMD scalar CPY instruction 3646//---------------------------------------------------------------------------- 3647 3648defm CPY : SIMDScalarCPY<"cpy">; 3649 3650//---------------------------------------------------------------------------- 3651// AdvSIMD scalar pairwise instructions 3652//---------------------------------------------------------------------------- 3653 3654defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 3655defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 3656defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 3657defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 3658defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 3659defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 3660def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 3661 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 3662def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 3663 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 3664def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 3665 (FADDPv2i32p V64:$Rn)>; 3666def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 3667 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 3668def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 3669 (FADDPv2i64p V128:$Rn)>; 3670def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), 3671 (FMAXNMPv2i32p V64:$Rn)>; 3672def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), 3673 (FMAXNMPv2i64p V128:$Rn)>; 3674def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), 3675 (FMAXPv2i32p V64:$Rn)>; 3676def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), 3677 (FMAXPv2i64p V128:$Rn)>; 3678def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), 3679 (FMINNMPv2i32p V64:$Rn)>; 3680def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), 3681 (FMINNMPv2i64p V128:$Rn)>; 3682def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), 3683 (FMINPv2i32p V64:$Rn)>; 3684def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), 3685 (FMINPv2i64p V128:$Rn)>; 3686 3687//---------------------------------------------------------------------------- 3688// AdvSIMD INS/DUP instructions 3689//---------------------------------------------------------------------------- 3690 3691def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 3692def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 3693def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 3694def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 3695def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 3696def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 3697def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 3698 3699def DUPv2i64lane : SIMDDup64FromElement; 3700def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 3701def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 3702def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 3703def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 3704def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 3705def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 3706 3707def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 3708 (v2f32 (DUPv2i32lane 3709 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3710 (i64 0)))>; 3711def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 3712 (v4f32 (DUPv4i32lane 3713 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3714 (i64 0)))>; 3715def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 3716 (v2f64 (DUPv2i64lane 3717 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 3718 (i64 0)))>; 3719def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 3720 (v4f16 (DUPv4i16lane 3721 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 3722 (i64 0)))>; 3723def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 3724 (v8f16 (DUPv8i16lane 3725 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 3726 (i64 0)))>; 3727 3728def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 3729 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 3730def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 3731 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 3732 3733def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3734 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 3735def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3736 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 3737def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 3738 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 3739 3740// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 3741// instruction even if the types don't match: we just have to remap the lane 3742// carefully. N.b. this trick only applies to truncations. 3743def VecIndex_x2 : SDNodeXForm<imm, [{ 3744 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 3745}]>; 3746def VecIndex_x4 : SDNodeXForm<imm, [{ 3747 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 3748}]>; 3749def VecIndex_x8 : SDNodeXForm<imm, [{ 3750 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 3751}]>; 3752 3753multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 3754 ValueType Src128VT, ValueType ScalVT, 3755 Instruction DUP, SDNodeXForm IdxXFORM> { 3756 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 3757 imm:$idx)))), 3758 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3759 3760 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 3761 imm:$idx)))), 3762 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3763} 3764 3765defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 3766defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 3767defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 3768 3769defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 3770defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 3771defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 3772 3773multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 3774 SDNodeXForm IdxXFORM> { 3775 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 3776 imm:$idx))))), 3777 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3778 3779 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 3780 imm:$idx))))), 3781 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3782} 3783 3784defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 3785defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 3786defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 3787 3788defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 3789defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 3790defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 3791 3792// SMOV and UMOV definitions, with some extra patterns for convenience 3793defm SMOV : SMov; 3794defm UMOV : UMov; 3795 3796def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3797 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 3798def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3799 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 3800def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3801 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3802def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3803 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 3804def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3805 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3806def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 3807 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 3808 3809def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 3810 VectorIndexB:$idx)))), i8), 3811 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 3812def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 3813 VectorIndexH:$idx)))), i16), 3814 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 3815 3816// Extracting i8 or i16 elements will have the zero-extend transformed to 3817// an 'and' mask by type legalization since neither i8 nor i16 are legal types 3818// for AArch64. Match these patterns here since UMOV already zeroes out the high 3819// bits of the destination register. 3820def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 3821 (i32 0xff)), 3822 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 3823def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 3824 (i32 0xffff)), 3825 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 3826 3827defm INS : SIMDIns; 3828 3829def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 3830 (SUBREG_TO_REG (i32 0), 3831 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3832def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 3833 (SUBREG_TO_REG (i32 0), 3834 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3835 3836def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 3837 (SUBREG_TO_REG (i32 0), 3838 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3839def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 3840 (SUBREG_TO_REG (i32 0), 3841 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3842 3843def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 3844 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3845 (i32 FPR32:$Rn), ssub))>; 3846def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 3847 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3848 (i32 FPR32:$Rn), ssub))>; 3849def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 3850 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3851 (i64 FPR64:$Rn), dsub))>; 3852 3853def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 3854 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 3855def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 3856 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 3857 3858def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 3859 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3860def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 3861 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3862def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 3863 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 3864 3865def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 3866 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 3867 (EXTRACT_SUBREG 3868 (INSvi16lane 3869 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 3870 VectorIndexS:$imm, 3871 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 3872 (i64 0)), 3873 dsub)>; 3874 3875def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 3876 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 3877 (INSvi16lane 3878 V128:$Rn, VectorIndexH:$imm, 3879 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 3880 (i64 0))>; 3881 3882def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 3883 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3884 (EXTRACT_SUBREG 3885 (INSvi32lane 3886 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 3887 VectorIndexS:$imm, 3888 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3889 (i64 0)), 3890 dsub)>; 3891def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 3892 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3893 (INSvi32lane 3894 V128:$Rn, VectorIndexS:$imm, 3895 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3896 (i64 0))>; 3897def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 3898 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 3899 (INSvi64lane 3900 V128:$Rn, VectorIndexD:$imm, 3901 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 3902 (i64 0))>; 3903 3904// Copy an element at a constant index in one vector into a constant indexed 3905// element of another. 3906// FIXME refactor to a shared class/dev parameterized on vector type, vector 3907// index type and INS extension 3908def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 3909 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 3910 VectorIndexB:$idx2)), 3911 (v16i8 (INSvi8lane 3912 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 3913 )>; 3914def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 3915 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 3916 VectorIndexH:$idx2)), 3917 (v8i16 (INSvi16lane 3918 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 3919 )>; 3920def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 3921 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 3922 VectorIndexS:$idx2)), 3923 (v4i32 (INSvi32lane 3924 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 3925 )>; 3926def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 3927 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 3928 VectorIndexD:$idx2)), 3929 (v2i64 (INSvi64lane 3930 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 3931 )>; 3932 3933multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 3934 ValueType VTScal, Instruction INS> { 3935 def : Pat<(VT128 (vector_insert V128:$src, 3936 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3937 imm:$Immd)), 3938 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 3939 3940 def : Pat<(VT128 (vector_insert V128:$src, 3941 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3942 imm:$Immd)), 3943 (INS V128:$src, imm:$Immd, 3944 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 3945 3946 def : Pat<(VT64 (vector_insert V64:$src, 3947 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3948 imm:$Immd)), 3949 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 3950 imm:$Immd, V128:$Rn, imm:$Immn), 3951 dsub)>; 3952 3953 def : Pat<(VT64 (vector_insert V64:$src, 3954 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3955 imm:$Immd)), 3956 (EXTRACT_SUBREG 3957 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 3958 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 3959 dsub)>; 3960} 3961 3962defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 3963defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 3964defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 3965 3966 3967// Floating point vector extractions are codegen'd as either a sequence of 3968// subregister extractions, or a MOV (aka CPY here, alias for DUP) if 3969// the lane number is anything other than zero. 3970def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 3971 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 3972def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 3973 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 3974def : Pat<(vector_extract (v8f16 V128:$Rn), 0), 3975 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 3976 3977def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 3978 (f64 (CPYi64 V128:$Rn, VectorIndexD:$idx))>; 3979def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 3980 (f32 (CPYi32 V128:$Rn, VectorIndexS:$idx))>; 3981def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 3982 (f16 (CPYi16 V128:$Rn, VectorIndexH:$idx))>; 3983 3984// All concat_vectors operations are canonicalised to act on i64 vectors for 3985// AArch64. In the general case we need an instruction, which had just as well be 3986// INS. 3987class ConcatPat<ValueType DstTy, ValueType SrcTy> 3988 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 3989 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 3990 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 3991 3992def : ConcatPat<v2i64, v1i64>; 3993def : ConcatPat<v2f64, v1f64>; 3994def : ConcatPat<v4i32, v2i32>; 3995def : ConcatPat<v4f32, v2f32>; 3996def : ConcatPat<v8i16, v4i16>; 3997def : ConcatPat<v8f16, v4f16>; 3998def : ConcatPat<v16i8, v8i8>; 3999 4000// If the high lanes are undef, though, we can just ignore them: 4001class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 4002 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 4003 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 4004 4005def : ConcatUndefPat<v2i64, v1i64>; 4006def : ConcatUndefPat<v2f64, v1f64>; 4007def : ConcatUndefPat<v4i32, v2i32>; 4008def : ConcatUndefPat<v4f32, v2f32>; 4009def : ConcatUndefPat<v8i16, v4i16>; 4010def : ConcatUndefPat<v16i8, v8i8>; 4011 4012//---------------------------------------------------------------------------- 4013// AdvSIMD across lanes instructions 4014//---------------------------------------------------------------------------- 4015 4016defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 4017defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 4018defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 4019defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 4020defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 4021defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 4022defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 4023defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; 4024defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; 4025defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; 4026defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; 4027 4028// Patterns for across-vector intrinsics, that have a node equivalent, that 4029// returns a vector (with only the low lane defined) instead of a scalar. 4030// In effect, opNode is the same as (scalar_to_vector (IntNode)). 4031multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 4032 SDPatternOperator opNode> { 4033// If a lane instruction caught the vector_extract around opNode, we can 4034// directly match the latter to the instruction. 4035def : Pat<(v8i8 (opNode V64:$Rn)), 4036 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 4037 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 4038def : Pat<(v16i8 (opNode V128:$Rn)), 4039 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4040 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 4041def : Pat<(v4i16 (opNode V64:$Rn)), 4042 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 4043 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 4044def : Pat<(v8i16 (opNode V128:$Rn)), 4045 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 4046 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 4047def : Pat<(v4i32 (opNode V128:$Rn)), 4048 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 4049 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 4050 4051 4052// If none did, fallback to the explicit patterns, consuming the vector_extract. 4053def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 4054 (i32 0)), (i64 0))), 4055 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 4056 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 4057 bsub), ssub)>; 4058def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 4059 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4060 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 4061 bsub), ssub)>; 4062def : Pat<(i32 (vector_extract (insert_subvector undef, 4063 (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))), 4064 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 4065 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 4066 hsub), ssub)>; 4067def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 4068 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 4069 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 4070 hsub), ssub)>; 4071def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 4072 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 4073 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 4074 ssub), ssub)>; 4075 4076} 4077 4078multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 4079 SDPatternOperator opNode> 4080 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 4081// If there is a sign extension after this intrinsic, consume it as smov already 4082// performed it 4083def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 4084 (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)), 4085 (i32 (SMOVvi8to32 4086 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4087 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 4088 (i64 0)))>; 4089def : Pat<(i32 (sext_inreg (i32 (vector_extract 4090 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 4091 (i32 (SMOVvi8to32 4092 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4093 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 4094 (i64 0)))>; 4095def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 4096 (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)), 4097 (i32 (SMOVvi16to32 4098 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4099 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 4100 (i64 0)))>; 4101def : Pat<(i32 (sext_inreg (i32 (vector_extract 4102 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 4103 (i32 (SMOVvi16to32 4104 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4105 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 4106 (i64 0)))>; 4107} 4108 4109multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 4110 SDPatternOperator opNode> 4111 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 4112// If there is a masking operation keeping only what has been actually 4113// generated, consume it. 4114def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 4115 (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)), 4116 (i32 (EXTRACT_SUBREG 4117 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4118 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 4119 ssub))>; 4120def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 4121 maski8_or_more)), 4122 (i32 (EXTRACT_SUBREG 4123 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4124 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 4125 ssub))>; 4126def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 4127 (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)), 4128 (i32 (EXTRACT_SUBREG 4129 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4130 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 4131 ssub))>; 4132def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 4133 maski16_or_more)), 4134 (i32 (EXTRACT_SUBREG 4135 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4136 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 4137 ssub))>; 4138} 4139 4140defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 4141// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 4142def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 4143 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 4144 4145defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 4146// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 4147def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 4148 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 4149 4150defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 4151def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 4152 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 4153 4154defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 4155def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 4156 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 4157 4158defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 4159def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 4160 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 4161 4162defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 4163def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 4164 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 4165 4166multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 4167 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 4168 (i32 (SMOVvi16to32 4169 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4170 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 4171 (i64 0)))>; 4172def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 4173 (i32 (SMOVvi16to32 4174 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4175 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 4176 (i64 0)))>; 4177 4178def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 4179 (i32 (EXTRACT_SUBREG 4180 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4181 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 4182 ssub))>; 4183def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 4184 (i32 (EXTRACT_SUBREG 4185 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4186 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 4187 ssub))>; 4188 4189def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 4190 (i64 (EXTRACT_SUBREG 4191 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4192 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 4193 dsub))>; 4194} 4195 4196multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 4197 Intrinsic intOp> { 4198 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 4199 (i32 (EXTRACT_SUBREG 4200 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4201 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 4202 ssub))>; 4203def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 4204 (i32 (EXTRACT_SUBREG 4205 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4206 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 4207 ssub))>; 4208 4209def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 4210 (i32 (EXTRACT_SUBREG 4211 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4212 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 4213 ssub))>; 4214def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 4215 (i32 (EXTRACT_SUBREG 4216 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4217 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 4218 ssub))>; 4219 4220def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 4221 (i64 (EXTRACT_SUBREG 4222 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4223 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 4224 dsub))>; 4225} 4226 4227defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 4228defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 4229 4230// The vaddlv_s32 intrinsic gets mapped to SADDLP. 4231def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 4232 (i64 (EXTRACT_SUBREG 4233 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4234 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 4235 dsub))>; 4236// The vaddlv_u32 intrinsic gets mapped to UADDLP. 4237def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 4238 (i64 (EXTRACT_SUBREG 4239 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 4240 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 4241 dsub))>; 4242 4243//------------------------------------------------------------------------------ 4244// AdvSIMD modified immediate instructions 4245//------------------------------------------------------------------------------ 4246 4247// AdvSIMD BIC 4248defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 4249// AdvSIMD ORR 4250defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 4251 4252def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 4253def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 4254def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 4255def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 4256 4257def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4258def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4259def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4260def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4261 4262def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 4263def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 4264def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 4265def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 4266 4267def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4268def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4269def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4270def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4271 4272// AdvSIMD FMOV 4273def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 4274 "fmov", ".2d", 4275 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 4276def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 4277 "fmov", ".2s", 4278 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 4279def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 4280 "fmov", ".4s", 4281 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 4282let Predicates = [HasNEON, HasFullFP16] in { 4283def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 4284 "fmov", ".4h", 4285 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 4286def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 4287 "fmov", ".8h", 4288 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 4289} // Predicates = [HasNEON, HasFullFP16] 4290 4291// AdvSIMD MOVI 4292 4293// EDIT byte mask: scalar 4294let isReMaterializable = 1, isAsCheapAsAMove = 1 in 4295def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 4296 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 4297// The movi_edit node has the immediate value already encoded, so we use 4298// a plain imm0_255 here. 4299def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 4300 (MOVID imm0_255:$shift)>; 4301 4302def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; 4303def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; 4304def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; 4305def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; 4306 4307def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; 4308def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; 4309def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; 4310def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; 4311 4312// EDIT byte mask: 2d 4313 4314// The movi_edit node has the immediate value already encoded, so we use 4315// a plain imm0_255 in the pattern 4316let isReMaterializable = 1, isAsCheapAsAMove = 1 in 4317def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 4318 simdimmtype10, 4319 "movi", ".2d", 4320 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 4321 4322 4323// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. 4324// Complexity is added to break a tie with a plain MOVI. 4325let AddedComplexity = 1 in { 4326def : Pat<(f32 fpimm0), 4327 (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, 4328 Requires<[HasZCZ]>; 4329def : Pat<(f64 fpimm0), 4330 (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, 4331 Requires<[HasZCZ]>; 4332} 4333 4334def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 4335def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 4336def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 4337def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 4338 4339def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 4340def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 4341def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 4342def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 4343 4344def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; 4345def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; 4346 4347// EDIT per word & halfword: 2s, 4h, 4s, & 8h 4348defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 4349 4350def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4351def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4352def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4353def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4354 4355def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4356def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4357def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4358def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4359 4360def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 4361 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 4362def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 4363 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 4364def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 4365 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 4366def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 4367 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 4368 4369// EDIT per word: 2s & 4s with MSL shifter 4370def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 4371 [(set (v2i32 V64:$Rd), 4372 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4373def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 4374 [(set (v4i32 V128:$Rd), 4375 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4376 4377// Per byte: 8b & 16b 4378def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 4379 "movi", ".8b", 4380 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 4381def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 4382 "movi", ".16b", 4383 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 4384 4385// AdvSIMD MVNI 4386 4387// EDIT per word & halfword: 2s, 4h, 4s, & 8h 4388defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 4389 4390def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4391def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4392def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4393def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4394 4395def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4396def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4397def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4398def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4399 4400def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4401 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 4402def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4403 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 4404def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4405 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 4406def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4407 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 4408 4409// EDIT per word: 2s & 4s with MSL shifter 4410def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 4411 [(set (v2i32 V64:$Rd), 4412 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4413def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 4414 [(set (v4i32 V128:$Rd), 4415 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4416 4417//---------------------------------------------------------------------------- 4418// AdvSIMD indexed element 4419//---------------------------------------------------------------------------- 4420 4421let hasSideEffects = 0 in { 4422 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 4423 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 4424} 4425 4426// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 4427// instruction expects the addend first, while the intrinsic expects it last. 4428 4429// On the other hand, there are quite a few valid combinatorial options due to 4430// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 4431defm : SIMDFPIndexedTiedPatterns<"FMLA", 4432 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; 4433defm : SIMDFPIndexedTiedPatterns<"FMLA", 4434 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; 4435 4436defm : SIMDFPIndexedTiedPatterns<"FMLS", 4437 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4438defm : SIMDFPIndexedTiedPatterns<"FMLS", 4439 TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 4440defm : SIMDFPIndexedTiedPatterns<"FMLS", 4441 TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 4442defm : SIMDFPIndexedTiedPatterns<"FMLS", 4443 TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 4444 4445multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 4446 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 4447 // and DUP scalar. 4448 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4449 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 4450 VectorIndexS:$idx))), 4451 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 4452 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4453 (v2f32 (AArch64duplane32 4454 (v4f32 (insert_subvector undef, 4455 (v2f32 (fneg V64:$Rm)), 4456 (i32 0))), 4457 VectorIndexS:$idx)))), 4458 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4459 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4460 VectorIndexS:$idx)>; 4461 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4462 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4463 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4464 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4465 4466 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 4467 // and DUP scalar. 4468 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4469 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 4470 VectorIndexS:$idx))), 4471 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 4472 VectorIndexS:$idx)>; 4473 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4474 (v4f32 (AArch64duplane32 4475 (v4f32 (insert_subvector undef, 4476 (v2f32 (fneg V64:$Rm)), 4477 (i32 0))), 4478 VectorIndexS:$idx)))), 4479 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4480 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4481 VectorIndexS:$idx)>; 4482 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4483 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4484 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4485 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4486 4487 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 4488 // (DUPLANE from 64-bit would be trivial). 4489 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4490 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 4491 VectorIndexD:$idx))), 4492 (FMLSv2i64_indexed 4493 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 4494 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4495 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 4496 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 4497 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 4498 4499 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 4500 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4501 (vector_extract (v4f32 (fneg V128:$Rm)), 4502 VectorIndexS:$idx))), 4503 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4504 V128:$Rm, VectorIndexS:$idx)>; 4505 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4506 (vector_extract (v4f32 (insert_subvector undef, 4507 (v2f32 (fneg V64:$Rm)), 4508 (i32 0))), 4509 VectorIndexS:$idx))), 4510 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4511 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 4512 4513 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 4514 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 4515 (vector_extract (v2f64 (fneg V128:$Rm)), 4516 VectorIndexS:$idx))), 4517 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 4518 V128:$Rm, VectorIndexS:$idx)>; 4519} 4520 4521defm : FMLSIndexedAfterNegPatterns< 4522 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 4523defm : FMLSIndexedAfterNegPatterns< 4524 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; 4525 4526defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 4527defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>; 4528 4529def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4530 (FMULv2i32_indexed V64:$Rn, 4531 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4532 (i64 0))>; 4533def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4534 (FMULv4i32_indexed V128:$Rn, 4535 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4536 (i64 0))>; 4537def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 4538 (FMULv2i64_indexed V128:$Rn, 4539 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 4540 (i64 0))>; 4541 4542defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 4543defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 4544defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", 4545 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; 4546defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", 4547 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; 4548defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 4549defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 4550 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4551defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 4552 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4553defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", 4554 int_aarch64_neon_smull>; 4555defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 4556 int_aarch64_neon_sqadd>; 4557defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 4558 int_aarch64_neon_sqsub>; 4559defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 4560 int_aarch64_neon_sqadd>; 4561defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 4562 int_aarch64_neon_sqsub>; 4563defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 4564defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 4565 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4566defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 4567 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4568defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", 4569 int_aarch64_neon_umull>; 4570 4571// A scalar sqdmull with the second operand being a vector lane can be 4572// handled directly with the indexed instruction encoding. 4573def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 4574 (vector_extract (v4i32 V128:$Vm), 4575 VectorIndexS:$idx)), 4576 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 4577 4578//---------------------------------------------------------------------------- 4579// AdvSIMD scalar shift instructions 4580//---------------------------------------------------------------------------- 4581defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 4582defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 4583defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 4584defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 4585// Codegen patterns for the above. We don't put these directly on the 4586// instructions because TableGen's type inference can't handle the truth. 4587// Having the same base pattern for fp <--> int totally freaks it out. 4588def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 4589 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 4590def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 4591 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 4592def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 4593 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4594def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 4595 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4596def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 4597 vecshiftR64:$imm)), 4598 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4599def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 4600 vecshiftR64:$imm)), 4601 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4602def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 4603 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4604def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 4605 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4606def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4607 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4608def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4609 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4610def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 4611 vecshiftR64:$imm)), 4612 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4613def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 4614 vecshiftR64:$imm)), 4615 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4616 4617defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 4618defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 4619defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 4620 int_aarch64_neon_sqrshrn>; 4621defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 4622 int_aarch64_neon_sqrshrun>; 4623defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4624defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4625defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 4626 int_aarch64_neon_sqshrn>; 4627defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 4628 int_aarch64_neon_sqshrun>; 4629defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 4630defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 4631defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 4632 TriOpFrag<(add node:$LHS, 4633 (AArch64srshri node:$MHS, node:$RHS))>>; 4634defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 4635defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 4636 TriOpFrag<(add node:$LHS, 4637 (AArch64vashr node:$MHS, node:$RHS))>>; 4638defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 4639 int_aarch64_neon_uqrshrn>; 4640defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4641defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 4642 int_aarch64_neon_uqshrn>; 4643defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 4644defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 4645 TriOpFrag<(add node:$LHS, 4646 (AArch64urshri node:$MHS, node:$RHS))>>; 4647defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 4648defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 4649 TriOpFrag<(add node:$LHS, 4650 (AArch64vlshr node:$MHS, node:$RHS))>>; 4651 4652//---------------------------------------------------------------------------- 4653// AdvSIMD vector shift instructions 4654//---------------------------------------------------------------------------- 4655defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 4656defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 4657defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 4658 int_aarch64_neon_vcvtfxs2fp>; 4659defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 4660 int_aarch64_neon_rshrn>; 4661defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 4662defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 4663 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 4664defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; 4665def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4666 (i32 vecshiftL64:$imm))), 4667 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 4668defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 4669 int_aarch64_neon_sqrshrn>; 4670defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 4671 int_aarch64_neon_sqrshrun>; 4672defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4673defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4674defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 4675 int_aarch64_neon_sqshrn>; 4676defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 4677 int_aarch64_neon_sqshrun>; 4678defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; 4679def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4680 (i32 vecshiftR64:$imm))), 4681 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 4682defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 4683defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 4684 TriOpFrag<(add node:$LHS, 4685 (AArch64srshri node:$MHS, node:$RHS))> >; 4686defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 4687 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 4688 4689defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 4690defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 4691 TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 4692defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 4693 int_aarch64_neon_vcvtfxu2fp>; 4694defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 4695 int_aarch64_neon_uqrshrn>; 4696defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4697defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 4698 int_aarch64_neon_uqshrn>; 4699defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 4700defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 4701 TriOpFrag<(add node:$LHS, 4702 (AArch64urshri node:$MHS, node:$RHS))> >; 4703defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 4704 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 4705defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 4706defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 4707 TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 4708 4709// SHRN patterns for when a logical right shift was used instead of arithmetic 4710// (the immediate guarantees no sign bits actually end up in the result so it 4711// doesn't matter). 4712def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 4713 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 4714def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 4715 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 4716def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 4717 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 4718 4719def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 4720 (trunc (AArch64vlshr (v8i16 V128:$Rn), 4721 vecshiftR16Narrow:$imm)))), 4722 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4723 V128:$Rn, vecshiftR16Narrow:$imm)>; 4724def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 4725 (trunc (AArch64vlshr (v4i32 V128:$Rn), 4726 vecshiftR32Narrow:$imm)))), 4727 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4728 V128:$Rn, vecshiftR32Narrow:$imm)>; 4729def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 4730 (trunc (AArch64vlshr (v2i64 V128:$Rn), 4731 vecshiftR64Narrow:$imm)))), 4732 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4733 V128:$Rn, vecshiftR32Narrow:$imm)>; 4734 4735// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 4736// Anyexts are implemented as zexts. 4737def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 4738def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4739def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4740def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 4741def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4742def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4743def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 4744def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4745def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4746// Also match an extend from the upper half of a 128 bit source register. 4747def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4748 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4749def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4750 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4751def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4752 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 4753def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4754 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4755def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4756 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4757def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4758 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 4759def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4760 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4761def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4762 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4763def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4764 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 4765 4766// Vector shift sxtl aliases 4767def : InstAlias<"sxtl.8h $dst, $src1", 4768 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4769def : InstAlias<"sxtl $dst.8h, $src1.8b", 4770 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4771def : InstAlias<"sxtl.4s $dst, $src1", 4772 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4773def : InstAlias<"sxtl $dst.4s, $src1.4h", 4774 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4775def : InstAlias<"sxtl.2d $dst, $src1", 4776 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4777def : InstAlias<"sxtl $dst.2d, $src1.2s", 4778 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4779 4780// Vector shift sxtl2 aliases 4781def : InstAlias<"sxtl2.8h $dst, $src1", 4782 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4783def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 4784 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4785def : InstAlias<"sxtl2.4s $dst, $src1", 4786 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4787def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 4788 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4789def : InstAlias<"sxtl2.2d $dst, $src1", 4790 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4791def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 4792 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4793 4794// Vector shift uxtl aliases 4795def : InstAlias<"uxtl.8h $dst, $src1", 4796 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4797def : InstAlias<"uxtl $dst.8h, $src1.8b", 4798 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4799def : InstAlias<"uxtl.4s $dst, $src1", 4800 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4801def : InstAlias<"uxtl $dst.4s, $src1.4h", 4802 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4803def : InstAlias<"uxtl.2d $dst, $src1", 4804 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4805def : InstAlias<"uxtl $dst.2d, $src1.2s", 4806 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4807 4808// Vector shift uxtl2 aliases 4809def : InstAlias<"uxtl2.8h $dst, $src1", 4810 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4811def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 4812 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4813def : InstAlias<"uxtl2.4s $dst, $src1", 4814 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4815def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 4816 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4817def : InstAlias<"uxtl2.2d $dst, $src1", 4818 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4819def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 4820 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4821 4822// If an integer is about to be converted to a floating point value, 4823// just load it on the floating point unit. 4824// These patterns are more complex because floating point loads do not 4825// support sign extension. 4826// The sign extension has to be explicitly added and is only supported for 4827// one step: byte-to-half, half-to-word, word-to-doubleword. 4828// SCVTF GPR -> FPR is 9 cycles. 4829// SCVTF FPR -> FPR is 4 cyclces. 4830// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 4831// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 4832// and still being faster. 4833// However, this is not good for code size. 4834// 8-bits -> float. 2 sizes step-up. 4835class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 4836 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 4837 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4838 (SSHLLv4i16_shift 4839 (f64 4840 (EXTRACT_SUBREG 4841 (SSHLLv8i8_shift 4842 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4843 INST, 4844 bsub), 4845 0), 4846 dsub)), 4847 0), 4848 ssub)))>, Requires<[NotForCodeSize, IsCyclone]>; 4849 4850def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 4851 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 4852def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 4853 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 4854def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 4855 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 4856def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 4857 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 4858 4859// 16-bits -> float. 1 size step-up. 4860class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 4861 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4862 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4863 (SSHLLv4i16_shift 4864 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4865 INST, 4866 hsub), 4867 0), 4868 ssub)))>, Requires<[NotForCodeSize]>; 4869 4870def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4871 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4872def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4873 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4874def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4875 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4876def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4877 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4878 4879// 32-bits to 32-bits are handled in target specific dag combine: 4880// performIntToFpCombine. 4881// 64-bits integer to 32-bits floating point, not possible with 4882// SCVTF on floating point registers (both source and destination 4883// must have the same size). 4884 4885// Here are the patterns for 8, 16, 32, and 64-bits to double. 4886// 8-bits -> double. 3 size step-up: give up. 4887// 16-bits -> double. 2 size step. 4888class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 4889 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4890 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4891 (SSHLLv2i32_shift 4892 (f64 4893 (EXTRACT_SUBREG 4894 (SSHLLv4i16_shift 4895 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4896 INST, 4897 hsub), 4898 0), 4899 dsub)), 4900 0), 4901 dsub)))>, Requires<[NotForCodeSize, IsCyclone]>; 4902 4903def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4904 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4905def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4906 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4907def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4908 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4909def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4910 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4911// 32-bits -> double. 1 size step-up. 4912class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 4913 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 4914 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4915 (SSHLLv2i32_shift 4916 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4917 INST, 4918 ssub), 4919 0), 4920 dsub)))>, Requires<[NotForCodeSize]>; 4921 4922def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 4923 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 4924def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 4925 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 4926def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 4927 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 4928def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 4929 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 4930 4931// 64-bits -> double are handled in target specific dag combine: 4932// performIntToFpCombine. 4933 4934 4935//---------------------------------------------------------------------------- 4936// AdvSIMD Load-Store Structure 4937//---------------------------------------------------------------------------- 4938defm LD1 : SIMDLd1Multiple<"ld1">; 4939defm LD2 : SIMDLd2Multiple<"ld2">; 4940defm LD3 : SIMDLd3Multiple<"ld3">; 4941defm LD4 : SIMDLd4Multiple<"ld4">; 4942 4943defm ST1 : SIMDSt1Multiple<"st1">; 4944defm ST2 : SIMDSt2Multiple<"st2">; 4945defm ST3 : SIMDSt3Multiple<"st3">; 4946defm ST4 : SIMDSt4Multiple<"st4">; 4947 4948class Ld1Pat<ValueType ty, Instruction INST> 4949 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 4950 4951def : Ld1Pat<v16i8, LD1Onev16b>; 4952def : Ld1Pat<v8i16, LD1Onev8h>; 4953def : Ld1Pat<v4i32, LD1Onev4s>; 4954def : Ld1Pat<v2i64, LD1Onev2d>; 4955def : Ld1Pat<v8i8, LD1Onev8b>; 4956def : Ld1Pat<v4i16, LD1Onev4h>; 4957def : Ld1Pat<v2i32, LD1Onev2s>; 4958def : Ld1Pat<v1i64, LD1Onev1d>; 4959 4960class St1Pat<ValueType ty, Instruction INST> 4961 : Pat<(store ty:$Vt, GPR64sp:$Rn), 4962 (INST ty:$Vt, GPR64sp:$Rn)>; 4963 4964def : St1Pat<v16i8, ST1Onev16b>; 4965def : St1Pat<v8i16, ST1Onev8h>; 4966def : St1Pat<v4i32, ST1Onev4s>; 4967def : St1Pat<v2i64, ST1Onev2d>; 4968def : St1Pat<v8i8, ST1Onev8b>; 4969def : St1Pat<v4i16, ST1Onev4h>; 4970def : St1Pat<v2i32, ST1Onev2s>; 4971def : St1Pat<v1i64, ST1Onev1d>; 4972 4973//--- 4974// Single-element 4975//--- 4976 4977defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 4978defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 4979defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 4980defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 4981let mayLoad = 1, hasSideEffects = 0 in { 4982defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 4983defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 4984defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 4985defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 4986defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 4987defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 4988defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 4989defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 4990defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 4991defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 4992defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 4993defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 4994defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 4995defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 4996defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 4997defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 4998} 4999 5000def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 5001 (LD1Rv8b GPR64sp:$Rn)>; 5002def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 5003 (LD1Rv16b GPR64sp:$Rn)>; 5004def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 5005 (LD1Rv4h GPR64sp:$Rn)>; 5006def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 5007 (LD1Rv8h GPR64sp:$Rn)>; 5008def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 5009 (LD1Rv2s GPR64sp:$Rn)>; 5010def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 5011 (LD1Rv4s GPR64sp:$Rn)>; 5012def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 5013 (LD1Rv2d GPR64sp:$Rn)>; 5014def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 5015 (LD1Rv1d GPR64sp:$Rn)>; 5016// Grab the floating point version too 5017def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 5018 (LD1Rv2s GPR64sp:$Rn)>; 5019def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 5020 (LD1Rv4s GPR64sp:$Rn)>; 5021def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 5022 (LD1Rv2d GPR64sp:$Rn)>; 5023def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 5024 (LD1Rv1d GPR64sp:$Rn)>; 5025def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 5026 (LD1Rv4h GPR64sp:$Rn)>; 5027def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 5028 (LD1Rv8h GPR64sp:$Rn)>; 5029 5030class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 5031 ValueType VTy, ValueType STy, Instruction LD1> 5032 : Pat<(vector_insert (VTy VecListOne128:$Rd), 5033 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 5034 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 5035 5036def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 5037def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 5038def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 5039def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 5040def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 5041def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 5042def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 5043 5044class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 5045 ValueType VTy, ValueType STy, Instruction LD1> 5046 : Pat<(vector_insert (VTy VecListOne64:$Rd), 5047 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 5048 (EXTRACT_SUBREG 5049 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 5050 VecIndex:$idx, GPR64sp:$Rn), 5051 dsub)>; 5052 5053def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 5054def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 5055def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 5056def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 5057def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 5058 5059 5060defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 5061defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 5062defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 5063defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 5064 5065// Stores 5066defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 5067defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 5068defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 5069defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 5070 5071let AddedComplexity = 19 in 5072class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 5073 ValueType VTy, ValueType STy, Instruction ST1> 5074 : Pat<(scalar_store 5075 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 5076 GPR64sp:$Rn), 5077 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 5078 5079def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 5080def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 5081def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 5082def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 5083def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 5084def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 5085def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 5086 5087let AddedComplexity = 19 in 5088class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 5089 ValueType VTy, ValueType STy, Instruction ST1> 5090 : Pat<(scalar_store 5091 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 5092 GPR64sp:$Rn), 5093 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 5094 VecIndex:$idx, GPR64sp:$Rn)>; 5095 5096def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 5097def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 5098def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 5099def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 5100def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 5101 5102multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 5103 ValueType VTy, ValueType STy, Instruction ST1, 5104 int offset> { 5105 def : Pat<(scalar_store 5106 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 5107 GPR64sp:$Rn, offset), 5108 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 5109 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 5110 5111 def : Pat<(scalar_store 5112 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 5113 GPR64sp:$Rn, GPR64:$Rm), 5114 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 5115 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 5116} 5117 5118defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 5119defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 5120 2>; 5121defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 5122defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 5123defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 5124defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 5125defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 5126 5127multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 5128 ValueType VTy, ValueType STy, Instruction ST1, 5129 int offset> { 5130 def : Pat<(scalar_store 5131 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 5132 GPR64sp:$Rn, offset), 5133 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 5134 5135 def : Pat<(scalar_store 5136 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 5137 GPR64sp:$Rn, GPR64:$Rm), 5138 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 5139} 5140 5141defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 5142 1>; 5143defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 5144 2>; 5145defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 5146defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 5147defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 5148defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 5149defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 5150 5151let mayStore = 1, hasSideEffects = 0 in { 5152defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 5153defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 5154defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 5155defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 5156defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 5157defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 5158defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 5159defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 5160defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 5161defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 5162defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 5163defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 5164} 5165 5166defm ST1 : SIMDLdSt1SingleAliases<"st1">; 5167defm ST2 : SIMDLdSt2SingleAliases<"st2">; 5168defm ST3 : SIMDLdSt3SingleAliases<"st3">; 5169defm ST4 : SIMDLdSt4SingleAliases<"st4">; 5170 5171//---------------------------------------------------------------------------- 5172// Crypto extensions 5173//---------------------------------------------------------------------------- 5174 5175def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 5176def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 5177def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 5178def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 5179 5180def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 5181def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 5182def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 5183def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 5184def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 5185def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 5186def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 5187 5188def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 5189def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 5190def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 5191 5192//---------------------------------------------------------------------------- 5193// Compiler-pseudos 5194//---------------------------------------------------------------------------- 5195// FIXME: Like for X86, these should go in their own separate .td file. 5196 5197// Any instruction that defines a 32-bit result leaves the high half of the 5198// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 5199// be copying from a truncate. But any other 32-bit operation will zero-extend 5200// up to 64 bits. 5201// FIXME: X86 also checks for CMOV here. Do we need something similar? 5202def def32 : PatLeaf<(i32 GPR32:$src), [{ 5203 return N->getOpcode() != ISD::TRUNCATE && 5204 N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && 5205 N->getOpcode() != ISD::CopyFromReg; 5206}]>; 5207 5208// In the case of a 32-bit def that is known to implicitly zero-extend, 5209// we can use a SUBREG_TO_REG. 5210def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; 5211 5212// For an anyext, we don't care what the high bits are, so we can perform an 5213// INSERT_SUBREF into an IMPLICIT_DEF. 5214def : Pat<(i64 (anyext GPR32:$src)), 5215 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 5216 5217// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 5218// then assert the extension has happened. 5219def : Pat<(i64 (zext GPR32:$src)), 5220 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 5221 5222// To sign extend, we use a signed bitfield move instruction (SBFM) on the 5223// containing super-reg. 5224def : Pat<(i64 (sext GPR32:$src)), 5225 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 5226def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 5227def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 5228def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 5229def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 5230def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 5231def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 5232def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 5233 5234def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 5235 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 5236 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 5237def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 5238 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 5239 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 5240 5241def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 5242 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 5243 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 5244def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 5245 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 5246 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 5247 5248def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 5249 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 5250 (i64 (i64shift_a imm0_63:$imm)), 5251 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 5252 5253// sra patterns have an AddedComplexity of 10, so make sure we have a higher 5254// AddedComplexity for the following patterns since we want to match sext + sra 5255// patterns before we attempt to match a single sra node. 5256let AddedComplexity = 20 in { 5257// We support all sext + sra combinations which preserve at least one bit of the 5258// original value which is to be sign extended. E.g. we support shifts up to 5259// bitwidth-1 bits. 5260def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 5261 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 5262def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 5263 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 5264 5265def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 5266 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 5267def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 5268 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 5269 5270def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 5271 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 5272 (i64 imm0_31:$imm), 31)>; 5273} // AddedComplexity = 20 5274 5275// To truncate, we can simply extract from a subregister. 5276def : Pat<(i32 (trunc GPR64sp:$src)), 5277 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 5278 5279// __builtin_trap() uses the BRK instruction on AArch64. 5280def : Pat<(trap), (BRK 1)>; 5281 5282// Conversions within AdvSIMD types in the same register size are free. 5283// But because we need a consistent lane ordering, in big endian many 5284// conversions require one or more REV instructions. 5285// 5286// Consider a simple memory load followed by a bitconvert then a store. 5287// v0 = load v2i32 5288// v1 = BITCAST v2i32 v0 to v4i16 5289// store v4i16 v2 5290// 5291// In big endian mode every memory access has an implicit byte swap. LDR and 5292// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 5293// is, they treat the vector as a sequence of elements to be byte-swapped. 5294// The two pairs of instructions are fundamentally incompatible. We've decided 5295// to use LD1/ST1 only to simplify compiler implementation. 5296// 5297// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 5298// the original code sequence: 5299// v0 = load v2i32 5300// v1 = REV v2i32 (implicit) 5301// v2 = BITCAST v2i32 v1 to v4i16 5302// v3 = REV v4i16 v2 (implicit) 5303// store v4i16 v3 5304// 5305// But this is now broken - the value stored is different to the value loaded 5306// due to lane reordering. To fix this, on every BITCAST we must perform two 5307// other REVs: 5308// v0 = load v2i32 5309// v1 = REV v2i32 (implicit) 5310// v2 = REV v2i32 5311// v3 = BITCAST v2i32 v2 to v4i16 5312// v4 = REV v4i16 5313// v5 = REV v4i16 v4 (implicit) 5314// store v4i16 v5 5315// 5316// This means an extra two instructions, but actually in most cases the two REV 5317// instructions can be combined into one. For example: 5318// (REV64_2s (REV64_4h X)) === (REV32_4h X) 5319// 5320// There is also no 128-bit REV instruction. This must be synthesized with an 5321// EXT instruction. 5322// 5323// Most bitconverts require some sort of conversion. The only exceptions are: 5324// a) Identity conversions - vNfX <-> vNiX 5325// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 5326// 5327 5328// Natural vector casts (64 bit) 5329def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 5330def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 5331def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 5332def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>; 5333def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 5334def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 5335 5336def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 5337def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>; 5338def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 5339def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 5340def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 5341 5342def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>; 5343def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 5344def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 5345def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 5346def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 5347 5348def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 5349def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 5350def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 5351def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 5352def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5353def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 5354def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 5355 5356def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 5357def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 5358def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 5359def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; 5360def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 5361 5362// Natural vector casts (128 bit) 5363def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 5364def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 5365def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 5366def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>; 5367def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 5368def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 5369def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 5370 5371def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 5372def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>; 5373def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 5374def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 5375def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 5376def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 5377def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 5378 5379def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>; 5380def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 5381def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 5382def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 5383def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 5384def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 5385def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 5386 5387def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 5388def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 5389def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 5390def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 5391def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>; 5392def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 5393def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 5394 5395def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 5396def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 5397def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 5398def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>; 5399def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 5400def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 5401def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 5402 5403def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 5404def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 5405def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 5406def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 5407def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>; 5408def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 5409def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 5410 5411let Predicates = [IsLE] in { 5412def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5413def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5414def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5415def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5416def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5417 5418def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 5419 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5420def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 5421 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5422def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 5423 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5424def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 5425 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5426def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 5427 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5428def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 5429 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5430} 5431let Predicates = [IsBE] in { 5432def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 5433 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 5434def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 5435 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 5436def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 5437 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 5438def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 5439 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 5440def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 5441 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 5442 5443def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 5444 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 5445def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 5446 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 5447def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 5448 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 5449def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 5450 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 5451def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 5452 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 5453} 5454def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5455def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5456def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 5457 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5458def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 5459 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5460def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 5461 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5462def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 5463 5464def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 5465 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 5466def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 5467 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 5468def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 5469 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 5470def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 5471 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 5472def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 5473 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 5474 5475let Predicates = [IsLE] in { 5476def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 5477def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 5478def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 5479def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 5480def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 5481} 5482let Predicates = [IsBE] in { 5483def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 5484 (v1i64 (REV64v2i32 FPR64:$src))>; 5485def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 5486 (v1i64 (REV64v4i16 FPR64:$src))>; 5487def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 5488 (v1i64 (REV64v8i8 FPR64:$src))>; 5489def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 5490 (v1i64 (REV64v4i16 FPR64:$src))>; 5491def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 5492 (v1i64 (REV64v2i32 FPR64:$src))>; 5493} 5494def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 5495def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 5496 5497let Predicates = [IsLE] in { 5498def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 5499def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 5500def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 5501def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 5502def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 5503def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 5504} 5505let Predicates = [IsBE] in { 5506def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 5507 (v2i32 (REV64v2i32 FPR64:$src))>; 5508def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 5509 (v2i32 (REV32v4i16 FPR64:$src))>; 5510def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 5511 (v2i32 (REV32v8i8 FPR64:$src))>; 5512def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 5513 (v2i32 (REV64v2i32 FPR64:$src))>; 5514def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 5515 (v2i32 (REV64v2i32 FPR64:$src))>; 5516def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 5517 (v2i32 (REV64v4i16 FPR64:$src))>; 5518} 5519def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 5520 5521let Predicates = [IsLE] in { 5522def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 5523def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 5524def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 5525def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 5526def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 5527def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 5528def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 5529} 5530let Predicates = [IsBE] in { 5531def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 5532 (v4i16 (REV64v4i16 FPR64:$src))>; 5533def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 5534 (v4i16 (REV32v4i16 FPR64:$src))>; 5535def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 5536 (v4i16 (REV16v8i8 FPR64:$src))>; 5537def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 5538 (v4i16 (REV64v4i16 FPR64:$src))>; 5539def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), 5540 (v4i16 (REV32v4i16 FPR64:$src))>; 5541def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 5542 (v4i16 (REV32v4i16 FPR64:$src))>; 5543def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 5544 (v4i16 (REV64v4i16 FPR64:$src))>; 5545} 5546 5547let Predicates = [IsLE] in { 5548def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 5549def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 5550def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 5551def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 5552def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 5553def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 5554def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 5555} 5556let Predicates = [IsBE] in { 5557def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 5558 (v4f16 (REV64v4i16 FPR64:$src))>; 5559def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 5560 (v4f16 (REV64v4i16 FPR64:$src))>; 5561def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), 5562 (v4f16 (REV64v4i16 FPR64:$src))>; 5563def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 5564 (v4f16 (REV16v8i8 FPR64:$src))>; 5565def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 5566 (v4f16 (REV64v4i16 FPR64:$src))>; 5567def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 5568 (v4f16 (REV64v4i16 FPR64:$src))>; 5569def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 5570 (v4f16 (REV64v4i16 FPR64:$src))>; 5571} 5572 5573 5574 5575let Predicates = [IsLE] in { 5576def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 5577def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 5578def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 5579def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 5580def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 5581def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 5582def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 5583} 5584let Predicates = [IsBE] in { 5585def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 5586 (v8i8 (REV64v8i8 FPR64:$src))>; 5587def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 5588 (v8i8 (REV32v8i8 FPR64:$src))>; 5589def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 5590 (v8i8 (REV16v8i8 FPR64:$src))>; 5591def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 5592 (v8i8 (REV64v8i8 FPR64:$src))>; 5593def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 5594 (v8i8 (REV32v8i8 FPR64:$src))>; 5595def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 5596 (v8i8 (REV64v8i8 FPR64:$src))>; 5597def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 5598 (v8i8 (REV16v8i8 FPR64:$src))>; 5599} 5600 5601let Predicates = [IsLE] in { 5602def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 5603def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 5604def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 5605def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 5606def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 5607} 5608let Predicates = [IsBE] in { 5609def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 5610 (f64 (REV64v2i32 FPR64:$src))>; 5611def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 5612 (f64 (REV64v4i16 FPR64:$src))>; 5613def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 5614 (f64 (REV64v2i32 FPR64:$src))>; 5615def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 5616 (f64 (REV64v8i8 FPR64:$src))>; 5617def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 5618 (f64 (REV64v4i16 FPR64:$src))>; 5619} 5620def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 5621def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 5622 5623let Predicates = [IsLE] in { 5624def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 5625def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 5626def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 5627def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 5628def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 5629} 5630let Predicates = [IsBE] in { 5631def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 5632 (v1f64 (REV64v2i32 FPR64:$src))>; 5633def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 5634 (v1f64 (REV64v4i16 FPR64:$src))>; 5635def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 5636 (v1f64 (REV64v8i8 FPR64:$src))>; 5637def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 5638 (v1f64 (REV64v2i32 FPR64:$src))>; 5639def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 5640 (v1f64 (REV64v4i16 FPR64:$src))>; 5641} 5642def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 5643def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 5644 5645let Predicates = [IsLE] in { 5646def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 5647def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 5648def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 5649def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5650def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5651def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 5652} 5653let Predicates = [IsBE] in { 5654def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 5655 (v2f32 (REV64v2i32 FPR64:$src))>; 5656def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 5657 (v2f32 (REV32v4i16 FPR64:$src))>; 5658def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 5659 (v2f32 (REV32v8i8 FPR64:$src))>; 5660def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 5661 (v2f32 (REV64v2i32 FPR64:$src))>; 5662def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 5663 (v2f32 (REV64v2i32 FPR64:$src))>; 5664def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 5665 (v2f32 (REV64v4i16 FPR64:$src))>; 5666} 5667def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 5668 5669let Predicates = [IsLE] in { 5670def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 5671def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 5672def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 5673def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 5674def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 5675def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 5676def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 5677} 5678let Predicates = [IsBE] in { 5679def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 5680 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5681def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 5682 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5683 (REV64v4i32 FPR128:$src), (i32 8)))>; 5684def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 5685 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 5686 (REV64v8i16 FPR128:$src), (i32 8)))>; 5687def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 5688 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 5689 (REV64v8i16 FPR128:$src), (i32 8)))>; 5690def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 5691 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5692def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 5693 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5694 (REV64v4i32 FPR128:$src), (i32 8)))>; 5695def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 5696 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 5697 (REV64v16i8 FPR128:$src), (i32 8)))>; 5698} 5699 5700let Predicates = [IsLE] in { 5701def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 5702def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 5703def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 5704def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 5705def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 5706def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 5707} 5708let Predicates = [IsBE] in { 5709def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 5710 (v2f64 (EXTv16i8 FPR128:$src, 5711 FPR128:$src, (i32 8)))>; 5712def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 5713 (v2f64 (REV64v4i32 FPR128:$src))>; 5714def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 5715 (v2f64 (REV64v8i16 FPR128:$src))>; 5716def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 5717 (v2f64 (REV64v8i16 FPR128:$src))>; 5718def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 5719 (v2f64 (REV64v16i8 FPR128:$src))>; 5720def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 5721 (v2f64 (REV64v4i32 FPR128:$src))>; 5722} 5723def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 5724 5725let Predicates = [IsLE] in { 5726def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 5727def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 5728def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 5729def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 5730def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 5731def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 5732} 5733let Predicates = [IsBE] in { 5734def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 5735 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5736 (REV64v4i32 FPR128:$src), (i32 8)))>; 5737def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 5738 (v4f32 (REV32v8i16 FPR128:$src))>; 5739def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 5740 (v4f32 (REV32v8i16 FPR128:$src))>; 5741def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 5742 (v4f32 (REV32v16i8 FPR128:$src))>; 5743def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 5744 (v4f32 (REV64v4i32 FPR128:$src))>; 5745def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 5746 (v4f32 (REV64v4i32 FPR128:$src))>; 5747} 5748def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 5749 5750let Predicates = [IsLE] in { 5751def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 5752def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 5753def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 5754def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 5755def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 5756def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 5757} 5758let Predicates = [IsBE] in { 5759def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 5760 (v2i64 (EXTv16i8 FPR128:$src, 5761 FPR128:$src, (i32 8)))>; 5762def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 5763 (v2i64 (REV64v4i32 FPR128:$src))>; 5764def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 5765 (v2i64 (REV64v8i16 FPR128:$src))>; 5766def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 5767 (v2i64 (REV64v16i8 FPR128:$src))>; 5768def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 5769 (v2i64 (REV64v4i32 FPR128:$src))>; 5770def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 5771 (v2i64 (REV64v8i16 FPR128:$src))>; 5772} 5773def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 5774 5775let Predicates = [IsLE] in { 5776def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 5777def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 5778def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 5779def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 5780def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 5781def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 5782} 5783let Predicates = [IsBE] in { 5784def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 5785 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5786 (REV64v4i32 FPR128:$src), 5787 (i32 8)))>; 5788def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 5789 (v4i32 (REV64v4i32 FPR128:$src))>; 5790def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 5791 (v4i32 (REV32v8i16 FPR128:$src))>; 5792def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 5793 (v4i32 (REV32v16i8 FPR128:$src))>; 5794def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 5795 (v4i32 (REV64v4i32 FPR128:$src))>; 5796def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 5797 (v4i32 (REV32v8i16 FPR128:$src))>; 5798} 5799def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 5800 5801let Predicates = [IsLE] in { 5802def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 5803def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 5804def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 5805def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 5806def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 5807def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 5808def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 5809} 5810let Predicates = [IsBE] in { 5811def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 5812 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 5813 (REV64v8i16 FPR128:$src), 5814 (i32 8)))>; 5815def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 5816 (v8i16 (REV64v8i16 FPR128:$src))>; 5817def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 5818 (v8i16 (REV32v8i16 FPR128:$src))>; 5819def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 5820 (v8i16 (REV16v16i8 FPR128:$src))>; 5821def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 5822 (v8i16 (REV64v8i16 FPR128:$src))>; 5823def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 5824 (v8i16 (REV32v8i16 FPR128:$src))>; 5825def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), 5826 (v8i16 (REV32v8i16 FPR128:$src))>; 5827} 5828 5829let Predicates = [IsLE] in { 5830def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 5831def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 5832def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 5833def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 5834def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 5835def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 5836def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 5837} 5838let Predicates = [IsBE] in { 5839def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 5840 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 5841 (REV64v8i16 FPR128:$src), 5842 (i32 8)))>; 5843def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 5844 (v8f16 (REV64v8i16 FPR128:$src))>; 5845def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 5846 (v8f16 (REV32v8i16 FPR128:$src))>; 5847def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), 5848 (v8f16 (REV64v8i16 FPR128:$src))>; 5849def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 5850 (v8f16 (REV16v16i8 FPR128:$src))>; 5851def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 5852 (v8f16 (REV64v8i16 FPR128:$src))>; 5853def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 5854 (v8f16 (REV32v8i16 FPR128:$src))>; 5855} 5856 5857let Predicates = [IsLE] in { 5858def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 5859def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 5860def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 5861def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 5862def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 5863def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 5864def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 5865} 5866let Predicates = [IsBE] in { 5867def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 5868 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 5869 (REV64v16i8 FPR128:$src), 5870 (i32 8)))>; 5871def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 5872 (v16i8 (REV64v16i8 FPR128:$src))>; 5873def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 5874 (v16i8 (REV32v16i8 FPR128:$src))>; 5875def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 5876 (v16i8 (REV16v16i8 FPR128:$src))>; 5877def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 5878 (v16i8 (REV64v16i8 FPR128:$src))>; 5879def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 5880 (v16i8 (REV32v16i8 FPR128:$src))>; 5881def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 5882 (v16i8 (REV16v16i8 FPR128:$src))>; 5883} 5884 5885def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 5886 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5887def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 5888 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5889def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 5890 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5891def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 5892 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5893def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 5894 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5895def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 5896 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5897def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 5898 (EXTRACT_SUBREG V128:$Rn, dsub)>; 5899 5900def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 5901 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5902def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 5903 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5904def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 5905 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5906def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 5907 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5908 5909// A 64-bit subvector insert to the first 128-bit vector position 5910// is a subregister copy that needs no instruction. 5911def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), 5912 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5913def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), 5914 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5915def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), 5916 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5917def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), 5918 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5919def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), 5920 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5921def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (i32 0)), 5922 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5923def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), 5924 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5925 5926// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 5927// or v2f32. 5928def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 5929 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 5930 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 5931def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 5932 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 5933 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 5934 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 5935 // so we match on v4f32 here, not v2f32. This will also catch adding 5936 // the low two lanes of a true v4f32 vector. 5937def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 5938 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 5939 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 5940 5941// Scalar 64-bit shifts in FPR64 registers. 5942def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5943 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5944def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5945 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5946def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5947 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5948def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5949 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5950 5951// Patterns for nontemporal/no-allocate stores. 5952// We have to resort to tricks to turn a single-input store into a store pair, 5953// because there is no single-input nontemporal store, only STNP. 5954let Predicates = [IsLE] in { 5955let AddedComplexity = 15 in { 5956class NTStore128Pat<ValueType VT> : 5957 Pat<(nontemporalstore (VT FPR128:$Rt), 5958 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 5959 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 5960 (CPYi64 FPR128:$Rt, (i64 1)), 5961 GPR64sp:$Rn, simm7s8:$offset)>; 5962 5963def : NTStore128Pat<v2i64>; 5964def : NTStore128Pat<v4i32>; 5965def : NTStore128Pat<v8i16>; 5966def : NTStore128Pat<v16i8>; 5967 5968class NTStore64Pat<ValueType VT> : 5969 Pat<(nontemporalstore (VT FPR64:$Rt), 5970 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 5971 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 5972 (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 5973 GPR64sp:$Rn, simm7s4:$offset)>; 5974 5975// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 5976def : NTStore64Pat<v1f64>; 5977def : NTStore64Pat<v1i64>; 5978def : NTStore64Pat<v2i32>; 5979def : NTStore64Pat<v4i16>; 5980def : NTStore64Pat<v8i8>; 5981 5982def : Pat<(nontemporalstore GPR64:$Rt, 5983 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 5984 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 5985 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32), 5986 GPR64sp:$Rn, simm7s4:$offset)>; 5987} // AddedComplexity=10 5988} // Predicates = [IsLE] 5989 5990// Tail call return handling. These are all compiler pseudo-instructions, 5991// so no encoding information or anything like that. 5992let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 5993 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; 5994 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; 5995} 5996 5997def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 5998 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; 5999def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 6000 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 6001def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 6002 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 6003 6004include "AArch64InstrAtomics.td" 6005