1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// AArch64 Instruction definitions. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// ARM Instruction Predicate Definitions. 16// 17def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 18 AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; 19def HasNEON : Predicate<"Subtarget->hasNEON()">, 20 AssemblerPredicate<"FeatureNEON", "neon">; 21def HasCrypto : Predicate<"Subtarget->hasCrypto()">, 22 AssemblerPredicate<"FeatureCrypto", "crypto">; 23def HasCRC : Predicate<"Subtarget->hasCRC()">, 24 AssemblerPredicate<"FeatureCRC", "crc">; 25def IsLE : Predicate<"Subtarget->isLittleEndian()">; 26def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 27 28//===----------------------------------------------------------------------===// 29// AArch64-specific DAG Nodes. 30// 31 32// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 33def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 34 [SDTCisSameAs<0, 2>, 35 SDTCisSameAs<0, 3>, 36 SDTCisInt<0>, SDTCisVT<1, i32>]>; 37 38// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 39def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 40 [SDTCisSameAs<0, 1>, 41 SDTCisSameAs<0, 2>, 42 SDTCisInt<0>, 43 SDTCisVT<3, i32>]>; 44 45// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 46def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 47 [SDTCisSameAs<0, 2>, 48 SDTCisSameAs<0, 3>, 49 SDTCisInt<0>, 50 SDTCisVT<1, i32>, 51 SDTCisVT<4, i32>]>; 52 53def SDT_AArch64Brcond : SDTypeProfile<0, 3, 54 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 55 SDTCisVT<2, i32>]>; 56def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 57def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 58 SDTCisVT<2, OtherVT>]>; 59 60 61def SDT_AArch64CSel : SDTypeProfile<1, 4, 62 [SDTCisSameAs<0, 1>, 63 SDTCisSameAs<0, 2>, 64 SDTCisInt<3>, 65 SDTCisVT<4, i32>]>; 66def SDT_AArch64FCmp : SDTypeProfile<0, 2, 67 [SDTCisFP<0>, 68 SDTCisSameAs<0, 1>]>; 69def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 70def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 71def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 72 SDTCisSameAs<0, 1>, 73 SDTCisSameAs<0, 2>]>; 74def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 75def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 76def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 77 SDTCisInt<2>, SDTCisInt<3>]>; 78def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 79def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 80 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 81def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 82 83def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 84def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 85def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 86def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 87 SDTCisSameAs<0,2>]>; 88def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 89 SDTCisSameAs<0,2>, 90 SDTCisSameAs<0,3>]>; 91def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 92def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 93 94def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 95 96def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 97 SDTCisPtrTy<1>]>; 98def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 99 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 100 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 101 SDTCisSameAs<1, 4>]>; 102 103 104// Node definitions. 105def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 106def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 107def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 108def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 109 SDCallSeqStart<[ SDTCisVT<0, i32> ]>, 110 [SDNPHasChain, SDNPOutGlue]>; 111def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 112 SDCallSeqEnd<[ SDTCisVT<0, i32>, 113 SDTCisVT<1, i32> ]>, 114 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 115def AArch64call : SDNode<"AArch64ISD::CALL", 116 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 117 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 118 SDNPVariadic]>; 119def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 120 [SDNPHasChain]>; 121def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 122 [SDNPHasChain]>; 123def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 124 [SDNPHasChain]>; 125def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 126 [SDNPHasChain]>; 127def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 128 [SDNPHasChain]>; 129 130 131def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 132def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 133def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 134def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 135def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, 136 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 137def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 138def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 139def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 140 [SDNPCommutative]>; 141def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 142def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 143 [SDNPCommutative]>; 144def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 145def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 146 147def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 148 149def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 150 151def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>; 152def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>; 153 154def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 155def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 156def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 157def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 158def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 159 160def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 161def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 162def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 163def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 164def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 165def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 166 167def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 168def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 169def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 170def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 171def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 172def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 173def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 174 175def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 176def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 177def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 178def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 179 180def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 181def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 182def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 183def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 184def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 185def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 186def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 187def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 188 189def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; 190def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 191def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; 192 193def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 194def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 195def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 196def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 197def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 198 199def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 200def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 201def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 202 203def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 204def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 205def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 206def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 207def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 208def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 209 (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 210 211def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 212def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 213def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 214def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 215def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 216 217def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 218def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 219 220def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; 221 222def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 223 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 224 225def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 226 [SDNPHasChain, SDNPSideEffect]>; 227 228def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 229def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 230 231def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", 232 SDT_AArch64TLSDescCall, 233 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 234 SDNPVariadic]>; 235 236def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 237 SDT_AArch64WrapperLarge>; 238 239 240//===----------------------------------------------------------------------===// 241 242//===----------------------------------------------------------------------===// 243 244// AArch64 Instruction Predicate Definitions. 245// 246def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; 247def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; 248def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; 249def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; 250def ForCodeSize : Predicate<"ForCodeSize">; 251def NotForCodeSize : Predicate<"!ForCodeSize">; 252 253include "AArch64InstrFormats.td" 254 255//===----------------------------------------------------------------------===// 256 257//===----------------------------------------------------------------------===// 258// Miscellaneous instructions. 259//===----------------------------------------------------------------------===// 260 261let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 262def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), 263 [(AArch64callseq_start timm:$amt)]>; 264def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 265 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; 266} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 267 268let isReMaterializable = 1, isCodeGenOnly = 1 in { 269// FIXME: The following pseudo instructions are only needed because remat 270// cannot handle multiple instructions. When that changes, they can be 271// removed, along with the AArch64Wrapper node. 272 273let AddedComplexity = 10 in 274def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), 275 [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 276 Sched<[WriteLDAdr]>; 277 278// The MOVaddr instruction should match only when the add is not folded 279// into a load or store address. 280def MOVaddr 281 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 282 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 283 tglobaladdr:$low))]>, 284 Sched<[WriteAdrAdr]>; 285def MOVaddrJT 286 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 287 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 288 tjumptable:$low))]>, 289 Sched<[WriteAdrAdr]>; 290def MOVaddrCP 291 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 292 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 293 tconstpool:$low))]>, 294 Sched<[WriteAdrAdr]>; 295def MOVaddrBA 296 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 297 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 298 tblockaddress:$low))]>, 299 Sched<[WriteAdrAdr]>; 300def MOVaddrTLS 301 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 302 [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 303 tglobaltlsaddr:$low))]>, 304 Sched<[WriteAdrAdr]>; 305def MOVaddrEXT 306 : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), 307 [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 308 texternalsym:$low))]>, 309 Sched<[WriteAdrAdr]>; 310 311} // isReMaterializable, isCodeGenOnly 312 313def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 314 (LOADgot tglobaltlsaddr:$addr)>; 315 316def : Pat<(AArch64LOADgot texternalsym:$addr), 317 (LOADgot texternalsym:$addr)>; 318 319def : Pat<(AArch64LOADgot tconstpool:$addr), 320 (LOADgot tconstpool:$addr)>; 321 322//===----------------------------------------------------------------------===// 323// System instructions. 324//===----------------------------------------------------------------------===// 325 326def HINT : HintI<"hint">; 327def : InstAlias<"nop", (HINT 0b000)>; 328def : InstAlias<"yield",(HINT 0b001)>; 329def : InstAlias<"wfe", (HINT 0b010)>; 330def : InstAlias<"wfi", (HINT 0b011)>; 331def : InstAlias<"sev", (HINT 0b100)>; 332def : InstAlias<"sevl", (HINT 0b101)>; 333 334 // As far as LLVM is concerned this writes to the system's exclusive monitors. 335let mayLoad = 1, mayStore = 1 in 336def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 337 338def DMB : CRmSystemI<barrier_op, 0b101, "dmb">; 339def DSB : CRmSystemI<barrier_op, 0b100, "dsb">; 340def ISB : CRmSystemI<barrier_op, 0b110, "isb">; 341def : InstAlias<"clrex", (CLREX 0xf)>; 342def : InstAlias<"isb", (ISB 0xf)>; 343 344def MRS : MRSI; 345def MSR : MSRI; 346def MSRpstate: MSRpstateI; 347 348// The thread pointer (on Linux, at least, where this has been implemented) is 349// TPIDR_EL0. 350def : Pat<(AArch64threadpointer), (MRS 0xde82)>; 351 352// Generic system instructions 353def SYSxt : SystemXtI<0, "sys">; 354def SYSLxt : SystemLXtI<1, "sysl">; 355 356def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 357 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 358 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 359 360//===----------------------------------------------------------------------===// 361// Move immediate instructions. 362//===----------------------------------------------------------------------===// 363 364defm MOVK : InsertImmediate<0b11, "movk">; 365defm MOVN : MoveImmediate<0b00, "movn">; 366 367let PostEncoderMethod = "fixMOVZ" in 368defm MOVZ : MoveImmediate<0b10, "movz">; 369 370// First group of aliases covers an implicit "lsl #0". 371def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; 372def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; 373def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; 374def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; 375def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; 376def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; 377 378// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 379def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 380def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 381def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 382def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 383 384def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; 385def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; 386def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; 387def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; 388 389def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>; 390def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; 391def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; 392def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; 393 394def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 395def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 396 397def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; 398def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; 399 400def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; 401def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; 402 403// Final group of aliases covers true "mov $Rd, $imm" cases. 404multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 405 int width, int shift> { 406 def _asmoperand : AsmOperandClass { 407 let Name = basename # width # "_lsl" # shift # "MovAlias"; 408 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 409 # shift # ">"; 410 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 411 } 412 413 def _movimm : Operand<i32> { 414 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 415 } 416 417 def : InstAlias<"mov $Rd, $imm", 418 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 419} 420 421defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 422defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 423 424defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 425defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 426defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 427defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 428 429defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 430defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 431 432defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 433defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 434defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 435defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 436 437let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 438 isAsCheapAsAMove = 1 in { 439// FIXME: The following pseudo instructions are only needed because remat 440// cannot handle multiple instructions. When that changes, we can select 441// directly to the real instructions and get rid of these pseudos. 442 443def MOVi32imm 444 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 445 [(set GPR32:$dst, imm:$src)]>, 446 Sched<[WriteImm]>; 447def MOVi64imm 448 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 449 [(set GPR64:$dst, imm:$src)]>, 450 Sched<[WriteImm]>; 451} // isReMaterializable, isCodeGenOnly 452 453// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 454// eventual expansion code fewer bits to worry about getting right. Marshalling 455// the types is a little tricky though: 456def i64imm_32bit : ImmLeaf<i64, [{ 457 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 458}]>; 459 460def trunc_imm : SDNodeXForm<imm, [{ 461 return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32); 462}]>; 463 464def : Pat<(i64 i64imm_32bit:$src), 465 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 466 467// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 468// sequences. 469def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 470 tglobaladdr:$g1, tglobaladdr:$g0), 471 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), 472 tglobaladdr:$g2, 32), 473 tglobaladdr:$g1, 16), 474 tglobaladdr:$g0, 0)>; 475 476def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 477 tblockaddress:$g1, tblockaddress:$g0), 478 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), 479 tblockaddress:$g2, 32), 480 tblockaddress:$g1, 16), 481 tblockaddress:$g0, 0)>; 482 483def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 484 tconstpool:$g1, tconstpool:$g0), 485 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), 486 tconstpool:$g2, 32), 487 tconstpool:$g1, 16), 488 tconstpool:$g0, 0)>; 489 490def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 491 tjumptable:$g1, tjumptable:$g0), 492 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48), 493 tjumptable:$g2, 32), 494 tjumptable:$g1, 16), 495 tjumptable:$g0, 0)>; 496 497 498//===----------------------------------------------------------------------===// 499// Arithmetic instructions. 500//===----------------------------------------------------------------------===// 501 502// Add/subtract with carry. 503defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 504defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 505 506def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 507def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 508def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 509def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 510 511// Add/subtract 512defm ADD : AddSub<0, "add", add>; 513defm SUB : AddSub<1, "sub">; 514 515def : InstAlias<"mov $dst, $src", 516 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 517def : InstAlias<"mov $dst, $src", 518 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 519def : InstAlias<"mov $dst, $src", 520 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 521def : InstAlias<"mov $dst, $src", 522 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 523 524defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">; 525defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">; 526 527// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 528def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 529 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 530def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 531 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 532def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 533 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 534def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 535 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 536def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 537 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 538def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 539 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 540def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3), 541 (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>; 542def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3), 543 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>; 544 545// Because of the immediate format for add/sub-imm instructions, the 546// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 547// These patterns capture that transformation. 548let AddedComplexity = 1 in { 549def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 550 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 551def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 552 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 553def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 554 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 555def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 556 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 557} 558 559// Because of the immediate format for add/sub-imm instructions, the 560// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 561// These patterns capture that transformation. 562let AddedComplexity = 1 in { 563def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 564 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 565def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 566 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 567def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 568 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 569def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 570 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 571} 572 573def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 574def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 575def : InstAlias<"neg $dst, $src$shift", 576 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 577def : InstAlias<"neg $dst, $src$shift", 578 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 579 580def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 581def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 582def : InstAlias<"negs $dst, $src$shift", 583 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 584def : InstAlias<"negs $dst, $src$shift", 585 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 586 587 588// Unsigned/Signed divide 589defm UDIV : Div<0, "udiv", udiv>; 590defm SDIV : Div<1, "sdiv", sdiv>; 591let isCodeGenOnly = 1 in { 592defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>; 593defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>; 594} 595 596// Variable shift 597defm ASRV : Shift<0b10, "asr", sra>; 598defm LSLV : Shift<0b00, "lsl", shl>; 599defm LSRV : Shift<0b01, "lsr", srl>; 600defm RORV : Shift<0b11, "ror", rotr>; 601 602def : ShiftAlias<"asrv", ASRVWr, GPR32>; 603def : ShiftAlias<"asrv", ASRVXr, GPR64>; 604def : ShiftAlias<"lslv", LSLVWr, GPR32>; 605def : ShiftAlias<"lslv", LSLVXr, GPR64>; 606def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 607def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 608def : ShiftAlias<"rorv", RORVWr, GPR32>; 609def : ShiftAlias<"rorv", RORVXr, GPR64>; 610 611// Multiply-add 612let AddedComplexity = 7 in { 613defm MADD : MulAccum<0, "madd", add>; 614defm MSUB : MulAccum<1, "msub", sub>; 615 616def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 617 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 618def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 619 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 620 621def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 622 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 623def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 624 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 625} // AddedComplexity = 7 626 627let AddedComplexity = 5 in { 628def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 629def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 630def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 631def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 632 633def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 634 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 635def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 636 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 637 638def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 639 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 640def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 641 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 642} // AddedComplexity = 5 643 644def : MulAccumWAlias<"mul", MADDWrrr>; 645def : MulAccumXAlias<"mul", MADDXrrr>; 646def : MulAccumWAlias<"mneg", MSUBWrrr>; 647def : MulAccumXAlias<"mneg", MSUBXrrr>; 648def : WideMulAccumAlias<"smull", SMADDLrrr>; 649def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 650def : WideMulAccumAlias<"umull", UMADDLrrr>; 651def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 652 653// Multiply-high 654def SMULHrr : MulHi<0b010, "smulh", mulhs>; 655def UMULHrr : MulHi<0b110, "umulh", mulhu>; 656 657// CRC32 658def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 659def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 660def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 661def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 662 663def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 664def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 665def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 666def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 667 668 669//===----------------------------------------------------------------------===// 670// Logical instructions. 671//===----------------------------------------------------------------------===// 672 673// (immediate) 674defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 675defm AND : LogicalImm<0b00, "and", and, "bic">; 676defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 677defm ORR : LogicalImm<0b01, "orr", or, "orn">; 678 679// FIXME: these aliases *are* canonical sometimes (when movz can't be 680// used). Actually, it seems to be working right now, but putting logical_immXX 681// here is a bit dodgy on the AsmParser side too. 682def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 683 logical_imm32:$imm), 0>; 684def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 685 logical_imm64:$imm), 0>; 686 687 688// (register) 689defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 690defm BICS : LogicalRegS<0b11, 1, "bics", 691 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 692defm AND : LogicalReg<0b00, 0, "and", and>; 693defm BIC : LogicalReg<0b00, 1, "bic", 694 BinOpFrag<(and node:$LHS, (not node:$RHS))>>; 695defm EON : LogicalReg<0b10, 1, "eon", 696 BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; 697defm EOR : LogicalReg<0b10, 0, "eor", xor>; 698defm ORN : LogicalReg<0b01, 1, "orn", 699 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 700defm ORR : LogicalReg<0b01, 0, "orr", or>; 701 702def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 703def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 704 705def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 706def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 707 708def : InstAlias<"mvn $Wd, $Wm$sh", 709 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 710def : InstAlias<"mvn $Xd, $Xm$sh", 711 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 712 713def : InstAlias<"tst $src1, $src2", 714 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 715def : InstAlias<"tst $src1, $src2", 716 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 717 718def : InstAlias<"tst $src1, $src2", 719 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 720def : InstAlias<"tst $src1, $src2", 721 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 722 723def : InstAlias<"tst $src1, $src2$sh", 724 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 725def : InstAlias<"tst $src1, $src2$sh", 726 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 727 728 729def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 730def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 731 732 733//===----------------------------------------------------------------------===// 734// One operand data processing instructions. 735//===----------------------------------------------------------------------===// 736 737defm CLS : OneOperandData<0b101, "cls">; 738defm CLZ : OneOperandData<0b100, "clz", ctlz>; 739defm RBIT : OneOperandData<0b000, "rbit">; 740 741def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>; 742def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>; 743 744def REV16Wr : OneWRegData<0b001, "rev16", 745 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 746def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; 747 748def : Pat<(cttz GPR32:$Rn), 749 (CLZWr (RBITWr GPR32:$Rn))>; 750def : Pat<(cttz GPR64:$Rn), 751 (CLZXr (RBITXr GPR64:$Rn))>; 752def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 753 (i32 1))), 754 (CLSWr GPR32:$Rn)>; 755def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 756 (i64 1))), 757 (CLSXr GPR64:$Rn)>; 758 759// Unlike the other one operand instructions, the instructions with the "rev" 760// mnemonic do *not* just different in the size bit, but actually use different 761// opcode bits for the different sizes. 762def REVWr : OneWRegData<0b010, "rev", bswap>; 763def REVXr : OneXRegData<0b011, "rev", bswap>; 764def REV32Xr : OneXRegData<0b010, "rev32", 765 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 766 767// The bswap commutes with the rotr so we want a pattern for both possible 768// orders. 769def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 770def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 771 772//===----------------------------------------------------------------------===// 773// Bitfield immediate extraction instruction. 774//===----------------------------------------------------------------------===// 775let neverHasSideEffects = 1 in 776defm EXTR : ExtractImm<"extr">; 777def : InstAlias<"ror $dst, $src, $shift", 778 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 779def : InstAlias<"ror $dst, $src, $shift", 780 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 781 782def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 783 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 784def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 785 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 786 787//===----------------------------------------------------------------------===// 788// Other bitfield immediate instructions. 789//===----------------------------------------------------------------------===// 790let neverHasSideEffects = 1 in { 791defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 792defm SBFM : BitfieldImm<0b00, "sbfm">; 793defm UBFM : BitfieldImm<0b10, "ubfm">; 794} 795 796def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 797 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 798 return CurDAG->getTargetConstant(enc, MVT::i64); 799}]>; 800 801def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 802 uint64_t enc = 31 - N->getZExtValue(); 803 return CurDAG->getTargetConstant(enc, MVT::i64); 804}]>; 805 806// min(7, 31 - shift_amt) 807def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 808 uint64_t enc = 31 - N->getZExtValue(); 809 enc = enc > 7 ? 7 : enc; 810 return CurDAG->getTargetConstant(enc, MVT::i64); 811}]>; 812 813// min(15, 31 - shift_amt) 814def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 815 uint64_t enc = 31 - N->getZExtValue(); 816 enc = enc > 15 ? 15 : enc; 817 return CurDAG->getTargetConstant(enc, MVT::i64); 818}]>; 819 820def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 821 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 822 return CurDAG->getTargetConstant(enc, MVT::i64); 823}]>; 824 825def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 826 uint64_t enc = 63 - N->getZExtValue(); 827 return CurDAG->getTargetConstant(enc, MVT::i64); 828}]>; 829 830// min(7, 63 - shift_amt) 831def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 832 uint64_t enc = 63 - N->getZExtValue(); 833 enc = enc > 7 ? 7 : enc; 834 return CurDAG->getTargetConstant(enc, MVT::i64); 835}]>; 836 837// min(15, 63 - shift_amt) 838def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 839 uint64_t enc = 63 - N->getZExtValue(); 840 enc = enc > 15 ? 15 : enc; 841 return CurDAG->getTargetConstant(enc, MVT::i64); 842}]>; 843 844// min(31, 63 - shift_amt) 845def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 846 uint64_t enc = 63 - N->getZExtValue(); 847 enc = enc > 31 ? 31 : enc; 848 return CurDAG->getTargetConstant(enc, MVT::i64); 849}]>; 850 851def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 852 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 853 (i64 (i32shift_b imm0_31:$imm)))>; 854def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 855 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 856 (i64 (i64shift_b imm0_63:$imm)))>; 857 858let AddedComplexity = 10 in { 859def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 860 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 861def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 862 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 863} 864 865def : InstAlias<"asr $dst, $src, $shift", 866 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 867def : InstAlias<"asr $dst, $src, $shift", 868 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 869def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 870def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 871def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 872def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 873def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 874 875def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 876 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 877def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 878 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 879 880def : InstAlias<"lsr $dst, $src, $shift", 881 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 882def : InstAlias<"lsr $dst, $src, $shift", 883 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 884def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 885def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 886def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 887def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 888def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 889 890//===----------------------------------------------------------------------===// 891// Conditionally set flags instructions. 892//===----------------------------------------------------------------------===// 893defm CCMN : CondSetFlagsImm<0, "ccmn">; 894defm CCMP : CondSetFlagsImm<1, "ccmp">; 895 896defm CCMN : CondSetFlagsReg<0, "ccmn">; 897defm CCMP : CondSetFlagsReg<1, "ccmp">; 898 899//===----------------------------------------------------------------------===// 900// Conditional select instructions. 901//===----------------------------------------------------------------------===// 902defm CSEL : CondSelect<0, 0b00, "csel">; 903 904def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 905defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 906defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 907defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 908 909def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 910 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 911def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 912 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 913def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 914 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 915def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 916 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 917def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 918 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 919def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 920 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 921 922def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 923 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 924def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 925 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 926def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 927 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 928def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 929 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 930 931// The inverse of the condition code from the alias instruction is what is used 932// in the aliased instruction. The parser all ready inverts the condition code 933// for these aliases. 934def : InstAlias<"cset $dst, $cc", 935 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 936def : InstAlias<"cset $dst, $cc", 937 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 938 939def : InstAlias<"csetm $dst, $cc", 940 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 941def : InstAlias<"csetm $dst, $cc", 942 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 943 944def : InstAlias<"cinc $dst, $src, $cc", 945 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 946def : InstAlias<"cinc $dst, $src, $cc", 947 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 948 949def : InstAlias<"cinv $dst, $src, $cc", 950 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 951def : InstAlias<"cinv $dst, $src, $cc", 952 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 953 954def : InstAlias<"cneg $dst, $src, $cc", 955 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 956def : InstAlias<"cneg $dst, $src, $cc", 957 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 958 959//===----------------------------------------------------------------------===// 960// PC-relative instructions. 961//===----------------------------------------------------------------------===// 962let isReMaterializable = 1 in { 963let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in { 964def ADR : ADRI<0, "adr", adrlabel, []>; 965} // neverHasSideEffects = 1 966 967def ADRP : ADRI<1, "adrp", adrplabel, 968 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 969} // isReMaterializable = 1 970 971// page address of a constant pool entry, block address 972def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 973def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 974 975//===----------------------------------------------------------------------===// 976// Unconditional branch (register) instructions. 977//===----------------------------------------------------------------------===// 978 979let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 980def RET : BranchReg<0b0010, "ret", []>; 981def DRPS : SpecialReturn<0b0101, "drps">; 982def ERET : SpecialReturn<0b0100, "eret">; 983} // isReturn = 1, isTerminator = 1, isBarrier = 1 984 985// Default to the LR register. 986def : InstAlias<"ret", (RET LR)>; 987 988let isCall = 1, Defs = [LR], Uses = [SP] in { 989def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; 990} // isCall 991 992let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 993def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 994} // isBranch, isTerminator, isBarrier, isIndirectBranch 995 996// Create a separate pseudo-instruction for codegen to use so that we don't 997// flag lr as used in every function. It'll be restored before the RET by the 998// epilogue if it's legitimately used. 999def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { 1000 let isTerminator = 1; 1001 let isBarrier = 1; 1002 let isReturn = 1; 1003} 1004 1005// This is a directive-like pseudo-instruction. The purpose is to insert an 1006// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 1007// (which in the usual case is a BLR). 1008let hasSideEffects = 1 in 1009def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { 1010 let AsmString = ".tlsdesccall $sym"; 1011} 1012 1013// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It 1014// gets expanded to two MCInsts during lowering. 1015let isCall = 1, Defs = [LR] in 1016def TLSDESC_BLR 1017 : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), 1018 [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; 1019 1020def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), 1021 (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; 1022//===----------------------------------------------------------------------===// 1023// Conditional branch (immediate) instruction. 1024//===----------------------------------------------------------------------===// 1025def Bcc : BranchCond; 1026 1027//===----------------------------------------------------------------------===// 1028// Compare-and-branch instructions. 1029//===----------------------------------------------------------------------===// 1030defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 1031defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 1032 1033//===----------------------------------------------------------------------===// 1034// Test-bit-and-branch instructions. 1035//===----------------------------------------------------------------------===// 1036defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 1037defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 1038 1039//===----------------------------------------------------------------------===// 1040// Unconditional branch (immediate) instructions. 1041//===----------------------------------------------------------------------===// 1042let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 1043def B : BranchImm<0, "b", [(br bb:$addr)]>; 1044} // isBranch, isTerminator, isBarrier 1045 1046let isCall = 1, Defs = [LR], Uses = [SP] in { 1047def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 1048} // isCall 1049def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 1050 1051//===----------------------------------------------------------------------===// 1052// Exception generation instructions. 1053//===----------------------------------------------------------------------===// 1054def BRK : ExceptionGeneration<0b001, 0b00, "brk">; 1055def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 1056def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 1057def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; 1058def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 1059def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 1060def SMC : ExceptionGeneration<0b000, 0b11, "smc">; 1061def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 1062 1063// DCPSn defaults to an immediate operand of zero if unspecified. 1064def : InstAlias<"dcps1", (DCPS1 0)>; 1065def : InstAlias<"dcps2", (DCPS2 0)>; 1066def : InstAlias<"dcps3", (DCPS3 0)>; 1067 1068//===----------------------------------------------------------------------===// 1069// Load instructions. 1070//===----------------------------------------------------------------------===// 1071 1072// Pair (indexed, offset) 1073defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; 1074defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; 1075defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; 1076defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; 1077defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; 1078 1079defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; 1080 1081// Pair (pre-indexed) 1082def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1083def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1084def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1085def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1086def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1087 1088def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1089 1090// Pair (post-indexed) 1091def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; 1092def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; 1093def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; 1094def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; 1095def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; 1096 1097def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; 1098 1099 1100// Pair (no allocate) 1101defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; 1102defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; 1103defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; 1104defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; 1105defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; 1106 1107//--- 1108// (register offset) 1109//--- 1110 1111// Integer 1112defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 1113defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 1114defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 1115defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 1116 1117// Floating-point 1118defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; 1119defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; 1120defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; 1121defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; 1122defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; 1123 1124// Load sign-extended half-word 1125defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 1126defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 1127 1128// Load sign-extended byte 1129defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 1130defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 1131 1132// Load sign-extended word 1133defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 1134 1135// Pre-fetch. 1136defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 1137 1138// For regular load, we do not have any alignment requirement. 1139// Thus, it is safe to directly map the vector loads with interesting 1140// addressing modes. 1141// FIXME: We could do the same for bitconvert to floating point vectors. 1142multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 1143 ValueType ScalTy, ValueType VecTy, 1144 Instruction LOADW, Instruction LOADX, 1145 SubRegIndex sub> { 1146 def : Pat<(VecTy (scalar_to_vector (ScalTy 1147 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 1148 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1149 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 1150 sub)>; 1151 1152 def : Pat<(VecTy (scalar_to_vector (ScalTy 1153 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 1154 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 1155 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 1156 sub)>; 1157} 1158 1159let AddedComplexity = 10 in { 1160defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 1161defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 1162 1163defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 1164defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 1165 1166defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 1167defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 1168 1169defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 1170defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 1171 1172defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 1173 1174defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 1175 1176 1177def : Pat <(v1i64 (scalar_to_vector (i64 1178 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 1179 ro_Wextend64:$extend))))), 1180 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 1181 1182def : Pat <(v1i64 (scalar_to_vector (i64 1183 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 1184 ro_Xextend64:$extend))))), 1185 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 1186} 1187 1188// Match all load 64 bits width whose type is compatible with FPR64 1189multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 1190 Instruction LOADW, Instruction LOADX> { 1191 1192 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1193 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1194 1195 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1196 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1197} 1198 1199let AddedComplexity = 10 in { 1200let Predicates = [IsLE] in { 1201 // We must do vector loads with LD1 in big-endian. 1202 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 1203 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 1204 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 1205 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 1206} 1207 1208defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 1209defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 1210 1211// Match all load 128 bits width whose type is compatible with FPR128 1212let Predicates = [IsLE] in { 1213 // We must do vector loads with LD1 in big-endian. 1214 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 1215 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 1216 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 1217 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 1218 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 1219 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 1220} 1221} // AddedComplexity = 10 1222 1223// zextload -> i64 1224multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 1225 Instruction INSTW, Instruction INSTX> { 1226 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1227 (SUBREG_TO_REG (i64 0), 1228 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 1229 sub_32)>; 1230 1231 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1232 (SUBREG_TO_REG (i64 0), 1233 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 1234 sub_32)>; 1235} 1236 1237let AddedComplexity = 10 in { 1238 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 1239 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 1240 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 1241 1242 // zextloadi1 -> zextloadi8 1243 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1244 1245 // extload -> zextload 1246 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1247 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1248 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1249 1250 // extloadi1 -> zextloadi8 1251 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 1252} 1253 1254 1255// zextload -> i64 1256multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 1257 Instruction INSTW, Instruction INSTX> { 1258 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 1259 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1260 1261 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 1262 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1263 1264} 1265 1266let AddedComplexity = 10 in { 1267 // extload -> zextload 1268 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 1269 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 1270 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 1271 1272 // zextloadi1 -> zextloadi8 1273 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 1274} 1275 1276//--- 1277// (unsigned immediate) 1278//--- 1279defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", 1280 [(set GPR64:$Rt, 1281 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1282defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", 1283 [(set GPR32:$Rt, 1284 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1285defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", 1286 [(set FPR8:$Rt, 1287 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 1288defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", 1289 [(set (f16 FPR16:$Rt), 1290 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 1291defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", 1292 [(set (f32 FPR32:$Rt), 1293 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 1294defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", 1295 [(set (f64 FPR64:$Rt), 1296 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 1297defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", 1298 [(set (f128 FPR128:$Rt), 1299 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 1300 1301// For regular load, we do not have any alignment requirement. 1302// Thus, it is safe to directly map the vector loads with interesting 1303// addressing modes. 1304// FIXME: We could do the same for bitconvert to floating point vectors. 1305def : Pat <(v8i8 (scalar_to_vector (i32 1306 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1307 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 1308 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1309def : Pat <(v16i8 (scalar_to_vector (i32 1310 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 1311 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 1312 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 1313def : Pat <(v4i16 (scalar_to_vector (i32 1314 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1315 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 1316 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1317def : Pat <(v8i16 (scalar_to_vector (i32 1318 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 1319 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 1320 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 1321def : Pat <(v2i32 (scalar_to_vector (i32 1322 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1323 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 1324 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1325def : Pat <(v4i32 (scalar_to_vector (i32 1326 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 1327 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 1328 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 1329def : Pat <(v1i64 (scalar_to_vector (i64 1330 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1331 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1332def : Pat <(v2i64 (scalar_to_vector (i64 1333 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 1334 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 1335 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 1336 1337// Match all load 64 bits width whose type is compatible with FPR64 1338let Predicates = [IsLE] in { 1339 // We must use LD1 to perform vector loads in big-endian. 1340 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1341 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1342 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1343 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1344 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1345 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1346 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1347 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1348} 1349def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1350 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1351def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 1352 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 1353 1354// Match all load 128 bits width whose type is compatible with FPR128 1355let Predicates = [IsLE] in { 1356 // We must use LD1 to perform vector loads in big-endian. 1357 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1358 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1359 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1360 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1361 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1362 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1363 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1364 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1365 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1366 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1367 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1368 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1369} 1370def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 1371 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 1372 1373defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 1374 [(set GPR32:$Rt, 1375 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 1376 uimm12s2:$offset)))]>; 1377defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 1378 [(set GPR32:$Rt, 1379 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 1380 uimm12s1:$offset)))]>; 1381// zextload -> i64 1382def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1383 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1384def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1385 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1386 1387// zextloadi1 -> zextloadi8 1388def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1389 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1390def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1391 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1392 1393// extload -> zextload 1394def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1395 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 1396def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1397 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1398def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1399 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 1400def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1401 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1402def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 1403 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 1404def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1405 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1406def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 1407 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 1408 1409// load sign-extended half-word 1410defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 1411 [(set GPR32:$Rt, 1412 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1413 uimm12s2:$offset)))]>; 1414defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 1415 [(set GPR64:$Rt, 1416 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 1417 uimm12s2:$offset)))]>; 1418 1419// load sign-extended byte 1420defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 1421 [(set GPR32:$Rt, 1422 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1423 uimm12s1:$offset)))]>; 1424defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 1425 [(set GPR64:$Rt, 1426 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 1427 uimm12s1:$offset)))]>; 1428 1429// load sign-extended word 1430defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 1431 [(set GPR64:$Rt, 1432 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 1433 uimm12s4:$offset)))]>; 1434 1435// load zero-extended word 1436def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 1437 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 1438 1439// Pre-fetch. 1440def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 1441 [(AArch64Prefetch imm:$Rt, 1442 (am_indexed64 GPR64sp:$Rn, 1443 uimm12s8:$offset))]>; 1444 1445def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 1446 1447//--- 1448// (literal) 1449def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; 1450def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; 1451def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; 1452def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; 1453def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; 1454 1455// load sign-extended word 1456def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; 1457 1458// prefetch 1459def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 1460// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 1461 1462//--- 1463// (unscaled immediate) 1464defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", 1465 [(set GPR64:$Rt, 1466 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1467defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", 1468 [(set GPR32:$Rt, 1469 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1470defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", 1471 [(set FPR8:$Rt, 1472 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1473defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", 1474 [(set FPR16:$Rt, 1475 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1476defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", 1477 [(set (f32 FPR32:$Rt), 1478 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1479defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", 1480 [(set (f64 FPR64:$Rt), 1481 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 1482defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", 1483 [(set (f128 FPR128:$Rt), 1484 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 1485 1486defm LDURHH 1487 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 1488 [(set GPR32:$Rt, 1489 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1490defm LDURBB 1491 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 1492 [(set GPR32:$Rt, 1493 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1494 1495// Match all load 64 bits width whose type is compatible with FPR64 1496let Predicates = [IsLE] in { 1497 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1498 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1499 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1500 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1501 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1502 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1503 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1504 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1505} 1506def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1507 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1508def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 1509 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 1510 1511// Match all load 128 bits width whose type is compatible with FPR128 1512let Predicates = [IsLE] in { 1513 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1514 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1515 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1516 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1517 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1518 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1519 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1520 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1521 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1522 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1523 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 1524 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 1525} 1526 1527// anyext -> zext 1528def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1529 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1530def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1531 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1532def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1533 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1534def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1535 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1536def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1537 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1538def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1539 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1540def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1541 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1542// unscaled zext 1543def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1544 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 1545def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1546 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1547def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1548 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 1549def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 1550 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1551def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1552 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1553def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1554 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1555def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1556 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1557 1558 1559//--- 1560// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 1561 1562// Define new assembler match classes as we want to only match these when 1563// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 1564// associate a DiagnosticType either, as we want the diagnostic for the 1565// canonical form (the scaled operand) to take precedence. 1566class SImm9OffsetOperand<int Width> : AsmOperandClass { 1567 let Name = "SImm9OffsetFB" # Width; 1568 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 1569 let RenderMethod = "addImmOperands"; 1570} 1571 1572def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 1573def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 1574def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 1575def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 1576def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 1577 1578def simm9_offset_fb8 : Operand<i64> { 1579 let ParserMatchClass = SImm9OffsetFB8Operand; 1580} 1581def simm9_offset_fb16 : Operand<i64> { 1582 let ParserMatchClass = SImm9OffsetFB16Operand; 1583} 1584def simm9_offset_fb32 : Operand<i64> { 1585 let ParserMatchClass = SImm9OffsetFB32Operand; 1586} 1587def simm9_offset_fb64 : Operand<i64> { 1588 let ParserMatchClass = SImm9OffsetFB64Operand; 1589} 1590def simm9_offset_fb128 : Operand<i64> { 1591 let ParserMatchClass = SImm9OffsetFB128Operand; 1592} 1593 1594def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1595 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1596def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1597 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1598def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1599 (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1600def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1601 (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1602def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1603 (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1604def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1605 (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 1606def : InstAlias<"ldr $Rt, [$Rn, $offset]", 1607 (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 1608 1609// zextload -> i64 1610def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 1611 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1612def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 1613 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 1614 1615// load sign-extended half-word 1616defm LDURSHW 1617 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 1618 [(set GPR32:$Rt, 1619 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1620defm LDURSHX 1621 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 1622 [(set GPR64:$Rt, 1623 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 1624 1625// load sign-extended byte 1626defm LDURSBW 1627 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 1628 [(set GPR32:$Rt, 1629 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1630defm LDURSBX 1631 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 1632 [(set GPR64:$Rt, 1633 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 1634 1635// load sign-extended word 1636defm LDURSW 1637 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 1638 [(set GPR64:$Rt, 1639 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 1640 1641// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 1642def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 1643 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1644def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 1645 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1646def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1647 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1648def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 1649 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 1650def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1651 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1652def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 1653 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 1654def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 1655 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 1656 1657// Pre-fetch. 1658defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 1659 [(AArch64Prefetch imm:$Rt, 1660 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1661 1662//--- 1663// (unscaled immediate, unprivileged) 1664defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 1665defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 1666 1667defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 1668defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 1669 1670// load sign-extended half-word 1671defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 1672defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 1673 1674// load sign-extended byte 1675defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 1676defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 1677 1678// load sign-extended word 1679defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 1680 1681//--- 1682// (immediate pre-indexed) 1683def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; 1684def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; 1685def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; 1686def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; 1687def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; 1688def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; 1689def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; 1690 1691// load sign-extended half-word 1692def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1693def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1694 1695// load sign-extended byte 1696def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1697def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1698 1699// load zero-extended byte 1700def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1701def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1702 1703// load sign-extended word 1704def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1705 1706//--- 1707// (immediate post-indexed) 1708def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; 1709def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; 1710def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; 1711def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; 1712def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; 1713def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; 1714def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; 1715 1716// load sign-extended half-word 1717def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; 1718def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; 1719 1720// load sign-extended byte 1721def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; 1722def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; 1723 1724// load zero-extended byte 1725def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; 1726def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; 1727 1728// load sign-extended word 1729def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; 1730 1731//===----------------------------------------------------------------------===// 1732// Store instructions. 1733//===----------------------------------------------------------------------===// 1734 1735// Pair (indexed, offset) 1736// FIXME: Use dedicated range-checked addressing mode operand here. 1737defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">; 1738defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">; 1739defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">; 1740defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">; 1741defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">; 1742 1743// Pair (pre-indexed) 1744def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">; 1745def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">; 1746def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">; 1747def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">; 1748def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">; 1749 1750// Pair (pre-indexed) 1751def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; 1752def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; 1753def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; 1754def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; 1755def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; 1756 1757// Pair (no allocate) 1758defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">; 1759defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">; 1760defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">; 1761defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">; 1762defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">; 1763 1764//--- 1765// (Register offset) 1766 1767// Integer 1768defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 1769defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 1770defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 1771defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 1772 1773 1774// Floating-point 1775defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>; 1776defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>; 1777defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; 1778defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; 1779defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; 1780 1781multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 1782 Instruction STRW, Instruction STRX> { 1783 1784 def : Pat<(storeop GPR64:$Rt, 1785 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1786 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1787 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1788 1789 def : Pat<(storeop GPR64:$Rt, 1790 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1791 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 1792 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1793} 1794 1795let AddedComplexity = 10 in { 1796 // truncstore i64 1797 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 1798 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 1799 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 1800} 1801 1802multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 1803 Instruction STRW, Instruction STRX> { 1804 def : Pat<(store (VecTy FPR:$Rt), 1805 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 1806 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 1807 1808 def : Pat<(store (VecTy FPR:$Rt), 1809 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 1810 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 1811} 1812 1813let AddedComplexity = 10 in { 1814// Match all store 64 bits width whose type is compatible with FPR64 1815let Predicates = [IsLE] in { 1816 // We must use ST1 to store vectors in big-endian. 1817 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 1818 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 1819 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 1820 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 1821} 1822 1823defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 1824defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 1825 1826// Match all store 128 bits width whose type is compatible with FPR128 1827let Predicates = [IsLE] in { 1828 // We must use ST1 to store vectors in big-endian. 1829 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 1830 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 1831 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 1832 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 1833 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 1834 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 1835} 1836} // AddedComplexity = 10 1837 1838//--- 1839// (unsigned immediate) 1840defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", 1841 [(store GPR64:$Rt, 1842 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 1843defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", 1844 [(store GPR32:$Rt, 1845 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 1846defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", 1847 [(store FPR8:$Rt, 1848 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 1849defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str", 1850 [(store (f16 FPR16:$Rt), 1851 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 1852defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str", 1853 [(store (f32 FPR32:$Rt), 1854 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 1855defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", 1856 [(store (f64 FPR64:$Rt), 1857 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 1858defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; 1859 1860defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", 1861 [(truncstorei16 GPR32:$Rt, 1862 (am_indexed16 GPR64sp:$Rn, 1863 uimm12s2:$offset))]>; 1864defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", 1865 [(truncstorei8 GPR32:$Rt, 1866 (am_indexed8 GPR64sp:$Rn, 1867 uimm12s1:$offset))]>; 1868 1869// Match all store 64 bits width whose type is compatible with FPR64 1870let AddedComplexity = 10 in { 1871let Predicates = [IsLE] in { 1872 // We must use ST1 to store vectors in big-endian. 1873 def : Pat<(store (v2f32 FPR64:$Rt), 1874 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1875 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1876 def : Pat<(store (v8i8 FPR64:$Rt), 1877 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1878 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1879 def : Pat<(store (v4i16 FPR64:$Rt), 1880 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1881 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1882 def : Pat<(store (v2i32 FPR64:$Rt), 1883 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1884 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1885} 1886def : Pat<(store (v1f64 FPR64:$Rt), 1887 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1888 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1889def : Pat<(store (v1i64 FPR64:$Rt), 1890 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 1891 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 1892 1893// Match all store 128 bits width whose type is compatible with FPR128 1894let Predicates = [IsLE] in { 1895 // We must use ST1 to store vectors in big-endian. 1896 def : Pat<(store (v4f32 FPR128:$Rt), 1897 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1898 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1899 def : Pat<(store (v2f64 FPR128:$Rt), 1900 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1901 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1902 def : Pat<(store (v16i8 FPR128:$Rt), 1903 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1904 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1905 def : Pat<(store (v8i16 FPR128:$Rt), 1906 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1907 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1908 def : Pat<(store (v4i32 FPR128:$Rt), 1909 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1910 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1911 def : Pat<(store (v2i64 FPR128:$Rt), 1912 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1913 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1914} 1915def : Pat<(store (f128 FPR128:$Rt), 1916 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 1917 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 1918 1919// truncstore i64 1920def : Pat<(truncstorei32 GPR64:$Rt, 1921 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 1922 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 1923def : Pat<(truncstorei16 GPR64:$Rt, 1924 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 1925 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 1926def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 1927 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 1928 1929} // AddedComplexity = 10 1930 1931//--- 1932// (unscaled immediate) 1933defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur", 1934 [(store GPR64:$Rt, 1935 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1936defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur", 1937 [(store GPR32:$Rt, 1938 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 1939defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur", 1940 [(store FPR8:$Rt, 1941 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 1942defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur", 1943 [(store (f16 FPR16:$Rt), 1944 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 1945defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur", 1946 [(store (f32 FPR32:$Rt), 1947 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 1948defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur", 1949 [(store (f64 FPR64:$Rt), 1950 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 1951defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur", 1952 [(store (f128 FPR128:$Rt), 1953 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 1954defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh", 1955 [(truncstorei16 GPR32:$Rt, 1956 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 1957defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb", 1958 [(truncstorei8 GPR32:$Rt, 1959 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 1960 1961// Match all store 64 bits width whose type is compatible with FPR64 1962let Predicates = [IsLE] in { 1963 // We must use ST1 to store vectors in big-endian. 1964 def : Pat<(store (v2f32 FPR64:$Rt), 1965 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1966 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1967 def : Pat<(store (v8i8 FPR64:$Rt), 1968 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1969 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1970 def : Pat<(store (v4i16 FPR64:$Rt), 1971 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1972 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1973 def : Pat<(store (v2i32 FPR64:$Rt), 1974 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1975 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1976} 1977def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1978 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1979def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 1980 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1981 1982// Match all store 128 bits width whose type is compatible with FPR128 1983let Predicates = [IsLE] in { 1984 // We must use ST1 to store vectors in big-endian. 1985 def : Pat<(store (v4f32 FPR128:$Rt), 1986 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1987 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1988 def : Pat<(store (v2f64 FPR128:$Rt), 1989 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1990 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1991 def : Pat<(store (v16i8 FPR128:$Rt), 1992 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1993 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1994 def : Pat<(store (v8i16 FPR128:$Rt), 1995 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1996 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 1997 def : Pat<(store (v4i32 FPR128:$Rt), 1998 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 1999 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2000 def : Pat<(store (v2i64 FPR128:$Rt), 2001 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2002 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2003 def : Pat<(store (v2f64 FPR128:$Rt), 2004 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 2005 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 2006} 2007 2008// unscaled i64 truncating stores 2009def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 2010 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2011def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 2012 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2013def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 2014 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 2015 2016//--- 2017// STR mnemonics fall back to STUR for negative or unaligned offsets. 2018def : InstAlias<"str $Rt, [$Rn, $offset]", 2019 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2020def : InstAlias<"str $Rt, [$Rn, $offset]", 2021 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2022def : InstAlias<"str $Rt, [$Rn, $offset]", 2023 (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2024def : InstAlias<"str $Rt, [$Rn, $offset]", 2025 (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2026def : InstAlias<"str $Rt, [$Rn, $offset]", 2027 (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 2028def : InstAlias<"str $Rt, [$Rn, $offset]", 2029 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 2030def : InstAlias<"str $Rt, [$Rn, $offset]", 2031 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 2032 2033def : InstAlias<"strb $Rt, [$Rn, $offset]", 2034 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 2035def : InstAlias<"strh $Rt, [$Rn, $offset]", 2036 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 2037 2038//--- 2039// (unscaled immediate, unprivileged) 2040defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 2041defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 2042 2043defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 2044defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 2045 2046//--- 2047// (immediate pre-indexed) 2048def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>; 2049def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>; 2050def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>; 2051def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>; 2052def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>; 2053def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>; 2054def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>; 2055 2056def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>; 2057def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>; 2058 2059// truncstore i64 2060def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2061 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2062 simm9:$off)>; 2063def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2064 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2065 simm9:$off)>; 2066def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2067 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2068 simm9:$off)>; 2069 2070def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2071 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2072def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2073 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2074def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2075 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2076def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2077 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2078def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2079 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2080def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2081 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2082 2083def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2084 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2085def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2086 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2087def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2088 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2089def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2090 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2091def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2092 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2093def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2094 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2095 2096//--- 2097// (immediate post-indexed) 2098def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>; 2099def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>; 2100def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>; 2101def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>; 2102def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>; 2103def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>; 2104def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>; 2105 2106def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>; 2107def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>; 2108 2109// truncstore i64 2110def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2111 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2112 simm9:$off)>; 2113def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2114 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2115 simm9:$off)>; 2116def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 2117 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 2118 simm9:$off)>; 2119 2120def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2121 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2122def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2123 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2124def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2125 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2126def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2127 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2128def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2129 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2130def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 2131 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 2132 2133def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2134 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2135def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2136 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2137def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2138 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2139def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2140 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2141def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2142 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2143def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 2144 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 2145 2146//===----------------------------------------------------------------------===// 2147// Load/store exclusive instructions. 2148//===----------------------------------------------------------------------===// 2149 2150def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 2151def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 2152def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 2153def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 2154 2155def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 2156def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 2157def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 2158def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 2159 2160def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 2161def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 2162def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 2163def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 2164 2165def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 2166def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 2167def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 2168def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 2169 2170def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 2171def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 2172def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 2173def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 2174 2175def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 2176def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 2177def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 2178def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 2179 2180def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 2181def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 2182 2183def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 2184def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 2185 2186def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 2187def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 2188 2189def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 2190def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 2191 2192//===----------------------------------------------------------------------===// 2193// Scaled floating point to integer conversion instructions. 2194//===----------------------------------------------------------------------===// 2195 2196defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 2197defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 2198defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 2199defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 2200defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 2201defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 2202defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 2203defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 2204defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2205defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2206defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; 2207defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; 2208let isCodeGenOnly = 1 in { 2209defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2210defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2211defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; 2212defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; 2213} 2214 2215//===----------------------------------------------------------------------===// 2216// Scaled integer to floating point conversion instructions. 2217//===----------------------------------------------------------------------===// 2218 2219defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; 2220defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; 2221 2222//===----------------------------------------------------------------------===// 2223// Unscaled integer to floating point conversion instruction. 2224//===----------------------------------------------------------------------===// 2225 2226defm FMOV : UnscaledConversion<"fmov">; 2227 2228def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; 2229def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; 2230 2231//===----------------------------------------------------------------------===// 2232// Floating point conversion instruction. 2233//===----------------------------------------------------------------------===// 2234 2235defm FCVT : FPConversion<"fcvt">; 2236 2237def : Pat<(f32_to_f16 FPR32:$Rn), 2238 (i32 (COPY_TO_REGCLASS 2239 (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), 2240 GPR32))>; 2241 2242def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), 2243 [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; 2244 2245// When converting from f16 coming directly from a load, make sure we 2246// load into the FPR16 registers rather than going through the GPRs. 2247// f16->f32 2248def : Pat<(f32 (f16_to_f32 (i32 2249 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, 2250 ro_Wextend16:$extend))))), 2251 (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; 2252def : Pat<(f32 (f16_to_f32 (i32 2253 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, 2254 ro_Xextend16:$extend))))), 2255 (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; 2256def : Pat <(f32 (f16_to_f32 (i32 2257 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2258 (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; 2259def : Pat <(f32 (f16_to_f32 (i32 2260 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 2261 (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; 2262 2263// f16->f64 2264def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 2265 (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, 2266 ro_Wextend16:$extend))))))), 2267 (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; 2268def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 2269 (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, 2270 ro_Xextend16:$extend))))))), 2271 (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; 2272def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 2273 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))), 2274 (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; 2275def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 2276 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), 2277 (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; 2278 2279// When converting to f16 going directly to a store, make sure we use the 2280// appropriate direct conversion instructions and store via the FPR16 2281// registers rather than going through the GPRs. 2282let AddedComplexity = 10 in { 2283// f32->f16 2284def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), 2285 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, 2286 ro_Wextend16:$extend)), 2287 (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm, 2288 ro_Wextend16:$extend)>; 2289def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), 2290 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, 2291 ro_Xextend16:$extend)), 2292 (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm, 2293 ro_Xextend16:$extend)>; 2294def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), 2295 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 2296 (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; 2297def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), 2298 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 2299 (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>; 2300// f64->f16 2301def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), 2302 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, 2303 ro_Wextend16:$extend)), 2304 (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm, 2305 ro_Wextend16:$extend)>; 2306def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), 2307 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, 2308 ro_Xextend16:$extend)), 2309 (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm, 2310 ro_Xextend16:$extend)>; 2311def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), 2312 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 2313 (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; 2314def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), 2315 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 2316 (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>; 2317} 2318 2319 2320//===----------------------------------------------------------------------===// 2321// Floating point single operand instructions. 2322//===----------------------------------------------------------------------===// 2323 2324defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; 2325defm FMOV : SingleOperandFPData<0b0000, "fmov">; 2326defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; 2327defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; 2328defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; 2329defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; 2330defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; 2331defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; 2332 2333def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), 2334 (FRINTNDr FPR64:$Rn)>; 2335 2336// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior 2337// in the C spec. Setting hasSideEffects ensures it is not DCE'd. 2338// <rdar://problem/13715968> 2339// TODO: We should really model the FPSR flags correctly. This is really ugly. 2340let hasSideEffects = 1 in { 2341defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; 2342} 2343 2344defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; 2345 2346let SchedRW = [WriteFDiv] in { 2347defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; 2348} 2349 2350//===----------------------------------------------------------------------===// 2351// Floating point two operand instructions. 2352//===----------------------------------------------------------------------===// 2353 2354defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; 2355let SchedRW = [WriteFDiv] in { 2356defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; 2357} 2358defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>; 2359defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>; 2360defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>; 2361defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>; 2362let SchedRW = [WriteFMul] in { 2363defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; 2364defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; 2365} 2366defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; 2367 2368def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2369 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 2370def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2371 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 2372def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2373 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 2374def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2375 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 2376 2377//===----------------------------------------------------------------------===// 2378// Floating point three operand instructions. 2379//===----------------------------------------------------------------------===// 2380 2381defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; 2382defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 2383 TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 2384defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 2385 TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; 2386defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 2387 TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 2388 2389// The following def pats catch the case where the LHS of an FMA is negated. 2390// The TriOpFrag above catches the case where the middle operand is negated. 2391 2392// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 2393// the NEON variant. 2394def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 2395 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2396 2397def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 2398 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2399 2400// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and 2401// "(-a) + b*(-c)". 2402def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 2403 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2404 2405def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 2406 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2407 2408def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), 2409 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 2410 2411def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), 2412 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 2413 2414//===----------------------------------------------------------------------===// 2415// Floating point comparison instructions. 2416//===----------------------------------------------------------------------===// 2417 2418defm FCMPE : FPComparison<1, "fcmpe">; 2419defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; 2420 2421//===----------------------------------------------------------------------===// 2422// Floating point conditional comparison instructions. 2423//===----------------------------------------------------------------------===// 2424 2425defm FCCMPE : FPCondComparison<1, "fccmpe">; 2426defm FCCMP : FPCondComparison<0, "fccmp">; 2427 2428//===----------------------------------------------------------------------===// 2429// Floating point conditional select instruction. 2430//===----------------------------------------------------------------------===// 2431 2432defm FCSEL : FPCondSelect<"fcsel">; 2433 2434// CSEL instructions providing f128 types need to be handled by a 2435// pseudo-instruction since the eventual code will need to introduce basic 2436// blocks and control flow. 2437def F128CSEL : Pseudo<(outs FPR128:$Rd), 2438 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 2439 [(set (f128 FPR128:$Rd), 2440 (AArch64csel FPR128:$Rn, FPR128:$Rm, 2441 (i32 imm:$cond), NZCV))]> { 2442 let Uses = [NZCV]; 2443 let usesCustomInserter = 1; 2444} 2445 2446 2447//===----------------------------------------------------------------------===// 2448// Floating point immediate move. 2449//===----------------------------------------------------------------------===// 2450 2451let isReMaterializable = 1 in { 2452defm FMOV : FPMoveImmediate<"fmov">; 2453} 2454 2455//===----------------------------------------------------------------------===// 2456// Advanced SIMD two vector instructions. 2457//===----------------------------------------------------------------------===// 2458 2459defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; 2460defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 2461defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 2462defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 2463defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 2464defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 2465defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 2466defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 2467defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 2468defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; 2469 2470defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 2471defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 2472defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 2473defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 2474defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 2475defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 2476defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 2477defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 2478def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 2479 (FCVTLv4i16 V64:$Rn)>; 2480def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 2481 (i64 4)))), 2482 (FCVTLv8i16 V128:$Rn)>; 2483def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; 2484def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), 2485 (i64 2))))), 2486 (FCVTLv4i32 V128:$Rn)>; 2487 2488defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 2489defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 2490defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 2491defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 2492defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 2493def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 2494 (FCVTNv4i16 V128:$Rn)>; 2495def : Pat<(concat_vectors V64:$Rd, 2496 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 2497 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2498def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; 2499def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), 2500 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 2501defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 2502defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 2503defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 2504 int_aarch64_neon_fcvtxn>; 2505defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; 2506defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; 2507let isCodeGenOnly = 1 in { 2508defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", 2509 int_aarch64_neon_fcvtzs>; 2510defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", 2511 int_aarch64_neon_fcvtzu>; 2512} 2513defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; 2514defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 2515defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; 2516defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; 2517defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; 2518defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; 2519defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; 2520defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; 2521defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; 2522defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 2523defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; 2524defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 2525 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 2526defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 2527// Aliases for MVN -> NOT. 2528def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 2529 (NOTv8i8 V64:$Vd, V64:$Vn)>; 2530def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 2531 (NOTv16i8 V128:$Vd, V128:$Vn)>; 2532 2533def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; 2534def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; 2535def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; 2536def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; 2537def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; 2538def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; 2539def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; 2540 2541def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2542def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2543def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2544def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2545def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2546def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2547def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2548def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2549 2550def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2551def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2552def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 2553def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2554def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 2555 2556defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; 2557defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 2558defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 2559defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 2560defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 2561 BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; 2562defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; 2563defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; 2564defm SHLL : SIMDVectorLShiftLongBySizeBHS; 2565defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 2566defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 2567defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 2568defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 2569defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 2570defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 2571 BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; 2572defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", 2573 int_aarch64_neon_uaddlp>; 2574defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; 2575defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 2576defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 2577defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 2578defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 2579defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 2580 2581def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 2582def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 2583 2584// Patterns for vector long shift (by element width). These need to match all 2585// three of zext, sext and anyext so it's easier to pull the patterns out of the 2586// definition. 2587multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 2588 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 2589 (SHLLv8i8 V64:$Rn)>; 2590 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), 2591 (SHLLv16i8 V128:$Rn)>; 2592 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 2593 (SHLLv4i16 V64:$Rn)>; 2594 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), 2595 (SHLLv8i16 V128:$Rn)>; 2596 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 2597 (SHLLv2i32 V64:$Rn)>; 2598 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), 2599 (SHLLv4i32 V128:$Rn)>; 2600} 2601 2602defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 2603defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 2604defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 2605 2606//===----------------------------------------------------------------------===// 2607// Advanced SIMD three vector instructions. 2608//===----------------------------------------------------------------------===// 2609 2610defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 2611defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; 2612defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 2613defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 2614defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 2615defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 2616defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 2617defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 2618defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; 2619defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; 2620defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; 2621defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; 2622defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; 2623defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; 2624defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; 2625defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; 2626defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; 2627defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 2628defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>; 2629defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; 2630defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>; 2631defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; 2632defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>; 2633defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; 2634defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>; 2635 2636// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 2637// instruction expects the addend first, while the fma intrinsic puts it last. 2638defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", 2639 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 2640defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", 2641 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 2642 2643// The following def pats catch the case where the LHS of an FMA is negated. 2644// The TriOpFrag above catches the case where the middle operand is negated. 2645def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), 2646 (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; 2647 2648def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2649 (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; 2650 2651def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), 2652 (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; 2653 2654defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; 2655defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; 2656defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; 2657defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; 2658defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; 2659defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", 2660 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; 2661defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", 2662 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; 2663defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 2664defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 2665defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 2666 TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; 2667defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; 2668defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; 2669defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 2670defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 2671defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>; 2672defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 2673defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>; 2674defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 2675defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 2676defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 2677defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 2678defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 2679defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 2680defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; 2681defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 2682defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 2683defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 2684defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 2685 TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; 2686defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; 2687defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; 2688defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 2689defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 2690defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>; 2691defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 2692defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>; 2693defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 2694defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 2695defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 2696defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 2697defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; 2698defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 2699defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 2700 2701defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 2702defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 2703 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 2704defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; 2705defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 2706defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", 2707 TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; 2708defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 2709defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 2710 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 2711defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 2712 2713def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 2714 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2715def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 2716 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2717def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 2718 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2719def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 2720 (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 2721 2722def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 2723 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2724def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 2725 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2726def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 2727 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2728def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 2729 (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 2730 2731def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 2732 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 2733def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 2734 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2735def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 2736 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2737def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 2738 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 2739 2740def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 2741 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 2742def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 2743 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2744def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 2745 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2746def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 2747 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 2748 2749def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 2750 "|cmls.8b\t$dst, $src1, $src2}", 2751 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2752def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 2753 "|cmls.16b\t$dst, $src1, $src2}", 2754 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2755def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 2756 "|cmls.4h\t$dst, $src1, $src2}", 2757 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2758def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 2759 "|cmls.8h\t$dst, $src1, $src2}", 2760 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2761def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 2762 "|cmls.2s\t$dst, $src1, $src2}", 2763 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2764def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 2765 "|cmls.4s\t$dst, $src1, $src2}", 2766 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2767def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 2768 "|cmls.2d\t$dst, $src1, $src2}", 2769 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2770 2771def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 2772 "|cmlo.8b\t$dst, $src1, $src2}", 2773 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2774def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 2775 "|cmlo.16b\t$dst, $src1, $src2}", 2776 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2777def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 2778 "|cmlo.4h\t$dst, $src1, $src2}", 2779 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2780def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 2781 "|cmlo.8h\t$dst, $src1, $src2}", 2782 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2783def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 2784 "|cmlo.2s\t$dst, $src1, $src2}", 2785 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2786def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 2787 "|cmlo.4s\t$dst, $src1, $src2}", 2788 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2789def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 2790 "|cmlo.2d\t$dst, $src1, $src2}", 2791 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2792 2793def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 2794 "|cmle.8b\t$dst, $src1, $src2}", 2795 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2796def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 2797 "|cmle.16b\t$dst, $src1, $src2}", 2798 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2799def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 2800 "|cmle.4h\t$dst, $src1, $src2}", 2801 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2802def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 2803 "|cmle.8h\t$dst, $src1, $src2}", 2804 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2805def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 2806 "|cmle.2s\t$dst, $src1, $src2}", 2807 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2808def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 2809 "|cmle.4s\t$dst, $src1, $src2}", 2810 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2811def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 2812 "|cmle.2d\t$dst, $src1, $src2}", 2813 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2814 2815def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 2816 "|cmlt.8b\t$dst, $src1, $src2}", 2817 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 2818def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 2819 "|cmlt.16b\t$dst, $src1, $src2}", 2820 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 2821def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 2822 "|cmlt.4h\t$dst, $src1, $src2}", 2823 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 2824def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 2825 "|cmlt.8h\t$dst, $src1, $src2}", 2826 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 2827def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 2828 "|cmlt.2s\t$dst, $src1, $src2}", 2829 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 2830def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 2831 "|cmlt.4s\t$dst, $src1, $src2}", 2832 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 2833def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 2834 "|cmlt.2d\t$dst, $src1, $src2}", 2835 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 2836 2837def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 2838 "|fcmle.2s\t$dst, $src1, $src2}", 2839 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2840def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 2841 "|fcmle.4s\t$dst, $src1, $src2}", 2842 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2843def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 2844 "|fcmle.2d\t$dst, $src1, $src2}", 2845 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2846 2847def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 2848 "|fcmlt.2s\t$dst, $src1, $src2}", 2849 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2850def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 2851 "|fcmlt.4s\t$dst, $src1, $src2}", 2852 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2853def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 2854 "|fcmlt.2d\t$dst, $src1, $src2}", 2855 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2856 2857def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 2858 "|facle.2s\t$dst, $src1, $src2}", 2859 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2860def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 2861 "|facle.4s\t$dst, $src1, $src2}", 2862 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2863def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 2864 "|facle.2d\t$dst, $src1, $src2}", 2865 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2866 2867def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 2868 "|faclt.2s\t$dst, $src1, $src2}", 2869 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 2870def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 2871 "|faclt.4s\t$dst, $src1, $src2}", 2872 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 2873def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 2874 "|faclt.2d\t$dst, $src1, $src2}", 2875 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 2876 2877//===----------------------------------------------------------------------===// 2878// Advanced SIMD three scalar instructions. 2879//===----------------------------------------------------------------------===// 2880 2881defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 2882defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 2883defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 2884defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 2885defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 2886defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 2887defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 2888defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; 2889def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 2890 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 2891defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", 2892 int_aarch64_neon_facge>; 2893defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", 2894 int_aarch64_neon_facgt>; 2895defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; 2896defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; 2897defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; 2898defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; 2899defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; 2900defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; 2901defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 2902defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 2903defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 2904defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 2905defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 2906defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 2907defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 2908defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 2909defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 2910defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 2911defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 2912defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 2913defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 2914defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 2915defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 2916 2917def : InstAlias<"cmls $dst, $src1, $src2", 2918 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2919def : InstAlias<"cmle $dst, $src1, $src2", 2920 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2921def : InstAlias<"cmlo $dst, $src1, $src2", 2922 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2923def : InstAlias<"cmlt $dst, $src1, $src2", 2924 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2925def : InstAlias<"fcmle $dst, $src1, $src2", 2926 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2927def : InstAlias<"fcmle $dst, $src1, $src2", 2928 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2929def : InstAlias<"fcmlt $dst, $src1, $src2", 2930 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2931def : InstAlias<"fcmlt $dst, $src1, $src2", 2932 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2933def : InstAlias<"facle $dst, $src1, $src2", 2934 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2935def : InstAlias<"facle $dst, $src1, $src2", 2936 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2937def : InstAlias<"faclt $dst, $src1, $src2", 2938 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 2939def : InstAlias<"faclt $dst, $src1, $src2", 2940 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 2941 2942//===----------------------------------------------------------------------===// 2943// Advanced SIMD three scalar instructions (mixed operands). 2944//===----------------------------------------------------------------------===// 2945defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 2946 int_aarch64_neon_sqdmulls_scalar>; 2947defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 2948defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 2949 2950def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 2951 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 2952 (i32 FPR32:$Rm))))), 2953 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 2954def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 2955 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 2956 (i32 FPR32:$Rm))))), 2957 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 2958 2959//===----------------------------------------------------------------------===// 2960// Advanced SIMD two scalar instructions. 2961//===----------------------------------------------------------------------===// 2962 2963defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; 2964defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 2965defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 2966defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 2967defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 2968defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 2969defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 2970defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 2971defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 2972defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 2973defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 2974defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; 2975defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; 2976defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; 2977defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; 2978defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; 2979defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; 2980defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; 2981defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; 2982def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 2983defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; 2984defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; 2985defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; 2986defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; 2987defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; 2988defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 2989 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 2990defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; 2991defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 2992defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 2993defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 2994defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 2995defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 2996 int_aarch64_neon_suqadd>; 2997defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 2998defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 2999defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 3000 int_aarch64_neon_usqadd>; 3001 3002def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; 3003 3004def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 3005 (FCVTASv1i64 FPR64:$Rn)>; 3006def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 3007 (FCVTAUv1i64 FPR64:$Rn)>; 3008def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 3009 (FCVTMSv1i64 FPR64:$Rn)>; 3010def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 3011 (FCVTMUv1i64 FPR64:$Rn)>; 3012def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 3013 (FCVTNSv1i64 FPR64:$Rn)>; 3014def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 3015 (FCVTNUv1i64 FPR64:$Rn)>; 3016def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 3017 (FCVTPSv1i64 FPR64:$Rn)>; 3018def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 3019 (FCVTPUv1i64 FPR64:$Rn)>; 3020 3021def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 3022 (FRECPEv1i32 FPR32:$Rn)>; 3023def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 3024 (FRECPEv1i64 FPR64:$Rn)>; 3025def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 3026 (FRECPEv1i64 FPR64:$Rn)>; 3027 3028def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 3029 (FRECPXv1i32 FPR32:$Rn)>; 3030def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 3031 (FRECPXv1i64 FPR64:$Rn)>; 3032 3033def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 3034 (FRSQRTEv1i32 FPR32:$Rn)>; 3035def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 3036 (FRSQRTEv1i64 FPR64:$Rn)>; 3037def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 3038 (FRSQRTEv1i64 FPR64:$Rn)>; 3039 3040// If an integer is about to be converted to a floating point value, 3041// just load it on the floating point unit. 3042// Here are the patterns for 8 and 16-bits to float. 3043// 8-bits -> float. 3044multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 3045 SDPatternOperator loadop, Instruction UCVTF, 3046 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 3047 SubRegIndex sub> { 3048 def : Pat<(DstTy (uint_to_fp (SrcTy 3049 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 3050 ro.Wext:$extend))))), 3051 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 3052 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 3053 sub))>; 3054 3055 def : Pat<(DstTy (uint_to_fp (SrcTy 3056 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 3057 ro.Wext:$extend))))), 3058 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 3059 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 3060 sub))>; 3061} 3062 3063defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 3064 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 3065def : Pat <(f32 (uint_to_fp (i32 3066 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3067 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3068 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 3069def : Pat <(f32 (uint_to_fp (i32 3070 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 3071 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3072 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 3073// 16-bits -> float. 3074defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 3075 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 3076def : Pat <(f32 (uint_to_fp (i32 3077 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3078 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3079 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3080def : Pat <(f32 (uint_to_fp (i32 3081 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3082 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 3083 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3084// 32-bits are handled in target specific dag combine: 3085// performIntToFpCombine. 3086// 64-bits integer to 32-bits floating point, not possible with 3087// UCVTF on floating point registers (both source and destination 3088// must have the same size). 3089 3090// Here are the patterns for 8, 16, 32, and 64-bits to double. 3091// 8-bits -> double. 3092defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 3093 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 3094def : Pat <(f64 (uint_to_fp (i32 3095 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 3096 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3097 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 3098def : Pat <(f64 (uint_to_fp (i32 3099 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 3100 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3101 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 3102// 16-bits -> double. 3103defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 3104 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 3105def : Pat <(f64 (uint_to_fp (i32 3106 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 3107 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3108 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 3109def : Pat <(f64 (uint_to_fp (i32 3110 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 3111 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3112 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 3113// 32-bits -> double. 3114defm : UIntToFPROLoadPat<f64, i32, load, 3115 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 3116def : Pat <(f64 (uint_to_fp (i32 3117 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 3118 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3119 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 3120def : Pat <(f64 (uint_to_fp (i32 3121 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 3122 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 3123 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 3124// 64-bits -> double are handled in target specific dag combine: 3125// performIntToFpCombine. 3126 3127//===----------------------------------------------------------------------===// 3128// Advanced SIMD three different-sized vector instructions. 3129//===----------------------------------------------------------------------===// 3130 3131defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 3132defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 3133defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 3134defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 3135defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; 3136defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 3137 int_aarch64_neon_sabd>; 3138defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 3139 int_aarch64_neon_sabd>; 3140defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 3141 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 3142defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 3143 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 3144defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 3145 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3146defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 3147 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 3148defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; 3149defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 3150 int_aarch64_neon_sqadd>; 3151defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 3152 int_aarch64_neon_sqsub>; 3153defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 3154 int_aarch64_neon_sqdmull>; 3155defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 3156 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 3157defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 3158 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 3159defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 3160 int_aarch64_neon_uabd>; 3161defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 3162 int_aarch64_neon_uabd>; 3163defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 3164 BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; 3165defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 3166 BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; 3167defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 3168 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3169defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 3170 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 3171defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; 3172defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 3173 BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; 3174defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 3175 BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; 3176 3177// Patterns for 64-bit pmull 3178def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), 3179 (PMULLv1i64 V64:$Rn, V64:$Rm)>; 3180def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), 3181 (vector_extract (v2i64 V128:$Rm), (i64 1))), 3182 (PMULLv2i64 V128:$Rn, V128:$Rm)>; 3183 3184// CodeGen patterns for addhn and subhn instructions, which can actually be 3185// written in LLVM IR without too much difficulty. 3186 3187// ADDHN 3188def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 3189 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3190def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3191 (i32 16))))), 3192 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3193def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3194 (i32 32))))), 3195 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3196def : Pat<(concat_vectors (v8i8 V64:$Rd), 3197 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3198 (i32 8))))), 3199 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3200 V128:$Rn, V128:$Rm)>; 3201def : Pat<(concat_vectors (v4i16 V64:$Rd), 3202 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3203 (i32 16))))), 3204 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3205 V128:$Rn, V128:$Rm)>; 3206def : Pat<(concat_vectors (v2i32 V64:$Rd), 3207 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 3208 (i32 32))))), 3209 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3210 V128:$Rn, V128:$Rm)>; 3211 3212// SUBHN 3213def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 3214 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 3215def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3216 (i32 16))))), 3217 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 3218def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3219 (i32 32))))), 3220 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 3221def : Pat<(concat_vectors (v8i8 V64:$Rd), 3222 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3223 (i32 8))))), 3224 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3225 V128:$Rn, V128:$Rm)>; 3226def : Pat<(concat_vectors (v4i16 V64:$Rd), 3227 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3228 (i32 16))))), 3229 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3230 V128:$Rn, V128:$Rm)>; 3231def : Pat<(concat_vectors (v2i32 V64:$Rd), 3232 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 3233 (i32 32))))), 3234 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 3235 V128:$Rn, V128:$Rm)>; 3236 3237//---------------------------------------------------------------------------- 3238// AdvSIMD bitwise extract from vector instruction. 3239//---------------------------------------------------------------------------- 3240 3241defm EXT : SIMDBitwiseExtract<"ext">; 3242 3243def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3244 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3245def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3246 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3247def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3248 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3249def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 3250 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 3251def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3252 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3253def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3254 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3255def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3256 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3257def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 3258 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 3259 3260// We use EXT to handle extract_subvector to copy the upper 64-bits of a 3261// 128-bit vector. 3262def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), 3263 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3264def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), 3265 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3266def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), 3267 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3268def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), 3269 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3270def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), 3271 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3272def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), 3273 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 3274 3275 3276//---------------------------------------------------------------------------- 3277// AdvSIMD zip vector 3278//---------------------------------------------------------------------------- 3279 3280defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 3281defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 3282defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 3283defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 3284defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 3285defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 3286 3287//---------------------------------------------------------------------------- 3288// AdvSIMD TBL/TBX instructions 3289//---------------------------------------------------------------------------- 3290 3291defm TBL : SIMDTableLookup< 0, "tbl">; 3292defm TBX : SIMDTableLookupTied<1, "tbx">; 3293 3294def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3295 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 3296def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3297 (TBLv16i8One V128:$Ri, V128:$Rn)>; 3298 3299def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 3300 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 3301 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 3302def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 3303 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 3304 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 3305 3306 3307//---------------------------------------------------------------------------- 3308// AdvSIMD scalar CPY instruction 3309//---------------------------------------------------------------------------- 3310 3311defm CPY : SIMDScalarCPY<"cpy">; 3312 3313//---------------------------------------------------------------------------- 3314// AdvSIMD scalar pairwise instructions 3315//---------------------------------------------------------------------------- 3316 3317defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 3318defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; 3319defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; 3320defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; 3321defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; 3322defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; 3323def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), 3324 (ADDPv2i64p V128:$Rn)>; 3325def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), 3326 (ADDPv2i64p V128:$Rn)>; 3327def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 3328 (FADDPv2i32p V64:$Rn)>; 3329def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 3330 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 3331def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 3332 (FADDPv2i64p V128:$Rn)>; 3333def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), 3334 (FMAXNMPv2i32p V64:$Rn)>; 3335def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), 3336 (FMAXNMPv2i64p V128:$Rn)>; 3337def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), 3338 (FMAXPv2i32p V64:$Rn)>; 3339def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), 3340 (FMAXPv2i64p V128:$Rn)>; 3341def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), 3342 (FMINNMPv2i32p V64:$Rn)>; 3343def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), 3344 (FMINNMPv2i64p V128:$Rn)>; 3345def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), 3346 (FMINPv2i32p V64:$Rn)>; 3347def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), 3348 (FMINPv2i64p V128:$Rn)>; 3349 3350//---------------------------------------------------------------------------- 3351// AdvSIMD INS/DUP instructions 3352//---------------------------------------------------------------------------- 3353 3354def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; 3355def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; 3356def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; 3357def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; 3358def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; 3359def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; 3360def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; 3361 3362def DUPv2i64lane : SIMDDup64FromElement; 3363def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 3364def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 3365def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 3366def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 3367def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 3368def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 3369 3370def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 3371 (v2f32 (DUPv2i32lane 3372 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3373 (i64 0)))>; 3374def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 3375 (v4f32 (DUPv4i32lane 3376 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 3377 (i64 0)))>; 3378def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 3379 (v2f64 (DUPv2i64lane 3380 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 3381 (i64 0)))>; 3382 3383def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3384 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 3385def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 3386 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 3387def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 3388 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 3389 3390// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 3391// instruction even if the types don't match: we just have to remap the lane 3392// carefully. N.b. this trick only applies to truncations. 3393def VecIndex_x2 : SDNodeXForm<imm, [{ 3394 return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64); 3395}]>; 3396def VecIndex_x4 : SDNodeXForm<imm, [{ 3397 return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64); 3398}]>; 3399def VecIndex_x8 : SDNodeXForm<imm, [{ 3400 return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64); 3401}]>; 3402 3403multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 3404 ValueType Src128VT, ValueType ScalVT, 3405 Instruction DUP, SDNodeXForm IdxXFORM> { 3406 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 3407 imm:$idx)))), 3408 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3409 3410 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 3411 imm:$idx)))), 3412 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3413} 3414 3415defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 3416defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 3417defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 3418 3419defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 3420defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 3421defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 3422 3423multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 3424 SDNodeXForm IdxXFORM> { 3425 def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), 3426 imm:$idx))))), 3427 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 3428 3429 def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), 3430 imm:$idx))))), 3431 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 3432} 3433 3434defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 3435defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 3436defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 3437 3438defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 3439defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 3440defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 3441 3442// SMOV and UMOV definitions, with some extra patterns for convenience 3443defm SMOV : SMov; 3444defm UMOV : UMov; 3445 3446def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3447 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 3448def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 3449 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 3450def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3451 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3452def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3453 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 3454def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 3455 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 3456def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 3457 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 3458 3459// Extracting i8 or i16 elements will have the zero-extend transformed to 3460// an 'and' mask by type legalization since neither i8 nor i16 are legal types 3461// for AArch64. Match these patterns here since UMOV already zeroes out the high 3462// bits of the destination register. 3463def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 3464 (i32 0xff)), 3465 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 3466def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 3467 (i32 0xffff)), 3468 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 3469 3470defm INS : SIMDIns; 3471 3472def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 3473 (SUBREG_TO_REG (i32 0), 3474 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3475def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 3476 (SUBREG_TO_REG (i32 0), 3477 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3478 3479def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 3480 (SUBREG_TO_REG (i32 0), 3481 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3482def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 3483 (SUBREG_TO_REG (i32 0), 3484 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 3485 3486def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 3487 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 3488 (i32 FPR32:$Rn), ssub))>; 3489def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 3490 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 3491 (i32 FPR32:$Rn), ssub))>; 3492def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 3493 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 3494 (i64 FPR64:$Rn), dsub))>; 3495 3496def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 3497 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3498def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 3499 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 3500def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 3501 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 3502 3503def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 3504 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3505 (EXTRACT_SUBREG 3506 (INSvi32lane 3507 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 3508 VectorIndexS:$imm, 3509 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3510 (i64 0)), 3511 dsub)>; 3512def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 3513 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 3514 (INSvi32lane 3515 V128:$Rn, VectorIndexS:$imm, 3516 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 3517 (i64 0))>; 3518def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 3519 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 3520 (INSvi64lane 3521 V128:$Rn, VectorIndexD:$imm, 3522 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 3523 (i64 0))>; 3524 3525// Copy an element at a constant index in one vector into a constant indexed 3526// element of another. 3527// FIXME refactor to a shared class/dev parameterized on vector type, vector 3528// index type and INS extension 3529def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 3530 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 3531 VectorIndexB:$idx2)), 3532 (v16i8 (INSvi8lane 3533 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 3534 )>; 3535def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 3536 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 3537 VectorIndexH:$idx2)), 3538 (v8i16 (INSvi16lane 3539 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 3540 )>; 3541def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 3542 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 3543 VectorIndexS:$idx2)), 3544 (v4i32 (INSvi32lane 3545 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 3546 )>; 3547def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 3548 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 3549 VectorIndexD:$idx2)), 3550 (v2i64 (INSvi64lane 3551 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 3552 )>; 3553 3554multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 3555 ValueType VTScal, Instruction INS> { 3556 def : Pat<(VT128 (vector_insert V128:$src, 3557 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3558 imm:$Immd)), 3559 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 3560 3561 def : Pat<(VT128 (vector_insert V128:$src, 3562 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3563 imm:$Immd)), 3564 (INS V128:$src, imm:$Immd, 3565 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 3566 3567 def : Pat<(VT64 (vector_insert V64:$src, 3568 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 3569 imm:$Immd)), 3570 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 3571 imm:$Immd, V128:$Rn, imm:$Immn), 3572 dsub)>; 3573 3574 def : Pat<(VT64 (vector_insert V64:$src, 3575 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 3576 imm:$Immd)), 3577 (EXTRACT_SUBREG 3578 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 3579 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 3580 dsub)>; 3581} 3582 3583defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 3584defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 3585defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>; 3586defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>; 3587defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>; 3588defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>; 3589 3590 3591// Floating point vector extractions are codegen'd as either a sequence of 3592// subregister extractions, possibly fed by an INS if the lane number is 3593// anything other than zero. 3594def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 3595 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 3596def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 3597 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 3598def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 3599 (f64 (EXTRACT_SUBREG 3600 (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, 3601 V128:$Rn, VectorIndexD:$idx), 3602 dsub))>; 3603def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 3604 (f32 (EXTRACT_SUBREG 3605 (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, 3606 V128:$Rn, VectorIndexS:$idx), 3607 ssub))>; 3608 3609// All concat_vectors operations are canonicalised to act on i64 vectors for 3610// AArch64. In the general case we need an instruction, which had just as well be 3611// INS. 3612class ConcatPat<ValueType DstTy, ValueType SrcTy> 3613 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 3614 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 3615 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 3616 3617def : ConcatPat<v2i64, v1i64>; 3618def : ConcatPat<v2f64, v1f64>; 3619def : ConcatPat<v4i32, v2i32>; 3620def : ConcatPat<v4f32, v2f32>; 3621def : ConcatPat<v8i16, v4i16>; 3622def : ConcatPat<v16i8, v8i8>; 3623 3624// If the high lanes are undef, though, we can just ignore them: 3625class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 3626 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 3627 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 3628 3629def : ConcatUndefPat<v2i64, v1i64>; 3630def : ConcatUndefPat<v2f64, v1f64>; 3631def : ConcatUndefPat<v4i32, v2i32>; 3632def : ConcatUndefPat<v4f32, v2f32>; 3633def : ConcatUndefPat<v8i16, v4i16>; 3634def : ConcatUndefPat<v16i8, v8i8>; 3635 3636//---------------------------------------------------------------------------- 3637// AdvSIMD across lanes instructions 3638//---------------------------------------------------------------------------- 3639 3640defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 3641defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 3642defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 3643defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 3644defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 3645defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 3646defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 3647defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; 3648defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; 3649defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; 3650defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; 3651 3652multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> { 3653// If there is a sign extension after this intrinsic, consume it as smov already 3654// performed it 3655 def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), 3656 (i32 (SMOVvi8to32 3657 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3658 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3659 (i64 0)))>; 3660 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3661 (i32 (SMOVvi8to32 3662 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3663 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3664 (i64 0)))>; 3665// If there is a sign extension after this intrinsic, consume it as smov already 3666// performed it 3667def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), 3668 (i32 (SMOVvi8to32 3669 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3670 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3671 (i64 0)))>; 3672def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3673 (i32 (SMOVvi8to32 3674 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3675 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3676 (i64 0)))>; 3677// If there is a sign extension after this intrinsic, consume it as smov already 3678// performed it 3679def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), 3680 (i32 (SMOVvi16to32 3681 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3682 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3683 (i64 0)))>; 3684def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3685 (i32 (SMOVvi16to32 3686 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3687 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3688 (i64 0)))>; 3689// If there is a sign extension after this intrinsic, consume it as smov already 3690// performed it 3691def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), 3692 (i32 (SMOVvi16to32 3693 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3694 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3695 (i64 0)))>; 3696def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3697 (i32 (SMOVvi16to32 3698 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3699 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3700 (i64 0)))>; 3701 3702def : Pat<(i32 (intOp (v4i32 V128:$Rn))), 3703 (i32 (EXTRACT_SUBREG 3704 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3705 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), 3706 ssub))>; 3707} 3708 3709multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> { 3710// If there is a masking operation keeping only what has been actually 3711// generated, consume it. 3712 def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), 3713 (i32 (EXTRACT_SUBREG 3714 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3715 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3716 ssub))>; 3717 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3718 (i32 (EXTRACT_SUBREG 3719 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3720 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 3721 ssub))>; 3722// If there is a masking operation keeping only what has been actually 3723// generated, consume it. 3724def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), 3725 (i32 (EXTRACT_SUBREG 3726 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3727 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3728 ssub))>; 3729def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3730 (i32 (EXTRACT_SUBREG 3731 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3732 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 3733 ssub))>; 3734 3735// If there is a masking operation keeping only what has been actually 3736// generated, consume it. 3737def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), 3738 (i32 (EXTRACT_SUBREG 3739 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3740 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3741 ssub))>; 3742def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3743 (i32 (EXTRACT_SUBREG 3744 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3745 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 3746 ssub))>; 3747// If there is a masking operation keeping only what has been actually 3748// generated, consume it. 3749def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), 3750 (i32 (EXTRACT_SUBREG 3751 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3752 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3753 ssub))>; 3754def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3755 (i32 (EXTRACT_SUBREG 3756 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3757 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 3758 ssub))>; 3759 3760def : Pat<(i32 (intOp (v4i32 V128:$Rn))), 3761 (i32 (EXTRACT_SUBREG 3762 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3763 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), 3764 ssub))>; 3765 3766} 3767 3768multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 3769 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3770 (i32 (SMOVvi16to32 3771 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3772 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 3773 (i64 0)))>; 3774def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3775 (i32 (SMOVvi16to32 3776 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3777 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 3778 (i64 0)))>; 3779 3780def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3781 (i32 (EXTRACT_SUBREG 3782 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3783 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 3784 ssub))>; 3785def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3786 (i32 (EXTRACT_SUBREG 3787 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3788 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 3789 ssub))>; 3790 3791def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 3792 (i64 (EXTRACT_SUBREG 3793 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3794 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 3795 dsub))>; 3796} 3797 3798multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 3799 Intrinsic intOp> { 3800 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 3801 (i32 (EXTRACT_SUBREG 3802 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3803 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 3804 ssub))>; 3805def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 3806 (i32 (EXTRACT_SUBREG 3807 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3808 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 3809 ssub))>; 3810 3811def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 3812 (i32 (EXTRACT_SUBREG 3813 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3814 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 3815 ssub))>; 3816def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 3817 (i32 (EXTRACT_SUBREG 3818 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3819 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 3820 ssub))>; 3821 3822def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 3823 (i64 (EXTRACT_SUBREG 3824 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3825 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 3826 dsub))>; 3827} 3828 3829defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; 3830// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 3831def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), 3832 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3833 3834defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; 3835// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 3836def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), 3837 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3838 3839defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; 3840def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), 3841 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3842 3843defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; 3844def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), 3845 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3846 3847defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; 3848def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), 3849 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3850 3851defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; 3852def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), 3853 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; 3854 3855defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 3856defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 3857 3858// The vaddlv_s32 intrinsic gets mapped to SADDLP. 3859def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 3860 (i64 (EXTRACT_SUBREG 3861 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3862 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 3863 dsub))>; 3864// The vaddlv_u32 intrinsic gets mapped to UADDLP. 3865def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 3866 (i64 (EXTRACT_SUBREG 3867 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 3868 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 3869 dsub))>; 3870 3871//------------------------------------------------------------------------------ 3872// AdvSIMD modified immediate instructions 3873//------------------------------------------------------------------------------ 3874 3875// AdvSIMD BIC 3876defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 3877// AdvSIMD ORR 3878defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 3879 3880def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 3881def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 3882def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 3883def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 3884 3885def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3886def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3887def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3888def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3889 3890def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 3891def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 3892def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 3893def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 3894 3895def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3896def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3897def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3898def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3899 3900// AdvSIMD FMOV 3901def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, 3902 "fmov", ".2d", 3903 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3904def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, 3905 "fmov", ".2s", 3906 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3907def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, 3908 "fmov", ".4s", 3909 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 3910 3911// AdvSIMD MOVI 3912 3913// EDIT byte mask: scalar 3914let isReMaterializable = 1, isAsCheapAsAMove = 1 in 3915def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 3916 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 3917// The movi_edit node has the immediate value already encoded, so we use 3918// a plain imm0_255 here. 3919def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 3920 (MOVID imm0_255:$shift)>; 3921 3922def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; 3923def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; 3924def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; 3925def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; 3926 3927def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; 3928def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; 3929def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; 3930def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; 3931 3932// EDIT byte mask: 2d 3933 3934// The movi_edit node has the immediate value already encoded, so we use 3935// a plain imm0_255 in the pattern 3936let isReMaterializable = 1, isAsCheapAsAMove = 1 in 3937def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, 3938 simdimmtype10, 3939 "movi", ".2d", 3940 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 3941 3942 3943// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. 3944// Complexity is added to break a tie with a plain MOVI. 3945let AddedComplexity = 1 in { 3946def : Pat<(f32 fpimm0), 3947 (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, 3948 Requires<[HasZCZ]>; 3949def : Pat<(f64 fpimm0), 3950 (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, 3951 Requires<[HasZCZ]>; 3952} 3953 3954def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3955def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3956def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3957def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 3958 3959def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3960def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3961def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3962def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 3963 3964def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; 3965def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; 3966 3967// EDIT per word & halfword: 2s, 4h, 4s, & 8h 3968defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 3969 3970def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3971def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3972def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3973def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3974 3975def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 3976def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 3977def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 3978def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 3979 3980def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3981 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 3982def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3983 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 3984def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3985 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 3986def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 3987 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 3988 3989// EDIT per word: 2s & 4s with MSL shifter 3990def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 3991 [(set (v2i32 V64:$Rd), 3992 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3993def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 3994 [(set (v4i32 V128:$Rd), 3995 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 3996 3997// Per byte: 8b & 16b 3998def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, 3999 "movi", ".8b", 4000 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 4001def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, 4002 "movi", ".16b", 4003 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 4004 4005// AdvSIMD MVNI 4006 4007// EDIT per word & halfword: 2s, 4h, 4s, & 8h 4008defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 4009 4010def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4011def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4012def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4013def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4014 4015def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 4016def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 4017def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 4018def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 4019 4020def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4021 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 4022def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4023 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 4024def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4025 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 4026def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 4027 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 4028 4029// EDIT per word: 2s & 4s with MSL shifter 4030def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 4031 [(set (v2i32 V64:$Rd), 4032 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4033def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 4034 [(set (v4i32 V128:$Rd), 4035 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 4036 4037//---------------------------------------------------------------------------- 4038// AdvSIMD indexed element 4039//---------------------------------------------------------------------------- 4040 4041let neverHasSideEffects = 1 in { 4042 defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; 4043 defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; 4044} 4045 4046// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 4047// instruction expects the addend first, while the intrinsic expects it last. 4048 4049// On the other hand, there are quite a few valid combinatorial options due to 4050// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 4051defm : SIMDFPIndexedSDTiedPatterns<"FMLA", 4052 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; 4053defm : SIMDFPIndexedSDTiedPatterns<"FMLA", 4054 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; 4055 4056defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 4057 TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4058defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 4059 TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 4060defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 4061 TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 4062defm : SIMDFPIndexedSDTiedPatterns<"FMLS", 4063 TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 4064 4065multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 4066 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 4067 // and DUP scalar. 4068 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4069 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 4070 VectorIndexS:$idx))), 4071 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 4072 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4073 (v2f32 (AArch64duplane32 4074 (v4f32 (insert_subvector undef, 4075 (v2f32 (fneg V64:$Rm)), 4076 (i32 0))), 4077 VectorIndexS:$idx)))), 4078 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4079 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4080 VectorIndexS:$idx)>; 4081 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 4082 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4083 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 4084 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4085 4086 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 4087 // and DUP scalar. 4088 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4089 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 4090 VectorIndexS:$idx))), 4091 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 4092 VectorIndexS:$idx)>; 4093 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4094 (v4f32 (AArch64duplane32 4095 (v4f32 (insert_subvector undef, 4096 (v2f32 (fneg V64:$Rm)), 4097 (i32 0))), 4098 VectorIndexS:$idx)))), 4099 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4100 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 4101 VectorIndexS:$idx)>; 4102 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 4103 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 4104 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 4105 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 4106 4107 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 4108 // (DUPLANE from 64-bit would be trivial). 4109 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4110 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 4111 VectorIndexD:$idx))), 4112 (FMLSv2i64_indexed 4113 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 4114 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 4115 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 4116 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 4117 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 4118 4119 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 4120 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4121 (vector_extract (v4f32 (fneg V128:$Rm)), 4122 VectorIndexS:$idx))), 4123 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4124 V128:$Rm, VectorIndexS:$idx)>; 4125 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 4126 (vector_extract (v2f32 (fneg V64:$Rm)), 4127 VectorIndexS:$idx))), 4128 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 4129 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 4130 4131 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 4132 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 4133 (vector_extract (v2f64 (fneg V128:$Rm)), 4134 VectorIndexS:$idx))), 4135 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 4136 V128:$Rm, VectorIndexS:$idx)>; 4137} 4138 4139defm : FMLSIndexedAfterNegPatterns< 4140 TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; 4141defm : FMLSIndexedAfterNegPatterns< 4142 TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; 4143 4144defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 4145defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; 4146 4147def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4148 (FMULv2i32_indexed V64:$Rn, 4149 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4150 (i64 0))>; 4151def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 4152 (FMULv4i32_indexed V128:$Rn, 4153 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 4154 (i64 0))>; 4155def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 4156 (FMULv2i64_indexed V128:$Rn, 4157 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 4158 (i64 0))>; 4159 4160defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 4161defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 4162defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", 4163 TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; 4164defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", 4165 TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; 4166defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 4167defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 4168 TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4169defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 4170 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; 4171defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", 4172 int_aarch64_neon_smull>; 4173defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 4174 int_aarch64_neon_sqadd>; 4175defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 4176 int_aarch64_neon_sqsub>; 4177defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 4178defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 4179 TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4180defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 4181 TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; 4182defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", 4183 int_aarch64_neon_umull>; 4184 4185// A scalar sqdmull with the second operand being a vector lane can be 4186// handled directly with the indexed instruction encoding. 4187def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 4188 (vector_extract (v4i32 V128:$Vm), 4189 VectorIndexS:$idx)), 4190 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 4191 4192//---------------------------------------------------------------------------- 4193// AdvSIMD scalar shift instructions 4194//---------------------------------------------------------------------------- 4195defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; 4196defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; 4197defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; 4198defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; 4199// Codegen patterns for the above. We don't put these directly on the 4200// instructions because TableGen's type inference can't handle the truth. 4201// Having the same base pattern for fp <--> int totally freaks it out. 4202def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 4203 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 4204def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 4205 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 4206def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 4207 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4208def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 4209 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4210def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 4211 vecshiftR64:$imm)), 4212 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 4213def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 4214 vecshiftR64:$imm)), 4215 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 4216def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 4217 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4218def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 4219 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 4220def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4221 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4222def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 4223 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4224def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 4225 vecshiftR64:$imm)), 4226 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4227def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 4228 vecshiftR64:$imm)), 4229 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 4230 4231defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 4232defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 4233defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 4234 int_aarch64_neon_sqrshrn>; 4235defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 4236 int_aarch64_neon_sqrshrun>; 4237defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4238defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4239defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 4240 int_aarch64_neon_sqshrn>; 4241defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 4242 int_aarch64_neon_sqshrun>; 4243defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 4244defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 4245defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 4246 TriOpFrag<(add node:$LHS, 4247 (AArch64srshri node:$MHS, node:$RHS))>>; 4248defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 4249defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 4250 TriOpFrag<(add node:$LHS, 4251 (AArch64vashr node:$MHS, node:$RHS))>>; 4252defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 4253 int_aarch64_neon_uqrshrn>; 4254defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4255defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 4256 int_aarch64_neon_uqshrn>; 4257defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 4258defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 4259 TriOpFrag<(add node:$LHS, 4260 (AArch64urshri node:$MHS, node:$RHS))>>; 4261defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 4262defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 4263 TriOpFrag<(add node:$LHS, 4264 (AArch64vlshr node:$MHS, node:$RHS))>>; 4265 4266//---------------------------------------------------------------------------- 4267// AdvSIMD vector shift instructions 4268//---------------------------------------------------------------------------- 4269defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 4270defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 4271defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", 4272 int_aarch64_neon_vcvtfxs2fp>; 4273defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 4274 int_aarch64_neon_rshrn>; 4275defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 4276defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 4277 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 4278defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; 4279def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4280 (i32 vecshiftL64:$imm))), 4281 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 4282defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 4283 int_aarch64_neon_sqrshrn>; 4284defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 4285 int_aarch64_neon_sqrshrun>; 4286defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 4287defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 4288defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 4289 int_aarch64_neon_sqshrn>; 4290defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 4291 int_aarch64_neon_sqshrun>; 4292defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; 4293def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 4294 (i32 vecshiftR64:$imm))), 4295 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 4296defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 4297defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 4298 TriOpFrag<(add node:$LHS, 4299 (AArch64srshri node:$MHS, node:$RHS))> >; 4300defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 4301 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 4302 4303defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 4304defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 4305 TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 4306defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", 4307 int_aarch64_neon_vcvtfxu2fp>; 4308defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 4309 int_aarch64_neon_uqrshrn>; 4310defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 4311defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 4312 int_aarch64_neon_uqshrn>; 4313defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 4314defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 4315 TriOpFrag<(add node:$LHS, 4316 (AArch64urshri node:$MHS, node:$RHS))> >; 4317defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 4318 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 4319defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 4320defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 4321 TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 4322 4323// SHRN patterns for when a logical right shift was used instead of arithmetic 4324// (the immediate guarantees no sign bits actually end up in the result so it 4325// doesn't matter). 4326def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 4327 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 4328def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 4329 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 4330def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 4331 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 4332 4333def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 4334 (trunc (AArch64vlshr (v8i16 V128:$Rn), 4335 vecshiftR16Narrow:$imm)))), 4336 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4337 V128:$Rn, vecshiftR16Narrow:$imm)>; 4338def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 4339 (trunc (AArch64vlshr (v4i32 V128:$Rn), 4340 vecshiftR32Narrow:$imm)))), 4341 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4342 V128:$Rn, vecshiftR32Narrow:$imm)>; 4343def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 4344 (trunc (AArch64vlshr (v2i64 V128:$Rn), 4345 vecshiftR64Narrow:$imm)))), 4346 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 4347 V128:$Rn, vecshiftR32Narrow:$imm)>; 4348 4349// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 4350// Anyexts are implemented as zexts. 4351def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 4352def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4353def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 4354def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 4355def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4356def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 4357def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 4358def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4359def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 4360// Also match an extend from the upper half of a 128 bit source register. 4361def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4362 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4363def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4364 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 4365def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 4366 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 4367def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4368 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4369def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4370 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 4371def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 4372 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 4373def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4374 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4375def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4376 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 4377def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 4378 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 4379 4380// Vector shift sxtl aliases 4381def : InstAlias<"sxtl.8h $dst, $src1", 4382 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4383def : InstAlias<"sxtl $dst.8h, $src1.8b", 4384 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4385def : InstAlias<"sxtl.4s $dst, $src1", 4386 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4387def : InstAlias<"sxtl $dst.4s, $src1.4h", 4388 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4389def : InstAlias<"sxtl.2d $dst, $src1", 4390 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4391def : InstAlias<"sxtl $dst.2d, $src1.2s", 4392 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4393 4394// Vector shift sxtl2 aliases 4395def : InstAlias<"sxtl2.8h $dst, $src1", 4396 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4397def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 4398 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4399def : InstAlias<"sxtl2.4s $dst, $src1", 4400 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4401def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 4402 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4403def : InstAlias<"sxtl2.2d $dst, $src1", 4404 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4405def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 4406 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4407 4408// Vector shift uxtl aliases 4409def : InstAlias<"uxtl.8h $dst, $src1", 4410 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4411def : InstAlias<"uxtl $dst.8h, $src1.8b", 4412 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 4413def : InstAlias<"uxtl.4s $dst, $src1", 4414 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4415def : InstAlias<"uxtl $dst.4s, $src1.4h", 4416 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 4417def : InstAlias<"uxtl.2d $dst, $src1", 4418 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4419def : InstAlias<"uxtl $dst.2d, $src1.2s", 4420 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 4421 4422// Vector shift uxtl2 aliases 4423def : InstAlias<"uxtl2.8h $dst, $src1", 4424 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4425def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 4426 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 4427def : InstAlias<"uxtl2.4s $dst, $src1", 4428 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4429def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 4430 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 4431def : InstAlias<"uxtl2.2d $dst, $src1", 4432 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4433def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 4434 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 4435 4436// If an integer is about to be converted to a floating point value, 4437// just load it on the floating point unit. 4438// These patterns are more complex because floating point loads do not 4439// support sign extension. 4440// The sign extension has to be explicitly added and is only supported for 4441// one step: byte-to-half, half-to-word, word-to-doubleword. 4442// SCVTF GPR -> FPR is 9 cycles. 4443// SCVTF FPR -> FPR is 4 cyclces. 4444// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 4445// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 4446// and still being faster. 4447// However, this is not good for code size. 4448// 8-bits -> float. 2 sizes step-up. 4449class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 4450 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 4451 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4452 (SSHLLv4i16_shift 4453 (f64 4454 (EXTRACT_SUBREG 4455 (SSHLLv8i8_shift 4456 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4457 INST, 4458 bsub), 4459 0), 4460 dsub)), 4461 0), 4462 ssub)))>, Requires<[NotForCodeSize]>; 4463 4464def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 4465 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 4466def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 4467 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 4468def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 4469 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 4470def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 4471 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 4472 4473// 16-bits -> float. 1 size step-up. 4474class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 4475 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4476 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 4477 (SSHLLv4i16_shift 4478 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4479 INST, 4480 hsub), 4481 0), 4482 ssub)))>, Requires<[NotForCodeSize]>; 4483 4484def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4485 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4486def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4487 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4488def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4489 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4490def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4491 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4492 4493// 32-bits to 32-bits are handled in target specific dag combine: 4494// performIntToFpCombine. 4495// 64-bits integer to 32-bits floating point, not possible with 4496// SCVTF on floating point registers (both source and destination 4497// must have the same size). 4498 4499// Here are the patterns for 8, 16, 32, and 64-bits to double. 4500// 8-bits -> double. 3 size step-up: give up. 4501// 16-bits -> double. 2 size step. 4502class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 4503 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 4504 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4505 (SSHLLv2i32_shift 4506 (f64 4507 (EXTRACT_SUBREG 4508 (SSHLLv4i16_shift 4509 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4510 INST, 4511 hsub), 4512 0), 4513 dsub)), 4514 0), 4515 dsub)))>, Requires<[NotForCodeSize]>; 4516 4517def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 4518 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 4519def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 4520 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 4521def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 4522 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 4523def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 4524 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 4525// 32-bits -> double. 1 size step-up. 4526class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 4527 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 4528 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 4529 (SSHLLv2i32_shift 4530 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 4531 INST, 4532 ssub), 4533 0), 4534 dsub)))>, Requires<[NotForCodeSize]>; 4535 4536def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 4537 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 4538def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 4539 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 4540def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 4541 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 4542def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 4543 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 4544 4545// 64-bits -> double are handled in target specific dag combine: 4546// performIntToFpCombine. 4547 4548 4549//---------------------------------------------------------------------------- 4550// AdvSIMD Load-Store Structure 4551//---------------------------------------------------------------------------- 4552defm LD1 : SIMDLd1Multiple<"ld1">; 4553defm LD2 : SIMDLd2Multiple<"ld2">; 4554defm LD3 : SIMDLd3Multiple<"ld3">; 4555defm LD4 : SIMDLd4Multiple<"ld4">; 4556 4557defm ST1 : SIMDSt1Multiple<"st1">; 4558defm ST2 : SIMDSt2Multiple<"st2">; 4559defm ST3 : SIMDSt3Multiple<"st3">; 4560defm ST4 : SIMDSt4Multiple<"st4">; 4561 4562class Ld1Pat<ValueType ty, Instruction INST> 4563 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 4564 4565def : Ld1Pat<v16i8, LD1Onev16b>; 4566def : Ld1Pat<v8i16, LD1Onev8h>; 4567def : Ld1Pat<v4i32, LD1Onev4s>; 4568def : Ld1Pat<v2i64, LD1Onev2d>; 4569def : Ld1Pat<v8i8, LD1Onev8b>; 4570def : Ld1Pat<v4i16, LD1Onev4h>; 4571def : Ld1Pat<v2i32, LD1Onev2s>; 4572def : Ld1Pat<v1i64, LD1Onev1d>; 4573 4574class St1Pat<ValueType ty, Instruction INST> 4575 : Pat<(store ty:$Vt, GPR64sp:$Rn), 4576 (INST ty:$Vt, GPR64sp:$Rn)>; 4577 4578def : St1Pat<v16i8, ST1Onev16b>; 4579def : St1Pat<v8i16, ST1Onev8h>; 4580def : St1Pat<v4i32, ST1Onev4s>; 4581def : St1Pat<v2i64, ST1Onev2d>; 4582def : St1Pat<v8i8, ST1Onev8b>; 4583def : St1Pat<v4i16, ST1Onev4h>; 4584def : St1Pat<v2i32, ST1Onev2s>; 4585def : St1Pat<v1i64, ST1Onev1d>; 4586 4587//--- 4588// Single-element 4589//--- 4590 4591defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 4592defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 4593defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 4594defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 4595let mayLoad = 1, neverHasSideEffects = 1 in { 4596defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 4597defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 4598defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 4599defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 4600defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 4601defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 4602defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 4603defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 4604defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 4605defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 4606defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 4607defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 4608defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 4609defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 4610defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 4611defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 4612} 4613 4614def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 4615 (LD1Rv8b GPR64sp:$Rn)>; 4616def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 4617 (LD1Rv16b GPR64sp:$Rn)>; 4618def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 4619 (LD1Rv4h GPR64sp:$Rn)>; 4620def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 4621 (LD1Rv8h GPR64sp:$Rn)>; 4622def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 4623 (LD1Rv2s GPR64sp:$Rn)>; 4624def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 4625 (LD1Rv4s GPR64sp:$Rn)>; 4626def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 4627 (LD1Rv2d GPR64sp:$Rn)>; 4628def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 4629 (LD1Rv1d GPR64sp:$Rn)>; 4630// Grab the floating point version too 4631def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 4632 (LD1Rv2s GPR64sp:$Rn)>; 4633def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 4634 (LD1Rv4s GPR64sp:$Rn)>; 4635def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 4636 (LD1Rv2d GPR64sp:$Rn)>; 4637def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 4638 (LD1Rv1d GPR64sp:$Rn)>; 4639 4640class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 4641 ValueType VTy, ValueType STy, Instruction LD1> 4642 : Pat<(vector_insert (VTy VecListOne128:$Rd), 4643 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 4644 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 4645 4646def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 4647def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 4648def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 4649def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 4650def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 4651def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 4652 4653class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 4654 ValueType VTy, ValueType STy, Instruction LD1> 4655 : Pat<(vector_insert (VTy VecListOne64:$Rd), 4656 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 4657 (EXTRACT_SUBREG 4658 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 4659 VecIndex:$idx, GPR64sp:$Rn), 4660 dsub)>; 4661 4662def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 4663def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 4664def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 4665def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 4666 4667 4668defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 4669defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 4670defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 4671defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 4672 4673// Stores 4674defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 4675defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 4676defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 4677defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 4678 4679let AddedComplexity = 15 in 4680class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 4681 ValueType VTy, ValueType STy, Instruction ST1> 4682 : Pat<(scalar_store 4683 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4684 GPR64sp:$Rn), 4685 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 4686 4687def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 4688def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 4689def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 4690def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 4691def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 4692def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 4693 4694let AddedComplexity = 15 in 4695class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 4696 ValueType VTy, ValueType STy, Instruction ST1> 4697 : Pat<(scalar_store 4698 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4699 GPR64sp:$Rn), 4700 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4701 VecIndex:$idx, GPR64sp:$Rn)>; 4702 4703def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 4704def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 4705def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 4706def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 4707 4708multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 4709 ValueType VTy, ValueType STy, Instruction ST1, 4710 int offset> { 4711 def : Pat<(scalar_store 4712 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4713 GPR64sp:$Rn, offset), 4714 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4715 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 4716 4717 def : Pat<(scalar_store 4718 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 4719 GPR64sp:$Rn, GPR64:$Rm), 4720 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 4721 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 4722} 4723 4724defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 4725defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 4726 2>; 4727defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 4728defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 4729defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 4730defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 4731 4732multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 4733 ValueType VTy, ValueType STy, Instruction ST1, 4734 int offset> { 4735 def : Pat<(scalar_store 4736 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4737 GPR64sp:$Rn, offset), 4738 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 4739 4740 def : Pat<(scalar_store 4741 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 4742 GPR64sp:$Rn, GPR64:$Rm), 4743 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 4744} 4745 4746defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 4747 1>; 4748defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 4749 2>; 4750defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 4751defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 4752defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 4753defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 4754 4755let mayStore = 1, neverHasSideEffects = 1 in { 4756defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 4757defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 4758defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 4759defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 4760defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 4761defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 4762defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 4763defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 4764defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 4765defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 4766defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 4767defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 4768} 4769 4770defm ST1 : SIMDLdSt1SingleAliases<"st1">; 4771defm ST2 : SIMDLdSt2SingleAliases<"st2">; 4772defm ST3 : SIMDLdSt3SingleAliases<"st3">; 4773defm ST4 : SIMDLdSt4SingleAliases<"st4">; 4774 4775//---------------------------------------------------------------------------- 4776// Crypto extensions 4777//---------------------------------------------------------------------------- 4778 4779def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 4780def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 4781def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 4782def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 4783 4784def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 4785def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 4786def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 4787def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 4788def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 4789def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 4790def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 4791 4792def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 4793def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 4794def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 4795 4796//---------------------------------------------------------------------------- 4797// Compiler-pseudos 4798//---------------------------------------------------------------------------- 4799// FIXME: Like for X86, these should go in their own separate .td file. 4800 4801// Any instruction that defines a 32-bit result leaves the high half of the 4802// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may 4803// be copying from a truncate. But any other 32-bit operation will zero-extend 4804// up to 64 bits. 4805// FIXME: X86 also checks for CMOV here. Do we need something similar? 4806def def32 : PatLeaf<(i32 GPR32:$src), [{ 4807 return N->getOpcode() != ISD::TRUNCATE && 4808 N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && 4809 N->getOpcode() != ISD::CopyFromReg; 4810}]>; 4811 4812// In the case of a 32-bit def that is known to implicitly zero-extend, 4813// we can use a SUBREG_TO_REG. 4814def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; 4815 4816// For an anyext, we don't care what the high bits are, so we can perform an 4817// INSERT_SUBREF into an IMPLICIT_DEF. 4818def : Pat<(i64 (anyext GPR32:$src)), 4819 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 4820 4821// When we need to explicitly zero-extend, we use an unsigned bitfield move 4822// instruction (UBFM) on the enclosing super-reg. 4823def : Pat<(i64 (zext GPR32:$src)), 4824 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 4825 4826// To sign extend, we use a signed bitfield move instruction (SBFM) on the 4827// containing super-reg. 4828def : Pat<(i64 (sext GPR32:$src)), 4829 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 4830def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 4831def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 4832def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 4833def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 4834def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 4835def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 4836def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 4837 4838def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 4839 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 4840 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 4841def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 4842 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 4843 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 4844 4845def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 4846 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 4847 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 4848def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 4849 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 4850 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 4851 4852def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 4853 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 4854 (i64 (i64shift_a imm0_63:$imm)), 4855 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 4856 4857// sra patterns have an AddedComplexity of 10, so make sure we have a higher 4858// AddedComplexity for the following patterns since we want to match sext + sra 4859// patterns before we attempt to match a single sra node. 4860let AddedComplexity = 20 in { 4861// We support all sext + sra combinations which preserve at least one bit of the 4862// original value which is to be sign extended. E.g. we support shifts up to 4863// bitwidth-1 bits. 4864def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 4865 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 4866def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 4867 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 4868 4869def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 4870 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 4871def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 4872 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 4873 4874def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 4875 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 4876 (i64 imm0_31:$imm), 31)>; 4877} // AddedComplexity = 20 4878 4879// To truncate, we can simply extract from a subregister. 4880def : Pat<(i32 (trunc GPR64sp:$src)), 4881 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 4882 4883// __builtin_trap() uses the BRK instruction on AArch64. 4884def : Pat<(trap), (BRK 1)>; 4885 4886// Conversions within AdvSIMD types in the same register size are free. 4887// But because we need a consistent lane ordering, in big endian many 4888// conversions require one or more REV instructions. 4889// 4890// Consider a simple memory load followed by a bitconvert then a store. 4891// v0 = load v2i32 4892// v1 = BITCAST v2i32 v0 to v4i16 4893// store v4i16 v2 4894// 4895// In big endian mode every memory access has an implicit byte swap. LDR and 4896// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 4897// is, they treat the vector as a sequence of elements to be byte-swapped. 4898// The two pairs of instructions are fundamentally incompatible. We've decided 4899// to use LD1/ST1 only to simplify compiler implementation. 4900// 4901// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 4902// the original code sequence: 4903// v0 = load v2i32 4904// v1 = REV v2i32 (implicit) 4905// v2 = BITCAST v2i32 v1 to v4i16 4906// v3 = REV v4i16 v2 (implicit) 4907// store v4i16 v3 4908// 4909// But this is now broken - the value stored is different to the value loaded 4910// due to lane reordering. To fix this, on every BITCAST we must perform two 4911// other REVs: 4912// v0 = load v2i32 4913// v1 = REV v2i32 (implicit) 4914// v2 = REV v2i32 4915// v3 = BITCAST v2i32 v2 to v4i16 4916// v4 = REV v4i16 4917// v5 = REV v4i16 v4 (implicit) 4918// store v4i16 v5 4919// 4920// This means an extra two instructions, but actually in most cases the two REV 4921// instructions can be combined into one. For example: 4922// (REV64_2s (REV64_4h X)) === (REV32_4h X) 4923// 4924// There is also no 128-bit REV instruction. This must be synthesized with an 4925// EXT instruction. 4926// 4927// Most bitconverts require some sort of conversion. The only exceptions are: 4928// a) Identity conversions - vNfX <-> vNiX 4929// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 4930// 4931 4932let Predicates = [IsLE] in { 4933def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4934def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4935def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4936def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4937 4938def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 4939 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4940def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 4941 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4942def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 4943 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4944def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 4945 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4946def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 4947 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4948} 4949let Predicates = [IsBE] in { 4950def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 4951 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4952def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 4953 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4954def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 4955 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4956def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 4957 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 4958 4959def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 4960 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4961def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 4962 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4963def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 4964 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4965def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 4966 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 4967} 4968def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4969def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4970def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 4971 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4972def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 4973 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4974def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 4975 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4976def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 4977 4978def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 4979 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 4980def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 4981 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 4982def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 4983 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 4984def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 4985 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 4986def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 4987 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 4988 4989let Predicates = [IsLE] in { 4990def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 4991def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 4992def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 4993def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 4994} 4995let Predicates = [IsBE] in { 4996def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 4997 (v1i64 (REV64v2i32 FPR64:$src))>; 4998def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 4999 (v1i64 (REV64v4i16 FPR64:$src))>; 5000def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 5001 (v1i64 (REV64v8i8 FPR64:$src))>; 5002def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 5003 (v1i64 (REV64v2i32 FPR64:$src))>; 5004} 5005def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 5006def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 5007 5008let Predicates = [IsLE] in { 5009def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 5010def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 5011def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 5012def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 5013def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 5014} 5015let Predicates = [IsBE] in { 5016def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 5017 (v2i32 (REV64v2i32 FPR64:$src))>; 5018def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 5019 (v2i32 (REV32v4i16 FPR64:$src))>; 5020def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 5021 (v2i32 (REV32v8i8 FPR64:$src))>; 5022def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 5023 (v2i32 (REV64v2i32 FPR64:$src))>; 5024def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 5025 (v2i32 (REV64v2i32 FPR64:$src))>; 5026} 5027def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 5028 5029let Predicates = [IsLE] in { 5030def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 5031def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 5032def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 5033def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 5034def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 5035def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 5036} 5037let Predicates = [IsBE] in { 5038def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 5039 (v4i16 (REV64v4i16 FPR64:$src))>; 5040def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 5041 (v4i16 (REV32v4i16 FPR64:$src))>; 5042def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 5043 (v4i16 (REV16v8i8 FPR64:$src))>; 5044def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 5045 (v4i16 (REV64v4i16 FPR64:$src))>; 5046def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 5047 (v4i16 (REV32v4i16 FPR64:$src))>; 5048def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 5049 (v4i16 (REV64v4i16 FPR64:$src))>; 5050} 5051 5052let Predicates = [IsLE] in { 5053def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 5054def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 5055def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 5056def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 5057def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 5058def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 5059} 5060let Predicates = [IsBE] in { 5061def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 5062 (v8i8 (REV64v8i8 FPR64:$src))>; 5063def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 5064 (v8i8 (REV32v8i8 FPR64:$src))>; 5065def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 5066 (v8i8 (REV16v8i8 FPR64:$src))>; 5067def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 5068 (v8i8 (REV64v8i8 FPR64:$src))>; 5069def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 5070 (v8i8 (REV32v8i8 FPR64:$src))>; 5071def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 5072 (v8i8 (REV64v8i8 FPR64:$src))>; 5073} 5074 5075let Predicates = [IsLE] in { 5076def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 5077def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 5078def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 5079def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 5080} 5081let Predicates = [IsBE] in { 5082def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 5083 (f64 (REV64v2i32 FPR64:$src))>; 5084def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 5085 (f64 (REV64v4i16 FPR64:$src))>; 5086def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 5087 (f64 (REV64v2i32 FPR64:$src))>; 5088def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 5089 (f64 (REV64v8i8 FPR64:$src))>; 5090} 5091def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 5092def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 5093 5094let Predicates = [IsLE] in { 5095def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 5096def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 5097def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 5098def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 5099} 5100let Predicates = [IsBE] in { 5101def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 5102 (v1f64 (REV64v2i32 FPR64:$src))>; 5103def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 5104 (v1f64 (REV64v4i16 FPR64:$src))>; 5105def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 5106 (v1f64 (REV64v8i8 FPR64:$src))>; 5107def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 5108 (v1f64 (REV64v2i32 FPR64:$src))>; 5109} 5110def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 5111def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 5112 5113let Predicates = [IsLE] in { 5114def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 5115def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 5116def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 5117def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5118def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 5119} 5120let Predicates = [IsBE] in { 5121def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 5122 (v2f32 (REV64v2i32 FPR64:$src))>; 5123def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 5124 (v2f32 (REV32v4i16 FPR64:$src))>; 5125def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 5126 (v2f32 (REV32v8i8 FPR64:$src))>; 5127def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 5128 (v2f32 (REV64v2i32 FPR64:$src))>; 5129def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 5130 (v2f32 (REV64v2i32 FPR64:$src))>; 5131} 5132def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 5133 5134let Predicates = [IsLE] in { 5135def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 5136def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 5137def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 5138def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 5139def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 5140def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 5141} 5142let Predicates = [IsBE] in { 5143def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 5144 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5145def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 5146 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5147 (REV64v4i32 FPR128:$src), (i32 8)))>; 5148def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 5149 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 5150 (REV64v8i16 FPR128:$src), (i32 8)))>; 5151def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 5152 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 5153def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 5154 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 5155 (REV64v4i32 FPR128:$src), (i32 8)))>; 5156def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 5157 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 5158 (REV64v16i8 FPR128:$src), (i32 8)))>; 5159} 5160 5161let Predicates = [IsLE] in { 5162def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 5163def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 5164def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 5165def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 5166def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 5167} 5168let Predicates = [IsBE] in { 5169def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 5170 (v2f64 (EXTv16i8 FPR128:$src, 5171 FPR128:$src, (i32 8)))>; 5172def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 5173 (v2f64 (REV64v4i32 FPR128:$src))>; 5174def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 5175 (v2f64 (REV64v8i16 FPR128:$src))>; 5176def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 5177 (v2f64 (REV64v16i8 FPR128:$src))>; 5178def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 5179 (v2f64 (REV64v4i32 FPR128:$src))>; 5180} 5181def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 5182 5183let Predicates = [IsLE] in { 5184def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 5185def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 5186def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 5187def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 5188def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 5189} 5190let Predicates = [IsBE] in { 5191def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 5192 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5193 (REV64v4i32 FPR128:$src), (i32 8)))>; 5194def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 5195 (v4f32 (REV32v8i16 FPR128:$src))>; 5196def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 5197 (v4f32 (REV32v16i8 FPR128:$src))>; 5198def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 5199 (v4f32 (REV64v4i32 FPR128:$src))>; 5200def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 5201 (v4f32 (REV64v4i32 FPR128:$src))>; 5202} 5203def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 5204 5205let Predicates = [IsLE] in { 5206def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 5207def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 5208def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 5209def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 5210def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 5211} 5212let Predicates = [IsBE] in { 5213def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 5214 (v2i64 (EXTv16i8 FPR128:$src, 5215 FPR128:$src, (i32 8)))>; 5216def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 5217 (v2i64 (REV64v4i32 FPR128:$src))>; 5218def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 5219 (v2i64 (REV64v8i16 FPR128:$src))>; 5220def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 5221 (v2i64 (REV64v16i8 FPR128:$src))>; 5222def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 5223 (v2i64 (REV64v4i32 FPR128:$src))>; 5224} 5225def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 5226 5227let Predicates = [IsLE] in { 5228def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 5229def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 5230def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 5231def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 5232def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 5233} 5234let Predicates = [IsBE] in { 5235def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 5236 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 5237 (REV64v4i32 FPR128:$src), 5238 (i32 8)))>; 5239def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 5240 (v4i32 (REV64v4i32 FPR128:$src))>; 5241def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 5242 (v4i32 (REV32v8i16 FPR128:$src))>; 5243def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 5244 (v4i32 (REV32v16i8 FPR128:$src))>; 5245def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 5246 (v4i32 (REV64v4i32 FPR128:$src))>; 5247} 5248def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 5249 5250let Predicates = [IsLE] in { 5251def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 5252def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 5253def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 5254def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 5255def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 5256def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 5257} 5258let Predicates = [IsBE] in { 5259def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 5260 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 5261 (REV64v8i16 FPR128:$src), 5262 (i32 8)))>; 5263def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 5264 (v8i16 (REV64v8i16 FPR128:$src))>; 5265def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 5266 (v8i16 (REV32v8i16 FPR128:$src))>; 5267def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 5268 (v8i16 (REV16v16i8 FPR128:$src))>; 5269def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 5270 (v8i16 (REV64v8i16 FPR128:$src))>; 5271def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 5272 (v8i16 (REV32v8i16 FPR128:$src))>; 5273} 5274 5275let Predicates = [IsLE] in { 5276def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 5277def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 5278def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 5279def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 5280def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 5281def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 5282} 5283let Predicates = [IsBE] in { 5284def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 5285 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 5286 (REV64v16i8 FPR128:$src), 5287 (i32 8)))>; 5288def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 5289 (v16i8 (REV64v16i8 FPR128:$src))>; 5290def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 5291 (v16i8 (REV32v16i8 FPR128:$src))>; 5292def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 5293 (v16i8 (REV16v16i8 FPR128:$src))>; 5294def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 5295 (v16i8 (REV64v16i8 FPR128:$src))>; 5296def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 5297 (v16i8 (REV32v16i8 FPR128:$src))>; 5298} 5299 5300def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 5301 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5302def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 5303 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5304def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 5305 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5306def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 5307 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 5308 5309// A 64-bit subvector insert to the first 128-bit vector position 5310// is a subregister copy that needs no instruction. 5311def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), 5312 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5313def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), 5314 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5315def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), 5316 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5317def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), 5318 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5319def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), 5320 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5321def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), 5322 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 5323 5324// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 5325// or v2f32. 5326def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 5327 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 5328 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 5329def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 5330 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 5331 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 5332 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 5333 // so we match on v4f32 here, not v2f32. This will also catch adding 5334 // the low two lanes of a true v4f32 vector. 5335def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 5336 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 5337 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 5338 5339// Scalar 64-bit shifts in FPR64 registers. 5340def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5341 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5342def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5343 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5344def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5345 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5346def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 5347 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 5348 5349// Tail call return handling. These are all compiler pseudo-instructions, 5350// so no encoding information or anything like that. 5351let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 5352 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; 5353 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; 5354} 5355 5356def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 5357 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; 5358def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 5359 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 5360def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 5361 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 5362 5363include "AArch64InstrAtomics.td" 5364