1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } 38def nImmSplatNotI16 : Operand<i32> { 39 let ParserMatchClass = nImmSplatNotI16AsmOperand; 40} 41def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } 42def nImmSplatNotI32 : Operand<i32> { 43 let ParserMatchClass = nImmSplatNotI32AsmOperand; 44} 45def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 46def nImmVMOVI32 : Operand<i32> { 47 let PrintMethod = "printNEONModImmOperand"; 48 let ParserMatchClass = nImmVMOVI32AsmOperand; 49} 50 51class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To> 52 : AsmOperandClass { 53 let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate"; 54 let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">"; 55 let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands"; 56} 57 58class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To> 59 : AsmOperandClass { 60 let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate"; 61 let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">"; 62 let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands"; 63} 64 65class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> { 66 let PrintMethod = "printNEONModImmOperand"; 67 let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>; 68} 69 70class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> { 71 let PrintMethod = "printNEONModImmOperand"; 72 let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>; 73} 74 75def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 76def nImmVMOVI32Neg : Operand<i32> { 77 let PrintMethod = "printNEONModImmOperand"; 78 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 79} 80def nImmVMOVF32 : Operand<i32> { 81 let PrintMethod = "printFPImmOperand"; 82 let ParserMatchClass = FPImmOperand; 83} 84def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 85def nImmSplatI64 : Operand<i32> { 86 let PrintMethod = "printNEONModImmOperand"; 87 let ParserMatchClass = nImmSplatI64AsmOperand; 88} 89 90def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 91def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 92def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 93def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } 94def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 95 return ((uint64_t)Imm) < 8; 96}]> { 97 let ParserMatchClass = VectorIndex8Operand; 98 let PrintMethod = "printVectorIndex"; 99 let MIOperandInfo = (ops i32imm); 100} 101def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 102 return ((uint64_t)Imm) < 4; 103}]> { 104 let ParserMatchClass = VectorIndex16Operand; 105 let PrintMethod = "printVectorIndex"; 106 let MIOperandInfo = (ops i32imm); 107} 108def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 109 return ((uint64_t)Imm) < 2; 110}]> { 111 let ParserMatchClass = VectorIndex32Operand; 112 let PrintMethod = "printVectorIndex"; 113 let MIOperandInfo = (ops i32imm); 114} 115def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ 116 return ((uint64_t)Imm) < 1; 117}]> { 118 let ParserMatchClass = VectorIndex64Operand; 119 let PrintMethod = "printVectorIndex"; 120 let MIOperandInfo = (ops i32imm); 121} 122 123// Register list of one D register. 124def VecListOneDAsmOperand : AsmOperandClass { 125 let Name = "VecListOneD"; 126 let ParserMethod = "parseVectorList"; 127 let RenderMethod = "addVecListOperands"; 128} 129def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 130 let ParserMatchClass = VecListOneDAsmOperand; 131} 132// Register list of two sequential D registers. 133def VecListDPairAsmOperand : AsmOperandClass { 134 let Name = "VecListDPair"; 135 let ParserMethod = "parseVectorList"; 136 let RenderMethod = "addVecListOperands"; 137} 138def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 139 let ParserMatchClass = VecListDPairAsmOperand; 140} 141// Register list of three sequential D registers. 142def VecListThreeDAsmOperand : AsmOperandClass { 143 let Name = "VecListThreeD"; 144 let ParserMethod = "parseVectorList"; 145 let RenderMethod = "addVecListOperands"; 146} 147def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 148 let ParserMatchClass = VecListThreeDAsmOperand; 149} 150// Register list of four sequential D registers. 151def VecListFourDAsmOperand : AsmOperandClass { 152 let Name = "VecListFourD"; 153 let ParserMethod = "parseVectorList"; 154 let RenderMethod = "addVecListOperands"; 155} 156def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 157 let ParserMatchClass = VecListFourDAsmOperand; 158} 159// Register list of two D registers spaced by 2 (two sequential Q registers). 160def VecListDPairSpacedAsmOperand : AsmOperandClass { 161 let Name = "VecListDPairSpaced"; 162 let ParserMethod = "parseVectorList"; 163 let RenderMethod = "addVecListOperands"; 164} 165def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 166 let ParserMatchClass = VecListDPairSpacedAsmOperand; 167} 168// Register list of three D registers spaced by 2 (three Q registers). 169def VecListThreeQAsmOperand : AsmOperandClass { 170 let Name = "VecListThreeQ"; 171 let ParserMethod = "parseVectorList"; 172 let RenderMethod = "addVecListOperands"; 173} 174def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 175 let ParserMatchClass = VecListThreeQAsmOperand; 176} 177// Register list of three D registers spaced by 2 (three Q registers). 178def VecListFourQAsmOperand : AsmOperandClass { 179 let Name = "VecListFourQ"; 180 let ParserMethod = "parseVectorList"; 181 let RenderMethod = "addVecListOperands"; 182} 183def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 184 let ParserMatchClass = VecListFourQAsmOperand; 185} 186 187// Register list of one D register, with "all lanes" subscripting. 188def VecListOneDAllLanesAsmOperand : AsmOperandClass { 189 let Name = "VecListOneDAllLanes"; 190 let ParserMethod = "parseVectorList"; 191 let RenderMethod = "addVecListOperands"; 192} 193def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 194 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 195} 196// Register list of two D registers, with "all lanes" subscripting. 197def VecListDPairAllLanesAsmOperand : AsmOperandClass { 198 let Name = "VecListDPairAllLanes"; 199 let ParserMethod = "parseVectorList"; 200 let RenderMethod = "addVecListOperands"; 201} 202def VecListDPairAllLanes : RegisterOperand<DPair, 203 "printVectorListTwoAllLanes"> { 204 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 205} 206// Register list of two D registers spaced by 2 (two sequential Q registers). 207def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 208 let Name = "VecListDPairSpacedAllLanes"; 209 let ParserMethod = "parseVectorList"; 210 let RenderMethod = "addVecListOperands"; 211} 212def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc, 213 "printVectorListTwoSpacedAllLanes"> { 214 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 215} 216// Register list of three D registers, with "all lanes" subscripting. 217def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 218 let Name = "VecListThreeDAllLanes"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListOperands"; 221} 222def VecListThreeDAllLanes : RegisterOperand<DPR, 223 "printVectorListThreeAllLanes"> { 224 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 225} 226// Register list of three D registers spaced by 2 (three sequential Q regs). 227def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 228 let Name = "VecListThreeQAllLanes"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListOperands"; 231} 232def VecListThreeQAllLanes : RegisterOperand<DPR, 233 "printVectorListThreeSpacedAllLanes"> { 234 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 235} 236// Register list of four D registers, with "all lanes" subscripting. 237def VecListFourDAllLanesAsmOperand : AsmOperandClass { 238 let Name = "VecListFourDAllLanes"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListOperands"; 241} 242def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 243 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 244} 245// Register list of four D registers spaced by 2 (four sequential Q regs). 246def VecListFourQAllLanesAsmOperand : AsmOperandClass { 247 let Name = "VecListFourQAllLanes"; 248 let ParserMethod = "parseVectorList"; 249 let RenderMethod = "addVecListOperands"; 250} 251def VecListFourQAllLanes : RegisterOperand<DPR, 252 "printVectorListFourSpacedAllLanes"> { 253 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 254} 255 256 257// Register list of one D register, with byte lane subscripting. 258def VecListOneDByteIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListOneDByteIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262} 263def VecListOneDByteIndexed : Operand<i32> { 264 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266} 267// ...with half-word lane subscripting. 268def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListOneDHWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListOneDHWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// ...with word lane subscripting. 278def VecListOneDWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListOneDWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListOneDWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287 288// Register list of two D registers with byte lane subscripting. 289def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 290 let Name = "VecListTwoDByteIndexed"; 291 let ParserMethod = "parseVectorList"; 292 let RenderMethod = "addVecListIndexedOperands"; 293} 294def VecListTwoDByteIndexed : Operand<i32> { 295 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 296 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 297} 298// ...with half-word lane subscripting. 299def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 300 let Name = "VecListTwoDHWordIndexed"; 301 let ParserMethod = "parseVectorList"; 302 let RenderMethod = "addVecListIndexedOperands"; 303} 304def VecListTwoDHWordIndexed : Operand<i32> { 305 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 306 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 307} 308// ...with word lane subscripting. 309def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 310 let Name = "VecListTwoDWordIndexed"; 311 let ParserMethod = "parseVectorList"; 312 let RenderMethod = "addVecListIndexedOperands"; 313} 314def VecListTwoDWordIndexed : Operand<i32> { 315 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 316 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 317} 318// Register list of two Q registers with half-word lane subscripting. 319def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 320 let Name = "VecListTwoQHWordIndexed"; 321 let ParserMethod = "parseVectorList"; 322 let RenderMethod = "addVecListIndexedOperands"; 323} 324def VecListTwoQHWordIndexed : Operand<i32> { 325 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 326 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 327} 328// ...with word lane subscripting. 329def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 330 let Name = "VecListTwoQWordIndexed"; 331 let ParserMethod = "parseVectorList"; 332 let RenderMethod = "addVecListIndexedOperands"; 333} 334def VecListTwoQWordIndexed : Operand<i32> { 335 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 336 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 337} 338 339 340// Register list of three D registers with byte lane subscripting. 341def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 342 let Name = "VecListThreeDByteIndexed"; 343 let ParserMethod = "parseVectorList"; 344 let RenderMethod = "addVecListIndexedOperands"; 345} 346def VecListThreeDByteIndexed : Operand<i32> { 347 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 348 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 349} 350// ...with half-word lane subscripting. 351def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListThreeDHWordIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListThreeDHWordIndexed : Operand<i32> { 357 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with word lane subscripting. 361def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListThreeDWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListThreeDWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// Register list of three Q registers with half-word lane subscripting. 371def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListThreeQHWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListThreeQHWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// ...with word lane subscripting. 381def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListThreeQWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListThreeQWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390 391// Register list of four D registers with byte lane subscripting. 392def VecListFourDByteIndexAsmOperand : AsmOperandClass { 393 let Name = "VecListFourDByteIndexed"; 394 let ParserMethod = "parseVectorList"; 395 let RenderMethod = "addVecListIndexedOperands"; 396} 397def VecListFourDByteIndexed : Operand<i32> { 398 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 399 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 400} 401// ...with half-word lane subscripting. 402def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 403 let Name = "VecListFourDHWordIndexed"; 404 let ParserMethod = "parseVectorList"; 405 let RenderMethod = "addVecListIndexedOperands"; 406} 407def VecListFourDHWordIndexed : Operand<i32> { 408 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 409 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 410} 411// ...with word lane subscripting. 412def VecListFourDWordIndexAsmOperand : AsmOperandClass { 413 let Name = "VecListFourDWordIndexed"; 414 let ParserMethod = "parseVectorList"; 415 let RenderMethod = "addVecListIndexedOperands"; 416} 417def VecListFourDWordIndexed : Operand<i32> { 418 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 419 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 420} 421// Register list of four Q registers with half-word lane subscripting. 422def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 423 let Name = "VecListFourQHWordIndexed"; 424 let ParserMethod = "parseVectorList"; 425 let RenderMethod = "addVecListIndexedOperands"; 426} 427def VecListFourQHWordIndexed : Operand<i32> { 428 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 429 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 430} 431// ...with word lane subscripting. 432def VecListFourQWordIndexAsmOperand : AsmOperandClass { 433 let Name = "VecListFourQWordIndexed"; 434 let ParserMethod = "parseVectorList"; 435 let RenderMethod = "addVecListIndexedOperands"; 436} 437def VecListFourQWordIndexed : Operand<i32> { 438 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 439 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 440} 441 442def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 443 return cast<LoadSDNode>(N)->getAlignment() >= 8; 444}]>; 445def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 446 (store node:$val, node:$ptr), [{ 447 return cast<StoreSDNode>(N)->getAlignment() >= 8; 448}]>; 449def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 450 return cast<LoadSDNode>(N)->getAlignment() == 4; 451}]>; 452def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 453 (store node:$val, node:$ptr), [{ 454 return cast<StoreSDNode>(N)->getAlignment() == 4; 455}]>; 456def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 457 return cast<LoadSDNode>(N)->getAlignment() == 2; 458}]>; 459def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 460 (store node:$val, node:$ptr), [{ 461 return cast<StoreSDNode>(N)->getAlignment() == 2; 462}]>; 463def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 464 return cast<LoadSDNode>(N)->getAlignment() == 1; 465}]>; 466def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 467 (store node:$val, node:$ptr), [{ 468 return cast<StoreSDNode>(N)->getAlignment() == 1; 469}]>; 470def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 471 return cast<LoadSDNode>(N)->getAlignment() < 4; 472}]>; 473def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 474 (store node:$val, node:$ptr), [{ 475 return cast<StoreSDNode>(N)->getAlignment() < 4; 476}]>; 477 478//===----------------------------------------------------------------------===// 479// NEON-specific DAG Nodes. 480//===----------------------------------------------------------------------===// 481 482def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 483def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 484 485def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 486def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 487def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 488def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 489def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 490def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 491def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 492def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 493def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 494def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 495def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 496 497// Types for vector shift by immediates. The "SHX" version is for long and 498// narrow operations where the source and destination vectors have different 499// types. The "SHINS" version is for shift and insert operations. 500def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 501 SDTCisVT<2, i32>]>; 502def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 503 SDTCisVT<2, i32>]>; 504def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 505 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 506 507def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 508def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 509def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 510def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 511 512def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 513def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 514def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 515 516def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 517def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 518def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 519def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 520def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 521def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 522 523def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 524def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 525def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 526 527def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 528def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 529 530def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 531 SDTCisVT<2, i32>]>; 532def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 533def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 534 535def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 536def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 537def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 538def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 539 540def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 541 SDTCisVT<2, i32>]>; 542def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 543def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 544 545def NEONvbsl : SDNode<"ARMISD::VBSL", 546 SDTypeProfile<1, 3, [SDTCisVec<0>, 547 SDTCisSameAs<0, 1>, 548 SDTCisSameAs<0, 2>, 549 SDTCisSameAs<0, 3>]>>; 550 551def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 552 553// VDUPLANE can produce a quad-register result from a double-register source, 554// so the result is not constrained to match the source. 555def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 556 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 557 SDTCisVT<2, i32>]>>; 558 559def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 560 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 561def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 562 563def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 564def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 565def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 566def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 567 568def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 569 SDTCisSameAs<0, 2>, 570 SDTCisSameAs<0, 3>]>; 571def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 572def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 573def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 574 575def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 576 SDTCisSameAs<1, 2>]>; 577def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 578def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 579 580def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 581 SDTCisVT<2, v8i8>]>; 582def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, 583 SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; 584def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; 585def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; 586 587 588def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 589 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 590 unsigned EltBits = 0; 591 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 592 return (EltBits == 32 && EltVal == 0); 593}]>; 594 595def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 596 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 597 unsigned EltBits = 0; 598 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 599 return (EltBits == 8 && EltVal == 0xff); 600}]>; 601 602//===----------------------------------------------------------------------===// 603// NEON load / store instructions 604//===----------------------------------------------------------------------===// 605 606// Use VLDM to load a Q register as a D register pair. 607// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 608def VLDMQIA 609 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 610 IIC_fpLoad_m, "", 611 [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>; 612 613// Use VSTM to store a Q register as a D register pair. 614// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 615def VSTMQIA 616 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 617 IIC_fpStore_m, "", 618 [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>; 619 620// Classes for VLD* pseudo-instructions with multi-register operands. 621// These are expanded to real instructions after register allocation. 622class VLDQPseudo<InstrItinClass itin> 623 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 624class VLDQWBPseudo<InstrItinClass itin> 625 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 626 (ins addrmode6:$addr, am6offset:$offset), itin, 627 "$addr.addr = $wb">; 628class VLDQWBfixedPseudo<InstrItinClass itin> 629 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 630 (ins addrmode6:$addr), itin, 631 "$addr.addr = $wb">; 632class VLDQWBregisterPseudo<InstrItinClass itin> 633 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 634 (ins addrmode6:$addr, rGPR:$offset), itin, 635 "$addr.addr = $wb">; 636 637class VLDQQPseudo<InstrItinClass itin> 638 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 639class VLDQQWBPseudo<InstrItinClass itin> 640 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 641 (ins addrmode6:$addr, am6offset:$offset), itin, 642 "$addr.addr = $wb">; 643class VLDQQWBfixedPseudo<InstrItinClass itin> 644 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 645 (ins addrmode6:$addr), itin, 646 "$addr.addr = $wb">; 647class VLDQQWBregisterPseudo<InstrItinClass itin> 648 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 649 (ins addrmode6:$addr, rGPR:$offset), itin, 650 "$addr.addr = $wb">; 651 652 653class VLDQQQQPseudo<InstrItinClass itin> 654 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 655 "$src = $dst">; 656class VLDQQQQWBPseudo<InstrItinClass itin> 657 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 658 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 659 "$addr.addr = $wb, $src = $dst">; 660 661let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 662 663// VLD1 : Vector Load (multiple single elements) 664class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> 665 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 666 (ins AddrMode:$Rn), IIC_VLD1, 667 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { 668 let Rm = 0b1111; 669 let Inst{4} = Rn{4}; 670 let DecoderMethod = "DecodeVLDST1Instruction"; 671} 672class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode> 673 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 674 (ins AddrMode:$Rn), IIC_VLD1x2, 675 "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { 676 let Rm = 0b1111; 677 let Inst{5-4} = Rn{5-4}; 678 let DecoderMethod = "DecodeVLDST1Instruction"; 679} 680 681def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; 682def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; 683def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; 684def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; 685 686def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; 687def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; 688def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; 689def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; 690 691// ...with address register writeback: 692multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 693 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 694 (ins AddrMode:$Rn), IIC_VLD1u, 695 "vld1", Dt, "$Vd, $Rn!", 696 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 697 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 698 let Inst{4} = Rn{4}; 699 let DecoderMethod = "DecodeVLDST1Instruction"; 700 } 701 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 702 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, 703 "vld1", Dt, "$Vd, $Rn, $Rm", 704 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 705 let Inst{4} = Rn{4}; 706 let DecoderMethod = "DecodeVLDST1Instruction"; 707 } 708} 709multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 710 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 711 (ins AddrMode:$Rn), IIC_VLD1x2u, 712 "vld1", Dt, "$Vd, $Rn!", 713 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 714 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 715 let Inst{5-4} = Rn{5-4}; 716 let DecoderMethod = "DecodeVLDST1Instruction"; 717 } 718 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 719 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 720 "vld1", Dt, "$Vd, $Rn, $Rm", 721 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 722 let Inst{5-4} = Rn{5-4}; 723 let DecoderMethod = "DecodeVLDST1Instruction"; 724 } 725} 726 727defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; 728defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; 729defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; 730defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; 731defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 732defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 733defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 734defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 735 736// ...with 3 registers 737class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode> 738 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 739 (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, 740 "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { 741 let Rm = 0b1111; 742 let Inst{4} = Rn{4}; 743 let DecoderMethod = "DecodeVLDST1Instruction"; 744} 745multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 746 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 747 (ins AddrMode:$Rn), IIC_VLD1x2u, 748 "vld1", Dt, "$Vd, $Rn!", 749 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 750 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 751 let Inst{4} = Rn{4}; 752 let DecoderMethod = "DecodeVLDST1Instruction"; 753 } 754 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 755 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 756 "vld1", Dt, "$Vd, $Rn, $Rm", 757 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 758 let Inst{4} = Rn{4}; 759 let DecoderMethod = "DecodeVLDST1Instruction"; 760 } 761} 762 763def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; 764def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; 765def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; 766def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; 767 768defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; 769defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; 770defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; 771defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; 772 773def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 774def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 775def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 776def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 777def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 778def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 779 780def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 781def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 782def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 783def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 784def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 785def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 786def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 787def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>; 788 789// ...with 4 registers 790class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode> 791 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 792 (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, 793 "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { 794 let Rm = 0b1111; 795 let Inst{5-4} = Rn{5-4}; 796 let DecoderMethod = "DecodeVLDST1Instruction"; 797} 798multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 799 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 800 (ins AddrMode:$Rn), IIC_VLD1x2u, 801 "vld1", Dt, "$Vd, $Rn!", 802 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 803 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST1Instruction"; 806 } 807 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 808 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, 809 "vld1", Dt, "$Vd, $Rn, $Rm", 810 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 811 let Inst{5-4} = Rn{5-4}; 812 let DecoderMethod = "DecodeVLDST1Instruction"; 813 } 814} 815 816def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 817def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 818def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 819def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 820 821defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 822defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 823defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 824defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 825 826def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 827def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 828def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 829def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 830def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 831def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 832 833def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 834def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 835def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 836def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 837def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 838def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 839def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 840def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>; 841 842// VLD2 : Vector Load (multiple 2-element structures) 843class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 844 InstrItinClass itin, Operand AddrMode> 845 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 846 (ins AddrMode:$Rn), itin, 847 "vld2", Dt, "$Vd, $Rn", "", []> { 848 let Rm = 0b1111; 849 let Inst{5-4} = Rn{5-4}; 850 let DecoderMethod = "DecodeVLDST2Instruction"; 851} 852 853def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, 854 addrmode6align64or128>, Sched<[WriteVLD2]>; 855def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, 856 addrmode6align64or128>, Sched<[WriteVLD2]>; 857def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, 858 addrmode6align64or128>, Sched<[WriteVLD2]>; 859 860def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, 861 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 862def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, 863 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 864def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, 865 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 866 867def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 868def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 869def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>; 870 871// ...with address register writeback: 872multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 873 RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { 874 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 875 (ins AddrMode:$Rn), itin, 876 "vld2", Dt, "$Vd, $Rn!", 877 "$Rn.addr = $wb", []> { 878 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 879 let Inst{5-4} = Rn{5-4}; 880 let DecoderMethod = "DecodeVLDST2Instruction"; 881 } 882 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 883 (ins AddrMode:$Rn, rGPR:$Rm), itin, 884 "vld2", Dt, "$Vd, $Rn, $Rm", 885 "$Rn.addr = $wb", []> { 886 let Inst{5-4} = Rn{5-4}; 887 let DecoderMethod = "DecodeVLDST2Instruction"; 888 } 889} 890 891defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, 892 addrmode6align64or128>, Sched<[WriteVLD2]>; 893defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, 894 addrmode6align64or128>, Sched<[WriteVLD2]>; 895defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, 896 addrmode6align64or128>, Sched<[WriteVLD2]>; 897 898defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, 899 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 900defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, 901 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 902defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, 903 addrmode6align64or128or256>, Sched<[WriteVLD4]>; 904 905def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 906def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 907def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 908def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 909def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 910def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>; 911 912// ...with double-spaced registers 913def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, 914 addrmode6align64or128>, Sched<[WriteVLD2]>; 915def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, 916 addrmode6align64or128>, Sched<[WriteVLD2]>; 917def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, 918 addrmode6align64or128>, Sched<[WriteVLD2]>; 919defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, 920 addrmode6align64or128>, Sched<[WriteVLD2]>; 921defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, 922 addrmode6align64or128>, Sched<[WriteVLD2]>; 923defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, 924 addrmode6align64or128>, Sched<[WriteVLD2]>; 925 926// VLD3 : Vector Load (multiple 3-element structures) 927class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 928 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 929 (ins addrmode6:$Rn), IIC_VLD3, 930 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { 931 let Rm = 0b1111; 932 let Inst{4} = Rn{4}; 933 let DecoderMethod = "DecodeVLDST3Instruction"; 934} 935 936def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 937def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 938def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 939 940def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 941def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 942def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 943 944// ...with address register writeback: 945class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 946 : NLdSt<0, 0b10, op11_8, op7_4, 947 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 948 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 949 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 950 "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { 951 let Inst{4} = Rn{4}; 952 let DecoderMethod = "DecodeVLDST3Instruction"; 953} 954 955def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 956def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 957def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 958 959def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 960def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 961def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 962 963// ...with double-spaced registers: 964def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 965def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 966def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 967def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 968def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 969def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 970 971def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 972def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 973def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 974 975// ...alternate versions to be allocated odd register numbers: 976def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 977def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 978def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>; 979 980def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 981def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 982def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>; 983 984// VLD4 : Vector Load (multiple 4-element structures) 985class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 986 : NLdSt<0, 0b10, op11_8, op7_4, 987 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 988 (ins addrmode6:$Rn), IIC_VLD4, 989 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, 990 Sched<[WriteVLD4]> { 991 let Rm = 0b1111; 992 let Inst{5-4} = Rn{5-4}; 993 let DecoderMethod = "DecodeVLDST4Instruction"; 994} 995 996def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 997def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 998def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 999 1000def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1001def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1002def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1003 1004// ...with address register writeback: 1005class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1006 : NLdSt<0, 0b10, op11_8, op7_4, 1007 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1008 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 1009 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 1010 "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { 1011 let Inst{5-4} = Rn{5-4}; 1012 let DecoderMethod = "DecodeVLDST4Instruction"; 1013} 1014 1015def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 1016def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 1017def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 1018 1019def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1020def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1021def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1022 1023// ...with double-spaced registers: 1024def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 1025def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 1026def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 1027def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 1028def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 1029def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 1030 1031def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1032def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1033def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1034 1035// ...alternate versions to be allocated odd register numbers: 1036def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1037def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1038def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>; 1039 1040def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1041def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1042def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>; 1043 1044} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1045 1046// Classes for VLD*LN pseudo-instructions with multi-register operands. 1047// These are expanded to real instructions after register allocation. 1048class VLDQLNPseudo<InstrItinClass itin> 1049 : PseudoNLdSt<(outs QPR:$dst), 1050 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1051 itin, "$src = $dst">; 1052class VLDQLNWBPseudo<InstrItinClass itin> 1053 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 1054 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1055 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1056class VLDQQLNPseudo<InstrItinClass itin> 1057 : PseudoNLdSt<(outs QQPR:$dst), 1058 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1059 itin, "$src = $dst">; 1060class VLDQQLNWBPseudo<InstrItinClass itin> 1061 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 1062 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1063 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1064class VLDQQQQLNPseudo<InstrItinClass itin> 1065 : PseudoNLdSt<(outs QQQQPR:$dst), 1066 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1067 itin, "$src = $dst">; 1068class VLDQQQQLNWBPseudo<InstrItinClass itin> 1069 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 1070 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1071 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 1072 1073// VLD1LN : Vector Load (single element to one lane) 1074class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1075 PatFrag LoadOp> 1076 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1077 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 1078 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1079 "$src = $Vd", 1080 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1081 (i32 (LoadOp addrmode6:$Rn)), 1082 imm:$lane))]> { 1083 let Rm = 0b1111; 1084 let DecoderMethod = "DecodeVLD1LN"; 1085} 1086class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1087 PatFrag LoadOp> 1088 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1089 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1090 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1091 "$src = $Vd", 1092 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1093 (i32 (LoadOp addrmode6oneL32:$Rn)), 1094 imm:$lane))]>, Sched<[WriteVLD1]> { 1095 let Rm = 0b1111; 1096 let DecoderMethod = "DecodeVLD1LN"; 1097} 1098class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>, 1099 Sched<[WriteVLD1]> { 1100 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1101 (i32 (LoadOp addrmode6:$addr)), 1102 imm:$lane))]; 1103} 1104 1105def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1106 let Inst{7-5} = lane{2-0}; 1107} 1108def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1109 let Inst{7-6} = lane{1-0}; 1110 let Inst{5-4} = Rn{5-4}; 1111} 1112def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1113 let Inst{7} = lane{0}; 1114 let Inst{5-4} = Rn{5-4}; 1115} 1116 1117def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1118def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1119def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1120 1121def : Pat<(vector_insert (v2f32 DPR:$src), 1122 (f32 (load addrmode6:$addr)), imm:$lane), 1123 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1124def : Pat<(vector_insert (v4f32 QPR:$src), 1125 (f32 (load addrmode6:$addr)), imm:$lane), 1126 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1127 1128// A 64-bit subvector insert to the first 128-bit vector position 1129// is a subregister copy that needs no instruction. 1130def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), 1131 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1132def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), 1133 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1134def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), 1135 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1136def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), 1137 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1138def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), 1139 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1140def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), 1141 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 1142 1143 1144let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1145 1146// ...with address register writeback: 1147class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1148 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1149 (ins addrmode6:$Rn, am6offset:$Rm, 1150 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1151 "\\{$Vd[$lane]\\}, $Rn$Rm", 1152 "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1153 let DecoderMethod = "DecodeVLD1LN"; 1154} 1155 1156def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1157 let Inst{7-5} = lane{2-0}; 1158} 1159def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1160 let Inst{7-6} = lane{1-0}; 1161 let Inst{4} = Rn{4}; 1162} 1163def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1164 let Inst{7} = lane{0}; 1165 let Inst{5} = Rn{4}; 1166 let Inst{4} = Rn{4}; 1167} 1168 1169def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1170def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1171def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>; 1172 1173// VLD2LN : Vector Load (single 2-element structure to one lane) 1174class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1175 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1176 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1177 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1178 "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { 1179 let Rm = 0b1111; 1180 let Inst{4} = Rn{4}; 1181 let DecoderMethod = "DecodeVLD2LN"; 1182} 1183 1184def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1185 let Inst{7-5} = lane{2-0}; 1186} 1187def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1188 let Inst{7-6} = lane{1-0}; 1189} 1190def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1191 let Inst{7} = lane{0}; 1192} 1193 1194def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1195def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1196def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1197 1198// ...with double-spaced registers: 1199def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1200 let Inst{7-6} = lane{1-0}; 1201} 1202def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1203 let Inst{7} = lane{0}; 1204} 1205 1206def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1207def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>; 1208 1209// ...with address register writeback: 1210class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1211 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1212 (ins addrmode6:$Rn, am6offset:$Rm, 1213 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1214 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1215 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1216 let Inst{4} = Rn{4}; 1217 let DecoderMethod = "DecodeVLD2LN"; 1218} 1219 1220def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1221 let Inst{7-5} = lane{2-0}; 1222} 1223def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1224 let Inst{7-6} = lane{1-0}; 1225} 1226def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1227 let Inst{7} = lane{0}; 1228} 1229 1230def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1231def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1232def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1233 1234def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1235 let Inst{7-6} = lane{1-0}; 1236} 1237def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1238 let Inst{7} = lane{0}; 1239} 1240 1241def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1242def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>; 1243 1244// VLD3LN : Vector Load (single 3-element structure to one lane) 1245class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1246 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1247 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1248 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1249 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1250 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { 1251 let Rm = 0b1111; 1252 let DecoderMethod = "DecodeVLD3LN"; 1253} 1254 1255def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1256 let Inst{7-5} = lane{2-0}; 1257} 1258def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1259 let Inst{7-6} = lane{1-0}; 1260} 1261def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1262 let Inst{7} = lane{0}; 1263} 1264 1265def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1266def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1267def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1268 1269// ...with double-spaced registers: 1270def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1271 let Inst{7-6} = lane{1-0}; 1272} 1273def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1274 let Inst{7} = lane{0}; 1275} 1276 1277def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1278def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>; 1279 1280// ...with address register writeback: 1281class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1282 : NLdStLn<1, 0b10, op11_8, op7_4, 1283 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1284 (ins addrmode6:$Rn, am6offset:$Rm, 1285 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1286 IIC_VLD3lnu, "vld3", Dt, 1287 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1288 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1289 []>, Sched<[WriteVLD2]> { 1290 let DecoderMethod = "DecodeVLD3LN"; 1291} 1292 1293def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1294 let Inst{7-5} = lane{2-0}; 1295} 1296def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1297 let Inst{7-6} = lane{1-0}; 1298} 1299def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1300 let Inst{7} = lane{0}; 1301} 1302 1303def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1304def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1305def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1306 1307def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1308 let Inst{7-6} = lane{1-0}; 1309} 1310def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1311 let Inst{7} = lane{0}; 1312} 1313 1314def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1315def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>; 1316 1317// VLD4LN : Vector Load (single 4-element structure to one lane) 1318class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1319 : NLdStLn<1, 0b10, op11_8, op7_4, 1320 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1321 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1322 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1323 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1324 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, 1325 Sched<[WriteVLD2]> { 1326 let Rm = 0b1111; 1327 let Inst{4} = Rn{4}; 1328 let DecoderMethod = "DecodeVLD4LN"; 1329} 1330 1331def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1332 let Inst{7-5} = lane{2-0}; 1333} 1334def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1335 let Inst{7-6} = lane{1-0}; 1336} 1337def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1338 let Inst{7} = lane{0}; 1339 let Inst{5} = Rn{5}; 1340} 1341 1342def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1343def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1344def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1345 1346// ...with double-spaced registers: 1347def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1348 let Inst{7-6} = lane{1-0}; 1349} 1350def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1351 let Inst{7} = lane{0}; 1352 let Inst{5} = Rn{5}; 1353} 1354 1355def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1356def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>; 1357 1358// ...with address register writeback: 1359class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1360 : NLdStLn<1, 0b10, op11_8, op7_4, 1361 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1362 (ins addrmode6:$Rn, am6offset:$Rm, 1363 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1364 IIC_VLD4lnu, "vld4", Dt, 1365"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1366"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1367 []> { 1368 let Inst{4} = Rn{4}; 1369 let DecoderMethod = "DecodeVLD4LN" ; 1370} 1371 1372def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1373 let Inst{7-5} = lane{2-0}; 1374} 1375def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1376 let Inst{7-6} = lane{1-0}; 1377} 1378def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1379 let Inst{7} = lane{0}; 1380 let Inst{5} = Rn{5}; 1381} 1382 1383def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1384def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1385def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1386 1387def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1388 let Inst{7-6} = lane{1-0}; 1389} 1390def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1391 let Inst{7} = lane{0}; 1392 let Inst{5} = Rn{5}; 1393} 1394 1395def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1396def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>; 1397 1398} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1399 1400// VLD1DUP : Vector Load (single element to all lanes) 1401class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1402 Operand AddrMode> 1403 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1404 (ins AddrMode:$Rn), 1405 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1406 [(set VecListOneDAllLanes:$Vd, 1407 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, 1408 Sched<[WriteVLD2]> { 1409 let Rm = 0b1111; 1410 let Inst{4} = Rn{4}; 1411 let DecoderMethod = "DecodeVLD1DupInstruction"; 1412} 1413def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, 1414 addrmode6dupalignNone>; 1415def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, 1416 addrmode6dupalign16>; 1417def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, 1418 addrmode6dupalign32>; 1419 1420def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1421 (VLD1DUPd32 addrmode6:$addr)>; 1422 1423class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, 1424 Operand AddrMode> 1425 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1426 (ins AddrMode:$Rn), IIC_VLD1dup, 1427 "vld1", Dt, "$Vd, $Rn", "", 1428 [(set VecListDPairAllLanes:$Vd, 1429 (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { 1430 let Rm = 0b1111; 1431 let Inst{4} = Rn{4}; 1432 let DecoderMethod = "DecodeVLD1DupInstruction"; 1433} 1434 1435def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, 1436 addrmode6dupalignNone>; 1437def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, 1438 addrmode6dupalign16>; 1439def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, 1440 addrmode6dupalign32>; 1441 1442def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1443 (VLD1DUPq32 addrmode6:$addr)>; 1444 1445let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { 1446// ...with address register writeback: 1447multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1448 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1449 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1450 (ins AddrMode:$Rn), IIC_VLD1dupu, 1451 "vld1", Dt, "$Vd, $Rn!", 1452 "$Rn.addr = $wb", []> { 1453 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1454 let Inst{4} = Rn{4}; 1455 let DecoderMethod = "DecodeVLD1DupInstruction"; 1456 } 1457 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1458 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1459 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1460 "vld1", Dt, "$Vd, $Rn, $Rm", 1461 "$Rn.addr = $wb", []> { 1462 let Inst{4} = Rn{4}; 1463 let DecoderMethod = "DecodeVLD1DupInstruction"; 1464 } 1465} 1466multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1467 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1468 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1469 (ins AddrMode:$Rn), IIC_VLD1dupu, 1470 "vld1", Dt, "$Vd, $Rn!", 1471 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1472 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1473 let Inst{4} = Rn{4}; 1474 let DecoderMethod = "DecodeVLD1DupInstruction"; 1475 } 1476 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1477 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1478 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1479 "vld1", Dt, "$Vd, $Rn, $Rm", 1480 "$Rn.addr = $wb", []> { 1481 let Inst{4} = Rn{4}; 1482 let DecoderMethod = "DecodeVLD1DupInstruction"; 1483 } 1484} 1485 1486defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; 1487defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; 1488defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; 1489 1490defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; 1491defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; 1492defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; 1493 1494// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1495class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> 1496 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1497 (ins AddrMode:$Rn), IIC_VLD2dup, 1498 "vld2", Dt, "$Vd, $Rn", "", []> { 1499 let Rm = 0b1111; 1500 let Inst{4} = Rn{4}; 1501 let DecoderMethod = "DecodeVLD2DupInstruction"; 1502} 1503 1504def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, 1505 addrmode6dupalign16>; 1506def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, 1507 addrmode6dupalign32>; 1508def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, 1509 addrmode6dupalign64>; 1510 1511// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or 1512// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". 1513// ...with double-spaced registers 1514def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, 1515 addrmode6dupalign16>; 1516def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1517 addrmode6dupalign32>; 1518def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1519 addrmode6dupalign64>; 1520 1521def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1522def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1523def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1524def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1525def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1526def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>; 1527 1528// ...with address register writeback: 1529multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy, 1530 Operand AddrMode> { 1531 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1532 (outs VdTy:$Vd, GPR:$wb), 1533 (ins AddrMode:$Rn), IIC_VLD2dupu, 1534 "vld2", Dt, "$Vd, $Rn!", 1535 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1536 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1537 let Inst{4} = Rn{4}; 1538 let DecoderMethod = "DecodeVLD2DupInstruction"; 1539 } 1540 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1541 (outs VdTy:$Vd, GPR:$wb), 1542 (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1543 "vld2", Dt, "$Vd, $Rn, $Rm", 1544 "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { 1545 let Inst{4} = Rn{4}; 1546 let DecoderMethod = "DecodeVLD2DupInstruction"; 1547 } 1548} 1549 1550defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, 1551 addrmode6dupalign16>; 1552defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, 1553 addrmode6dupalign32>; 1554defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, 1555 addrmode6dupalign64>; 1556 1557defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, 1558 addrmode6dupalign16>; 1559defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, 1560 addrmode6dupalign32>; 1561defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, 1562 addrmode6dupalign64>; 1563 1564// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1565class VLD3DUP<bits<4> op7_4, string Dt> 1566 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1567 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1568 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, 1569 Sched<[WriteVLD2]> { 1570 let Rm = 0b1111; 1571 let Inst{4} = 0; 1572 let DecoderMethod = "DecodeVLD3DupInstruction"; 1573} 1574 1575def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1576def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1577def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1578 1579def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1580def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1581def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1582 1583// ...with double-spaced registers (not used for codegen): 1584def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1585def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1586def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1587 1588def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1589def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1590def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1591def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1592def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1593def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>; 1594 1595// ...with address register writeback: 1596class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> 1597 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1598 (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1599 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1600 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1601 let Inst{4} = 0; 1602 let DecoderMethod = "DecodeVLD3DupInstruction"; 1603} 1604 1605def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; 1606def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; 1607def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; 1608 1609def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; 1610def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; 1611def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; 1612 1613def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1614def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1615def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>; 1616 1617// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1618class VLD4DUP<bits<4> op7_4, string Dt> 1619 : NLdSt<1, 0b10, 0b1111, op7_4, 1620 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1621 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1622 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1623 let Rm = 0b1111; 1624 let Inst{4} = Rn{4}; 1625 let DecoderMethod = "DecodeVLD4DupInstruction"; 1626} 1627 1628def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1629def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1630def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1631 1632def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1633def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1634def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1635 1636// ...with double-spaced registers (not used for codegen): 1637def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1638def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1639def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1640 1641def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1642def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1643def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1644def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1645def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1646def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>; 1647 1648// ...with address register writeback: 1649class VLD4DUPWB<bits<4> op7_4, string Dt> 1650 : NLdSt<1, 0b10, 0b1111, op7_4, 1651 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1652 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1653 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1654 "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { 1655 let Inst{4} = Rn{4}; 1656 let DecoderMethod = "DecodeVLD4DupInstruction"; 1657} 1658 1659def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1660def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1661def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1662 1663def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1664def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1665def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1666 1667def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1668def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1669def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>; 1670 1671} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 1672 1673let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 1674 1675// Classes for VST* pseudo-instructions with multi-register operands. 1676// These are expanded to real instructions after register allocation. 1677class VSTQPseudo<InstrItinClass itin> 1678 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1679class VSTQWBPseudo<InstrItinClass itin> 1680 : PseudoNLdSt<(outs GPR:$wb), 1681 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1682 "$addr.addr = $wb">; 1683class VSTQWBfixedPseudo<InstrItinClass itin> 1684 : PseudoNLdSt<(outs GPR:$wb), 1685 (ins addrmode6:$addr, QPR:$src), itin, 1686 "$addr.addr = $wb">; 1687class VSTQWBregisterPseudo<InstrItinClass itin> 1688 : PseudoNLdSt<(outs GPR:$wb), 1689 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1690 "$addr.addr = $wb">; 1691class VSTQQPseudo<InstrItinClass itin> 1692 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1693class VSTQQWBPseudo<InstrItinClass itin> 1694 : PseudoNLdSt<(outs GPR:$wb), 1695 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1696 "$addr.addr = $wb">; 1697class VSTQQWBfixedPseudo<InstrItinClass itin> 1698 : PseudoNLdSt<(outs GPR:$wb), 1699 (ins addrmode6:$addr, QQPR:$src), itin, 1700 "$addr.addr = $wb">; 1701class VSTQQWBregisterPseudo<InstrItinClass itin> 1702 : PseudoNLdSt<(outs GPR:$wb), 1703 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1704 "$addr.addr = $wb">; 1705 1706class VSTQQQQPseudo<InstrItinClass itin> 1707 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1708class VSTQQQQWBPseudo<InstrItinClass itin> 1709 : PseudoNLdSt<(outs GPR:$wb), 1710 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1711 "$addr.addr = $wb">; 1712 1713// VST1 : Vector Store (multiple single elements) 1714class VST1D<bits<4> op7_4, string Dt, Operand AddrMode> 1715 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), 1716 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { 1717 let Rm = 0b1111; 1718 let Inst{4} = Rn{4}; 1719 let DecoderMethod = "DecodeVLDST1Instruction"; 1720} 1721class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode> 1722 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), 1723 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { 1724 let Rm = 0b1111; 1725 let Inst{5-4} = Rn{5-4}; 1726 let DecoderMethod = "DecodeVLDST1Instruction"; 1727} 1728 1729def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; 1730def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; 1731def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; 1732def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; 1733 1734def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; 1735def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; 1736def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; 1737def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; 1738 1739// ...with address register writeback: 1740multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1741 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1742 (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1743 "vst1", Dt, "$Vd, $Rn!", 1744 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1745 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1746 let Inst{4} = Rn{4}; 1747 let DecoderMethod = "DecodeVLDST1Instruction"; 1748 } 1749 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1750 (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1751 IIC_VLD1u, 1752 "vst1", Dt, "$Vd, $Rn, $Rm", 1753 "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { 1754 let Inst{4} = Rn{4}; 1755 let DecoderMethod = "DecodeVLDST1Instruction"; 1756 } 1757} 1758multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1759 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1760 (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1761 "vst1", Dt, "$Vd, $Rn!", 1762 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1763 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1764 let Inst{5-4} = Rn{5-4}; 1765 let DecoderMethod = "DecodeVLDST1Instruction"; 1766 } 1767 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1768 (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1769 IIC_VLD1x2u, 1770 "vst1", Dt, "$Vd, $Rn, $Rm", 1771 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1772 let Inst{5-4} = Rn{5-4}; 1773 let DecoderMethod = "DecodeVLDST1Instruction"; 1774 } 1775} 1776 1777defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; 1778defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; 1779defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; 1780defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; 1781 1782defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; 1783defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; 1784defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; 1785defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; 1786 1787// ...with 3 registers 1788class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode> 1789 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1790 (ins AddrMode:$Rn, VecListThreeD:$Vd), 1791 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { 1792 let Rm = 0b1111; 1793 let Inst{4} = Rn{4}; 1794 let DecoderMethod = "DecodeVLDST1Instruction"; 1795} 1796multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1797 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1798 (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1799 "vst1", Dt, "$Vd, $Rn!", 1800 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1801 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1802 let Inst{5-4} = Rn{5-4}; 1803 let DecoderMethod = "DecodeVLDST1Instruction"; 1804 } 1805 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1806 (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1807 IIC_VLD1x3u, 1808 "vst1", Dt, "$Vd, $Rn, $Rm", 1809 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 1810 let Inst{5-4} = Rn{5-4}; 1811 let DecoderMethod = "DecodeVLDST1Instruction"; 1812 } 1813} 1814 1815def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; 1816def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; 1817def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; 1818def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; 1819 1820defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; 1821defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; 1822defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; 1823defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; 1824 1825def VST1d8TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1826def VST1d16TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1827def VST1d32TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1828def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1829def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1830def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>; 1831 1832def VST1q8HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1833def VST1q8LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1834def VST1q16HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1835def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1836def VST1q32HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1837def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1838def VST1q64HighTPseudo : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1839def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>; 1840 1841// ...with 4 registers 1842class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode> 1843 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1844 (ins AddrMode:$Rn, VecListFourD:$Vd), 1845 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1846 []>, Sched<[WriteVST4]> { 1847 let Rm = 0b1111; 1848 let Inst{5-4} = Rn{5-4}; 1849 let DecoderMethod = "DecodeVLDST1Instruction"; 1850} 1851multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> { 1852 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1853 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1854 "vst1", Dt, "$Vd, $Rn!", 1855 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1856 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1857 let Inst{5-4} = Rn{5-4}; 1858 let DecoderMethod = "DecodeVLDST1Instruction"; 1859 } 1860 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1861 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1862 IIC_VLD1x4u, 1863 "vst1", Dt, "$Vd, $Rn, $Rm", 1864 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1865 let Inst{5-4} = Rn{5-4}; 1866 let DecoderMethod = "DecodeVLDST1Instruction"; 1867 } 1868} 1869 1870def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; 1871def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; 1872def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; 1873def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; 1874 1875defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1876defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1877defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1878defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; 1879 1880def VST1d8QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1881def VST1d16QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1882def VST1d32QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1883def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1884def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1885def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>; 1886 1887def VST1q8HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1888def VST1q8LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1889def VST1q16HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1890def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1891def VST1q32HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1892def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1893def VST1q64HighQPseudo : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1894def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>; 1895 1896// VST2 : Vector Store (multiple 2-element structures) 1897class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1898 InstrItinClass itin, Operand AddrMode> 1899 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), 1900 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1901 let Rm = 0b1111; 1902 let Inst{5-4} = Rn{5-4}; 1903 let DecoderMethod = "DecodeVLDST2Instruction"; 1904} 1905 1906def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, 1907 addrmode6align64or128>, Sched<[WriteVST2]>; 1908def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, 1909 addrmode6align64or128>, Sched<[WriteVST2]>; 1910def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, 1911 addrmode6align64or128>, Sched<[WriteVST2]>; 1912 1913def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, 1914 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1915def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, 1916 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1917def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, 1918 addrmode6align64or128or256>, Sched<[WriteVST4]>; 1919 1920def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1921def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1922def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>; 1923 1924// ...with address register writeback: 1925multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1926 RegisterOperand VdTy, Operand AddrMode> { 1927 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1928 (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, 1929 "vst2", Dt, "$Vd, $Rn!", 1930 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1931 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1932 let Inst{5-4} = Rn{5-4}; 1933 let DecoderMethod = "DecodeVLDST2Instruction"; 1934 } 1935 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1936 (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1937 "vst2", Dt, "$Vd, $Rn, $Rm", 1938 "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { 1939 let Inst{5-4} = Rn{5-4}; 1940 let DecoderMethod = "DecodeVLDST2Instruction"; 1941 } 1942} 1943multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> { 1944 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1945 (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1946 "vst2", Dt, "$Vd, $Rn!", 1947 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1948 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1949 let Inst{5-4} = Rn{5-4}; 1950 let DecoderMethod = "DecodeVLDST2Instruction"; 1951 } 1952 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1953 (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1954 IIC_VLD1u, 1955 "vst2", Dt, "$Vd, $Rn, $Rm", 1956 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 1957 let Inst{5-4} = Rn{5-4}; 1958 let DecoderMethod = "DecodeVLDST2Instruction"; 1959 } 1960} 1961 1962defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, 1963 addrmode6align64or128>; 1964defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, 1965 addrmode6align64or128>; 1966defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, 1967 addrmode6align64or128>; 1968 1969defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; 1970defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; 1971defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; 1972 1973def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1974def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1975def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1976def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1977def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1978def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>; 1979 1980// ...with double-spaced registers 1981def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, 1982 addrmode6align64or128>; 1983def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, 1984 addrmode6align64or128>; 1985def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, 1986 addrmode6align64or128>; 1987defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, 1988 addrmode6align64or128>; 1989defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, 1990 addrmode6align64or128>; 1991defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, 1992 addrmode6align64or128>; 1993 1994// VST3 : Vector Store (multiple 3-element structures) 1995class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1996 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1997 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1998 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { 1999 let Rm = 0b1111; 2000 let Inst{4} = Rn{4}; 2001 let DecoderMethod = "DecodeVLDST3Instruction"; 2002} 2003 2004def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 2005def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 2006def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 2007 2008def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2009def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2010def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2011 2012// ...with address register writeback: 2013class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2014 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2015 (ins addrmode6:$Rn, am6offset:$Rm, 2016 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 2017 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 2018 "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { 2019 let Inst{4} = Rn{4}; 2020 let DecoderMethod = "DecodeVLDST3Instruction"; 2021} 2022 2023def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 2024def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 2025def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 2026 2027def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2028def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2029def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2030 2031// ...with double-spaced registers: 2032def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 2033def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 2034def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 2035def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 2036def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 2037def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 2038 2039def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2040def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2041def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2042 2043// ...alternate versions to be allocated odd register numbers: 2044def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2045def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2046def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>; 2047 2048def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2049def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2050def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>; 2051 2052// VST4 : Vector Store (multiple 4-element structures) 2053class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 2054 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 2055 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 2056 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 2057 "", []>, Sched<[WriteVST4]> { 2058 let Rm = 0b1111; 2059 let Inst{5-4} = Rn{5-4}; 2060 let DecoderMethod = "DecodeVLDST4Instruction"; 2061} 2062 2063def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 2064def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 2065def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 2066 2067def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2068def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2069def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2070 2071// ...with address register writeback: 2072class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2073 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 2074 (ins addrmode6:$Rn, am6offset:$Rm, 2075 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 2076 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 2077 "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { 2078 let Inst{5-4} = Rn{5-4}; 2079 let DecoderMethod = "DecodeVLDST4Instruction"; 2080} 2081 2082def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 2083def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 2084def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 2085 2086def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2087def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2088def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2089 2090// ...with double-spaced registers: 2091def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 2092def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 2093def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 2094def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 2095def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 2096def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 2097 2098def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2099def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2100def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2101 2102// ...alternate versions to be allocated odd register numbers: 2103def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2104def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2105def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>; 2106 2107def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2108def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2109def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>; 2110 2111} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2112 2113// Classes for VST*LN pseudo-instructions with multi-register operands. 2114// These are expanded to real instructions after register allocation. 2115class VSTQLNPseudo<InstrItinClass itin> 2116 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 2117 itin, "">; 2118class VSTQLNWBPseudo<InstrItinClass itin> 2119 : PseudoNLdSt<(outs GPR:$wb), 2120 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 2121 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2122class VSTQQLNPseudo<InstrItinClass itin> 2123 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 2124 itin, "">; 2125class VSTQQLNWBPseudo<InstrItinClass itin> 2126 : PseudoNLdSt<(outs GPR:$wb), 2127 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 2128 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2129class VSTQQQQLNPseudo<InstrItinClass itin> 2130 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 2131 itin, "">; 2132class VSTQQQQLNWBPseudo<InstrItinClass itin> 2133 : PseudoNLdSt<(outs GPR:$wb), 2134 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 2135 nohash_imm:$lane), itin, "$addr.addr = $wb">; 2136 2137// VST1LN : Vector Store (single element from one lane) 2138class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2139 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 2140 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2141 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 2142 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 2143 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, 2144 Sched<[WriteVST1]> { 2145 let Rm = 0b1111; 2146 let DecoderMethod = "DecodeVST1LN"; 2147} 2148class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2149 : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> { 2150 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2151 addrmode6:$addr)]; 2152} 2153 2154def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 2155 NEONvgetlaneu, addrmode6> { 2156 let Inst{7-5} = lane{2-0}; 2157} 2158def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 2159 NEONvgetlaneu, addrmode6> { 2160 let Inst{7-6} = lane{1-0}; 2161 let Inst{4} = Rn{4}; 2162} 2163 2164def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 2165 addrmode6oneL32> { 2166 let Inst{7} = lane{0}; 2167 let Inst{5-4} = Rn{5-4}; 2168} 2169 2170def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 2171def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 2172def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 2173 2174def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 2175 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 2176def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 2177 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 2178 2179// ...with address register writeback: 2180class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 2181 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 2182 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2183 (ins AdrMode:$Rn, am6offset:$Rm, 2184 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 2185 "\\{$Vd[$lane]\\}, $Rn$Rm", 2186 "$Rn.addr = $wb", 2187 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 2188 AdrMode:$Rn, am6offset:$Rm))]>, 2189 Sched<[WriteVST1]> { 2190 let DecoderMethod = "DecodeVST1LN"; 2191} 2192class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2193 : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> { 2194 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2195 addrmode6:$addr, am6offset:$offset))]; 2196} 2197 2198def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2199 NEONvgetlaneu, addrmode6> { 2200 let Inst{7-5} = lane{2-0}; 2201} 2202def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2203 NEONvgetlaneu, addrmode6> { 2204 let Inst{7-6} = lane{1-0}; 2205 let Inst{4} = Rn{4}; 2206} 2207def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2208 extractelt, addrmode6oneL32> { 2209 let Inst{7} = lane{0}; 2210 let Inst{5-4} = Rn{5-4}; 2211} 2212 2213def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2214def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2215def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2216 2217let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { 2218 2219// VST2LN : Vector Store (single 2-element structure from one lane) 2220class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2221 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2222 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2223 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2224 "", []>, Sched<[WriteVST1]> { 2225 let Rm = 0b1111; 2226 let Inst{4} = Rn{4}; 2227 let DecoderMethod = "DecodeVST2LN"; 2228} 2229 2230def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2231 let Inst{7-5} = lane{2-0}; 2232} 2233def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2234 let Inst{7-6} = lane{1-0}; 2235} 2236def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2237 let Inst{7} = lane{0}; 2238} 2239 2240def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2241def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2242def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2243 2244// ...with double-spaced registers: 2245def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2246 let Inst{7-6} = lane{1-0}; 2247 let Inst{4} = Rn{4}; 2248} 2249def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2250 let Inst{7} = lane{0}; 2251 let Inst{4} = Rn{4}; 2252} 2253 2254def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2255def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>; 2256 2257// ...with address register writeback: 2258class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2259 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2260 (ins addrmode6:$Rn, am6offset:$Rm, 2261 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2262 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2263 "$Rn.addr = $wb", []> { 2264 let Inst{4} = Rn{4}; 2265 let DecoderMethod = "DecodeVST2LN"; 2266} 2267 2268def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2269 let Inst{7-5} = lane{2-0}; 2270} 2271def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2272 let Inst{7-6} = lane{1-0}; 2273} 2274def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2275 let Inst{7} = lane{0}; 2276} 2277 2278def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2279def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2280def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2281 2282def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2283 let Inst{7-6} = lane{1-0}; 2284} 2285def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2286 let Inst{7} = lane{0}; 2287} 2288 2289def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2290def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>; 2291 2292// VST3LN : Vector Store (single 3-element structure from one lane) 2293class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2294 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2295 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2296 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2297 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, 2298 Sched<[WriteVST2]> { 2299 let Rm = 0b1111; 2300 let DecoderMethod = "DecodeVST3LN"; 2301} 2302 2303def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2304 let Inst{7-5} = lane{2-0}; 2305} 2306def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2307 let Inst{7-6} = lane{1-0}; 2308} 2309def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2310 let Inst{7} = lane{0}; 2311} 2312 2313def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2314def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2315def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>; 2316 2317// ...with double-spaced registers: 2318def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2319 let Inst{7-6} = lane{1-0}; 2320} 2321def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2322 let Inst{7} = lane{0}; 2323} 2324 2325def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2326def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2327 2328// ...with address register writeback: 2329class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2330 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2331 (ins addrmode6:$Rn, am6offset:$Rm, 2332 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2333 IIC_VST3lnu, "vst3", Dt, 2334 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2335 "$Rn.addr = $wb", []> { 2336 let DecoderMethod = "DecodeVST3LN"; 2337} 2338 2339def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2340 let Inst{7-5} = lane{2-0}; 2341} 2342def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2343 let Inst{7-6} = lane{1-0}; 2344} 2345def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2346 let Inst{7} = lane{0}; 2347} 2348 2349def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2350def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2351def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2352 2353def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2354 let Inst{7-6} = lane{1-0}; 2355} 2356def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2357 let Inst{7} = lane{0}; 2358} 2359 2360def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2361def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>; 2362 2363// VST4LN : Vector Store (single 4-element structure from one lane) 2364class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2365 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2366 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2367 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2368 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2369 "", []>, Sched<[WriteVST2]> { 2370 let Rm = 0b1111; 2371 let Inst{4} = Rn{4}; 2372 let DecoderMethod = "DecodeVST4LN"; 2373} 2374 2375def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2376 let Inst{7-5} = lane{2-0}; 2377} 2378def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2379 let Inst{7-6} = lane{1-0}; 2380} 2381def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2382 let Inst{7} = lane{0}; 2383 let Inst{5} = Rn{5}; 2384} 2385 2386def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2387def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2388def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2389 2390// ...with double-spaced registers: 2391def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2392 let Inst{7-6} = lane{1-0}; 2393} 2394def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2395 let Inst{7} = lane{0}; 2396 let Inst{5} = Rn{5}; 2397} 2398 2399def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2400def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>; 2401 2402// ...with address register writeback: 2403class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2404 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2405 (ins addrmode6:$Rn, am6offset:$Rm, 2406 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2407 IIC_VST4lnu, "vst4", Dt, 2408 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2409 "$Rn.addr = $wb", []> { 2410 let Inst{4} = Rn{4}; 2411 let DecoderMethod = "DecodeVST4LN"; 2412} 2413 2414def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2415 let Inst{7-5} = lane{2-0}; 2416} 2417def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2418 let Inst{7-6} = lane{1-0}; 2419} 2420def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2421 let Inst{7} = lane{0}; 2422 let Inst{5} = Rn{5}; 2423} 2424 2425def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2426def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2427def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2428 2429def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2430 let Inst{7-6} = lane{1-0}; 2431} 2432def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2433 let Inst{7} = lane{0}; 2434 let Inst{5} = Rn{5}; 2435} 2436 2437def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2438def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>; 2439 2440} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 2441 2442// Use vld1/vst1 for unaligned f64 load / store 2443def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2444 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2445def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2446 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2447def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2448 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2449def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2450 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2451def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2452 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2453def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2454 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2455 2456// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2457// load / store if it's legal. 2458def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2459 (VLD1q64 addrmode6:$addr)>; 2460def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2461 (VST1q64 addrmode6:$addr, QPR:$value)>; 2462def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2463 (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; 2464def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2465 (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2466def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2467 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2468def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2469 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2470def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2471 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2472def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2473 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2474 2475//===----------------------------------------------------------------------===// 2476// NEON pattern fragments 2477//===----------------------------------------------------------------------===// 2478 2479// Extract D sub-registers of Q registers. 2480def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2481 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2482 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, SDLoc(N), 2483 MVT::i32); 2484}]>; 2485def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2486 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2487 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, SDLoc(N), 2488 MVT::i32); 2489}]>; 2490def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2491 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2492 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, SDLoc(N), 2493 MVT::i32); 2494}]>; 2495def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2496 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2497 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), SDLoc(N), 2498 MVT::i32); 2499}]>; 2500 2501// Extract S sub-registers of Q/D registers. 2502def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2503 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2504 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), SDLoc(N), 2505 MVT::i32); 2506}]>; 2507 2508// Translate lane numbers from Q registers to D subregs. 2509def SubReg_i8_lane : SDNodeXForm<imm, [{ 2510 return CurDAG->getTargetConstant(N->getZExtValue() & 7, SDLoc(N), MVT::i32); 2511}]>; 2512def SubReg_i16_lane : SDNodeXForm<imm, [{ 2513 return CurDAG->getTargetConstant(N->getZExtValue() & 3, SDLoc(N), MVT::i32); 2514}]>; 2515def SubReg_i32_lane : SDNodeXForm<imm, [{ 2516 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i32); 2517}]>; 2518 2519//===----------------------------------------------------------------------===// 2520// Instruction Classes 2521//===----------------------------------------------------------------------===// 2522 2523// Basic 2-register operations: double- and quad-register. 2524class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2525 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2526 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2527 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2528 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2529 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2530class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2531 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2532 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2533 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2534 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2535 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2536 2537// Basic 2-register intrinsics, both double- and quad-register. 2538class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2539 bits<2> op17_16, bits<5> op11_7, bit op4, 2540 InstrItinClass itin, string OpcodeStr, string Dt, 2541 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2542 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2543 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2544 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2545class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2546 bits<2> op17_16, bits<5> op11_7, bit op4, 2547 InstrItinClass itin, string OpcodeStr, string Dt, 2548 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2549 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2550 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2551 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2552 2553// Same as above, but not predicated. 2554class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2555 InstrItinClass itin, string OpcodeStr, string Dt, 2556 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2557 : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2558 itin, OpcodeStr, Dt, 2559 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2560 2561class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, 2562 InstrItinClass itin, string OpcodeStr, string Dt, 2563 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2564 : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2565 itin, OpcodeStr, Dt, 2566 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2567 2568// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2569class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2570 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2571 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2572 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2573 itin, OpcodeStr, Dt, 2574 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2575 2576// Same as N2VQIntXnp but with Vd as a src register. 2577class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2578 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2579 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2580 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2581 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2582 itin, OpcodeStr, Dt, 2583 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2584 let Constraints = "$src = $Vd"; 2585} 2586 2587// Narrow 2-register operations. 2588class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2589 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2590 InstrItinClass itin, string OpcodeStr, string Dt, 2591 ValueType TyD, ValueType TyQ, SDNode OpNode> 2592 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2593 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2594 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2595 2596// Narrow 2-register intrinsics. 2597class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2598 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2599 InstrItinClass itin, string OpcodeStr, string Dt, 2600 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2601 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2602 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2603 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2604 2605// Long 2-register operations (currently only used for VMOVL). 2606class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2607 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2608 InstrItinClass itin, string OpcodeStr, string Dt, 2609 ValueType TyQ, ValueType TyD, SDNode OpNode> 2610 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2611 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2612 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2613 2614// Long 2-register intrinsics. 2615class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2616 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2617 InstrItinClass itin, string OpcodeStr, string Dt, 2618 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2619 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2620 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2621 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2622 2623// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2624class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2625 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2626 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2627 OpcodeStr, Dt, "$Vd, $Vm", 2628 "$src1 = $Vd, $src2 = $Vm", []>; 2629class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2630 InstrItinClass itin, string OpcodeStr, string Dt> 2631 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2632 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2633 "$src1 = $Vd, $src2 = $Vm", []>; 2634 2635// Basic 3-register operations: double- and quad-register. 2636class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2637 InstrItinClass itin, string OpcodeStr, string Dt, 2638 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2639 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2640 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2641 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2642 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2643 // All of these have a two-operand InstAlias. 2644 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2645 let isCommutable = Commutable; 2646} 2647// Same as N3VD but no data type. 2648class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2649 InstrItinClass itin, string OpcodeStr, 2650 ValueType ResTy, ValueType OpTy, 2651 SDNode OpNode, bit Commutable> 2652 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2653 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2654 OpcodeStr, "$Vd, $Vn, $Vm", "", 2655 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2656 // All of these have a two-operand InstAlias. 2657 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2658 let isCommutable = Commutable; 2659} 2660 2661class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2662 InstrItinClass itin, string OpcodeStr, string Dt, 2663 ValueType Ty, SDNode ShOp> 2664 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2665 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2666 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2667 [(set (Ty DPR:$Vd), 2668 (Ty (ShOp (Ty DPR:$Vn), 2669 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2670 // All of these have a two-operand InstAlias. 2671 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2672 let isCommutable = 0; 2673} 2674class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2675 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2676 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2677 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2678 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2679 [(set (Ty DPR:$Vd), 2680 (Ty (ShOp (Ty DPR:$Vn), 2681 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2682 // All of these have a two-operand InstAlias. 2683 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2684 let isCommutable = 0; 2685} 2686 2687class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2688 InstrItinClass itin, string OpcodeStr, string Dt, 2689 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2690 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2691 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2692 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2693 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2694 // All of these have a two-operand InstAlias. 2695 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2696 let isCommutable = Commutable; 2697} 2698class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2699 InstrItinClass itin, string OpcodeStr, 2700 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2701 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2702 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2703 OpcodeStr, "$Vd, $Vn, $Vm", "", 2704 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2705 // All of these have a two-operand InstAlias. 2706 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2707 let isCommutable = Commutable; 2708} 2709class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2710 InstrItinClass itin, string OpcodeStr, string Dt, 2711 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2712 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2713 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2714 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2715 [(set (ResTy QPR:$Vd), 2716 (ResTy (ShOp (ResTy QPR:$Vn), 2717 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2718 imm:$lane)))))]> { 2719 // All of these have a two-operand InstAlias. 2720 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2721 let isCommutable = 0; 2722} 2723class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2724 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2725 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2726 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2727 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2728 [(set (ResTy QPR:$Vd), 2729 (ResTy (ShOp (ResTy QPR:$Vn), 2730 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2731 imm:$lane)))))]> { 2732 // All of these have a two-operand InstAlias. 2733 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2734 let isCommutable = 0; 2735} 2736 2737// Basic 3-register intrinsics, both double- and quad-register. 2738class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2739 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2740 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2741 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2742 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2743 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2744 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2745 // All of these have a two-operand InstAlias. 2746 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2747 let isCommutable = Commutable; 2748} 2749 2750class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2751 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2752 string Dt, ValueType ResTy, ValueType OpTy, 2753 SDPatternOperator IntOp, bit Commutable> 2754 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2755 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2756 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2757 2758class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2759 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2760 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2761 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2762 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2763 [(set (Ty DPR:$Vd), 2764 (Ty (IntOp (Ty DPR:$Vn), 2765 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2766 imm:$lane)))))]> { 2767 let isCommutable = 0; 2768} 2769 2770class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2771 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2772 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2773 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2774 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2775 [(set (Ty DPR:$Vd), 2776 (Ty (IntOp (Ty DPR:$Vn), 2777 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2778 let isCommutable = 0; 2779} 2780class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2781 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2782 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2783 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2784 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2785 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2786 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2787 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2788 let isCommutable = 0; 2789} 2790 2791class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2792 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2793 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2794 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2795 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2796 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2797 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2798 // All of these have a two-operand InstAlias. 2799 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2800 let isCommutable = Commutable; 2801} 2802 2803class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2804 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2805 string Dt, ValueType ResTy, ValueType OpTy, 2806 SDPatternOperator IntOp, bit Commutable> 2807 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2808 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2809 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2810 2811// Same as N3VQIntnp but with Vd as a src register. 2812class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2813 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2814 string Dt, ValueType ResTy, ValueType OpTy, 2815 SDPatternOperator IntOp, bit Commutable> 2816 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2817 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), 2818 f, itin, OpcodeStr, Dt, 2819 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2820 (OpTy QPR:$Vm))))]> { 2821 let Constraints = "$src = $Vd"; 2822} 2823 2824class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2825 string OpcodeStr, string Dt, 2826 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2827 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2828 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2829 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2830 [(set (ResTy QPR:$Vd), 2831 (ResTy (IntOp (ResTy QPR:$Vn), 2832 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2833 imm:$lane)))))]> { 2834 let isCommutable = 0; 2835} 2836class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2837 string OpcodeStr, string Dt, 2838 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2839 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2840 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2841 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2842 [(set (ResTy QPR:$Vd), 2843 (ResTy (IntOp (ResTy QPR:$Vn), 2844 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2845 imm:$lane)))))]> { 2846 let isCommutable = 0; 2847} 2848class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2849 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2850 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2851 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2852 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2853 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2854 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2855 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2856 let isCommutable = 0; 2857} 2858 2859// Multiply-Add/Sub operations: double- and quad-register. 2860class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2861 InstrItinClass itin, string OpcodeStr, string Dt, 2862 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2863 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2864 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2865 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2866 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2867 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2868 2869class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2870 string OpcodeStr, string Dt, 2871 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2872 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2873 (outs DPR:$Vd), 2874 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2875 NVMulSLFrm, itin, 2876 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2877 [(set (Ty DPR:$Vd), 2878 (Ty (ShOp (Ty DPR:$src1), 2879 (Ty (MulOp DPR:$Vn, 2880 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2881 imm:$lane)))))))]>; 2882class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2883 string OpcodeStr, string Dt, 2884 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2885 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2886 (outs DPR:$Vd), 2887 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2888 NVMulSLFrm, itin, 2889 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2890 [(set (Ty DPR:$Vd), 2891 (Ty (ShOp (Ty DPR:$src1), 2892 (Ty (MulOp DPR:$Vn, 2893 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2894 imm:$lane)))))))]>; 2895 2896class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2897 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2898 SDPatternOperator MulOp, SDPatternOperator OpNode> 2899 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2900 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2901 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2902 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2903 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2904class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2905 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2906 SDPatternOperator MulOp, SDPatternOperator ShOp> 2907 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2908 (outs QPR:$Vd), 2909 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2910 NVMulSLFrm, itin, 2911 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2912 [(set (ResTy QPR:$Vd), 2913 (ResTy (ShOp (ResTy QPR:$src1), 2914 (ResTy (MulOp QPR:$Vn, 2915 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2916 imm:$lane)))))))]>; 2917class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2918 string OpcodeStr, string Dt, 2919 ValueType ResTy, ValueType OpTy, 2920 SDPatternOperator MulOp, SDPatternOperator ShOp> 2921 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2922 (outs QPR:$Vd), 2923 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2924 NVMulSLFrm, itin, 2925 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2926 [(set (ResTy QPR:$Vd), 2927 (ResTy (ShOp (ResTy QPR:$src1), 2928 (ResTy (MulOp QPR:$Vn, 2929 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2930 imm:$lane)))))))]>; 2931 2932// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2933class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2934 InstrItinClass itin, string OpcodeStr, string Dt, 2935 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2936 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2937 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2938 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2939 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2940 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2941class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2942 InstrItinClass itin, string OpcodeStr, string Dt, 2943 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2944 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2945 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2946 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2947 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2948 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2949 2950// Neon 3-argument intrinsics, both double- and quad-register. 2951// The destination register is also used as the first source operand register. 2952class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2953 InstrItinClass itin, string OpcodeStr, string Dt, 2954 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2955 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2956 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2957 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2958 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2959 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2960class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2961 InstrItinClass itin, string OpcodeStr, string Dt, 2962 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2963 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2964 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2965 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2966 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2967 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2968 2969// Long Multiply-Add/Sub operations. 2970class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2971 InstrItinClass itin, string OpcodeStr, string Dt, 2972 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2973 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2974 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2975 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2976 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2977 (TyQ (MulOp (TyD DPR:$Vn), 2978 (TyD DPR:$Vm)))))]>; 2979class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2980 InstrItinClass itin, string OpcodeStr, string Dt, 2981 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2982 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2983 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2984 NVMulSLFrm, itin, 2985 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2986 [(set QPR:$Vd, 2987 (OpNode (TyQ QPR:$src1), 2988 (TyQ (MulOp (TyD DPR:$Vn), 2989 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2990 imm:$lane))))))]>; 2991class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2992 InstrItinClass itin, string OpcodeStr, string Dt, 2993 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2994 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2995 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2996 NVMulSLFrm, itin, 2997 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2998 [(set QPR:$Vd, 2999 (OpNode (TyQ QPR:$src1), 3000 (TyQ (MulOp (TyD DPR:$Vn), 3001 (TyD (NEONvduplane (TyD DPR_8:$Vm), 3002 imm:$lane))))))]>; 3003 3004// Long Intrinsic-Op vector operations with explicit extend (VABAL). 3005class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3006 InstrItinClass itin, string OpcodeStr, string Dt, 3007 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3008 SDNode OpNode> 3009 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3010 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3011 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3012 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 3013 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3014 (TyD DPR:$Vm)))))))]>; 3015 3016// Neon Long 3-argument intrinsic. The destination register is 3017// a quad-register and is also used as the first source operand register. 3018class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3019 InstrItinClass itin, string OpcodeStr, string Dt, 3020 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 3021 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3022 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3023 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 3024 [(set QPR:$Vd, 3025 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 3026class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3027 string OpcodeStr, string Dt, 3028 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3029 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3030 (outs QPR:$Vd), 3031 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3032 NVMulSLFrm, itin, 3033 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3034 [(set (ResTy QPR:$Vd), 3035 (ResTy (IntOp (ResTy QPR:$src1), 3036 (OpTy DPR:$Vn), 3037 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3038 imm:$lane)))))]>; 3039class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3040 InstrItinClass itin, string OpcodeStr, string Dt, 3041 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3042 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3043 (outs QPR:$Vd), 3044 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3045 NVMulSLFrm, itin, 3046 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 3047 [(set (ResTy QPR:$Vd), 3048 (ResTy (IntOp (ResTy QPR:$src1), 3049 (OpTy DPR:$Vn), 3050 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3051 imm:$lane)))))]>; 3052 3053// Narrowing 3-register intrinsics. 3054class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3055 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 3056 SDPatternOperator IntOp, bit Commutable> 3057 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3058 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 3059 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3060 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 3061 let isCommutable = Commutable; 3062} 3063 3064// Long 3-register operations. 3065class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3066 InstrItinClass itin, string OpcodeStr, string Dt, 3067 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 3068 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3069 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3070 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3071 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3072 let isCommutable = Commutable; 3073} 3074 3075class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 3076 InstrItinClass itin, string OpcodeStr, string Dt, 3077 ValueType TyQ, ValueType TyD, SDNode OpNode> 3078 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3079 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3080 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3081 [(set QPR:$Vd, 3082 (TyQ (OpNode (TyD DPR:$Vn), 3083 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 3084class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3085 InstrItinClass itin, string OpcodeStr, string Dt, 3086 ValueType TyQ, ValueType TyD, SDNode OpNode> 3087 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3088 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3089 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3090 [(set QPR:$Vd, 3091 (TyQ (OpNode (TyD DPR:$Vn), 3092 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 3093 3094// Long 3-register operations with explicitly extended operands. 3095class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3096 InstrItinClass itin, string OpcodeStr, string Dt, 3097 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 3098 bit Commutable> 3099 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3100 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3101 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3102 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 3103 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3104 let isCommutable = Commutable; 3105} 3106 3107// Long 3-register intrinsics with explicit extend (VABDL). 3108class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3109 InstrItinClass itin, string OpcodeStr, string Dt, 3110 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 3111 bit Commutable> 3112 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3113 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3114 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3115 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 3116 (TyD DPR:$Vm))))))]> { 3117 let isCommutable = Commutable; 3118} 3119 3120// Long 3-register intrinsics. 3121class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3122 InstrItinClass itin, string OpcodeStr, string Dt, 3123 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 3124 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3125 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 3126 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3127 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 3128 let isCommutable = Commutable; 3129} 3130 3131// Same as above, but not predicated. 3132class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 3133 bit op4, InstrItinClass itin, string OpcodeStr, 3134 string Dt, ValueType ResTy, ValueType OpTy, 3135 SDPatternOperator IntOp, bit Commutable> 3136 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 3137 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 3138 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 3139 3140class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 3141 string OpcodeStr, string Dt, 3142 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3143 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 3144 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 3145 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3146 [(set (ResTy QPR:$Vd), 3147 (ResTy (IntOp (OpTy DPR:$Vn), 3148 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 3149 imm:$lane)))))]>; 3150class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 3151 InstrItinClass itin, string OpcodeStr, string Dt, 3152 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3153 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 3154 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 3155 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 3156 [(set (ResTy QPR:$Vd), 3157 (ResTy (IntOp (OpTy DPR:$Vn), 3158 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 3159 imm:$lane)))))]>; 3160 3161// Wide 3-register operations. 3162class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 3163 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 3164 SDNode OpNode, SDNode ExtOp, bit Commutable> 3165 : N3V<op24, op23, op21_20, op11_8, 0, op4, 3166 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 3167 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 3168 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 3169 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 3170 // All of these have a two-operand InstAlias. 3171 let TwoOperandAliasConstraint = "$Vn = $Vd"; 3172 let isCommutable = Commutable; 3173} 3174 3175// Pairwise long 2-register intrinsics, both double- and quad-register. 3176class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3177 bits<2> op17_16, bits<5> op11_7, bit op4, 3178 string OpcodeStr, string Dt, 3179 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3180 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 3181 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3182 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 3183class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3184 bits<2> op17_16, bits<5> op11_7, bit op4, 3185 string OpcodeStr, string Dt, 3186 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3187 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 3188 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 3189 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 3190 3191// Pairwise long 2-register accumulate intrinsics, 3192// both double- and quad-register. 3193// The destination register is also used as the first source operand register. 3194class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3195 bits<2> op17_16, bits<5> op11_7, bit op4, 3196 string OpcodeStr, string Dt, 3197 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3198 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3199 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3200 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3201 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3202class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3203 bits<2> op17_16, bits<5> op11_7, bit op4, 3204 string OpcodeStr, string Dt, 3205 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3206 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3207 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3208 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3209 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3210 3211// Shift by immediate, 3212// both double- and quad-register. 3213let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3214class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3215 Format f, InstrItinClass itin, Operand ImmTy, 3216 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3217 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3218 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3219 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3220 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3221class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3222 Format f, InstrItinClass itin, Operand ImmTy, 3223 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3224 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3225 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3226 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3227 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3228} 3229 3230// Long shift by immediate. 3231class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3232 string OpcodeStr, string Dt, 3233 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3234 SDPatternOperator OpNode> 3235 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3236 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3237 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3238 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>; 3239 3240// Narrow shift by immediate. 3241class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3242 InstrItinClass itin, string OpcodeStr, string Dt, 3243 ValueType ResTy, ValueType OpTy, Operand ImmTy, 3244 SDPatternOperator OpNode> 3245 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3246 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3247 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3248 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3249 (i32 ImmTy:$SIMM))))]>; 3250 3251// Shift right by immediate and accumulate, 3252// both double- and quad-register. 3253let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3254class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3255 Operand ImmTy, string OpcodeStr, string Dt, 3256 ValueType Ty, SDNode ShOp> 3257 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3258 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3259 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3260 [(set DPR:$Vd, (Ty (add DPR:$src1, 3261 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3262class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3263 Operand ImmTy, string OpcodeStr, string Dt, 3264 ValueType Ty, SDNode ShOp> 3265 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3266 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3267 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3268 [(set QPR:$Vd, (Ty (add QPR:$src1, 3269 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3270} 3271 3272// Shift by immediate and insert, 3273// both double- and quad-register. 3274let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3275class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3276 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3277 ValueType Ty,SDNode ShOp> 3278 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3279 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3280 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3281 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3282class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3283 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3284 ValueType Ty,SDNode ShOp> 3285 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3286 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3287 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3288 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3289} 3290 3291// Convert, with fractional bits immediate, 3292// both double- and quad-register. 3293class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3294 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3295 SDPatternOperator IntOp> 3296 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3297 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3298 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3299 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3300class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3301 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3302 SDPatternOperator IntOp> 3303 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3304 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3305 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3306 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3307 3308//===----------------------------------------------------------------------===// 3309// Multiclasses 3310//===----------------------------------------------------------------------===// 3311 3312// Abbreviations used in multiclass suffixes: 3313// Q = quarter int (8 bit) elements 3314// H = half int (16 bit) elements 3315// S = single int (32 bit) elements 3316// D = double int (64 bit) elements 3317 3318// Neon 2-register vector operations and intrinsics. 3319 3320// Neon 2-register comparisons. 3321// source operand element sizes of 8, 16 and 32 bits: 3322multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3323 bits<5> op11_7, bit op4, string opc, string Dt, 3324 string asm, SDNode OpNode> { 3325 // 64-bit vector types. 3326 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3327 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3328 opc, !strconcat(Dt, "8"), asm, "", 3329 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3330 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3331 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3332 opc, !strconcat(Dt, "16"), asm, "", 3333 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3334 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3335 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3336 opc, !strconcat(Dt, "32"), asm, "", 3337 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3338 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3339 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3340 opc, "f32", asm, "", 3341 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3342 let Inst{10} = 1; // overwrite F = 1 3343 } 3344 def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3345 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3346 opc, "f16", asm, "", 3347 [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, 3348 Requires<[HasNEON,HasFullFP16]> { 3349 let Inst{10} = 1; // overwrite F = 1 3350 } 3351 3352 // 128-bit vector types. 3353 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3354 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3355 opc, !strconcat(Dt, "8"), asm, "", 3356 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3357 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3358 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3359 opc, !strconcat(Dt, "16"), asm, "", 3360 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3361 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3362 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3363 opc, !strconcat(Dt, "32"), asm, "", 3364 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3365 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3366 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3367 opc, "f32", asm, "", 3368 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3369 let Inst{10} = 1; // overwrite F = 1 3370 } 3371 def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3372 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3373 opc, "f16", asm, "", 3374 [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, 3375 Requires<[HasNEON,HasFullFP16]> { 3376 let Inst{10} = 1; // overwrite F = 1 3377 } 3378} 3379 3380 3381// Neon 2-register vector intrinsics, 3382// element sizes of 8, 16 and 32 bits: 3383multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3384 bits<5> op11_7, bit op4, 3385 InstrItinClass itinD, InstrItinClass itinQ, 3386 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3387 // 64-bit vector types. 3388 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3389 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3390 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3391 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3392 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3393 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3394 3395 // 128-bit vector types. 3396 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3397 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3398 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3399 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3400 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3401 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3402} 3403 3404 3405// Neon Narrowing 2-register vector operations, 3406// source operand element sizes of 16, 32 and 64 bits: 3407multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3408 bits<5> op11_7, bit op6, bit op4, 3409 InstrItinClass itin, string OpcodeStr, string Dt, 3410 SDNode OpNode> { 3411 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3412 itin, OpcodeStr, !strconcat(Dt, "16"), 3413 v8i8, v8i16, OpNode>; 3414 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3415 itin, OpcodeStr, !strconcat(Dt, "32"), 3416 v4i16, v4i32, OpNode>; 3417 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3418 itin, OpcodeStr, !strconcat(Dt, "64"), 3419 v2i32, v2i64, OpNode>; 3420} 3421 3422// Neon Narrowing 2-register vector intrinsics, 3423// source operand element sizes of 16, 32 and 64 bits: 3424multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3425 bits<5> op11_7, bit op6, bit op4, 3426 InstrItinClass itin, string OpcodeStr, string Dt, 3427 SDPatternOperator IntOp> { 3428 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3429 itin, OpcodeStr, !strconcat(Dt, "16"), 3430 v8i8, v8i16, IntOp>; 3431 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3432 itin, OpcodeStr, !strconcat(Dt, "32"), 3433 v4i16, v4i32, IntOp>; 3434 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3435 itin, OpcodeStr, !strconcat(Dt, "64"), 3436 v2i32, v2i64, IntOp>; 3437} 3438 3439 3440// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3441// source operand element sizes of 16, 32 and 64 bits: 3442multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3443 string OpcodeStr, string Dt, SDNode OpNode> { 3444 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3445 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3446 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3447 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3448 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3449 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3450} 3451 3452 3453// Neon 3-register vector operations. 3454 3455// First with only element sizes of 8, 16 and 32 bits: 3456multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3457 InstrItinClass itinD16, InstrItinClass itinD32, 3458 InstrItinClass itinQ16, InstrItinClass itinQ32, 3459 string OpcodeStr, string Dt, 3460 SDNode OpNode, bit Commutable = 0> { 3461 // 64-bit vector types. 3462 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3463 OpcodeStr, !strconcat(Dt, "8"), 3464 v8i8, v8i8, OpNode, Commutable>; 3465 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3466 OpcodeStr, !strconcat(Dt, "16"), 3467 v4i16, v4i16, OpNode, Commutable>; 3468 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3469 OpcodeStr, !strconcat(Dt, "32"), 3470 v2i32, v2i32, OpNode, Commutable>; 3471 3472 // 128-bit vector types. 3473 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3474 OpcodeStr, !strconcat(Dt, "8"), 3475 v16i8, v16i8, OpNode, Commutable>; 3476 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3477 OpcodeStr, !strconcat(Dt, "16"), 3478 v8i16, v8i16, OpNode, Commutable>; 3479 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3480 OpcodeStr, !strconcat(Dt, "32"), 3481 v4i32, v4i32, OpNode, Commutable>; 3482} 3483 3484multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3485 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3486 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3487 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3488 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3489 v4i32, v2i32, ShOp>; 3490} 3491 3492// ....then also with element size 64 bits: 3493multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3494 InstrItinClass itinD, InstrItinClass itinQ, 3495 string OpcodeStr, string Dt, 3496 SDNode OpNode, bit Commutable = 0> 3497 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3498 OpcodeStr, Dt, OpNode, Commutable> { 3499 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3500 OpcodeStr, !strconcat(Dt, "64"), 3501 v1i64, v1i64, OpNode, Commutable>; 3502 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3503 OpcodeStr, !strconcat(Dt, "64"), 3504 v2i64, v2i64, OpNode, Commutable>; 3505} 3506 3507 3508// Neon 3-register vector intrinsics. 3509 3510// First with only element sizes of 16 and 32 bits: 3511multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3512 InstrItinClass itinD16, InstrItinClass itinD32, 3513 InstrItinClass itinQ16, InstrItinClass itinQ32, 3514 string OpcodeStr, string Dt, 3515 SDPatternOperator IntOp, bit Commutable = 0> { 3516 // 64-bit vector types. 3517 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3518 OpcodeStr, !strconcat(Dt, "16"), 3519 v4i16, v4i16, IntOp, Commutable>; 3520 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3521 OpcodeStr, !strconcat(Dt, "32"), 3522 v2i32, v2i32, IntOp, Commutable>; 3523 3524 // 128-bit vector types. 3525 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3526 OpcodeStr, !strconcat(Dt, "16"), 3527 v8i16, v8i16, IntOp, Commutable>; 3528 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3529 OpcodeStr, !strconcat(Dt, "32"), 3530 v4i32, v4i32, IntOp, Commutable>; 3531} 3532multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3533 InstrItinClass itinD16, InstrItinClass itinD32, 3534 InstrItinClass itinQ16, InstrItinClass itinQ32, 3535 string OpcodeStr, string Dt, 3536 SDPatternOperator IntOp> { 3537 // 64-bit vector types. 3538 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3539 OpcodeStr, !strconcat(Dt, "16"), 3540 v4i16, v4i16, IntOp>; 3541 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3542 OpcodeStr, !strconcat(Dt, "32"), 3543 v2i32, v2i32, IntOp>; 3544 3545 // 128-bit vector types. 3546 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3547 OpcodeStr, !strconcat(Dt, "16"), 3548 v8i16, v8i16, IntOp>; 3549 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3550 OpcodeStr, !strconcat(Dt, "32"), 3551 v4i32, v4i32, IntOp>; 3552} 3553 3554multiclass N3VIntSL_HS<bits<4> op11_8, 3555 InstrItinClass itinD16, InstrItinClass itinD32, 3556 InstrItinClass itinQ16, InstrItinClass itinQ32, 3557 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3558 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3559 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3560 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3561 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3562 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3563 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3564 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3565 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3566} 3567 3568// ....then also with element size of 8 bits: 3569multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3570 InstrItinClass itinD16, InstrItinClass itinD32, 3571 InstrItinClass itinQ16, InstrItinClass itinQ32, 3572 string OpcodeStr, string Dt, 3573 SDPatternOperator IntOp, bit Commutable = 0> 3574 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3575 OpcodeStr, Dt, IntOp, Commutable> { 3576 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3577 OpcodeStr, !strconcat(Dt, "8"), 3578 v8i8, v8i8, IntOp, Commutable>; 3579 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3580 OpcodeStr, !strconcat(Dt, "8"), 3581 v16i8, v16i8, IntOp, Commutable>; 3582} 3583multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3584 InstrItinClass itinD16, InstrItinClass itinD32, 3585 InstrItinClass itinQ16, InstrItinClass itinQ32, 3586 string OpcodeStr, string Dt, 3587 SDPatternOperator IntOp> 3588 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3589 OpcodeStr, Dt, IntOp> { 3590 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3591 OpcodeStr, !strconcat(Dt, "8"), 3592 v8i8, v8i8, IntOp>; 3593 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3594 OpcodeStr, !strconcat(Dt, "8"), 3595 v16i8, v16i8, IntOp>; 3596} 3597 3598 3599// ....then also with element size of 64 bits: 3600multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3601 InstrItinClass itinD16, InstrItinClass itinD32, 3602 InstrItinClass itinQ16, InstrItinClass itinQ32, 3603 string OpcodeStr, string Dt, 3604 SDPatternOperator IntOp, bit Commutable = 0> 3605 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3606 OpcodeStr, Dt, IntOp, Commutable> { 3607 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3608 OpcodeStr, !strconcat(Dt, "64"), 3609 v1i64, v1i64, IntOp, Commutable>; 3610 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3611 OpcodeStr, !strconcat(Dt, "64"), 3612 v2i64, v2i64, IntOp, Commutable>; 3613} 3614multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3615 InstrItinClass itinD16, InstrItinClass itinD32, 3616 InstrItinClass itinQ16, InstrItinClass itinQ32, 3617 string OpcodeStr, string Dt, 3618 SDPatternOperator IntOp> 3619 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3620 OpcodeStr, Dt, IntOp> { 3621 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3622 OpcodeStr, !strconcat(Dt, "64"), 3623 v1i64, v1i64, IntOp>; 3624 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3625 OpcodeStr, !strconcat(Dt, "64"), 3626 v2i64, v2i64, IntOp>; 3627} 3628 3629// Neon Narrowing 3-register vector intrinsics, 3630// source operand element sizes of 16, 32 and 64 bits: 3631multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3632 string OpcodeStr, string Dt, 3633 SDPatternOperator IntOp, bit Commutable = 0> { 3634 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3635 OpcodeStr, !strconcat(Dt, "16"), 3636 v8i8, v8i16, IntOp, Commutable>; 3637 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3638 OpcodeStr, !strconcat(Dt, "32"), 3639 v4i16, v4i32, IntOp, Commutable>; 3640 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3641 OpcodeStr, !strconcat(Dt, "64"), 3642 v2i32, v2i64, IntOp, Commutable>; 3643} 3644 3645 3646// Neon Long 3-register vector operations. 3647 3648multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3649 InstrItinClass itin16, InstrItinClass itin32, 3650 string OpcodeStr, string Dt, 3651 SDNode OpNode, bit Commutable = 0> { 3652 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3653 OpcodeStr, !strconcat(Dt, "8"), 3654 v8i16, v8i8, OpNode, Commutable>; 3655 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3656 OpcodeStr, !strconcat(Dt, "16"), 3657 v4i32, v4i16, OpNode, Commutable>; 3658 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3659 OpcodeStr, !strconcat(Dt, "32"), 3660 v2i64, v2i32, OpNode, Commutable>; 3661} 3662 3663multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3664 InstrItinClass itin, string OpcodeStr, string Dt, 3665 SDNode OpNode> { 3666 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3667 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3668 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3669 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3670} 3671 3672multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3673 InstrItinClass itin16, InstrItinClass itin32, 3674 string OpcodeStr, string Dt, 3675 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3676 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3677 OpcodeStr, !strconcat(Dt, "8"), 3678 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3679 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3680 OpcodeStr, !strconcat(Dt, "16"), 3681 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3682 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3683 OpcodeStr, !strconcat(Dt, "32"), 3684 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3685} 3686 3687// Neon Long 3-register vector intrinsics. 3688 3689// First with only element sizes of 16 and 32 bits: 3690multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3691 InstrItinClass itin16, InstrItinClass itin32, 3692 string OpcodeStr, string Dt, 3693 SDPatternOperator IntOp, bit Commutable = 0> { 3694 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3695 OpcodeStr, !strconcat(Dt, "16"), 3696 v4i32, v4i16, IntOp, Commutable>; 3697 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3698 OpcodeStr, !strconcat(Dt, "32"), 3699 v2i64, v2i32, IntOp, Commutable>; 3700} 3701 3702multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3703 InstrItinClass itin, string OpcodeStr, string Dt, 3704 SDPatternOperator IntOp> { 3705 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3706 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3707 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3708 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3709} 3710 3711// ....then also with element size of 8 bits: 3712multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3713 InstrItinClass itin16, InstrItinClass itin32, 3714 string OpcodeStr, string Dt, 3715 SDPatternOperator IntOp, bit Commutable = 0> 3716 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3717 IntOp, Commutable> { 3718 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3719 OpcodeStr, !strconcat(Dt, "8"), 3720 v8i16, v8i8, IntOp, Commutable>; 3721} 3722 3723// ....with explicit extend (VABDL). 3724multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3725 InstrItinClass itin, string OpcodeStr, string Dt, 3726 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3727 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3728 OpcodeStr, !strconcat(Dt, "8"), 3729 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3730 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3731 OpcodeStr, !strconcat(Dt, "16"), 3732 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3733 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3734 OpcodeStr, !strconcat(Dt, "32"), 3735 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3736} 3737 3738 3739// Neon Wide 3-register vector intrinsics, 3740// source operand element sizes of 8, 16 and 32 bits: 3741multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3742 string OpcodeStr, string Dt, 3743 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3744 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3745 OpcodeStr, !strconcat(Dt, "8"), 3746 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3747 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3748 OpcodeStr, !strconcat(Dt, "16"), 3749 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3750 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3751 OpcodeStr, !strconcat(Dt, "32"), 3752 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3753} 3754 3755 3756// Neon Multiply-Op vector operations, 3757// element sizes of 8, 16 and 32 bits: 3758multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3759 InstrItinClass itinD16, InstrItinClass itinD32, 3760 InstrItinClass itinQ16, InstrItinClass itinQ32, 3761 string OpcodeStr, string Dt, SDNode OpNode> { 3762 // 64-bit vector types. 3763 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3764 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3765 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3766 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3767 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3768 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3769 3770 // 128-bit vector types. 3771 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3772 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3773 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3774 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3775 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3776 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3777} 3778 3779multiclass N3VMulOpSL_HS<bits<4> op11_8, 3780 InstrItinClass itinD16, InstrItinClass itinD32, 3781 InstrItinClass itinQ16, InstrItinClass itinQ32, 3782 string OpcodeStr, string Dt, SDPatternOperator ShOp> { 3783 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3784 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3785 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3786 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3787 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3788 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3789 mul, ShOp>; 3790 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3791 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3792 mul, ShOp>; 3793} 3794 3795// Neon Intrinsic-Op vector operations, 3796// element sizes of 8, 16 and 32 bits: 3797multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3798 InstrItinClass itinD, InstrItinClass itinQ, 3799 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3800 SDNode OpNode> { 3801 // 64-bit vector types. 3802 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3803 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3804 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3805 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3806 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3807 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3808 3809 // 128-bit vector types. 3810 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3811 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3812 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3813 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3814 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3815 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3816} 3817 3818// Neon 3-argument intrinsics, 3819// element sizes of 16 and 32 bits: 3820multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3821 InstrItinClass itinD16, InstrItinClass itinD32, 3822 InstrItinClass itinQ16, InstrItinClass itinQ32, 3823 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3824 // 64-bit vector types. 3825 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16, 3826 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3827 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32, 3828 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3829 3830 // 128-bit vector types. 3831 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16, 3832 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3833 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32, 3834 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3835} 3836 3837// element sizes of 8, 16 and 32 bits: 3838multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3839 InstrItinClass itinD16, InstrItinClass itinD32, 3840 InstrItinClass itinQ16, InstrItinClass itinQ32, 3841 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3842 :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32, 3843 itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{ 3844 // 64-bit vector types. 3845 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16, 3846 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3847 // 128-bit vector types. 3848 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16, 3849 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3850} 3851 3852// Neon Long Multiply-Op vector operations, 3853// element sizes of 8, 16 and 32 bits: 3854multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3855 InstrItinClass itin16, InstrItinClass itin32, 3856 string OpcodeStr, string Dt, SDNode MulOp, 3857 SDNode OpNode> { 3858 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3859 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3860 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3861 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3862 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3863 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3864} 3865 3866multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3867 string Dt, SDNode MulOp, SDNode OpNode> { 3868 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3869 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3870 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3871 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3872} 3873 3874 3875// Neon Long 3-argument intrinsics. 3876 3877// First with only element sizes of 16 and 32 bits: 3878multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3879 InstrItinClass itin16, InstrItinClass itin32, 3880 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3881 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3882 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3883 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3884 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3885} 3886 3887multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3888 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3889 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3890 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3891 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3892 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3893} 3894 3895// ....then also with element size of 8 bits: 3896multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3897 InstrItinClass itin16, InstrItinClass itin32, 3898 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3899 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3900 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3901 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3902} 3903 3904// ....with explicit extend (VABAL). 3905multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3906 InstrItinClass itin, string OpcodeStr, string Dt, 3907 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3908 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3909 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3910 IntOp, ExtOp, OpNode>; 3911 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3912 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3913 IntOp, ExtOp, OpNode>; 3914 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3915 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3916 IntOp, ExtOp, OpNode>; 3917} 3918 3919 3920// Neon Pairwise long 2-register intrinsics, 3921// element sizes of 8, 16 and 32 bits: 3922multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3923 bits<5> op11_7, bit op4, 3924 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3925 // 64-bit vector types. 3926 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3927 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3928 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3929 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3930 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3931 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3932 3933 // 128-bit vector types. 3934 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3935 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3936 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3937 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3938 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3939 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3940} 3941 3942 3943// Neon Pairwise long 2-register accumulate intrinsics, 3944// element sizes of 8, 16 and 32 bits: 3945multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3946 bits<5> op11_7, bit op4, 3947 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3948 // 64-bit vector types. 3949 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3950 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3951 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3952 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3953 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3954 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3955 3956 // 128-bit vector types. 3957 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3958 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3959 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3960 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3961 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3962 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3963} 3964 3965 3966// Neon 2-register vector shift by immediate, 3967// with f of either N2RegVShLFrm or N2RegVShRFrm 3968// element sizes of 8, 16, 32 and 64 bits: 3969multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3970 InstrItinClass itin, string OpcodeStr, string Dt, 3971 SDNode OpNode> { 3972 // 64-bit vector types. 3973 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3974 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3975 let Inst{21-19} = 0b001; // imm6 = 001xxx 3976 } 3977 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3978 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3979 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3980 } 3981 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3982 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3983 let Inst{21} = 0b1; // imm6 = 1xxxxx 3984 } 3985 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3986 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3987 // imm6 = xxxxxx 3988 3989 // 128-bit vector types. 3990 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3991 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3992 let Inst{21-19} = 0b001; // imm6 = 001xxx 3993 } 3994 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3995 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3996 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3997 } 3998 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3999 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4000 let Inst{21} = 0b1; // imm6 = 1xxxxx 4001 } 4002 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 4003 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4004 // imm6 = xxxxxx 4005} 4006multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4007 InstrItinClass itin, string OpcodeStr, string Dt, 4008 string baseOpc, SDNode OpNode> { 4009 // 64-bit vector types. 4010 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4011 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 4012 let Inst{21-19} = 0b001; // imm6 = 001xxx 4013 } 4014 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4015 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 4016 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4017 } 4018 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4019 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 4020 let Inst{21} = 0b1; // imm6 = 1xxxxx 4021 } 4022 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4023 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 4024 // imm6 = xxxxxx 4025 4026 // 128-bit vector types. 4027 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 4028 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 4029 let Inst{21-19} = 0b001; // imm6 = 001xxx 4030 } 4031 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 4032 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 4033 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4034 } 4035 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 4036 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 4037 let Inst{21} = 0b1; // imm6 = 1xxxxx 4038 } 4039 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 4040 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 4041 // imm6 = xxxxxx 4042} 4043 4044// Neon Shift-Accumulate vector operations, 4045// element sizes of 8, 16, 32 and 64 bits: 4046multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4047 string OpcodeStr, string Dt, SDNode ShOp> { 4048 // 64-bit vector types. 4049 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4050 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 4051 let Inst{21-19} = 0b001; // imm6 = 001xxx 4052 } 4053 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4054 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 4055 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4056 } 4057 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4058 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 4059 let Inst{21} = 0b1; // imm6 = 1xxxxx 4060 } 4061 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4062 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 4063 // imm6 = xxxxxx 4064 4065 // 128-bit vector types. 4066 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 4067 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 4068 let Inst{21-19} = 0b001; // imm6 = 001xxx 4069 } 4070 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 4071 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 4072 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4073 } 4074 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 4075 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 4076 let Inst{21} = 0b1; // imm6 = 1xxxxx 4077 } 4078 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 4079 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 4080 // imm6 = xxxxxx 4081} 4082 4083// Neon Shift-Insert vector operations, 4084// with f of either N2RegVShLFrm or N2RegVShRFrm 4085// element sizes of 8, 16, 32 and 64 bits: 4086multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4087 string OpcodeStr> { 4088 // 64-bit vector types. 4089 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4090 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 4091 let Inst{21-19} = 0b001; // imm6 = 001xxx 4092 } 4093 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4094 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 4095 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4096 } 4097 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 4098 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 4099 let Inst{21} = 0b1; // imm6 = 1xxxxx 4100 } 4101 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 4102 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 4103 // imm6 = xxxxxx 4104 4105 // 128-bit vector types. 4106 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4107 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 4108 let Inst{21-19} = 0b001; // imm6 = 001xxx 4109 } 4110 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4111 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 4112 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4113 } 4114 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 4115 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 4116 let Inst{21} = 0b1; // imm6 = 1xxxxx 4117 } 4118 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 4119 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 4120 // imm6 = xxxxxx 4121} 4122multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 4123 string OpcodeStr> { 4124 // 64-bit vector types. 4125 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4126 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 4127 let Inst{21-19} = 0b001; // imm6 = 001xxx 4128 } 4129 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4130 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 4131 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4132 } 4133 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4134 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 4135 let Inst{21} = 0b1; // imm6 = 1xxxxx 4136 } 4137 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4138 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 4139 // imm6 = xxxxxx 4140 4141 // 128-bit vector types. 4142 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 4143 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 4144 let Inst{21-19} = 0b001; // imm6 = 001xxx 4145 } 4146 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 4147 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 4148 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4149 } 4150 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 4151 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 4152 let Inst{21} = 0b1; // imm6 = 1xxxxx 4153 } 4154 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 4155 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 4156 // imm6 = xxxxxx 4157} 4158 4159// Neon Shift Long operations, 4160// element sizes of 8, 16, 32 bits: 4161multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4162 bit op4, string OpcodeStr, string Dt, 4163 SDPatternOperator OpNode> { 4164 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4165 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 4166 let Inst{21-19} = 0b001; // imm6 = 001xxx 4167 } 4168 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4169 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 4170 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4171 } 4172 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 4173 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 4174 let Inst{21} = 0b1; // imm6 = 1xxxxx 4175 } 4176} 4177 4178// Neon Shift Narrow operations, 4179// element sizes of 16, 32, 64 bits: 4180multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 4181 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 4182 SDPatternOperator OpNode> { 4183 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4184 OpcodeStr, !strconcat(Dt, "16"), 4185 v8i8, v8i16, shr_imm8, OpNode> { 4186 let Inst{21-19} = 0b001; // imm6 = 001xxx 4187 } 4188 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4189 OpcodeStr, !strconcat(Dt, "32"), 4190 v4i16, v4i32, shr_imm16, OpNode> { 4191 let Inst{21-20} = 0b01; // imm6 = 01xxxx 4192 } 4193 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 4194 OpcodeStr, !strconcat(Dt, "64"), 4195 v2i32, v2i64, shr_imm32, OpNode> { 4196 let Inst{21} = 0b1; // imm6 = 1xxxxx 4197 } 4198} 4199 4200//===----------------------------------------------------------------------===// 4201// Instruction Definitions. 4202//===----------------------------------------------------------------------===// 4203 4204// Vector Add Operations. 4205 4206// VADD : Vector Add (integer and floating-point) 4207defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 4208 add, 1>; 4209def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 4210 v2f32, v2f32, fadd, 1>; 4211def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 4212 v4f32, v4f32, fadd, 1>; 4213def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16", 4214 v4f16, v4f16, fadd, 1>, 4215 Requires<[HasNEON,HasFullFP16]>; 4216def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16", 4217 v8f16, v8f16, fadd, 1>, 4218 Requires<[HasNEON,HasFullFP16]>; 4219// VADDL : Vector Add Long (Q = D + D) 4220defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4221 "vaddl", "s", add, sext, 1>; 4222defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 4223 "vaddl", "u", add, zext, 1>; 4224// VADDW : Vector Add Wide (Q = Q + D) 4225defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 4226defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 4227// VHADD : Vector Halving Add 4228defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4229 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4230 "vhadd", "s", int_arm_neon_vhadds, 1>; 4231defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4232 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4233 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4234// VRHADD : Vector Rounding Halving Add 4235defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4236 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4237 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4238defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4239 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4240 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4241// VQADD : Vector Saturating Add 4242defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4243 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4244 "vqadd", "s", int_arm_neon_vqadds, 1>; 4245defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4246 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4247 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4248// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4249defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4250// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4251defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4252 int_arm_neon_vraddhn, 1>; 4253 4254def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4255 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4256def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4257 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4258def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4259 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4260 4261// Vector Multiply Operations. 4262 4263// VMUL : Vector Multiply (integer, polynomial and floating-point) 4264defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4265 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4266def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4267 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4268def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4269 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4270def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4271 v2f32, v2f32, fmul, 1>; 4272def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4273 v4f32, v4f32, fmul, 1>; 4274def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16", 4275 v4f16, v4f16, fmul, 1>, 4276 Requires<[HasNEON,HasFullFP16]>; 4277def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16", 4278 v8f16, v8f16, fmul, 1>, 4279 Requires<[HasNEON,HasFullFP16]>; 4280defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4281def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4282def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4283 v2f32, fmul>; 4284def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>, 4285 Requires<[HasNEON,HasFullFP16]>; 4286def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16, 4287 v4f16, fmul>, 4288 Requires<[HasNEON,HasFullFP16]>; 4289 4290def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4291 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4292 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4293 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4294 (DSubReg_i16_reg imm:$lane))), 4295 (SubReg_i16_lane imm:$lane)))>; 4296def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4297 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4298 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4299 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4300 (DSubReg_i32_reg imm:$lane))), 4301 (SubReg_i32_lane imm:$lane)))>; 4302def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4303 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4304 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4305 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4306 (DSubReg_i32_reg imm:$lane))), 4307 (SubReg_i32_lane imm:$lane)))>; 4308 4309 4310def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4311 (VMULslfd DPR:$Rn, 4312 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4313 (i32 0))>; 4314def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4315 (VMULslfq QPR:$Rn, 4316 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4317 (i32 0))>; 4318 4319 4320// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4321defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4322 IIC_VMULi16Q, IIC_VMULi32Q, 4323 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4324defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4325 IIC_VMULi16Q, IIC_VMULi32Q, 4326 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4327def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4328 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4329 imm:$lane)))), 4330 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4331 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4332 (DSubReg_i16_reg imm:$lane))), 4333 (SubReg_i16_lane imm:$lane)))>; 4334def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4335 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4336 imm:$lane)))), 4337 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4338 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4339 (DSubReg_i32_reg imm:$lane))), 4340 (SubReg_i32_lane imm:$lane)))>; 4341 4342// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4343defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4344 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4345 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4346defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4347 IIC_VMULi16Q, IIC_VMULi32Q, 4348 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4349def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4350 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4351 imm:$lane)))), 4352 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4353 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4354 (DSubReg_i16_reg imm:$lane))), 4355 (SubReg_i16_lane imm:$lane)))>; 4356def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4357 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4358 imm:$lane)))), 4359 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4360 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4361 (DSubReg_i32_reg imm:$lane))), 4362 (SubReg_i32_lane imm:$lane)))>; 4363 4364// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4365let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4366 DecoderNamespace = "NEONData" in { 4367 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4368 "vmull", "s", NEONvmulls, 1>; 4369 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4370 "vmull", "u", NEONvmullu, 1>; 4371 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4372 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4373 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4374 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4375 Requires<[HasV8, HasCrypto]>; 4376} 4377defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4378defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4379 4380// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4381defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4382 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4383defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4384 "vqdmull", "s", int_arm_neon_vqdmull>; 4385 4386// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4387 4388// VMLA : Vector Multiply Accumulate (integer and floating-point) 4389defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4390 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4391def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4392 v2f32, fmul_su, fadd_mlx>, 4393 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4394def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4395 v4f32, fmul_su, fadd_mlx>, 4396 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4397def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", 4398 v4f16, fmul_su, fadd_mlx>, 4399 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4400def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", 4401 v8f16, fmul_su, fadd_mlx>, 4402 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4403defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4404 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4405def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4406 v2f32, fmul_su, fadd_mlx>, 4407 Requires<[HasNEON, UseFPVMLx]>; 4408def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4409 v4f32, v2f32, fmul_su, fadd_mlx>, 4410 Requires<[HasNEON, UseFPVMLx]>; 4411def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16", 4412 v4f16, fmul, fadd>, 4413 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4414def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16", 4415 v8f16, v4f16, fmul, fadd>, 4416 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4417 4418def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4419 (mul (v8i16 QPR:$src2), 4420 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4421 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4422 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4423 (DSubReg_i16_reg imm:$lane))), 4424 (SubReg_i16_lane imm:$lane)))>; 4425 4426def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4427 (mul (v4i32 QPR:$src2), 4428 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4429 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4430 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4431 (DSubReg_i32_reg imm:$lane))), 4432 (SubReg_i32_lane imm:$lane)))>; 4433 4434def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4435 (fmul_su (v4f32 QPR:$src2), 4436 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4437 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4438 (v4f32 QPR:$src2), 4439 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4440 (DSubReg_i32_reg imm:$lane))), 4441 (SubReg_i32_lane imm:$lane)))>, 4442 Requires<[HasNEON, UseFPVMLx]>; 4443 4444// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4445defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4446 "vmlal", "s", NEONvmulls, add>; 4447defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4448 "vmlal", "u", NEONvmullu, add>; 4449 4450defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4451defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4452 4453let Predicates = [HasNEON, HasV8_1a] in { 4454 // v8.1a Neon Rounding Double Multiply-Op vector operations, 4455 // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long 4456 // (Q += D * D) 4457 defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D, 4458 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4459 null_frag>; 4460 def : Pat<(v4i16 (int_arm_neon_vqadds 4461 (v4i16 DPR:$src1), 4462 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4463 (v4i16 DPR:$Vm))))), 4464 (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4465 def : Pat<(v2i32 (int_arm_neon_vqadds 4466 (v2i32 DPR:$src1), 4467 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4468 (v2i32 DPR:$Vm))))), 4469 (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4470 def : Pat<(v8i16 (int_arm_neon_vqadds 4471 (v8i16 QPR:$src1), 4472 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4473 (v8i16 QPR:$Vm))))), 4474 (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4475 def : Pat<(v4i32 (int_arm_neon_vqadds 4476 (v4i32 QPR:$src1), 4477 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4478 (v4i32 QPR:$Vm))))), 4479 (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4480 4481 defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D, 4482 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s", 4483 null_frag>; 4484 def : Pat<(v4i16 (int_arm_neon_vqadds 4485 (v4i16 DPR:$src1), 4486 (v4i16 (int_arm_neon_vqrdmulh 4487 (v4i16 DPR:$Vn), 4488 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4489 imm:$lane)))))), 4490 (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, 4491 imm:$lane))>; 4492 def : Pat<(v2i32 (int_arm_neon_vqadds 4493 (v2i32 DPR:$src1), 4494 (v2i32 (int_arm_neon_vqrdmulh 4495 (v2i32 DPR:$Vn), 4496 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4497 imm:$lane)))))), 4498 (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4499 imm:$lane))>; 4500 def : Pat<(v8i16 (int_arm_neon_vqadds 4501 (v8i16 QPR:$src1), 4502 (v8i16 (int_arm_neon_vqrdmulh 4503 (v8i16 QPR:$src2), 4504 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4505 imm:$lane)))))), 4506 (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1), 4507 (v8i16 QPR:$src2), 4508 (v4i16 (EXTRACT_SUBREG 4509 QPR:$src3, 4510 (DSubReg_i16_reg imm:$lane))), 4511 (SubReg_i16_lane imm:$lane)))>; 4512 def : Pat<(v4i32 (int_arm_neon_vqadds 4513 (v4i32 QPR:$src1), 4514 (v4i32 (int_arm_neon_vqrdmulh 4515 (v4i32 QPR:$src2), 4516 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4517 imm:$lane)))))), 4518 (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1), 4519 (v4i32 QPR:$src2), 4520 (v2i32 (EXTRACT_SUBREG 4521 QPR:$src3, 4522 (DSubReg_i32_reg imm:$lane))), 4523 (SubReg_i32_lane imm:$lane)))>; 4524 4525 // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long 4526 // (Q -= D * D) 4527 defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D, 4528 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4529 null_frag>; 4530 def : Pat<(v4i16 (int_arm_neon_vqsubs 4531 (v4i16 DPR:$src1), 4532 (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn), 4533 (v4i16 DPR:$Vm))))), 4534 (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4535 def : Pat<(v2i32 (int_arm_neon_vqsubs 4536 (v2i32 DPR:$src1), 4537 (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn), 4538 (v2i32 DPR:$Vm))))), 4539 (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>; 4540 def : Pat<(v8i16 (int_arm_neon_vqsubs 4541 (v8i16 QPR:$src1), 4542 (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn), 4543 (v8i16 QPR:$Vm))))), 4544 (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4545 def : Pat<(v4i32 (int_arm_neon_vqsubs 4546 (v4i32 QPR:$src1), 4547 (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn), 4548 (v4i32 QPR:$Vm))))), 4549 (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>; 4550 4551 defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D, 4552 IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s", 4553 null_frag>; 4554 def : Pat<(v4i16 (int_arm_neon_vqsubs 4555 (v4i16 DPR:$src1), 4556 (v4i16 (int_arm_neon_vqrdmulh 4557 (v4i16 DPR:$Vn), 4558 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4559 imm:$lane)))))), 4560 (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>; 4561 def : Pat<(v2i32 (int_arm_neon_vqsubs 4562 (v2i32 DPR:$src1), 4563 (v2i32 (int_arm_neon_vqrdmulh 4564 (v2i32 DPR:$Vn), 4565 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4566 imm:$lane)))))), 4567 (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4568 imm:$lane))>; 4569 def : Pat<(v8i16 (int_arm_neon_vqsubs 4570 (v8i16 QPR:$src1), 4571 (v8i16 (int_arm_neon_vqrdmulh 4572 (v8i16 QPR:$src2), 4573 (v8i16 (NEONvduplane (v8i16 QPR:$src3), 4574 imm:$lane)))))), 4575 (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1), 4576 (v8i16 QPR:$src2), 4577 (v4i16 (EXTRACT_SUBREG 4578 QPR:$src3, 4579 (DSubReg_i16_reg imm:$lane))), 4580 (SubReg_i16_lane imm:$lane)))>; 4581 def : Pat<(v4i32 (int_arm_neon_vqsubs 4582 (v4i32 QPR:$src1), 4583 (v4i32 (int_arm_neon_vqrdmulh 4584 (v4i32 QPR:$src2), 4585 (v4i32 (NEONvduplane (v4i32 QPR:$src3), 4586 imm:$lane)))))), 4587 (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1), 4588 (v4i32 QPR:$src2), 4589 (v2i32 (EXTRACT_SUBREG 4590 QPR:$src3, 4591 (DSubReg_i32_reg imm:$lane))), 4592 (SubReg_i32_lane imm:$lane)))>; 4593} 4594// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4595defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4596 "vqdmlal", "s", null_frag>; 4597defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4598 4599def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4600 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4601 (v4i16 DPR:$Vm))))), 4602 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4603def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4604 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4605 (v2i32 DPR:$Vm))))), 4606 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4607def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4608 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4609 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4610 imm:$lane)))))), 4611 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4612def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4613 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4614 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4615 imm:$lane)))))), 4616 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4617 4618// VMLS : Vector Multiply Subtract (integer and floating-point) 4619defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4620 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4621def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4622 v2f32, fmul_su, fsub_mlx>, 4623 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4624def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4625 v4f32, fmul_su, fsub_mlx>, 4626 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4627def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", 4628 v4f16, fmul, fsub>, 4629 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4630def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", 4631 v8f16, fmul, fsub>, 4632 Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; 4633defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4634 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4635def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4636 v2f32, fmul_su, fsub_mlx>, 4637 Requires<[HasNEON, UseFPVMLx]>; 4638def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4639 v4f32, v2f32, fmul_su, fsub_mlx>, 4640 Requires<[HasNEON, UseFPVMLx]>; 4641def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16", 4642 v4f16, fmul, fsub>, 4643 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4644def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16", 4645 v8f16, v4f16, fmul, fsub>, 4646 Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; 4647 4648def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4649 (mul (v8i16 QPR:$src2), 4650 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4651 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4652 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4653 (DSubReg_i16_reg imm:$lane))), 4654 (SubReg_i16_lane imm:$lane)))>; 4655 4656def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4657 (mul (v4i32 QPR:$src2), 4658 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4659 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4660 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4661 (DSubReg_i32_reg imm:$lane))), 4662 (SubReg_i32_lane imm:$lane)))>; 4663 4664def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4665 (fmul_su (v4f32 QPR:$src2), 4666 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4667 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4668 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4669 (DSubReg_i32_reg imm:$lane))), 4670 (SubReg_i32_lane imm:$lane)))>, 4671 Requires<[HasNEON, UseFPVMLx]>; 4672 4673// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4674defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4675 "vmlsl", "s", NEONvmulls, sub>; 4676defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4677 "vmlsl", "u", NEONvmullu, sub>; 4678 4679defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4680defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4681 4682// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4683defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4684 "vqdmlsl", "s", null_frag>; 4685defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; 4686 4687def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4688 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4689 (v4i16 DPR:$Vm))))), 4690 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4691def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4692 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4693 (v2i32 DPR:$Vm))))), 4694 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4695def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4696 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4697 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4698 imm:$lane)))))), 4699 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4700def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4701 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4702 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4703 imm:$lane)))))), 4704 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4705 4706// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4707def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4708 v2f32, fmul_su, fadd_mlx>, 4709 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4710 4711def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4712 v4f32, fmul_su, fadd_mlx>, 4713 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4714def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16", 4715 v4f16, fmul, fadd>, 4716 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4717 4718def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16", 4719 v8f16, fmul, fadd>, 4720 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4721 4722// Fused Vector Multiply Subtract (floating-point) 4723def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4724 v2f32, fmul_su, fsub_mlx>, 4725 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4726def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4727 v4f32, fmul_su, fsub_mlx>, 4728 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4729def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16", 4730 v4f16, fmul, fsub>, 4731 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4732def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16", 4733 v8f16, fmul, fsub>, 4734 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>; 4735 4736// Match @llvm.fma.* intrinsics 4737def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4738 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4739 Requires<[HasVFP4]>; 4740def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4741 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4742 Requires<[HasVFP4]>; 4743def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4744 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4745 Requires<[HasVFP4]>; 4746def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4747 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4748 Requires<[HasVFP4]>; 4749 4750// ARMv8.2a dot product instructions. 4751// We put them in the VFPV8 decoder namespace because the ARM and Thumb 4752// encodings are the same and thus no further bit twiddling is necessary 4753// in the disassembler. 4754class VDOT<bit op6, bit op4, RegisterClass RegTy, string Asm, string AsmTy, 4755 ValueType AccumTy, ValueType InputTy, 4756 SDPatternOperator OpNode> : 4757 N3Vnp<0b11000, 0b10, 0b1101, op6, op4, (outs RegTy:$dst), 4758 (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD, 4759 Asm, AsmTy, 4760 [(set (AccumTy RegTy:$dst), 4761 (OpNode (AccumTy RegTy:$Vd), 4762 (InputTy RegTy:$Vn), 4763 (InputTy RegTy:$Vm)))]> { 4764 let Predicates = [HasDotProd]; 4765 let DecoderNamespace = "VFPV8"; 4766 let Constraints = "$dst = $Vd"; 4767} 4768 4769def VUDOTD : VDOT<0, 1, DPR, "vudot", "u8", v2i32, v8i8, int_arm_neon_udot>; 4770def VSDOTD : VDOT<0, 0, DPR, "vsdot", "s8", v2i32, v8i8, int_arm_neon_sdot>; 4771def VUDOTQ : VDOT<1, 1, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>; 4772def VSDOTQ : VDOT<1, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>; 4773 4774// Indexed dot product instructions: 4775multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty, 4776 ValueType AccumType, ValueType InputType, SDPatternOperator OpNode, 4777 dag RHS> { 4778 def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst), 4779 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 4780 N3RegFrm, IIC_VDOTPROD, opc, dt, []> { 4781 bit lane; 4782 let Inst{5} = lane; 4783 let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); 4784 let Constraints = "$dst = $Vd"; 4785 let Predicates = [HasDotProd]; 4786 let DecoderNamespace = "VFPV8"; 4787 } 4788 4789 def : Pat< 4790 (AccumType (OpNode (AccumType Ty:$Vd), 4791 (InputType Ty:$Vn), 4792 (InputType (bitconvert (AccumType 4793 (NEONvduplane (AccumType Ty:$Vm), 4794 VectorIndex32:$lane)))))), 4795 (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>; 4796} 4797 4798defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8, 4799 int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>; 4800defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8, 4801 int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>; 4802defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8, 4803 int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4804defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8, 4805 int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>; 4806 4807 4808// ARMv8.3 complex operations 4809class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, 4810 InstrItinClass itin, dag oops, dag iops, 4811 string opc, string dt, list<dag> pattern> 4812 : N3VCP8<{?,?}, {op21,s}, q, op4, oops, 4813 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ 4814 bits<2> rot; 4815 let Inst{24-23} = rot; 4816} 4817 4818class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, 4819 InstrItinClass itin, dag oops, dag iops, string opc, 4820 string dt, list<dag> pattern> 4821 : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, 4822 iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { 4823 bits<1> rot; 4824 let Inst{24} = rot; 4825} 4826 4827class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, 4828 dag oops, dag iops, string opc, string dt, 4829 list<dag> pattern> 4830 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4831 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4832 bits<2> rot; 4833 bit lane; 4834 4835 let Inst{21-20} = rot; 4836 let Inst{5} = lane; 4837} 4838 4839class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, 4840 dag oops, dag iops, string opc, string dt, 4841 list<dag> pattern> 4842 : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, 4843 "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { 4844 bits<2> rot; 4845 bit lane; 4846 4847 let Inst{21-20} = rot; 4848 let Inst{5} = Vm{4}; 4849 // This is needed because the lane operand does not have any bits in the 4850 // encoding (it only has one possible value), so we need to manually set it 4851 // to it's default value. 4852 let DecoderMethod = "DecodeNEONComplexLane64Instruction"; 4853} 4854 4855multiclass N3VCP8ComplexTied<bit op21, bit op4, 4856 string OpcodeStr, SDPatternOperator Op> { 4857 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4858 def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), 4859 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4860 OpcodeStr, "f16", []>; 4861 def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), 4862 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4863 OpcodeStr, "f16", []>; 4864 } 4865 let Predicates = [HasNEON,HasV8_3a] in { 4866 def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), 4867 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), 4868 OpcodeStr, "f32", []>; 4869 def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), 4870 (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), 4871 OpcodeStr, "f32", []>; 4872 } 4873} 4874 4875multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, 4876 string OpcodeStr, SDPatternOperator Op> { 4877 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4878 def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, 4879 (outs DPR:$Vd), 4880 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4881 OpcodeStr, "f16", []>; 4882 def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, 4883 (outs QPR:$Vd), 4884 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4885 OpcodeStr, "f16", []>; 4886 } 4887 let Predicates = [HasNEON,HasV8_3a] in { 4888 def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, 4889 (outs DPR:$Vd), 4890 (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), 4891 OpcodeStr, "f32", []>; 4892 def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, 4893 (outs QPR:$Vd), 4894 (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), 4895 OpcodeStr, "f32", []>; 4896 } 4897} 4898 4899// These instructions index by pairs of lanes, so the VectorIndexes are twice 4900// as wide as the data types. 4901multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, 4902 SDPatternOperator Op> { 4903 let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { 4904 def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, 4905 (outs DPR:$Vd), 4906 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 4907 VectorIndex32:$lane, complexrotateop:$rot), 4908 OpcodeStr, "f16", []>; 4909 def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, 4910 (outs QPR:$Vd), 4911 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, 4912 VectorIndex32:$lane, complexrotateop:$rot), 4913 OpcodeStr, "f16", []>; 4914 } 4915 let Predicates = [HasNEON,HasV8_3a] in { 4916 def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, 4917 (outs DPR:$Vd), 4918 (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4919 complexrotateop:$rot), 4920 OpcodeStr, "f32", []>; 4921 def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, 4922 (outs QPR:$Vd), 4923 (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, 4924 complexrotateop:$rot), 4925 OpcodeStr, "f32", []>; 4926 } 4927} 4928 4929defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; 4930defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; 4931defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; 4932 4933// Vector Subtract Operations. 4934 4935// VSUB : Vector Subtract (integer and floating-point) 4936defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4937 "vsub", "i", sub, 0>; 4938def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4939 v2f32, v2f32, fsub, 0>; 4940def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4941 v4f32, v4f32, fsub, 0>; 4942def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16", 4943 v4f16, v4f16, fsub, 0>, 4944 Requires<[HasNEON,HasFullFP16]>; 4945def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16", 4946 v8f16, v8f16, fsub, 0>, 4947 Requires<[HasNEON,HasFullFP16]>; 4948// VSUBL : Vector Subtract Long (Q = D - D) 4949defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4950 "vsubl", "s", sub, sext, 0>; 4951defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4952 "vsubl", "u", sub, zext, 0>; 4953// VSUBW : Vector Subtract Wide (Q = Q - D) 4954defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4955defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4956// VHSUB : Vector Halving Subtract 4957defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4958 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4959 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4960defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4961 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4962 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4963// VQSUB : Vector Saturing Subtract 4964defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4965 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4966 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4967defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4968 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4969 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4970// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4971defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4972// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4973defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4974 int_arm_neon_vrsubhn, 0>; 4975 4976def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4977 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4978def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4979 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4980def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4981 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4982 4983// Vector Comparisons. 4984 4985// VCEQ : Vector Compare Equal 4986defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4987 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4988def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4989 NEONvceq, 1>; 4990def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4991 NEONvceq, 1>; 4992def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, 4993 NEONvceq, 1>, 4994 Requires<[HasNEON, HasFullFP16]>; 4995def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, 4996 NEONvceq, 1>, 4997 Requires<[HasNEON, HasFullFP16]>; 4998 4999let TwoOperandAliasConstraint = "$Vm = $Vd" in 5000defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 5001 "$Vd, $Vm, #0", NEONvceqz>; 5002 5003// VCGE : Vector Compare Greater Than or Equal 5004defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5005 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 5006defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5007 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 5008def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 5009 NEONvcge, 0>; 5010def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 5011 NEONvcge, 0>; 5012def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, 5013 NEONvcge, 0>, 5014 Requires<[HasNEON, HasFullFP16]>; 5015def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, 5016 NEONvcge, 0>, 5017 Requires<[HasNEON, HasFullFP16]>; 5018 5019let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5020defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 5021 "$Vd, $Vm, #0", NEONvcgez>; 5022defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 5023 "$Vd, $Vm, #0", NEONvclez>; 5024} 5025 5026// VCGT : Vector Compare Greater Than 5027defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5028 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 5029defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 5030 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 5031def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 5032 NEONvcgt, 0>; 5033def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 5034 NEONvcgt, 0>; 5035def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, 5036 NEONvcgt, 0>, 5037 Requires<[HasNEON, HasFullFP16]>; 5038def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, 5039 NEONvcgt, 0>, 5040 Requires<[HasNEON, HasFullFP16]>; 5041 5042let TwoOperandAliasConstraint = "$Vm = $Vd" in { 5043defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 5044 "$Vd, $Vm, #0", NEONvcgtz>; 5045defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 5046 "$Vd, $Vm, #0", NEONvcltz>; 5047} 5048 5049// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 5050def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5051 "f32", v2i32, v2f32, int_arm_neon_vacge, 0>; 5052def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5053 "f32", v4i32, v4f32, int_arm_neon_vacge, 0>; 5054def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 5055 "f16", v4i16, v4f16, int_arm_neon_vacge, 0>, 5056 Requires<[HasNEON, HasFullFP16]>; 5057def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 5058 "f16", v8i16, v8f16, int_arm_neon_vacge, 0>, 5059 Requires<[HasNEON, HasFullFP16]>; 5060// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 5061def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5062 "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>; 5063def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5064 "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>; 5065def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 5066 "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>, 5067 Requires<[HasNEON, HasFullFP16]>; 5068def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 5069 "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>, 5070 Requires<[HasNEON, HasFullFP16]>; 5071// VTST : Vector Test Bits 5072defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 5073 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 5074 5075def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5076 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5077def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 5078 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5079def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5080 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5081def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 5082 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5083let Predicates = [HasNEON, HasFullFP16] in { 5084def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5085 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5086def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm", 5087 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5088def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5089 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 5090def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", 5091 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 5092} 5093 5094def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5095 (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5096def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 5097 (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5098def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5099 (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5100def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 5101 (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5102let Predicates = [HasNEON, HasFullFP16] in { 5103def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5104 (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5105def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm", 5106 (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5107def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5108 (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 5109def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm", 5110 (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 5111} 5112 5113// Vector Bitwise Operations. 5114 5115def vnotd : PatFrag<(ops node:$in), 5116 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 5117def vnotq : PatFrag<(ops node:$in), 5118 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 5119 5120 5121// VAND : Vector Bitwise AND 5122def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 5123 v2i32, v2i32, and, 1>; 5124def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 5125 v4i32, v4i32, and, 1>; 5126 5127// VEOR : Vector Bitwise Exclusive OR 5128def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 5129 v2i32, v2i32, xor, 1>; 5130def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 5131 v4i32, v4i32, xor, 1>; 5132 5133// VORR : Vector Bitwise OR 5134def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 5135 v2i32, v2i32, or, 1>; 5136def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 5137 v4i32, v4i32, or, 1>; 5138 5139def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 5140 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5141 IIC_VMOVImm, 5142 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5143 [(set DPR:$Vd, 5144 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5145 let Inst{9} = SIMM{9}; 5146} 5147 5148def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 5149 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5150 IIC_VMOVImm, 5151 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5152 [(set DPR:$Vd, 5153 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 5154 let Inst{10-9} = SIMM{10-9}; 5155} 5156 5157def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 5158 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5159 IIC_VMOVImm, 5160 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 5161 [(set QPR:$Vd, 5162 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5163 let Inst{9} = SIMM{9}; 5164} 5165 5166def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 5167 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5168 IIC_VMOVImm, 5169 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 5170 [(set QPR:$Vd, 5171 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 5172 let Inst{10-9} = SIMM{10-9}; 5173} 5174 5175 5176// VBIC : Vector Bitwise Bit Clear (AND NOT) 5177let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5178def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5179 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5180 "vbic", "$Vd, $Vn, $Vm", "", 5181 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 5182 (vnotd DPR:$Vm))))]>; 5183def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5184 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5185 "vbic", "$Vd, $Vn, $Vm", "", 5186 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 5187 (vnotq QPR:$Vm))))]>; 5188} 5189 5190def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 5191 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 5192 IIC_VMOVImm, 5193 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5194 [(set DPR:$Vd, 5195 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5196 let Inst{9} = SIMM{9}; 5197} 5198 5199def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 5200 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 5201 IIC_VMOVImm, 5202 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5203 [(set DPR:$Vd, 5204 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 5205 let Inst{10-9} = SIMM{10-9}; 5206} 5207 5208def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 5209 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 5210 IIC_VMOVImm, 5211 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 5212 [(set QPR:$Vd, 5213 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5214 let Inst{9} = SIMM{9}; 5215} 5216 5217def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 5218 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 5219 IIC_VMOVImm, 5220 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 5221 [(set QPR:$Vd, 5222 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 5223 let Inst{10-9} = SIMM{10-9}; 5224} 5225 5226// VORN : Vector Bitwise OR NOT 5227def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 5228 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 5229 "vorn", "$Vd, $Vn, $Vm", "", 5230 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 5231 (vnotd DPR:$Vm))))]>; 5232def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 5233 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 5234 "vorn", "$Vd, $Vn, $Vm", "", 5235 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 5236 (vnotq QPR:$Vm))))]>; 5237 5238// VMVN : Vector Bitwise NOT (Immediate) 5239 5240let isReMaterializable = 1 in { 5241 5242def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 5243 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5244 "vmvn", "i16", "$Vd, $SIMM", "", 5245 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 5246 let Inst{9} = SIMM{9}; 5247} 5248 5249def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 5250 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5251 "vmvn", "i16", "$Vd, $SIMM", "", 5252 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 5253 let Inst{9} = SIMM{9}; 5254} 5255 5256def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 5257 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5258 "vmvn", "i32", "$Vd, $SIMM", "", 5259 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 5260 let Inst{11-8} = SIMM{11-8}; 5261} 5262 5263def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 5264 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5265 "vmvn", "i32", "$Vd, $SIMM", "", 5266 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 5267 let Inst{11-8} = SIMM{11-8}; 5268} 5269} 5270 5271// VMVN : Vector Bitwise NOT 5272def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 5273 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 5274 "vmvn", "$Vd, $Vm", "", 5275 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 5276def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 5277 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 5278 "vmvn", "$Vd, $Vm", "", 5279 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 5280def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 5281def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 5282 5283// VBSL : Vector Bitwise Select 5284def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 5285 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5286 N3RegFrm, IIC_VCNTiD, 5287 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5288 [(set DPR:$Vd, 5289 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 5290def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 5291 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 5292 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5293 Requires<[HasNEON]>; 5294def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 5295 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 5296 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5297 Requires<[HasNEON]>; 5298def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 5299 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 5300 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5301 Requires<[HasNEON]>; 5302def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 5303 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 5304 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5305 Requires<[HasNEON]>; 5306def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 5307 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 5308 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 5309 Requires<[HasNEON]>; 5310 5311def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 5312 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5313 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5314 Requires<[HasNEON]>; 5315 5316def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 5317 (and DPR:$Vm, (vnotd DPR:$Vd)))), 5318 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 5319 Requires<[HasNEON]>; 5320 5321def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 5322 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5323 N3RegFrm, IIC_VCNTiQ, 5324 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5325 [(set QPR:$Vd, 5326 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 5327 5328def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 5329 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 5330 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5331 Requires<[HasNEON]>; 5332def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 5333 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 5334 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5335 Requires<[HasNEON]>; 5336def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 5337 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 5338 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5339 Requires<[HasNEON]>; 5340def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 5341 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 5342 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5343 Requires<[HasNEON]>; 5344def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 5345 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 5346 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 5347 Requires<[HasNEON]>; 5348 5349def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 5350 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5351 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5352 Requires<[HasNEON]>; 5353def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 5354 (and QPR:$Vm, (vnotq QPR:$Vd)))), 5355 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 5356 Requires<[HasNEON]>; 5357 5358// VBIF : Vector Bitwise Insert if False 5359// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 5360// FIXME: This instruction's encoding MAY NOT BE correct. 5361def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 5362 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5363 N3RegFrm, IIC_VBINiD, 5364 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5365 []>; 5366def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 5367 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5368 N3RegFrm, IIC_VBINiQ, 5369 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5370 []>; 5371 5372// VBIT : Vector Bitwise Insert if True 5373// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 5374// FIXME: This instruction's encoding MAY NOT BE correct. 5375def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 5376 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 5377 N3RegFrm, IIC_VBINiD, 5378 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5379 []>; 5380def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 5381 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 5382 N3RegFrm, IIC_VBINiQ, 5383 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 5384 []>; 5385 5386// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 5387// for equivalent operations with different register constraints; it just 5388// inserts copies. 5389 5390// Vector Absolute Differences. 5391 5392// VABD : Vector Absolute Difference 5393defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 5394 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5395 "vabd", "s", int_arm_neon_vabds, 1>; 5396defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 5397 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5398 "vabd", "u", int_arm_neon_vabdu, 1>; 5399def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 5400 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 5401def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5402 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 5403def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND, 5404 "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>, 5405 Requires<[HasNEON, HasFullFP16]>; 5406def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, 5407 "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>, 5408 Requires<[HasNEON, HasFullFP16]>; 5409 5410// VABDL : Vector Absolute Difference Long (Q = | D - D |) 5411defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 5412 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 5413defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 5414 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 5415 5416def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), 5417 (VABDLuv8i16 DPR:$opA, DPR:$opB)>; 5418def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), 5419 (VABDLuv4i32 DPR:$opA, DPR:$opB)>; 5420 5421// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the 5422// shift/xor pattern for ABS. 5423 5424def abd_shr : 5425 PatFrag<(ops node:$in1, node:$in2, node:$shift), 5426 (NEONvshrs (sub (zext node:$in1), 5427 (zext node:$in2)), (i32 $shift))>; 5428 5429def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))), 5430 (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)), 5431 (zext (v2i32 DPR:$opB))), 5432 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))), 5433 (VABDLuv2i64 DPR:$opA, DPR:$opB)>; 5434 5435// VABA : Vector Absolute Difference and Accumulate 5436defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5437 "vaba", "s", int_arm_neon_vabds, add>; 5438defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 5439 "vaba", "u", int_arm_neon_vabdu, add>; 5440 5441// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 5442defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 5443 "vabal", "s", int_arm_neon_vabds, zext, add>; 5444defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 5445 "vabal", "u", int_arm_neon_vabdu, zext, add>; 5446 5447// Vector Maximum and Minimum. 5448 5449// VMAX : Vector Maximum 5450defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 5451 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5452 "vmax", "s", smax, 1>; 5453defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 5454 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5455 "vmax", "u", umax, 1>; 5456def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 5457 "vmax", "f32", 5458 v2f32, v2f32, fmaxnan, 1>; 5459def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5460 "vmax", "f32", 5461 v4f32, v4f32, fmaxnan, 1>; 5462def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND, 5463 "vmax", "f16", 5464 v4f16, v4f16, fmaxnan, 1>, 5465 Requires<[HasNEON, HasFullFP16]>; 5466def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5467 "vmax", "f16", 5468 v8f16, v8f16, fmaxnan, 1>, 5469 Requires<[HasNEON, HasFullFP16]>; 5470 5471// VMAXNM 5472let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5473 def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 5474 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5475 v2f32, v2f32, fmaxnum, 1>, 5476 Requires<[HasV8, HasNEON]>; 5477 def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 5478 N3RegFrm, NoItinerary, "vmaxnm", "f32", 5479 v4f32, v4f32, fmaxnum, 1>, 5480 Requires<[HasV8, HasNEON]>; 5481 def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1, 5482 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5483 v4f16, v4f16, fmaxnum, 1>, 5484 Requires<[HasV8, HasNEON, HasFullFP16]>; 5485 def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1, 5486 N3RegFrm, NoItinerary, "vmaxnm", "f16", 5487 v8f16, v8f16, fmaxnum, 1>, 5488 Requires<[HasV8, HasNEON, HasFullFP16]>; 5489} 5490 5491// VMIN : Vector Minimum 5492defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 5493 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5494 "vmin", "s", smin, 1>; 5495defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 5496 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 5497 "vmin", "u", umin, 1>; 5498def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 5499 "vmin", "f32", 5500 v2f32, v2f32, fminnan, 1>; 5501def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5502 "vmin", "f32", 5503 v4f32, v4f32, fminnan, 1>; 5504def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND, 5505 "vmin", "f16", 5506 v4f16, v4f16, fminnan, 1>, 5507 Requires<[HasNEON, HasFullFP16]>; 5508def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ, 5509 "vmin", "f16", 5510 v8f16, v8f16, fminnan, 1>, 5511 Requires<[HasNEON, HasFullFP16]>; 5512 5513// VMINNM 5514let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5515 def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 5516 N3RegFrm, NoItinerary, "vminnm", "f32", 5517 v2f32, v2f32, fminnum, 1>, 5518 Requires<[HasV8, HasNEON]>; 5519 def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 5520 N3RegFrm, NoItinerary, "vminnm", "f32", 5521 v4f32, v4f32, fminnum, 1>, 5522 Requires<[HasV8, HasNEON]>; 5523 def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1, 5524 N3RegFrm, NoItinerary, "vminnm", "f16", 5525 v4f16, v4f16, fminnum, 1>, 5526 Requires<[HasV8, HasNEON, HasFullFP16]>; 5527 def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1, 5528 N3RegFrm, NoItinerary, "vminnm", "f16", 5529 v8f16, v8f16, fminnum, 1>, 5530 Requires<[HasV8, HasNEON, HasFullFP16]>; 5531} 5532 5533// Vector Pairwise Operations. 5534 5535// VPADD : Vector Pairwise Add 5536def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5537 "vpadd", "i8", 5538 v8i8, v8i8, int_arm_neon_vpadd, 0>; 5539def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5540 "vpadd", "i16", 5541 v4i16, v4i16, int_arm_neon_vpadd, 0>; 5542def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 5543 "vpadd", "i32", 5544 v2i32, v2i32, int_arm_neon_vpadd, 0>; 5545def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 5546 IIC_VPBIND, "vpadd", "f32", 5547 v2f32, v2f32, int_arm_neon_vpadd, 0>; 5548def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm, 5549 IIC_VPBIND, "vpadd", "f16", 5550 v4f16, v4f16, int_arm_neon_vpadd, 0>, 5551 Requires<[HasNEON, HasFullFP16]>; 5552 5553// VPADDL : Vector Pairwise Add Long 5554defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 5555 int_arm_neon_vpaddls>; 5556defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 5557 int_arm_neon_vpaddlu>; 5558 5559// VPADAL : Vector Pairwise Add and Accumulate Long 5560defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 5561 int_arm_neon_vpadals>; 5562defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 5563 int_arm_neon_vpadalu>; 5564 5565// VPMAX : Vector Pairwise Maximum 5566def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5567 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 5568def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5569 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 5570def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5571 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 5572def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5573 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 5574def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5575 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 5576def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 5577 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 5578def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5579 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 5580def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 5581 "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>, 5582 Requires<[HasNEON, HasFullFP16]>; 5583 5584// VPMIN : Vector Pairwise Minimum 5585def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5586 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 5587def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5588 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 5589def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5590 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 5591def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5592 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 5593def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5594 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 5595def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 5596 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 5597def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5598 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 5599def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 5600 "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>, 5601 Requires<[HasNEON, HasFullFP16]>; 5602 5603// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 5604 5605// VRECPE : Vector Reciprocal Estimate 5606def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5607 IIC_VUNAD, "vrecpe", "u32", 5608 v2i32, v2i32, int_arm_neon_vrecpe>; 5609def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 5610 IIC_VUNAQ, "vrecpe", "u32", 5611 v4i32, v4i32, int_arm_neon_vrecpe>; 5612def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5613 IIC_VUNAD, "vrecpe", "f32", 5614 v2f32, v2f32, int_arm_neon_vrecpe>; 5615def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 5616 IIC_VUNAQ, "vrecpe", "f32", 5617 v4f32, v4f32, int_arm_neon_vrecpe>; 5618def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5619 IIC_VUNAD, "vrecpe", "f16", 5620 v4f16, v4f16, int_arm_neon_vrecpe>, 5621 Requires<[HasNEON, HasFullFP16]>; 5622def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0, 5623 IIC_VUNAQ, "vrecpe", "f16", 5624 v8f16, v8f16, int_arm_neon_vrecpe>, 5625 Requires<[HasNEON, HasFullFP16]>; 5626 5627// VRECPS : Vector Reciprocal Step 5628def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5629 IIC_VRECSD, "vrecps", "f32", 5630 v2f32, v2f32, int_arm_neon_vrecps, 1>; 5631def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 5632 IIC_VRECSQ, "vrecps", "f32", 5633 v4f32, v4f32, int_arm_neon_vrecps, 1>; 5634def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5635 IIC_VRECSD, "vrecps", "f16", 5636 v4f16, v4f16, int_arm_neon_vrecps, 1>, 5637 Requires<[HasNEON, HasFullFP16]>; 5638def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm, 5639 IIC_VRECSQ, "vrecps", "f16", 5640 v8f16, v8f16, int_arm_neon_vrecps, 1>, 5641 Requires<[HasNEON, HasFullFP16]>; 5642 5643// VRSQRTE : Vector Reciprocal Square Root Estimate 5644def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5645 IIC_VUNAD, "vrsqrte", "u32", 5646 v2i32, v2i32, int_arm_neon_vrsqrte>; 5647def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 5648 IIC_VUNAQ, "vrsqrte", "u32", 5649 v4i32, v4i32, int_arm_neon_vrsqrte>; 5650def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5651 IIC_VUNAD, "vrsqrte", "f32", 5652 v2f32, v2f32, int_arm_neon_vrsqrte>; 5653def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 5654 IIC_VUNAQ, "vrsqrte", "f32", 5655 v4f32, v4f32, int_arm_neon_vrsqrte>; 5656def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5657 IIC_VUNAD, "vrsqrte", "f16", 5658 v4f16, v4f16, int_arm_neon_vrsqrte>, 5659 Requires<[HasNEON, HasFullFP16]>; 5660def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0, 5661 IIC_VUNAQ, "vrsqrte", "f16", 5662 v8f16, v8f16, int_arm_neon_vrsqrte>, 5663 Requires<[HasNEON, HasFullFP16]>; 5664 5665// VRSQRTS : Vector Reciprocal Square Root Step 5666def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5667 IIC_VRECSD, "vrsqrts", "f32", 5668 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 5669def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 5670 IIC_VRECSQ, "vrsqrts", "f32", 5671 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 5672def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5673 IIC_VRECSD, "vrsqrts", "f16", 5674 v4f16, v4f16, int_arm_neon_vrsqrts, 1>, 5675 Requires<[HasNEON, HasFullFP16]>; 5676def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm, 5677 IIC_VRECSQ, "vrsqrts", "f16", 5678 v8f16, v8f16, int_arm_neon_vrsqrts, 1>, 5679 Requires<[HasNEON, HasFullFP16]>; 5680 5681// Vector Shifts. 5682 5683// VSHL : Vector Shift 5684defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 5685 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5686 "vshl", "s", int_arm_neon_vshifts>; 5687defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 5688 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 5689 "vshl", "u", int_arm_neon_vshiftu>; 5690 5691// VSHL : Vector Shift Left (Immediate) 5692defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 5693 5694// VSHR : Vector Shift Right (Immediate) 5695defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 5696 NEONvshrs>; 5697defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 5698 NEONvshru>; 5699 5700// VSHLL : Vector Shift Left Long 5701defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", 5702 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>; 5703defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", 5704 PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>; 5705 5706// VSHLL : Vector Shift Left Long (with maximum shift count) 5707class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 5708 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 5709 ValueType OpTy, Operand ImmTy> 5710 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 5711 ResTy, OpTy, ImmTy, null_frag> { 5712 let Inst{21-16} = op21_16; 5713 let DecoderMethod = "DecodeVSHLMaxInstruction"; 5714} 5715def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 5716 v8i16, v8i8, imm8>; 5717def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 5718 v4i32, v4i16, imm16>; 5719def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 5720 v2i64, v2i32, imm32>; 5721 5722def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))), 5723 (VSHLLi8 DPR:$Rn, 8)>; 5724def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))), 5725 (VSHLLi16 DPR:$Rn, 16)>; 5726def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))), 5727 (VSHLLi32 DPR:$Rn, 32)>; 5728def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))), 5729 (VSHLLi8 DPR:$Rn, 8)>; 5730def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), 5731 (VSHLLi16 DPR:$Rn, 16)>; 5732def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), 5733 (VSHLLi32 DPR:$Rn, 32)>; 5734def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))), 5735 (VSHLLi8 DPR:$Rn, 8)>; 5736def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))), 5737 (VSHLLi16 DPR:$Rn, 16)>; 5738def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))), 5739 (VSHLLi32 DPR:$Rn, 32)>; 5740 5741// VSHRN : Vector Shift Right and Narrow 5742defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 5743 PatFrag<(ops node:$Rn, node:$amt), 5744 (trunc (NEONvshrs node:$Rn, node:$amt))>>; 5745 5746def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))), 5747 (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>; 5748def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))), 5749 (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>; 5750def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))), 5751 (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>; 5752 5753// VRSHL : Vector Rounding Shift 5754defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 5755 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5756 "vrshl", "s", int_arm_neon_vrshifts>; 5757defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 5758 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5759 "vrshl", "u", int_arm_neon_vrshiftu>; 5760// VRSHR : Vector Rounding Shift Right 5761defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 5762 NEONvrshrs>; 5763defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 5764 NEONvrshru>; 5765 5766// VRSHRN : Vector Rounding Shift Right and Narrow 5767defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 5768 NEONvrshrn>; 5769 5770// VQSHL : Vector Saturating Shift 5771defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 5772 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5773 "vqshl", "s", int_arm_neon_vqshifts>; 5774defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 5775 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5776 "vqshl", "u", int_arm_neon_vqshiftu>; 5777// VQSHL : Vector Saturating Shift Left (Immediate) 5778defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 5779defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 5780 5781// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 5782defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 5783 5784// VQSHRN : Vector Saturating Shift Right and Narrow 5785defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5786 NEONvqshrns>; 5787defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5788 NEONvqshrnu>; 5789 5790// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5791defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5792 NEONvqshrnsu>; 5793 5794// VQRSHL : Vector Saturating Rounding Shift 5795defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5796 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5797 "vqrshl", "s", int_arm_neon_vqrshifts>; 5798defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5799 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5800 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5801 5802// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5803defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5804 NEONvqrshrns>; 5805defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5806 NEONvqrshrnu>; 5807 5808// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5809defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5810 NEONvqrshrnsu>; 5811 5812// VSRA : Vector Shift Right and Accumulate 5813defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5814defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5815// VRSRA : Vector Rounding Shift Right and Accumulate 5816defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5817defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5818 5819// VSLI : Vector Shift Left and Insert 5820defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5821 5822// VSRI : Vector Shift Right and Insert 5823defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5824 5825// Vector Absolute and Saturating Absolute. 5826 5827// VABS : Vector Absolute Value 5828defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5829 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>; 5830def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5831 "vabs", "f32", 5832 v2f32, v2f32, fabs>; 5833def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5834 "vabs", "f32", 5835 v4f32, v4f32, fabs>; 5836def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5837 "vabs", "f16", 5838 v4f16, v4f16, fabs>, 5839 Requires<[HasNEON, HasFullFP16]>; 5840def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0, 5841 "vabs", "f16", 5842 v8f16, v8f16, fabs>, 5843 Requires<[HasNEON, HasFullFP16]>; 5844 5845// VQABS : Vector Saturating Absolute Value 5846defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5847 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5848 int_arm_neon_vqabs>; 5849 5850// Vector Negate. 5851 5852def vnegd : PatFrag<(ops node:$in), 5853 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5854def vnegq : PatFrag<(ops node:$in), 5855 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5856 5857class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5858 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5859 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5860 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5861class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5862 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5863 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5864 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5865 5866// VNEG : Vector Negate (integer) 5867def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5868def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5869def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5870def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5871def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5872def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5873 5874// VNEG : Vector Negate (floating-point) 5875def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5876 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5877 "vneg", "f32", "$Vd, $Vm", "", 5878 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5879def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5880 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5881 "vneg", "f32", "$Vd, $Vm", "", 5882 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5883def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0, 5884 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5885 "vneg", "f16", "$Vd, $Vm", "", 5886 [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>, 5887 Requires<[HasNEON, HasFullFP16]>; 5888def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0, 5889 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5890 "vneg", "f16", "$Vd, $Vm", "", 5891 [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>, 5892 Requires<[HasNEON, HasFullFP16]>; 5893 5894def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5895def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5896def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5897def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5898def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5899def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5900 5901// VQNEG : Vector Saturating Negate 5902defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5903 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5904 int_arm_neon_vqneg>; 5905 5906// Vector Bit Counting Operations. 5907 5908// VCLS : Vector Count Leading Sign Bits 5909defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5910 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5911 int_arm_neon_vcls>; 5912// VCLZ : Vector Count Leading Zeros 5913defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5914 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5915 ctlz>; 5916// VCNT : Vector Count One Bits 5917def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5918 IIC_VCNTiD, "vcnt", "8", 5919 v8i8, v8i8, ctpop>; 5920def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5921 IIC_VCNTiQ, "vcnt", "8", 5922 v16i8, v16i8, ctpop>; 5923 5924// Vector Swap 5925def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5926 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5927 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5928 []>; 5929def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5930 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5931 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5932 []>; 5933 5934// Vector Move Operations. 5935 5936// VMOV : Vector Move (Register) 5937def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5938 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5939def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5940 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5941 5942// VMOV : Vector Move (Immediate) 5943 5944// Although VMOVs are not strictly speaking cheap, they are as expensive 5945// as their copies counterpart (VORR), so we should prefer rematerialization 5946// over splitting when it applies. 5947let isReMaterializable = 1, isAsCheapAsAMove=1 in { 5948def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5949 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5950 "vmov", "i8", "$Vd, $SIMM", "", 5951 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5952def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5953 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5954 "vmov", "i8", "$Vd, $SIMM", "", 5955 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5956 5957def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5958 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5959 "vmov", "i16", "$Vd, $SIMM", "", 5960 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5961 let Inst{9} = SIMM{9}; 5962} 5963 5964def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5965 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5966 "vmov", "i16", "$Vd, $SIMM", "", 5967 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5968 let Inst{9} = SIMM{9}; 5969} 5970 5971def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5972 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5973 "vmov", "i32", "$Vd, $SIMM", "", 5974 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5975 let Inst{11-8} = SIMM{11-8}; 5976} 5977 5978def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5979 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5980 "vmov", "i32", "$Vd, $SIMM", "", 5981 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5982 let Inst{11-8} = SIMM{11-8}; 5983} 5984 5985def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5986 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5987 "vmov", "i64", "$Vd, $SIMM", "", 5988 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5989def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5990 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5991 "vmov", "i64", "$Vd, $SIMM", "", 5992 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5993 5994def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5995 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5996 "vmov", "f32", "$Vd, $SIMM", "", 5997 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5998def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5999 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 6000 "vmov", "f32", "$Vd, $SIMM", "", 6001 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 6002} // isReMaterializable, isAsCheapAsAMove 6003 6004// Add support for bytes replication feature, so it could be GAS compatible. 6005multiclass NEONImmReplicateI8InstAlias<ValueType To> { 6006 // E.g. instructions below: 6007 // "vmov.i32 d0, #0xffffffff" 6008 // "vmov.i32 d0, #0xabababab" 6009 // "vmov.i16 d0, #0xabab" 6010 // are incorrect, but we could deal with such cases. 6011 // For last two instructions, for example, it should emit: 6012 // "vmov.i8 d0, #0xab" 6013 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6014 (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6015 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6016 (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>; 6017 // Also add same support for VMVN instructions. So instruction: 6018 // "vmvn.i32 d0, #0xabababab" 6019 // actually means: 6020 // "vmov.i8 d0, #0x54" 6021 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6022 (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6023 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6024 (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>; 6025} 6026 6027defm : NEONImmReplicateI8InstAlias<i16>; 6028defm : NEONImmReplicateI8InstAlias<i32>; 6029defm : NEONImmReplicateI8InstAlias<i64>; 6030 6031// Similar to above for types other than i8, e.g.: 6032// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00" 6033// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000" 6034// In this case we do not canonicalize VMVN to VMOV 6035multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16, 6036 NeonI NV8, NeonI NV16, ValueType To> { 6037 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6038 (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6039 def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm", 6040 (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6041 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6042 (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6043 def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm", 6044 (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>; 6045} 6046 6047defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6048 VMVNv4i16, VMVNv8i16, i32>; 6049defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16, 6050 VMVNv4i16, VMVNv8i16, i64>; 6051defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32, 6052 VMVNv2i32, VMVNv4i32, i64>; 6053// TODO: add "VMOV <-> VMVN" conversion for cases like 6054// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55" 6055// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00" 6056 6057// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" 6058// require zero cycles to execute so they should be used wherever possible for 6059// setting a register to zero. 6060 6061// Even without these pseudo-insts we would probably end up with the correct 6062// instruction, but we could not mark the general ones with "isAsCheapAsAMove" 6063// since they are sometimes rather expensive (in general). 6064 6065let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in { 6066 def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm, 6067 [(set DPR:$Vd, (v2i32 NEONimmAllZerosV))], 6068 (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>, 6069 Requires<[HasZCZ]>; 6070 def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm, 6071 [(set QPR:$Vd, (v4i32 NEONimmAllZerosV))], 6072 (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>, 6073 Requires<[HasZCZ]>; 6074} 6075 6076// VMOV : Vector Get Lane (move scalar to ARM core register) 6077 6078def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 6079 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6080 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 6081 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 6082 imm:$lane))]> { 6083 let Inst{21} = lane{2}; 6084 let Inst{6-5} = lane{1-0}; 6085} 6086def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 6087 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6088 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 6089 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 6090 imm:$lane))]> { 6091 let Inst{21} = lane{1}; 6092 let Inst{6} = lane{0}; 6093} 6094def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 6095 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 6096 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 6097 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 6098 imm:$lane))]> { 6099 let Inst{21} = lane{2}; 6100 let Inst{6-5} = lane{1-0}; 6101} 6102def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 6103 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 6104 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 6105 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 6106 imm:$lane))]> { 6107 let Inst{21} = lane{1}; 6108 let Inst{6} = lane{0}; 6109} 6110def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 6111 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 6112 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 6113 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 6114 imm:$lane))]>, 6115 Requires<[HasVFP2, HasFastVGETLNi32]> { 6116 let Inst{21} = lane{0}; 6117} 6118// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 6119def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 6120 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6121 (DSubReg_i8_reg imm:$lane))), 6122 (SubReg_i8_lane imm:$lane))>; 6123def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 6124 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6125 (DSubReg_i16_reg imm:$lane))), 6126 (SubReg_i16_lane imm:$lane))>; 6127def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 6128 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 6129 (DSubReg_i8_reg imm:$lane))), 6130 (SubReg_i8_lane imm:$lane))>; 6131def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 6132 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 6133 (DSubReg_i16_reg imm:$lane))), 6134 (SubReg_i16_lane imm:$lane))>; 6135def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6136 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 6137 (DSubReg_i32_reg imm:$lane))), 6138 (SubReg_i32_lane imm:$lane))>, 6139 Requires<[HasNEON, HasFastVGETLNi32]>; 6140def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 6141 (COPY_TO_REGCLASS 6142 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6143 Requires<[HasNEON, HasSlowVGETLNi32]>; 6144def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 6145 (COPY_TO_REGCLASS 6146 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 6147 Requires<[HasNEON, HasSlowVGETLNi32]>; 6148def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 6149 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 6150 (SSubReg_f32_reg imm:$src2))>; 6151def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 6152 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 6153 (SSubReg_f32_reg imm:$src2))>; 6154//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 6155// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6156def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 6157 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 6158 6159 6160// VMOV : Vector Set Lane (move ARM core register to scalar) 6161 6162let Constraints = "$src1 = $V" in { 6163def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 6164 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 6165 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 6166 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 6167 GPR:$R, imm:$lane))]> { 6168 let Inst{21} = lane{2}; 6169 let Inst{6-5} = lane{1-0}; 6170} 6171def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 6172 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 6173 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 6174 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 6175 GPR:$R, imm:$lane))]> { 6176 let Inst{21} = lane{1}; 6177 let Inst{6} = lane{0}; 6178} 6179def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 6180 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 6181 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 6182 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 6183 GPR:$R, imm:$lane))]>, 6184 Requires<[HasVFP2]> { 6185 let Inst{21} = lane{0}; 6186 // This instruction is equivalent as 6187 // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) 6188 let isInsertSubreg = 1; 6189} 6190} 6191def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 6192 (v16i8 (INSERT_SUBREG QPR:$src1, 6193 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 6194 (DSubReg_i8_reg imm:$lane))), 6195 GPR:$src2, (SubReg_i8_lane imm:$lane))), 6196 (DSubReg_i8_reg imm:$lane)))>; 6197def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 6198 (v8i16 (INSERT_SUBREG QPR:$src1, 6199 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 6200 (DSubReg_i16_reg imm:$lane))), 6201 GPR:$src2, (SubReg_i16_lane imm:$lane))), 6202 (DSubReg_i16_reg imm:$lane)))>; 6203def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 6204 (v4i32 (INSERT_SUBREG QPR:$src1, 6205 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 6206 (DSubReg_i32_reg imm:$lane))), 6207 GPR:$src2, (SubReg_i32_lane imm:$lane))), 6208 (DSubReg_i32_reg imm:$lane)))>; 6209 6210def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 6211 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 6212 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6213def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 6214 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 6215 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 6216 6217//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6218// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6219def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 6220 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 6221 6222def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 6223 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6224def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 6225 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 6226def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 6227 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 6228 6229def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 6230 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6231def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 6232 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6233def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 6234 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 6235 6236def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 6237 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6238 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6239 dsub_0)>; 6240def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 6241 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6242 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6243 dsub_0)>; 6244def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 6245 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6246 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 6247 dsub_0)>; 6248 6249// VDUP : Vector Duplicate (from ARM core register to all elements) 6250 6251class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6252 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 6253 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6254 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 6255class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 6256 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 6257 IIC_VMOVIS, "vdup", Dt, "$V, $R", 6258 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 6259 6260def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 6261def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 6262def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 6263 Requires<[HasNEON, HasFastVDUP32]>; 6264def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 6265def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 6266def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 6267 6268// NEONvdup patterns for uarchs with fast VDUP.32. 6269def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 6270 Requires<[HasNEON,HasFastVDUP32]>; 6271def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 6272 6273// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 6274def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 6275 Requires<[HasNEON,HasSlowVDUP32]>; 6276def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 6277 Requires<[HasNEON,HasSlowVDUP32]>; 6278 6279// VDUP : Vector Duplicate Lane (from scalar to all elements) 6280 6281class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 6282 ValueType Ty, Operand IdxTy> 6283 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6284 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 6285 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 6286 6287class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 6288 ValueType ResTy, ValueType OpTy, Operand IdxTy> 6289 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 6290 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 6291 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 6292 VectorIndex32:$lane)))]>; 6293 6294// Inst{19-16} is partially specified depending on the element size. 6295 6296def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 6297 bits<3> lane; 6298 let Inst{19-17} = lane{2-0}; 6299} 6300def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 6301 bits<2> lane; 6302 let Inst{19-18} = lane{1-0}; 6303} 6304def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 6305 bits<1> lane; 6306 let Inst{19} = lane{0}; 6307} 6308def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 6309 bits<3> lane; 6310 let Inst{19-17} = lane{2-0}; 6311} 6312def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 6313 bits<2> lane; 6314 let Inst{19-18} = lane{1-0}; 6315} 6316def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 6317 bits<1> lane; 6318 let Inst{19} = lane{0}; 6319} 6320 6321def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6322 (VDUPLN32d DPR:$Vm, imm:$lane)>; 6323 6324def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 6325 (VDUPLN32q DPR:$Vm, imm:$lane)>; 6326 6327def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 6328 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 6329 (DSubReg_i8_reg imm:$lane))), 6330 (SubReg_i8_lane imm:$lane)))>; 6331def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 6332 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 6333 (DSubReg_i16_reg imm:$lane))), 6334 (SubReg_i16_lane imm:$lane)))>; 6335def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 6336 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 6337 (DSubReg_i32_reg imm:$lane))), 6338 (SubReg_i32_lane imm:$lane)))>; 6339def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 6340 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 6341 (DSubReg_i32_reg imm:$lane))), 6342 (SubReg_i32_lane imm:$lane)))>; 6343 6344def : Pat<(v2f32 (NEONvdup (f32 SPR:$src))), 6345 (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6346 SPR:$src, ssub_0), (i32 0)))>; 6347def : Pat<(v4f32 (NEONvdup (f32 SPR:$src))), 6348 (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6349 SPR:$src, ssub_0), (i32 0)))>; 6350 6351// VMOVN : Vector Narrowing Move 6352defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 6353 "vmovn", "i", trunc>; 6354// VQMOVN : Vector Saturating Narrowing Move 6355defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 6356 "vqmovn", "s", int_arm_neon_vqmovns>; 6357defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 6358 "vqmovn", "u", int_arm_neon_vqmovnu>; 6359defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 6360 "vqmovun", "s", int_arm_neon_vqmovnsu>; 6361// VMOVL : Vector Lengthening Move 6362defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 6363defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 6364def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 6365def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 6366def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 6367 6368// Vector Conversions. 6369 6370// VCVT : Vector Convert Between Floating-Point and Integers 6371def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6372 v2i32, v2f32, fp_to_sint>; 6373def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6374 v2i32, v2f32, fp_to_uint>; 6375def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6376 v2f32, v2i32, sint_to_fp>; 6377def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6378 v2f32, v2i32, uint_to_fp>; 6379 6380def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 6381 v4i32, v4f32, fp_to_sint>; 6382def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 6383 v4i32, v4f32, fp_to_uint>; 6384def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 6385 v4f32, v4i32, sint_to_fp>; 6386def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 6387 v4f32, v4i32, uint_to_fp>; 6388 6389def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6390 v4i16, v4f16, fp_to_sint>, 6391 Requires<[HasNEON, HasFullFP16]>; 6392def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6393 v4i16, v4f16, fp_to_uint>, 6394 Requires<[HasNEON, HasFullFP16]>; 6395def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6396 v4f16, v4i16, sint_to_fp>, 6397 Requires<[HasNEON, HasFullFP16]>; 6398def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6399 v4f16, v4i16, uint_to_fp>, 6400 Requires<[HasNEON, HasFullFP16]>; 6401 6402def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16", 6403 v8i16, v8f16, fp_to_sint>, 6404 Requires<[HasNEON, HasFullFP16]>; 6405def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16", 6406 v8i16, v8f16, fp_to_uint>, 6407 Requires<[HasNEON, HasFullFP16]>; 6408def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16", 6409 v8f16, v8i16, sint_to_fp>, 6410 Requires<[HasNEON, HasFullFP16]>; 6411def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16", 6412 v8f16, v8i16, uint_to_fp>, 6413 Requires<[HasNEON, HasFullFP16]>; 6414 6415// VCVT{A, N, P, M} 6416multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 6417 SDPatternOperator IntU> { 6418 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6419 def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6420 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 6421 def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6422 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 6423 def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6424 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 6425 def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6426 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 6427 def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6428 "s16.f16", v4i16, v4f16, IntS>, 6429 Requires<[HasV8, HasNEON, HasFullFP16]>; 6430 def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 6431 "s16.f16", v8i16, v8f16, IntS>, 6432 Requires<[HasV8, HasNEON, HasFullFP16]>; 6433 def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6434 "u16.f16", v4i16, v4f16, IntU>, 6435 Requires<[HasV8, HasNEON, HasFullFP16]>; 6436 def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 6437 "u16.f16", v8i16, v8f16, IntU>, 6438 Requires<[HasV8, HasNEON, HasFullFP16]>; 6439 } 6440} 6441 6442defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 6443defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 6444defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 6445defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 6446 6447// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 6448let DecoderMethod = "DecodeVCVTD" in { 6449def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6450 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 6451def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6452 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 6453def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6454 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 6455def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6456 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 6457let Predicates = [HasNEON, HasFullFP16] in { 6458def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6459 v4i16, v4f16, int_arm_neon_vcvtfp2fxs>; 6460def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6461 v4i16, v4f16, int_arm_neon_vcvtfp2fxu>; 6462def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6463 v4f16, v4i16, int_arm_neon_vcvtfxs2fp>; 6464def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6465 v4f16, v4i16, int_arm_neon_vcvtfxu2fp>; 6466} // Predicates = [HasNEON, HasFullFP16] 6467} 6468 6469let DecoderMethod = "DecodeVCVTQ" in { 6470def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 6471 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 6472def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 6473 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 6474def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 6475 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 6476def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 6477 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 6478let Predicates = [HasNEON, HasFullFP16] in { 6479def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16", 6480 v8i16, v8f16, int_arm_neon_vcvtfp2fxs>; 6481def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16", 6482 v8i16, v8f16, int_arm_neon_vcvtfp2fxu>; 6483def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16", 6484 v8f16, v8i16, int_arm_neon_vcvtfxs2fp>; 6485def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16", 6486 v8f16, v8i16, int_arm_neon_vcvtfxu2fp>; 6487} // Predicates = [HasNEON, HasFullFP16] 6488} 6489 6490def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 6491 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6492def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 6493 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6494def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 6495 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6496def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 6497 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 6498 6499def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 6500 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6501def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 6502 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6503def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 6504 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6505def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 6506 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 6507 6508def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0", 6509 (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 6510def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0", 6511 (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 6512def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0", 6513 (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6514def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0", 6515 (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>; 6516 6517def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0", 6518 (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 6519def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0", 6520 (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 6521def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0", 6522 (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6523def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0", 6524 (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>; 6525 6526 6527// VCVT : Vector Convert Between Half-Precision and Single-Precision. 6528def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 6529 IIC_VUNAQ, "vcvt", "f16.f32", 6530 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 6531 Requires<[HasNEON, HasFP16]>; 6532def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 6533 IIC_VUNAQ, "vcvt", "f32.f16", 6534 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 6535 Requires<[HasNEON, HasFP16]>; 6536 6537// Vector Reverse. 6538 6539// VREV64 : Vector Reverse elements within 64-bit doublewords 6540 6541class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6542 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 6543 (ins DPR:$Vm), IIC_VMOVD, 6544 OpcodeStr, Dt, "$Vd, $Vm", "", 6545 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 6546class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6547 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 6548 (ins QPR:$Vm), IIC_VMOVQ, 6549 OpcodeStr, Dt, "$Vd, $Vm", "", 6550 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 6551 6552def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 6553def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 6554def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 6555def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 6556 6557def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 6558def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 6559def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 6560def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 6561 6562// VREV32 : Vector Reverse elements within 32-bit words 6563 6564class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6565 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 6566 (ins DPR:$Vm), IIC_VMOVD, 6567 OpcodeStr, Dt, "$Vd, $Vm", "", 6568 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 6569class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6570 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 6571 (ins QPR:$Vm), IIC_VMOVQ, 6572 OpcodeStr, Dt, "$Vd, $Vm", "", 6573 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 6574 6575def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 6576def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 6577 6578def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 6579def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 6580 6581// VREV16 : Vector Reverse elements within 16-bit halfwords 6582 6583class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6584 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 6585 (ins DPR:$Vm), IIC_VMOVD, 6586 OpcodeStr, Dt, "$Vd, $Vm", "", 6587 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 6588class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 6589 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 6590 (ins QPR:$Vm), IIC_VMOVQ, 6591 OpcodeStr, Dt, "$Vd, $Vm", "", 6592 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 6593 6594def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 6595def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 6596 6597// Other Vector Shuffles. 6598 6599// Aligned extractions: really just dropping registers 6600 6601class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 6602 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 6603 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 6604 6605def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 6606 6607def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 6608 6609def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 6610 6611def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 6612 6613def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 6614 6615 6616// VEXT : Vector Extract 6617 6618 6619// All of these have a two-operand InstAlias. 6620let TwoOperandAliasConstraint = "$Vn = $Vd" in { 6621class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6622 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 6623 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 6624 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6625 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 6626 (Ty DPR:$Vm), imm:$index)))]> { 6627 bits<3> index; 6628 let Inst{11} = 0b0; 6629 let Inst{10-8} = index{2-0}; 6630} 6631 6632class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 6633 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 6634 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 6635 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 6636 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 6637 (Ty QPR:$Vm), imm:$index)))]> { 6638 bits<4> index; 6639 let Inst{11-8} = index{3-0}; 6640} 6641} 6642 6643def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 6644 let Inst{10-8} = index{2-0}; 6645} 6646def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 6647 let Inst{10-9} = index{1-0}; 6648 let Inst{8} = 0b0; 6649} 6650def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 6651 let Inst{10} = index{0}; 6652 let Inst{9-8} = 0b00; 6653} 6654def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 6655 (v2f32 DPR:$Vm), 6656 (i32 imm:$index))), 6657 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 6658 6659def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 6660 let Inst{11-8} = index{3-0}; 6661} 6662def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 6663 let Inst{11-9} = index{2-0}; 6664 let Inst{8} = 0b0; 6665} 6666def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 6667 let Inst{11-10} = index{1-0}; 6668 let Inst{9-8} = 0b00; 6669} 6670def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 6671 let Inst{11} = index{0}; 6672 let Inst{10-8} = 0b000; 6673} 6674def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 6675 (v4f32 QPR:$Vm), 6676 (i32 imm:$index))), 6677 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 6678 6679// VTRN : Vector Transpose 6680 6681def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 6682def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 6683def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 6684 6685def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 6686def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 6687def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 6688 6689// VUZP : Vector Unzip (Deinterleave) 6690 6691def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 6692def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 6693// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6694def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 6695 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6696 6697def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 6698def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 6699def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 6700 6701// VZIP : Vector Zip (Interleave) 6702 6703def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 6704def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 6705// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 6706def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 6707 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 6708 6709def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 6710def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 6711def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 6712 6713// Vector Table Lookup and Table Extension. 6714 6715// VTBL : Vector Table Lookup 6716let DecoderMethod = "DecodeTBLInstruction" in { 6717def VTBL1 6718 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 6719 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 6720 "vtbl", "8", "$Vd, $Vn, $Vm", "", 6721 [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 6722 6723let hasExtraSrcRegAllocReq = 1 in { 6724def VTBL2 6725 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 6726 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 6727 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6728def VTBL3 6729 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 6730 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 6731 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6732def VTBL4 6733 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 6734 (ins VecListFourD:$Vn, DPR:$Vm), 6735 NVTBLFrm, IIC_VTB4, 6736 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 6737} // hasExtraSrcRegAllocReq = 1 6738 6739def VTBL3Pseudo 6740 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 6741def VTBL4Pseudo 6742 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 6743 6744// VTBX : Vector Table Extension 6745def VTBX1 6746 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 6747 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 6748 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 6749 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 6750 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 6751let hasExtraSrcRegAllocReq = 1 in { 6752def VTBX2 6753 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 6754 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 6755 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 6756def VTBX3 6757 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 6758 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 6759 NVTBLFrm, IIC_VTBX3, 6760 "vtbx", "8", "$Vd, $Vn, $Vm", 6761 "$orig = $Vd", []>; 6762def VTBX4 6763 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 6764 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 6765 "vtbx", "8", "$Vd, $Vn, $Vm", 6766 "$orig = $Vd", []>; 6767} // hasExtraSrcRegAllocReq = 1 6768 6769def VTBX3Pseudo 6770 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6771 IIC_VTBX3, "$orig = $dst", []>; 6772def VTBX4Pseudo 6773 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 6774 IIC_VTBX4, "$orig = $dst", []>; 6775} // DecoderMethod = "DecodeTBLInstruction" 6776 6777def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)), 6778 (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 6779 v8i8:$Vn1, dsub_1), 6780 v8i8:$Vm))>; 6781def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 6782 v8i8:$Vm)), 6783 (v8i8 (VTBX2 v8i8:$orig, 6784 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0, 6785 v8i8:$Vn1, dsub_1), 6786 v8i8:$Vm))>; 6787 6788def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1, 6789 v8i8:$Vn2, v8i8:$Vm)), 6790 (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 6791 v8i8:$Vn1, dsub_1, 6792 v8i8:$Vn2, dsub_2, 6793 (v8i8 (IMPLICIT_DEF)), dsub_3), 6794 v8i8:$Vm))>; 6795def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 6796 v8i8:$Vn2, v8i8:$Vm)), 6797 (v8i8 (VTBX3Pseudo v8i8:$orig, 6798 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 6799 v8i8:$Vn1, dsub_1, 6800 v8i8:$Vn2, dsub_2, 6801 (v8i8 (IMPLICIT_DEF)), dsub_3), 6802 v8i8:$Vm))>; 6803 6804def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1, 6805 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 6806 (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 6807 v8i8:$Vn1, dsub_1, 6808 v8i8:$Vn2, dsub_2, 6809 v8i8:$Vn3, dsub_3), 6810 v8i8:$Vm))>; 6811def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1, 6812 v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)), 6813 (v8i8 (VTBX4Pseudo v8i8:$orig, 6814 (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0, 6815 v8i8:$Vn1, dsub_1, 6816 v8i8:$Vn2, dsub_2, 6817 v8i8:$Vn3, dsub_3), 6818 v8i8:$Vm))>; 6819 6820// VRINT : Vector Rounding 6821multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 6822 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 6823 def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6824 !strconcat("vrint", op), "f32", 6825 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 6826 let Inst{9-7} = op9_7; 6827 } 6828 def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary, 6829 !strconcat("vrint", op), "f32", 6830 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 6831 let Inst{9-7} = op9_7; 6832 } 6833 def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6834 !strconcat("vrint", op), "f16", 6835 v4f16, v4f16, Int>, 6836 Requires<[HasV8, HasNEON, HasFullFP16]> { 6837 let Inst{9-7} = op9_7; 6838 } 6839 def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary, 6840 !strconcat("vrint", op), "f16", 6841 v8f16, v8f16, Int>, 6842 Requires<[HasV8, HasNEON, HasFullFP16]> { 6843 let Inst{9-7} = op9_7; 6844 } 6845 } 6846 6847 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 6848 (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>; 6849 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 6850 (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>; 6851 let Predicates = [HasNEON, HasFullFP16] in { 6852 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"), 6853 (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>; 6854 def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"), 6855 (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>; 6856 } 6857} 6858 6859defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 6860defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 6861defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 6862defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 6863defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 6864defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 6865 6866// Cryptography instructions 6867let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 6868 DecoderNamespace = "v8Crypto", hasSideEffects = 0 in { 6869 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 6870 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6871 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6872 Requires<[HasV8, HasCrypto]>; 6873 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 6874 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 6875 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 6876 Requires<[HasV8, HasCrypto]>; 6877 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6878 SDPatternOperator Int> 6879 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6880 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6881 Requires<[HasV8, HasCrypto]>; 6882 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 6883 SDPatternOperator Int> 6884 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 6885 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 6886 Requires<[HasV8, HasCrypto]>; 6887 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 6888 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 6889 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 6890 Requires<[HasV8, HasCrypto]>; 6891} 6892 6893def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 6894def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 6895def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 6896def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 6897 6898def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>; 6899def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 6900def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 6901def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>; 6902def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>; 6903def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>; 6904def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 6905def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 6906def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 6907def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 6908 6909def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), 6910 (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG 6911 (SHA1H (SUBREG_TO_REG (i64 0), 6912 (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)), 6913 ssub_0)), 6914 ssub_0)), GPR)>; 6915 6916def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6917 (SHA1C v4i32:$hash_abcd, 6918 (SUBREG_TO_REG (i64 0), 6919 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6920 ssub_0), 6921 v4i32:$wk)>; 6922 6923def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6924 (SHA1M v4i32:$hash_abcd, 6925 (SUBREG_TO_REG (i64 0), 6926 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6927 ssub_0), 6928 v4i32:$wk)>; 6929 6930def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)), 6931 (SHA1P v4i32:$hash_abcd, 6932 (SUBREG_TO_REG (i64 0), 6933 (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)), 6934 ssub_0), 6935 v4i32:$wk)>; 6936 6937//===----------------------------------------------------------------------===// 6938// NEON instructions for single-precision FP math 6939//===----------------------------------------------------------------------===// 6940 6941class N2VSPat<SDNode OpNode, NeonI Inst> 6942 : NEONFPPat<(f32 (OpNode SPR:$a)), 6943 (EXTRACT_SUBREG 6944 (v2f32 (COPY_TO_REGCLASS (Inst 6945 (INSERT_SUBREG 6946 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6947 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 6948 6949class N3VSPat<SDNode OpNode, NeonI Inst> 6950 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 6951 (EXTRACT_SUBREG 6952 (v2f32 (COPY_TO_REGCLASS (Inst 6953 (INSERT_SUBREG 6954 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6955 SPR:$a, ssub_0), 6956 (INSERT_SUBREG 6957 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6958 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6959 6960class N3VSPatFP16<SDNode OpNode, NeonI Inst> 6961 : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)), 6962 (EXTRACT_SUBREG 6963 (v4f16 (COPY_TO_REGCLASS (Inst 6964 (INSERT_SUBREG 6965 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 6966 HPR:$a, ssub_0), 6967 (INSERT_SUBREG 6968 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)), 6969 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6970 6971class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 6972 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 6973 (EXTRACT_SUBREG 6974 (v2f32 (COPY_TO_REGCLASS (Inst 6975 (INSERT_SUBREG 6976 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6977 SPR:$acc, ssub_0), 6978 (INSERT_SUBREG 6979 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6980 SPR:$a, ssub_0), 6981 (INSERT_SUBREG 6982 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 6983 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 6984 6985class NVCVTIFPat<SDNode OpNode, NeonI Inst> 6986 : NEONFPPat<(f32 (OpNode GPR:$a)), 6987 (f32 (EXTRACT_SUBREG 6988 (v2f32 (Inst 6989 (INSERT_SUBREG 6990 (v2f32 (IMPLICIT_DEF)), 6991 (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))), 6992 ssub_0))>; 6993class NVCVTFIPat<SDNode OpNode, NeonI Inst> 6994 : NEONFPPat<(i32 (OpNode SPR:$a)), 6995 (i32 (EXTRACT_SUBREG 6996 (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), 6997 SPR:$a, ssub_0))), 6998 ssub_0))>; 6999 7000def : N3VSPat<fadd, VADDfd>; 7001def : N3VSPat<fsub, VSUBfd>; 7002def : N3VSPat<fmul, VMULfd>; 7003def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 7004 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 7005def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 7006 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 7007def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 7008 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7009def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 7010 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 7011def : N2VSPat<fabs, VABSfd>; 7012def : N2VSPat<fneg, VNEGfd>; 7013def : N3VSPatFP16<fmaxnan, VMAXhd>, Requires<[HasFullFP16]>; 7014def : N3VSPatFP16<fminnan, VMINhd>, Requires<[HasFullFP16]>; 7015def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>; 7016def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>; 7017def : NVCVTFIPat<fp_to_sint, VCVTf2sd>; 7018def : NVCVTFIPat<fp_to_uint, VCVTf2ud>; 7019def : NVCVTIFPat<sint_to_fp, VCVTs2fd>; 7020def : NVCVTIFPat<uint_to_fp, VCVTu2fd>; 7021 7022// NEON doesn't have any f64 conversions, so provide patterns to make 7023// sure the VFP conversions match when extracting from a vector. 7024def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7025 (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7026def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7027 (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7028def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))), 7029 (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7030def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))), 7031 (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7032 7033 7034// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 7035def : Pat<(f32 (bitconvert GPR:$a)), 7036 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7037 Requires<[HasNEON, DontUseVMOVSR]>; 7038def : Pat<(arm_vmovsr GPR:$a), 7039 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 7040 Requires<[HasNEON, DontUseVMOVSR]>; 7041 7042//===----------------------------------------------------------------------===// 7043// Non-Instruction Patterns 7044//===----------------------------------------------------------------------===// 7045 7046// bit_convert 7047let Predicates = [IsLE] in { 7048 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 7049 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 7050 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 7051} 7052def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 7053let Predicates = [IsLE] in { 7054 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 7055 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 7056 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 7057 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 7058 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 7059} 7060def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 7061let Predicates = [IsLE] in { 7062 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 7063 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 7064 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 7065 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 7066 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 7067 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 7068 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 7069 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 7070 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 7071 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 7072} 7073def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 7074let Predicates = [IsLE] in { 7075 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 7076 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 7077 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (f64 DPR:$src)>; 7078 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 7079 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 7080 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 7081 def : Pat<(v4f16 (bitconvert (f64 DPR:$src))), (v4f16 DPR:$src)>; 7082 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 7083} 7084def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 7085let Predicates = [IsLE] in { 7086 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 7087 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 7088} 7089 7090let Predicates = [IsLE] in { 7091 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 7092 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 7093 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 7094} 7095def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 7096let Predicates = [IsLE] in { 7097 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 7098 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 7099 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 7100 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 7101 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 7102} 7103def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 7104let Predicates = [IsLE] in { 7105 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 7106 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 7107 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 7108 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 7109 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 7110 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>; 7111 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 7112 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 7113 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 7114 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 7115 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 7116 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 7117} 7118def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 7119let Predicates = [IsLE] in { 7120 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 7121 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 7122 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 7123} 7124def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 7125let Predicates = [IsLE] in { 7126 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 7127 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 7128 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>; 7129 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 7130 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 7131} 7132 7133let Predicates = [IsBE] in { 7134 // 64 bit conversions 7135 def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7136 def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7137 def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7138 def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7139 def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7140 def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7141 def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7142 def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7143 def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; 7144 def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; 7145 def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; 7146 def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; 7147 def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; 7148 def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; 7149 def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; 7150 def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; 7151 def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; 7152 def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; 7153 def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; 7154 def : Pat<(f64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>; 7155 def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; 7156 def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; 7157 def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; 7158 def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; 7159 def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; 7160 def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; 7161 def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; 7162 7163 // 128 bit conversions 7164 def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7165 def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7166 def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7167 def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7168 def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7169 def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7170 def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7171 def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7172 def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; 7173 def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; 7174 def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; 7175 def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7176 def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; 7177 def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; 7178 def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; 7179 def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; 7180 def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; 7181 def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; 7182 def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; 7183 def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; 7184 def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; 7185 def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>; 7186 def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; 7187 def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; 7188 def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; 7189 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; 7190 def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>; 7191 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; 7192 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; 7193} 7194 7195// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian 7196def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 7197 (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>; 7198def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7199 (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>; 7200def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 7201 (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>; 7202def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 7203 (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>; 7204 7205// Fold extracting an element out of a v2i32 into a vfp register. 7206def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 7207 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 7208 7209// Vector lengthening move with load, matching extending loads. 7210 7211// extload, zextload and sextload for a standard lengthening load. Example: 7212// Lengthen_Single<"8", "i16", "8"> = 7213// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 7214// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 7215// (f64 (IMPLICIT_DEF)), (i32 0)))>; 7216multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 7217 let AddedComplexity = 10 in { 7218 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7219 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 7220 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7221 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 7222 7223 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7224 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 7225 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 7226 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 7227 7228 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7229 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 7230 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 7231 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 7232 } 7233} 7234 7235// extload, zextload and sextload for a lengthening load which only uses 7236// half the lanes available. Example: 7237// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 7238// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 7239// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7240// (f64 (IMPLICIT_DEF)), (i32 0))), 7241// dsub_0)>; 7242multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 7243 string InsnLanes, string InsnTy> { 7244 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7245 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7246 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7247 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7248 dsub_0)>; 7249 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7250 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7251 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7252 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7253 dsub_0)>; 7254 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7255 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7256 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7257 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7258 dsub_0)>; 7259} 7260 7261// The following class definition is basically a copy of the 7262// Lengthen_HalfSingle definition above, however with an additional parameter 7263// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7264// data loaded by VLD1LN into proper vector format in big endian mode. 7265multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7266 string InsnLanes, string InsnTy, string RevLanes> { 7267 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7268 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7269 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7270 (!cast<Instruction>("VREV32d" # RevLanes) 7271 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7272 dsub_0)>; 7273 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7274 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7275 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 7276 (!cast<Instruction>("VREV32d" # RevLanes) 7277 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7278 dsub_0)>; 7279 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7280 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7281 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 7282 (!cast<Instruction>("VREV32d" # RevLanes) 7283 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7284 dsub_0)>; 7285} 7286 7287// extload, zextload and sextload for a lengthening load followed by another 7288// lengthening load, to quadruple the initial length. 7289// 7290// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 7291// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 7292// (EXTRACT_SUBREG (VMOVLuv4i32 7293// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 7294// (f64 (IMPLICIT_DEF)), 7295// (i32 0))), 7296// dsub_0)), 7297// dsub_0)>; 7298multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 7299 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7300 string Insn2Ty> { 7301 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7302 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7303 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7304 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7305 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7306 dsub_0))>; 7307 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7308 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7309 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7310 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7311 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7312 dsub_0))>; 7313 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7314 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7315 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7316 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7317 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7318 dsub_0))>; 7319} 7320 7321// The following class definition is basically a copy of the 7322// Lengthen_Double definition above, however with an additional parameter 7323// "RevLanes" to select the correct VREV32dXX instruction. This is to convert 7324// data loaded by VLD1LN into proper vector format in big endian mode. 7325multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7326 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7327 string Insn2Ty, string RevLanes> { 7328 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7329 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 7330 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7331 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7332 (!cast<Instruction>("VREV32d" # RevLanes) 7333 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7334 dsub_0))>; 7335 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7336 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 7337 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7338 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7339 (!cast<Instruction>("VREV32d" # RevLanes) 7340 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7341 dsub_0))>; 7342 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7343 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 7344 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7345 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7346 (!cast<Instruction>("VREV32d" # RevLanes) 7347 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7348 dsub_0))>; 7349} 7350 7351// extload, zextload and sextload for a lengthening load followed by another 7352// lengthening load, to quadruple the initial length, but which ends up only 7353// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 7354// 7355// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 7356// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 7357// (EXTRACT_SUBREG (VMOVLuv4i32 7358// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 7359// (f64 (IMPLICIT_DEF)), (i32 0))), 7360// dsub_0)), 7361// dsub_0)>; 7362multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 7363 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7364 string Insn2Ty> { 7365 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7366 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7367 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7368 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7369 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7370 dsub_0)), 7371 dsub_0)>; 7372 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7373 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7374 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7375 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7376 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7377 dsub_0)), 7378 dsub_0)>; 7379 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7380 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7381 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7382 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7383 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 7384 dsub_0)), 7385 dsub_0)>; 7386} 7387 7388// The following class definition is basically a copy of the 7389// Lengthen_HalfDouble definition above, however with an additional VREV16d8 7390// instruction to convert data loaded by VLD1LN into proper vector format 7391// in big endian mode. 7392multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, 7393 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 7394 string Insn2Ty> { 7395 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7396 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 7397 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7398 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7399 (!cast<Instruction>("VREV16d8") 7400 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7401 dsub_0)), 7402 dsub_0)>; 7403 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7404 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 7405 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 7406 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 7407 (!cast<Instruction>("VREV16d8") 7408 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7409 dsub_0)), 7410 dsub_0)>; 7411 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 7412 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 7413 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 7414 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 7415 (!cast<Instruction>("VREV16d8") 7416 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), 7417 dsub_0)), 7418 dsub_0)>; 7419} 7420 7421defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 7422defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 7423defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 7424 7425let Predicates = [IsLE] in { 7426 defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 7427 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 7428 7429 // Double lengthening - v4i8 -> v4i16 -> v4i32 7430 defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 7431 // v2i8 -> v2i16 -> v2i32 7432 defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 7433 // v2i16 -> v2i32 -> v2i64 7434 defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 7435} 7436 7437let Predicates = [IsBE] in { 7438 defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 7439 defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 7440 7441 // Double lengthening - v4i8 -> v4i16 -> v4i32 7442 defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; 7443 // v2i8 -> v2i16 -> v2i32 7444 defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; 7445 // v2i16 -> v2i32 -> v2i64 7446 defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; 7447} 7448 7449// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 7450let Predicates = [IsLE] in { 7451 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7452 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7453 (VLD1LNd16 addrmode6:$addr, 7454 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7455 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7456 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7457 (VLD1LNd16 addrmode6:$addr, 7458 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7459 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7460 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7461 (VLD1LNd16 addrmode6:$addr, 7462 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 7463} 7464// The following patterns are basically a copy of the patterns above, 7465// however with an additional VREV16d instruction to convert data 7466// loaded by VLD1LN into proper vector format in big endian mode. 7467let Predicates = [IsBE] in { 7468 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 7469 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7470 (!cast<Instruction>("VREV16d8") 7471 (VLD1LNd16 addrmode6:$addr, 7472 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7473 def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 7474 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 7475 (!cast<Instruction>("VREV16d8") 7476 (VLD1LNd16 addrmode6:$addr, 7477 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7478 def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 7479 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 7480 (!cast<Instruction>("VREV16d8") 7481 (VLD1LNd16 addrmode6:$addr, 7482 (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; 7483} 7484 7485def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)), 7486 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7487def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7488 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7489def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)), 7490 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7491def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)), 7492 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7493def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)), 7494 (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>; 7495 7496//===----------------------------------------------------------------------===// 7497// Assembler aliases 7498// 7499 7500def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 7501 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 7502def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 7503 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 7504 7505// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 7506defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7507 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7508defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 7509 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7510defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7511 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7512defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 7513 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7514defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7515 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7516defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 7517 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7518defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7519 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7520defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 7521 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7522// ... two-operand aliases 7523defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7524 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7525defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 7526 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7527defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7528 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7529defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 7530 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7531defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7532 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 7533defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 7534 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 7535// ... immediates 7536def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7537 (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7538def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7539 (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7540def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", 7541 (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; 7542def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", 7543 (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; 7544 7545 7546// VLD1 single-lane pseudo-instructions. These need special handling for 7547// the lane index that an InstAlias can't handle, so we use these instead. 7548def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 7549 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7550 pred:$p)>; 7551def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 7552 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7553 pred:$p)>; 7554def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 7555 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7556 pred:$p)>; 7557 7558def VLD1LNdWB_fixed_Asm_8 : 7559 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 7560 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7561 pred:$p)>; 7562def VLD1LNdWB_fixed_Asm_16 : 7563 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 7564 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7565 pred:$p)>; 7566def VLD1LNdWB_fixed_Asm_32 : 7567 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 7568 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7569 pred:$p)>; 7570def VLD1LNdWB_register_Asm_8 : 7571 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 7572 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7573 rGPR:$Rm, pred:$p)>; 7574def VLD1LNdWB_register_Asm_16 : 7575 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 7576 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7577 rGPR:$Rm, pred:$p)>; 7578def VLD1LNdWB_register_Asm_32 : 7579 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 7580 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7581 rGPR:$Rm, pred:$p)>; 7582 7583 7584// VST1 single-lane pseudo-instructions. These need special handling for 7585// the lane index that an InstAlias can't handle, so we use these instead. 7586def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 7587 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7588 pred:$p)>; 7589def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 7590 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7591 pred:$p)>; 7592def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 7593 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7594 pred:$p)>; 7595 7596def VST1LNdWB_fixed_Asm_8 : 7597 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 7598 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7599 pred:$p)>; 7600def VST1LNdWB_fixed_Asm_16 : 7601 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 7602 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7603 pred:$p)>; 7604def VST1LNdWB_fixed_Asm_32 : 7605 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 7606 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7607 pred:$p)>; 7608def VST1LNdWB_register_Asm_8 : 7609 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 7610 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, 7611 rGPR:$Rm, pred:$p)>; 7612def VST1LNdWB_register_Asm_16 : 7613 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 7614 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, 7615 rGPR:$Rm, pred:$p)>; 7616def VST1LNdWB_register_Asm_32 : 7617 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 7618 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, 7619 rGPR:$Rm, pred:$p)>; 7620 7621// VLD2 single-lane pseudo-instructions. These need special handling for 7622// the lane index that an InstAlias can't handle, so we use these instead. 7623def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 7624 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7625 pred:$p)>; 7626def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7627 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7628 pred:$p)>; 7629def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7630 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; 7631def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 7632 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7633 pred:$p)>; 7634def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 7635 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7636 pred:$p)>; 7637 7638def VLD2LNdWB_fixed_Asm_8 : 7639 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 7640 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7641 pred:$p)>; 7642def VLD2LNdWB_fixed_Asm_16 : 7643 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7644 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7645 pred:$p)>; 7646def VLD2LNdWB_fixed_Asm_32 : 7647 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7648 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7649 pred:$p)>; 7650def VLD2LNqWB_fixed_Asm_16 : 7651 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 7652 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7653 pred:$p)>; 7654def VLD2LNqWB_fixed_Asm_32 : 7655 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 7656 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7657 pred:$p)>; 7658def VLD2LNdWB_register_Asm_8 : 7659 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 7660 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7661 rGPR:$Rm, pred:$p)>; 7662def VLD2LNdWB_register_Asm_16 : 7663 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7664 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7665 rGPR:$Rm, pred:$p)>; 7666def VLD2LNdWB_register_Asm_32 : 7667 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7668 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7669 rGPR:$Rm, pred:$p)>; 7670def VLD2LNqWB_register_Asm_16 : 7671 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 7672 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7673 rGPR:$Rm, pred:$p)>; 7674def VLD2LNqWB_register_Asm_32 : 7675 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 7676 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7677 rGPR:$Rm, pred:$p)>; 7678 7679 7680// VST2 single-lane pseudo-instructions. These need special handling for 7681// the lane index that an InstAlias can't handle, so we use these instead. 7682def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 7683 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7684 pred:$p)>; 7685def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7686 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7687 pred:$p)>; 7688def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7689 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7690 pred:$p)>; 7691def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 7692 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7693 pred:$p)>; 7694def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 7695 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7696 pred:$p)>; 7697 7698def VST2LNdWB_fixed_Asm_8 : 7699 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 7700 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7701 pred:$p)>; 7702def VST2LNdWB_fixed_Asm_16 : 7703 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7704 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7705 pred:$p)>; 7706def VST2LNdWB_fixed_Asm_32 : 7707 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7708 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7709 pred:$p)>; 7710def VST2LNqWB_fixed_Asm_16 : 7711 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 7712 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7713 pred:$p)>; 7714def VST2LNqWB_fixed_Asm_32 : 7715 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 7716 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7717 pred:$p)>; 7718def VST2LNdWB_register_Asm_8 : 7719 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 7720 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, 7721 rGPR:$Rm, pred:$p)>; 7722def VST2LNdWB_register_Asm_16 : 7723 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7724 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, 7725 rGPR:$Rm, pred:$p)>; 7726def VST2LNdWB_register_Asm_32 : 7727 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7728 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, 7729 rGPR:$Rm, pred:$p)>; 7730def VST2LNqWB_register_Asm_16 : 7731 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 7732 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, 7733 rGPR:$Rm, pred:$p)>; 7734def VST2LNqWB_register_Asm_32 : 7735 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 7736 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, 7737 rGPR:$Rm, pred:$p)>; 7738 7739// VLD3 all-lanes pseudo-instructions. These need special handling for 7740// the lane index that an InstAlias can't handle, so we use these instead. 7741def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7742 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7743 pred:$p)>; 7744def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7745 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7746 pred:$p)>; 7747def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7748 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7749 pred:$p)>; 7750def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7751 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7752 pred:$p)>; 7753def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7754 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7755 pred:$p)>; 7756def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7757 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7758 pred:$p)>; 7759 7760def VLD3DUPdWB_fixed_Asm_8 : 7761 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7762 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7763 pred:$p)>; 7764def VLD3DUPdWB_fixed_Asm_16 : 7765 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7766 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7767 pred:$p)>; 7768def VLD3DUPdWB_fixed_Asm_32 : 7769 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7770 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7771 pred:$p)>; 7772def VLD3DUPqWB_fixed_Asm_8 : 7773 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7774 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7775 pred:$p)>; 7776def VLD3DUPqWB_fixed_Asm_16 : 7777 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7778 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7779 pred:$p)>; 7780def VLD3DUPqWB_fixed_Asm_32 : 7781 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7782 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7783 pred:$p)>; 7784def VLD3DUPdWB_register_Asm_8 : 7785 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7786 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7787 rGPR:$Rm, pred:$p)>; 7788def VLD3DUPdWB_register_Asm_16 : 7789 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7790 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7791 rGPR:$Rm, pred:$p)>; 7792def VLD3DUPdWB_register_Asm_32 : 7793 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7794 (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, 7795 rGPR:$Rm, pred:$p)>; 7796def VLD3DUPqWB_register_Asm_8 : 7797 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7798 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7799 rGPR:$Rm, pred:$p)>; 7800def VLD3DUPqWB_register_Asm_16 : 7801 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7802 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7803 rGPR:$Rm, pred:$p)>; 7804def VLD3DUPqWB_register_Asm_32 : 7805 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7806 (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, 7807 rGPR:$Rm, pred:$p)>; 7808 7809 7810// VLD3 single-lane pseudo-instructions. These need special handling for 7811// the lane index that an InstAlias can't handle, so we use these instead. 7812def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7813 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7814 pred:$p)>; 7815def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7816 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7817 pred:$p)>; 7818def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7819 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7820 pred:$p)>; 7821def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7822 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7823 pred:$p)>; 7824def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7825 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7826 pred:$p)>; 7827 7828def VLD3LNdWB_fixed_Asm_8 : 7829 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7830 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7831 pred:$p)>; 7832def VLD3LNdWB_fixed_Asm_16 : 7833 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7834 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7835 pred:$p)>; 7836def VLD3LNdWB_fixed_Asm_32 : 7837 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7838 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7839 pred:$p)>; 7840def VLD3LNqWB_fixed_Asm_16 : 7841 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7842 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7843 pred:$p)>; 7844def VLD3LNqWB_fixed_Asm_32 : 7845 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7846 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7847 pred:$p)>; 7848def VLD3LNdWB_register_Asm_8 : 7849 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7850 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7851 rGPR:$Rm, pred:$p)>; 7852def VLD3LNdWB_register_Asm_16 : 7853 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7854 (ins VecListThreeDHWordIndexed:$list, 7855 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7856def VLD3LNdWB_register_Asm_32 : 7857 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7858 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7859 rGPR:$Rm, pred:$p)>; 7860def VLD3LNqWB_register_Asm_16 : 7861 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7862 (ins VecListThreeQHWordIndexed:$list, 7863 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7864def VLD3LNqWB_register_Asm_32 : 7865 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7866 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7867 rGPR:$Rm, pred:$p)>; 7868 7869// VLD3 multiple structure pseudo-instructions. These need special handling for 7870// the vector operands that the normal instructions don't yet model. 7871// FIXME: Remove these when the register classes and instructions are updated. 7872def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7873 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7874def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7875 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7876def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7877 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7878def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 7879 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7880def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 7881 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7882def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 7883 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7884 7885def VLD3dWB_fixed_Asm_8 : 7886 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7887 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7888def VLD3dWB_fixed_Asm_16 : 7889 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7890 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7891def VLD3dWB_fixed_Asm_32 : 7892 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7893 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7894def VLD3qWB_fixed_Asm_8 : 7895 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 7896 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7897def VLD3qWB_fixed_Asm_16 : 7898 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 7899 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7900def VLD3qWB_fixed_Asm_32 : 7901 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 7902 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7903def VLD3dWB_register_Asm_8 : 7904 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7905 (ins VecListThreeD:$list, addrmode6align64:$addr, 7906 rGPR:$Rm, pred:$p)>; 7907def VLD3dWB_register_Asm_16 : 7908 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7909 (ins VecListThreeD:$list, addrmode6align64:$addr, 7910 rGPR:$Rm, pred:$p)>; 7911def VLD3dWB_register_Asm_32 : 7912 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7913 (ins VecListThreeD:$list, addrmode6align64:$addr, 7914 rGPR:$Rm, pred:$p)>; 7915def VLD3qWB_register_Asm_8 : 7916 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 7917 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7918 rGPR:$Rm, pred:$p)>; 7919def VLD3qWB_register_Asm_16 : 7920 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 7921 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7922 rGPR:$Rm, pred:$p)>; 7923def VLD3qWB_register_Asm_32 : 7924 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 7925 (ins VecListThreeQ:$list, addrmode6align64:$addr, 7926 rGPR:$Rm, pred:$p)>; 7927 7928// VST3 single-lane pseudo-instructions. These need special handling for 7929// the lane index that an InstAlias can't handle, so we use these instead. 7930def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7931 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7932 pred:$p)>; 7933def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7934 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7935 pred:$p)>; 7936def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7937 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7938 pred:$p)>; 7939def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7940 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7941 pred:$p)>; 7942def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7943 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7944 pred:$p)>; 7945 7946def VST3LNdWB_fixed_Asm_8 : 7947 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 7948 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7949 pred:$p)>; 7950def VST3LNdWB_fixed_Asm_16 : 7951 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7952 (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, 7953 pred:$p)>; 7954def VST3LNdWB_fixed_Asm_32 : 7955 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7956 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7957 pred:$p)>; 7958def VST3LNqWB_fixed_Asm_16 : 7959 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 7960 (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, 7961 pred:$p)>; 7962def VST3LNqWB_fixed_Asm_32 : 7963 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 7964 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7965 pred:$p)>; 7966def VST3LNdWB_register_Asm_8 : 7967 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 7968 (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, 7969 rGPR:$Rm, pred:$p)>; 7970def VST3LNdWB_register_Asm_16 : 7971 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7972 (ins VecListThreeDHWordIndexed:$list, 7973 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7974def VST3LNdWB_register_Asm_32 : 7975 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7976 (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, 7977 rGPR:$Rm, pred:$p)>; 7978def VST3LNqWB_register_Asm_16 : 7979 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 7980 (ins VecListThreeQHWordIndexed:$list, 7981 addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; 7982def VST3LNqWB_register_Asm_32 : 7983 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 7984 (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, 7985 rGPR:$Rm, pred:$p)>; 7986 7987 7988// VST3 multiple structure pseudo-instructions. These need special handling for 7989// the vector operands that the normal instructions don't yet model. 7990// FIXME: Remove these when the register classes and instructions are updated. 7991def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7992 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7993def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 7994 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7995def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 7996 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 7997def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 7998 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 7999def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 8000 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8001def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 8002 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8003 8004def VST3dWB_fixed_Asm_8 : 8005 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8006 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8007def VST3dWB_fixed_Asm_16 : 8008 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8009 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8010def VST3dWB_fixed_Asm_32 : 8011 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8012 (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; 8013def VST3qWB_fixed_Asm_8 : 8014 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 8015 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8016def VST3qWB_fixed_Asm_16 : 8017 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 8018 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8019def VST3qWB_fixed_Asm_32 : 8020 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 8021 (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; 8022def VST3dWB_register_Asm_8 : 8023 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8024 (ins VecListThreeD:$list, addrmode6align64:$addr, 8025 rGPR:$Rm, pred:$p)>; 8026def VST3dWB_register_Asm_16 : 8027 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8028 (ins VecListThreeD:$list, addrmode6align64:$addr, 8029 rGPR:$Rm, pred:$p)>; 8030def VST3dWB_register_Asm_32 : 8031 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8032 (ins VecListThreeD:$list, addrmode6align64:$addr, 8033 rGPR:$Rm, pred:$p)>; 8034def VST3qWB_register_Asm_8 : 8035 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 8036 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8037 rGPR:$Rm, pred:$p)>; 8038def VST3qWB_register_Asm_16 : 8039 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 8040 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8041 rGPR:$Rm, pred:$p)>; 8042def VST3qWB_register_Asm_32 : 8043 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 8044 (ins VecListThreeQ:$list, addrmode6align64:$addr, 8045 rGPR:$Rm, pred:$p)>; 8046 8047// VLD4 all-lanes pseudo-instructions. These need special handling for 8048// the lane index that an InstAlias can't handle, so we use these instead. 8049def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8050 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8051 pred:$p)>; 8052def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8053 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8054 pred:$p)>; 8055def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8056 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8057 pred:$p)>; 8058def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8059 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8060 pred:$p)>; 8061def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8062 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8063 pred:$p)>; 8064def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8065 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8066 pred:$p)>; 8067 8068def VLD4DUPdWB_fixed_Asm_8 : 8069 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8070 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8071 pred:$p)>; 8072def VLD4DUPdWB_fixed_Asm_16 : 8073 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8074 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8075 pred:$p)>; 8076def VLD4DUPdWB_fixed_Asm_32 : 8077 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8078 (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, 8079 pred:$p)>; 8080def VLD4DUPqWB_fixed_Asm_8 : 8081 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8082 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8083 pred:$p)>; 8084def VLD4DUPqWB_fixed_Asm_16 : 8085 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8086 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8087 pred:$p)>; 8088def VLD4DUPqWB_fixed_Asm_32 : 8089 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8090 (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, 8091 pred:$p)>; 8092def VLD4DUPdWB_register_Asm_8 : 8093 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8094 (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, 8095 rGPR:$Rm, pred:$p)>; 8096def VLD4DUPdWB_register_Asm_16 : 8097 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8098 (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, 8099 rGPR:$Rm, pred:$p)>; 8100def VLD4DUPdWB_register_Asm_32 : 8101 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8102 (ins VecListFourDAllLanes:$list, 8103 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8104def VLD4DUPqWB_register_Asm_8 : 8105 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8106 (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, 8107 rGPR:$Rm, pred:$p)>; 8108def VLD4DUPqWB_register_Asm_16 : 8109 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8110 (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, 8111 rGPR:$Rm, pred:$p)>; 8112def VLD4DUPqWB_register_Asm_32 : 8113 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8114 (ins VecListFourQAllLanes:$list, 8115 addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; 8116 8117 8118// VLD4 single-lane pseudo-instructions. These need special handling for 8119// the lane index that an InstAlias can't handle, so we use these instead. 8120def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8121 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8122 pred:$p)>; 8123def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8124 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8125 pred:$p)>; 8126def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8127 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8128 pred:$p)>; 8129def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8130 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8131 pred:$p)>; 8132def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8133 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8134 pred:$p)>; 8135 8136def VLD4LNdWB_fixed_Asm_8 : 8137 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8138 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8139 pred:$p)>; 8140def VLD4LNdWB_fixed_Asm_16 : 8141 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8142 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8143 pred:$p)>; 8144def VLD4LNdWB_fixed_Asm_32 : 8145 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8146 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8147 pred:$p)>; 8148def VLD4LNqWB_fixed_Asm_16 : 8149 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8150 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8151 pred:$p)>; 8152def VLD4LNqWB_fixed_Asm_32 : 8153 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8154 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8155 pred:$p)>; 8156def VLD4LNdWB_register_Asm_8 : 8157 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8158 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8159 rGPR:$Rm, pred:$p)>; 8160def VLD4LNdWB_register_Asm_16 : 8161 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8162 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8163 rGPR:$Rm, pred:$p)>; 8164def VLD4LNdWB_register_Asm_32 : 8165 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8166 (ins VecListFourDWordIndexed:$list, 8167 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8168def VLD4LNqWB_register_Asm_16 : 8169 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8170 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8171 rGPR:$Rm, pred:$p)>; 8172def VLD4LNqWB_register_Asm_32 : 8173 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8174 (ins VecListFourQWordIndexed:$list, 8175 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8176 8177 8178 8179// VLD4 multiple structure pseudo-instructions. These need special handling for 8180// the vector operands that the normal instructions don't yet model. 8181// FIXME: Remove these when the register classes and instructions are updated. 8182def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8183 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8184 pred:$p)>; 8185def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8186 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8187 pred:$p)>; 8188def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8189 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8190 pred:$p)>; 8191def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 8192 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8193 pred:$p)>; 8194def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 8195 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8196 pred:$p)>; 8197def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 8198 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8199 pred:$p)>; 8200 8201def VLD4dWB_fixed_Asm_8 : 8202 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8203 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8204 pred:$p)>; 8205def VLD4dWB_fixed_Asm_16 : 8206 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8207 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8208 pred:$p)>; 8209def VLD4dWB_fixed_Asm_32 : 8210 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8211 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8212 pred:$p)>; 8213def VLD4qWB_fixed_Asm_8 : 8214 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 8215 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8216 pred:$p)>; 8217def VLD4qWB_fixed_Asm_16 : 8218 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 8219 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8220 pred:$p)>; 8221def VLD4qWB_fixed_Asm_32 : 8222 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 8223 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8224 pred:$p)>; 8225def VLD4dWB_register_Asm_8 : 8226 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8227 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8228 rGPR:$Rm, pred:$p)>; 8229def VLD4dWB_register_Asm_16 : 8230 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8231 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8232 rGPR:$Rm, pred:$p)>; 8233def VLD4dWB_register_Asm_32 : 8234 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8235 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8236 rGPR:$Rm, pred:$p)>; 8237def VLD4qWB_register_Asm_8 : 8238 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 8239 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8240 rGPR:$Rm, pred:$p)>; 8241def VLD4qWB_register_Asm_16 : 8242 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 8243 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8244 rGPR:$Rm, pred:$p)>; 8245def VLD4qWB_register_Asm_32 : 8246 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 8247 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8248 rGPR:$Rm, pred:$p)>; 8249 8250// VST4 single-lane pseudo-instructions. These need special handling for 8251// the lane index that an InstAlias can't handle, so we use these instead. 8252def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8253 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8254 pred:$p)>; 8255def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8256 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8257 pred:$p)>; 8258def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8259 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8260 pred:$p)>; 8261def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8262 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8263 pred:$p)>; 8264def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8265 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8266 pred:$p)>; 8267 8268def VST4LNdWB_fixed_Asm_8 : 8269 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8270 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8271 pred:$p)>; 8272def VST4LNdWB_fixed_Asm_16 : 8273 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8274 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8275 pred:$p)>; 8276def VST4LNdWB_fixed_Asm_32 : 8277 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8278 (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, 8279 pred:$p)>; 8280def VST4LNqWB_fixed_Asm_16 : 8281 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8282 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8283 pred:$p)>; 8284def VST4LNqWB_fixed_Asm_32 : 8285 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8286 (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, 8287 pred:$p)>; 8288def VST4LNdWB_register_Asm_8 : 8289 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8290 (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, 8291 rGPR:$Rm, pred:$p)>; 8292def VST4LNdWB_register_Asm_16 : 8293 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8294 (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, 8295 rGPR:$Rm, pred:$p)>; 8296def VST4LNdWB_register_Asm_32 : 8297 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8298 (ins VecListFourDWordIndexed:$list, 8299 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8300def VST4LNqWB_register_Asm_16 : 8301 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8302 (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, 8303 rGPR:$Rm, pred:$p)>; 8304def VST4LNqWB_register_Asm_32 : 8305 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8306 (ins VecListFourQWordIndexed:$list, 8307 addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; 8308 8309 8310// VST4 multiple structure pseudo-instructions. These need special handling for 8311// the vector operands that the normal instructions don't yet model. 8312// FIXME: Remove these when the register classes and instructions are updated. 8313def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8314 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8315 pred:$p)>; 8316def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8317 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8318 pred:$p)>; 8319def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8320 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8321 pred:$p)>; 8322def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 8323 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8324 pred:$p)>; 8325def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 8326 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8327 pred:$p)>; 8328def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 8329 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8330 pred:$p)>; 8331 8332def VST4dWB_fixed_Asm_8 : 8333 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8334 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8335 pred:$p)>; 8336def VST4dWB_fixed_Asm_16 : 8337 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8338 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8339 pred:$p)>; 8340def VST4dWB_fixed_Asm_32 : 8341 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8342 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8343 pred:$p)>; 8344def VST4qWB_fixed_Asm_8 : 8345 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 8346 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8347 pred:$p)>; 8348def VST4qWB_fixed_Asm_16 : 8349 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 8350 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8351 pred:$p)>; 8352def VST4qWB_fixed_Asm_32 : 8353 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 8354 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8355 pred:$p)>; 8356def VST4dWB_register_Asm_8 : 8357 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8358 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8359 rGPR:$Rm, pred:$p)>; 8360def VST4dWB_register_Asm_16 : 8361 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8362 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8363 rGPR:$Rm, pred:$p)>; 8364def VST4dWB_register_Asm_32 : 8365 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8366 (ins VecListFourD:$list, addrmode6align64or128or256:$addr, 8367 rGPR:$Rm, pred:$p)>; 8368def VST4qWB_register_Asm_8 : 8369 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 8370 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8371 rGPR:$Rm, pred:$p)>; 8372def VST4qWB_register_Asm_16 : 8373 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 8374 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8375 rGPR:$Rm, pred:$p)>; 8376def VST4qWB_register_Asm_32 : 8377 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 8378 (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, 8379 rGPR:$Rm, pred:$p)>; 8380 8381// VMOV/VMVN takes an optional datatype suffix 8382defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8383 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 8384defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 8385 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 8386 8387defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8388 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 8389defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 8390 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 8391 8392// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8393// D-register versions. 8394def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 8395 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8396def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 8397 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8398def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 8399 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8400def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 8401 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8402def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 8403 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8404def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 8405 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8406def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 8407 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8408let Predicates = [HasNEON, HasFullFP16] in 8409def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm", 8410 (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8411// Q-register versions. 8412def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 8413 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8414def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 8415 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8416def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 8417 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8418def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 8419 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8420def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 8421 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8422def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 8423 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8424def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 8425 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8426let Predicates = [HasNEON, HasFullFP16] in 8427def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm", 8428 (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8429 8430// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 8431// D-register versions. 8432def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 8433 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8434def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 8435 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8436def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 8437 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8438def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 8439 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8440def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 8441 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8442def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 8443 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8444def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 8445 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8446let Predicates = [HasNEON, HasFullFP16] in 8447def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm", 8448 (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 8449// Q-register versions. 8450def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 8451 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8452def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 8453 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8454def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 8455 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8456def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 8457 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8458def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 8459 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8460def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 8461 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8462def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 8463 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8464let Predicates = [HasNEON, HasFullFP16] in 8465def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm", 8466 (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 8467 8468// VSWP allows, but does not require, a type suffix. 8469defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8470 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 8471defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 8472 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 8473 8474// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 8475defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8476 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8477defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8478 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8479defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8480 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 8481defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 8482 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8483defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 8484 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8485defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 8486 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 8487 8488// "vmov Rd, #-imm" can be handled via "vmvn". 8489def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8490 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8491def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 8492 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8493def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8494 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8495def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 8496 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 8497 8498// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 8499// these should restrict to just the Q register variants, but the register 8500// classes are enough to match correctly regardless, so we keep it simple 8501// and just use MnemonicAlias. 8502def : NEONMnemonicAlias<"vbicq", "vbic">; 8503def : NEONMnemonicAlias<"vandq", "vand">; 8504def : NEONMnemonicAlias<"veorq", "veor">; 8505def : NEONMnemonicAlias<"vorrq", "vorr">; 8506 8507def : NEONMnemonicAlias<"vmovq", "vmov">; 8508def : NEONMnemonicAlias<"vmvnq", "vmvn">; 8509// Explicit versions for floating point so that the FPImm variants get 8510// handled early. The parser gets confused otherwise. 8511def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 8512def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 8513 8514def : NEONMnemonicAlias<"vaddq", "vadd">; 8515def : NEONMnemonicAlias<"vsubq", "vsub">; 8516 8517def : NEONMnemonicAlias<"vminq", "vmin">; 8518def : NEONMnemonicAlias<"vmaxq", "vmax">; 8519 8520def : NEONMnemonicAlias<"vmulq", "vmul">; 8521 8522def : NEONMnemonicAlias<"vabsq", "vabs">; 8523 8524def : NEONMnemonicAlias<"vshlq", "vshl">; 8525def : NEONMnemonicAlias<"vshrq", "vshr">; 8526 8527def : NEONMnemonicAlias<"vcvtq", "vcvt">; 8528 8529def : NEONMnemonicAlias<"vcleq", "vcle">; 8530def : NEONMnemonicAlias<"vceqq", "vceq">; 8531 8532def : NEONMnemonicAlias<"vzipq", "vzip">; 8533def : NEONMnemonicAlias<"vswpq", "vswp">; 8534 8535def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 8536def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 8537 8538 8539// Alias for loading floating point immediates that aren't representable 8540// using the vmov.f32 encoding but the bitpattern is representable using 8541// the .i32 encoding. 8542def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8543 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8544def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 8545 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 8546