1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) { 5; CHECK-LABEL: test_vldrbq_gather_offset_s16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrb.s16 q1, [r0, q0] 8; CHECK-NEXT: vmov q0, q1 9; CHECK-NEXT: bx lr 10entry: 11 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0) 12 ret <8 x i16> %0 13} 14 15declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32) 16 17define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) { 18; CHECK-LABEL: test_vldrbq_gather_offset_s32: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vldrb.s32 q1, [r0, q0] 21; CHECK-NEXT: vmov q0, q1 22; CHECK-NEXT: bx lr 23entry: 24 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0) 25 ret <4 x i32> %0 26} 27 28declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32) 29 30define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) { 31; CHECK-LABEL: test_vldrbq_gather_offset_s8: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vldrb.u8 q1, [r0, q0] 34; CHECK-NEXT: vmov q0, q1 35; CHECK-NEXT: bx lr 36entry: 37 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0) 38 ret <16 x i8> %0 39} 40 41declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32) 42 43define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) { 44; CHECK-LABEL: test_vldrbq_gather_offset_u16: 45; CHECK: @ %bb.0: @ %entry 46; CHECK-NEXT: vldrb.u16 q1, [r0, q0] 47; CHECK-NEXT: vmov q0, q1 48; CHECK-NEXT: bx lr 49entry: 50 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1) 51 ret <8 x i16> %0 52} 53 54define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) { 55; CHECK-LABEL: test_vldrbq_gather_offset_u32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vldrb.u32 q1, [r0, q0] 58; CHECK-NEXT: vmov q0, q1 59; CHECK-NEXT: bx lr 60entry: 61 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1) 62 ret <4 x i32> %0 63} 64 65define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) { 66; CHECK-LABEL: test_vldrbq_gather_offset_u8: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: vldrb.u8 q1, [r0, q0] 69; CHECK-NEXT: vmov q0, q1 70; CHECK-NEXT: bx lr 71entry: 72 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1) 73 ret <16 x i8> %0 74} 75 76define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { 77; CHECK-LABEL: test_vldrbq_gather_offset_z_s16: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vmsr p0, r1 80; CHECK-NEXT: vpst 81; CHECK-NEXT: vldrbt.s16 q1, [r0, q0] 82; CHECK-NEXT: vmov q0, q1 83; CHECK-NEXT: bx lr 84entry: 85 %0 = zext i16 %p to i32 86 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 87 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1) 88 ret <8 x i16> %2 89} 90 91declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 92 93declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>) 94 95define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { 96; CHECK-LABEL: test_vldrbq_gather_offset_z_s32: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vmsr p0, r1 99; CHECK-NEXT: vpst 100; CHECK-NEXT: vldrbt.s32 q1, [r0, q0] 101; CHECK-NEXT: vmov q0, q1 102; CHECK-NEXT: bx lr 103entry: 104 %0 = zext i16 %p to i32 105 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 106 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1) 107 ret <4 x i32> %2 108} 109 110declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 111 112declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>) 113 114define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { 115; CHECK-LABEL: test_vldrbq_gather_offset_z_s8: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vmsr p0, r1 118; CHECK-NEXT: vpst 119; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] 120; CHECK-NEXT: vmov q0, q1 121; CHECK-NEXT: bx lr 122entry: 123 %0 = zext i16 %p to i32 124 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 125 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1) 126 ret <16 x i8> %2 127} 128 129declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 130 131declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>) 132 133define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) { 134; CHECK-LABEL: test_vldrbq_gather_offset_z_u16: 135; CHECK: @ %bb.0: @ %entry 136; CHECK-NEXT: vmsr p0, r1 137; CHECK-NEXT: vpst 138; CHECK-NEXT: vldrbt.u16 q1, [r0, q0] 139; CHECK-NEXT: vmov q0, q1 140; CHECK-NEXT: bx lr 141entry: 142 %0 = zext i16 %p to i32 143 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 144 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1) 145 ret <8 x i16> %2 146} 147 148define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) { 149; CHECK-LABEL: test_vldrbq_gather_offset_z_u32: 150; CHECK: @ %bb.0: @ %entry 151; CHECK-NEXT: vmsr p0, r1 152; CHECK-NEXT: vpst 153; CHECK-NEXT: vldrbt.u32 q1, [r0, q0] 154; CHECK-NEXT: vmov q0, q1 155; CHECK-NEXT: bx lr 156entry: 157 %0 = zext i16 %p to i32 158 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 159 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1) 160 ret <4 x i32> %2 161} 162 163define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) { 164; CHECK-LABEL: test_vldrbq_gather_offset_z_u8: 165; CHECK: @ %bb.0: @ %entry 166; CHECK-NEXT: vmsr p0, r1 167; CHECK-NEXT: vpst 168; CHECK-NEXT: vldrbt.u8 q1, [r0, q0] 169; CHECK-NEXT: vmov q0, q1 170; CHECK-NEXT: bx lr 171entry: 172 %0 = zext i16 %p to i32 173 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 174 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1) 175 ret <16 x i8> %2 176} 177 178define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) { 179; CHECK-LABEL: test_vldrdq_gather_base_s64: 180; CHECK: @ %bb.0: @ %entry 181; CHECK-NEXT: vldrd.u64 q1, [q0, #616] 182; CHECK-NEXT: vmov q0, q1 183; CHECK-NEXT: bx lr 184entry: 185 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616) 186 ret <2 x i64> %0 187} 188 189declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32) 190 191define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) { 192; CHECK-LABEL: test_vldrdq_gather_base_u64: 193; CHECK: @ %bb.0: @ %entry 194; CHECK-NEXT: vldrd.u64 q1, [q0, #-336] 195; CHECK-NEXT: vmov q0, q1 196; CHECK-NEXT: bx lr 197entry: 198 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336) 199 ret <2 x i64> %0 200} 201 202define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) { 203; CHECK-LABEL: test_vldrdq_gather_base_wb_s64: 204; CHECK: @ %bb.0: @ %entry 205; CHECK-NEXT: vldrw.u32 q1, [r0] 206; CHECK-NEXT: vldrd.u64 q0, [q1, #576]! 207; CHECK-NEXT: vstrw.32 q1, [r0] 208; CHECK-NEXT: bx lr 209entry: 210 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 211 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576) 212 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 213 store <2 x i64> %2, <2 x i64>* %addr, align 8 214 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 215 ret <2 x i64> %3 216} 217 218declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32) 219 220define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) { 221; CHECK-LABEL: test_vldrdq_gather_base_wb_u64: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vldrw.u32 q1, [r0] 224; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]! 225; CHECK-NEXT: vstrw.32 q1, [r0] 226; CHECK-NEXT: bx lr 227entry: 228 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 229 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328) 230 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1 231 store <2 x i64> %2, <2 x i64>* %addr, align 8 232 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0 233 ret <2 x i64> %3 234} 235 236define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) { 237; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vmsr p0, r1 240; CHECK-NEXT: vldrw.u32 q1, [r0] 241; CHECK-NEXT: vpst 242; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]! 243; CHECK-NEXT: vstrw.32 q1, [r0] 244; CHECK-NEXT: bx lr 245entry: 246 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 247 %1 = zext i16 %p to i32 248 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 249 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2) 250 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 251 store <2 x i64> %4, <2 x i64>* %addr, align 8 252 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 253 ret <2 x i64> %5 254} 255 256declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) 257 258define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) { 259; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64: 260; CHECK: @ %bb.0: @ %entry 261; CHECK-NEXT: vmsr p0, r1 262; CHECK-NEXT: vldrw.u32 q1, [r0] 263; CHECK-NEXT: vpst 264; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]! 265; CHECK-NEXT: vstrw.32 q1, [r0] 266; CHECK-NEXT: bx lr 267entry: 268 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 269 %1 = zext i16 %p to i32 270 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 271 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2) 272 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1 273 store <2 x i64> %4, <2 x i64>* %addr, align 8 274 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0 275 ret <2 x i64> %5 276} 277 278define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) { 279; CHECK-LABEL: test_vldrdq_gather_base_z_s64: 280; CHECK: @ %bb.0: @ %entry 281; CHECK-NEXT: vmsr p0, r0 282; CHECK-NEXT: vpst 283; CHECK-NEXT: vldrdt.u64 q1, [q0, #888] 284; CHECK-NEXT: vmov q0, q1 285; CHECK-NEXT: bx lr 286entry: 287 %0 = zext i16 %p to i32 288 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 289 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1) 290 ret <2 x i64> %2 291} 292 293declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>) 294 295define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) { 296; CHECK-LABEL: test_vldrdq_gather_base_z_u64: 297; CHECK: @ %bb.0: @ %entry 298; CHECK-NEXT: vmsr p0, r0 299; CHECK-NEXT: vpst 300; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000] 301; CHECK-NEXT: vmov q0, q1 302; CHECK-NEXT: bx lr 303entry: 304 %0 = zext i16 %p to i32 305 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 306 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1) 307 ret <2 x i64> %2 308} 309 310define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) { 311; CHECK-LABEL: test_vldrdq_gather_offset_s64: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vldrd.u64 q1, [r0, q0] 314; CHECK-NEXT: vmov q0, q1 315; CHECK-NEXT: bx lr 316entry: 317 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0) 318 ret <2 x i64> %0 319} 320 321declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32) 322 323define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) { 324; CHECK-LABEL: test_vldrdq_gather_offset_u64: 325; CHECK: @ %bb.0: @ %entry 326; CHECK-NEXT: vldrd.u64 q1, [r0, q0] 327; CHECK-NEXT: vmov q0, q1 328; CHECK-NEXT: bx lr 329entry: 330 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1) 331 ret <2 x i64> %0 332} 333 334define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { 335; CHECK-LABEL: test_vldrdq_gather_offset_z_s64: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: vmsr p0, r1 338; CHECK-NEXT: vpst 339; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] 340; CHECK-NEXT: vmov q0, q1 341; CHECK-NEXT: bx lr 342entry: 343 %0 = zext i16 %p to i32 344 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 345 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1) 346 ret <2 x i64> %2 347} 348 349declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>) 350 351define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { 352; CHECK-LABEL: test_vldrdq_gather_offset_z_u64: 353; CHECK: @ %bb.0: @ %entry 354; CHECK-NEXT: vmsr p0, r1 355; CHECK-NEXT: vpst 356; CHECK-NEXT: vldrdt.u64 q1, [r0, q0] 357; CHECK-NEXT: vmov q0, q1 358; CHECK-NEXT: bx lr 359entry: 360 %0 = zext i16 %p to i32 361 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 362 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1) 363 ret <2 x i64> %2 364} 365 366define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) { 367; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64: 368; CHECK: @ %bb.0: @ %entry 369; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] 370; CHECK-NEXT: vmov q0, q1 371; CHECK-NEXT: bx lr 372entry: 373 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0) 374 ret <2 x i64> %0 375} 376 377define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) { 378; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64: 379; CHECK: @ %bb.0: @ %entry 380; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3] 381; CHECK-NEXT: vmov q0, q1 382; CHECK-NEXT: bx lr 383entry: 384 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1) 385 ret <2 x i64> %0 386} 387 388define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { 389; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vmsr p0, r1 392; CHECK-NEXT: vpst 393; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] 394; CHECK-NEXT: vmov q0, q1 395; CHECK-NEXT: bx lr 396entry: 397 %0 = zext i16 %p to i32 398 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 399 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1) 400 ret <2 x i64> %2 401} 402 403define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) { 404; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64: 405; CHECK: @ %bb.0: @ %entry 406; CHECK-NEXT: vmsr p0, r1 407; CHECK-NEXT: vpst 408; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3] 409; CHECK-NEXT: vmov q0, q1 410; CHECK-NEXT: bx lr 411entry: 412 %0 = zext i16 %p to i32 413 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 414 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1) 415 ret <2 x i64> %2 416} 417 418define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) { 419; CHECK-LABEL: test_vldrhq_gather_offset_f16: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 422; CHECK-NEXT: vmov q0, q1 423; CHECK-NEXT: bx lr 424entry: 425 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) 426 ret <8 x half> %0 427} 428 429declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32) 430 431define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) { 432; CHECK-LABEL: test_vldrhq_gather_offset_s16: 433; CHECK: @ %bb.0: @ %entry 434; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 435; CHECK-NEXT: vmov q0, q1 436; CHECK-NEXT: bx lr 437entry: 438 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0) 439 ret <8 x i16> %0 440} 441 442declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32) 443 444define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) { 445; CHECK-LABEL: test_vldrhq_gather_offset_s32: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: vldrh.s32 q1, [r0, q0] 448; CHECK-NEXT: vmov q0, q1 449; CHECK-NEXT: bx lr 450entry: 451 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0) 452 ret <4 x i32> %0 453} 454 455declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32) 456 457define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) { 458; CHECK-LABEL: test_vldrhq_gather_offset_u16: 459; CHECK: @ %bb.0: @ %entry 460; CHECK-NEXT: vldrh.u16 q1, [r0, q0] 461; CHECK-NEXT: vmov q0, q1 462; CHECK-NEXT: bx lr 463entry: 464 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1) 465 ret <8 x i16> %0 466} 467 468define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) { 469; CHECK-LABEL: test_vldrhq_gather_offset_u32: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vldrh.u32 q1, [r0, q0] 472; CHECK-NEXT: vmov q0, q1 473; CHECK-NEXT: bx lr 474entry: 475 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1) 476 ret <4 x i32> %0 477} 478 479define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { 480; CHECK-LABEL: test_vldrhq_gather_offset_z_f16: 481; CHECK: @ %bb.0: @ %entry 482; CHECK-NEXT: vmsr p0, r1 483; CHECK-NEXT: vpst 484; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 485; CHECK-NEXT: vmov q0, q1 486; CHECK-NEXT: bx lr 487entry: 488 %0 = zext i16 %p to i32 489 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 490 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) 491 ret <8 x half> %2 492} 493 494declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>) 495 496define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { 497; CHECK-LABEL: test_vldrhq_gather_offset_z_s16: 498; CHECK: @ %bb.0: @ %entry 499; CHECK-NEXT: vmsr p0, r1 500; CHECK-NEXT: vpst 501; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 502; CHECK-NEXT: vmov q0, q1 503; CHECK-NEXT: bx lr 504entry: 505 %0 = zext i16 %p to i32 506 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 507 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1) 508 ret <8 x i16> %2 509} 510 511declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>) 512 513define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { 514; CHECK-LABEL: test_vldrhq_gather_offset_z_s32: 515; CHECK: @ %bb.0: @ %entry 516; CHECK-NEXT: vmsr p0, r1 517; CHECK-NEXT: vpst 518; CHECK-NEXT: vldrht.s32 q1, [r0, q0] 519; CHECK-NEXT: vmov q0, q1 520; CHECK-NEXT: bx lr 521entry: 522 %0 = zext i16 %p to i32 523 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 524 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1) 525 ret <4 x i32> %2 526} 527 528declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>) 529 530define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { 531; CHECK-LABEL: test_vldrhq_gather_offset_z_u16: 532; CHECK: @ %bb.0: @ %entry 533; CHECK-NEXT: vmsr p0, r1 534; CHECK-NEXT: vpst 535; CHECK-NEXT: vldrht.u16 q1, [r0, q0] 536; CHECK-NEXT: vmov q0, q1 537; CHECK-NEXT: bx lr 538entry: 539 %0 = zext i16 %p to i32 540 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 541 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1) 542 ret <8 x i16> %2 543} 544 545define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { 546; CHECK-LABEL: test_vldrhq_gather_offset_z_u32: 547; CHECK: @ %bb.0: @ %entry 548; CHECK-NEXT: vmsr p0, r1 549; CHECK-NEXT: vpst 550; CHECK-NEXT: vldrht.u32 q1, [r0, q0] 551; CHECK-NEXT: vmov q0, q1 552; CHECK-NEXT: bx lr 553entry: 554 %0 = zext i16 %p to i32 555 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 556 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1) 557 ret <4 x i32> %2 558} 559 560define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) { 561; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16: 562; CHECK: @ %bb.0: @ %entry 563; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 564; CHECK-NEXT: vmov q0, q1 565; CHECK-NEXT: bx lr 566entry: 567 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) 568 ret <8 x half> %0 569} 570 571define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) { 572; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16: 573; CHECK: @ %bb.0: @ %entry 574; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 575; CHECK-NEXT: vmov q0, q1 576; CHECK-NEXT: bx lr 577entry: 578 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0) 579 ret <8 x i16> %0 580} 581 582define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) { 583; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1] 586; CHECK-NEXT: vmov q0, q1 587; CHECK-NEXT: bx lr 588entry: 589 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0) 590 ret <4 x i32> %0 591} 592 593define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) { 594; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16: 595; CHECK: @ %bb.0: @ %entry 596; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1] 597; CHECK-NEXT: vmov q0, q1 598; CHECK-NEXT: bx lr 599entry: 600 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1) 601 ret <8 x i16> %0 602} 603 604define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) { 605; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32: 606; CHECK: @ %bb.0: @ %entry 607; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1] 608; CHECK-NEXT: vmov q0, q1 609; CHECK-NEXT: bx lr 610entry: 611 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1) 612 ret <4 x i32> %0 613} 614 615define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) { 616; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16: 617; CHECK: @ %bb.0: @ %entry 618; CHECK-NEXT: vmsr p0, r1 619; CHECK-NEXT: vpst 620; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 621; CHECK-NEXT: vmov q0, q1 622; CHECK-NEXT: bx lr 623entry: 624 %0 = zext i16 %p to i32 625 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 626 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) 627 ret <8 x half> %2 628} 629 630define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { 631; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16: 632; CHECK: @ %bb.0: @ %entry 633; CHECK-NEXT: vmsr p0, r1 634; CHECK-NEXT: vpst 635; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 636; CHECK-NEXT: vmov q0, q1 637; CHECK-NEXT: bx lr 638entry: 639 %0 = zext i16 %p to i32 640 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 641 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1) 642 ret <8 x i16> %2 643} 644 645define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { 646; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32: 647; CHECK: @ %bb.0: @ %entry 648; CHECK-NEXT: vmsr p0, r1 649; CHECK-NEXT: vpst 650; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1] 651; CHECK-NEXT: vmov q0, q1 652; CHECK-NEXT: bx lr 653entry: 654 %0 = zext i16 %p to i32 655 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 656 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1) 657 ret <4 x i32> %2 658} 659 660define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) { 661; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16: 662; CHECK: @ %bb.0: @ %entry 663; CHECK-NEXT: vmsr p0, r1 664; CHECK-NEXT: vpst 665; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1] 666; CHECK-NEXT: vmov q0, q1 667; CHECK-NEXT: bx lr 668entry: 669 %0 = zext i16 %p to i32 670 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 671 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1) 672 ret <8 x i16> %2 673} 674 675define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) { 676; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32: 677; CHECK: @ %bb.0: @ %entry 678; CHECK-NEXT: vmsr p0, r1 679; CHECK-NEXT: vpst 680; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1] 681; CHECK-NEXT: vmov q0, q1 682; CHECK-NEXT: bx lr 683entry: 684 %0 = zext i16 %p to i32 685 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 686 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1) 687 ret <4 x i32> %2 688} 689 690define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) { 691; CHECK-LABEL: test_vldrwq_gather_base_f32: 692; CHECK: @ %bb.0: @ %entry 693; CHECK-NEXT: vldrw.u32 q1, [q0, #12] 694; CHECK-NEXT: vmov q0, q1 695; CHECK-NEXT: bx lr 696entry: 697 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12) 698 ret <4 x float> %0 699} 700 701declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32) 702 703define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) { 704; CHECK-LABEL: test_vldrwq_gather_base_s32: 705; CHECK: @ %bb.0: @ %entry 706; CHECK-NEXT: vldrw.u32 q1, [q0, #400] 707; CHECK-NEXT: vmov q0, q1 708; CHECK-NEXT: bx lr 709entry: 710 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400) 711 ret <4 x i32> %0 712} 713 714declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32) 715 716define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) { 717; CHECK-LABEL: test_vldrwq_gather_base_u32: 718; CHECK: @ %bb.0: @ %entry 719; CHECK-NEXT: vldrw.u32 q1, [q0, #284] 720; CHECK-NEXT: vmov q0, q1 721; CHECK-NEXT: bx lr 722entry: 723 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284) 724 ret <4 x i32> %0 725} 726 727define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) { 728; CHECK-LABEL: test_vldrwq_gather_base_wb_f32: 729; CHECK: @ %bb.0: @ %entry 730; CHECK-NEXT: vldrw.u32 q1, [r0] 731; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]! 732; CHECK-NEXT: vstrw.32 q1, [r0] 733; CHECK-NEXT: bx lr 734entry: 735 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 736 %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64) 737 %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1 738 store <4 x i32> %2, <4 x i32>* %addr, align 8 739 %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0 740 ret <4 x float> %3 741} 742 743declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32) 744 745define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) { 746; CHECK-LABEL: test_vldrwq_gather_base_wb_s32: 747; CHECK: @ %bb.0: @ %entry 748; CHECK-NEXT: vldrw.u32 q1, [r0] 749; CHECK-NEXT: vldrw.u32 q0, [q1, #80]! 750; CHECK-NEXT: vstrw.32 q1, [r0] 751; CHECK-NEXT: bx lr 752entry: 753 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 754 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80) 755 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 756 store <4 x i32> %2, <4 x i32>* %addr, align 8 757 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 758 ret <4 x i32> %3 759} 760 761declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32) 762 763define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) { 764; CHECK-LABEL: test_vldrwq_gather_base_wb_u32: 765; CHECK: @ %bb.0: @ %entry 766; CHECK-NEXT: vldrw.u32 q1, [r0] 767; CHECK-NEXT: vldrw.u32 q0, [q1, #480]! 768; CHECK-NEXT: vstrw.32 q1, [r0] 769; CHECK-NEXT: bx lr 770entry: 771 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 772 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480) 773 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1 774 store <4 x i32> %2, <4 x i32>* %addr, align 8 775 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0 776 ret <4 x i32> %3 777} 778 779define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) { 780; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32: 781; CHECK: @ %bb.0: @ %entry 782; CHECK-NEXT: vmsr p0, r1 783; CHECK-NEXT: vldrw.u32 q1, [r0] 784; CHECK-NEXT: vpst 785; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]! 786; CHECK-NEXT: vstrw.32 q1, [r0] 787; CHECK-NEXT: bx lr 788entry: 789 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 790 %1 = zext i16 %p to i32 791 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 792 %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2) 793 %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1 794 store <4 x i32> %4, <4 x i32>* %addr, align 8 795 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0 796 ret <4 x float> %5 797} 798 799declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 800 801define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) { 802; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32: 803; CHECK: @ %bb.0: @ %entry 804; CHECK-NEXT: vmsr p0, r1 805; CHECK-NEXT: vldrw.u32 q1, [r0] 806; CHECK-NEXT: vpst 807; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]! 808; CHECK-NEXT: vstrw.32 q1, [r0] 809; CHECK-NEXT: bx lr 810entry: 811 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 812 %1 = zext i16 %p to i32 813 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 814 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2) 815 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 816 store <4 x i32> %4, <4 x i32>* %addr, align 8 817 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 818 ret <4 x i32> %5 819} 820 821declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 822 823define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) { 824; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32: 825; CHECK: @ %bb.0: @ %entry 826; CHECK-NEXT: vmsr p0, r1 827; CHECK-NEXT: vldrw.u32 q1, [r0] 828; CHECK-NEXT: vpst 829; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]! 830; CHECK-NEXT: vstrw.32 q1, [r0] 831; CHECK-NEXT: bx lr 832entry: 833 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 834 %1 = zext i16 %p to i32 835 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 836 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2) 837 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1 838 store <4 x i32> %4, <4 x i32>* %addr, align 8 839 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0 840 ret <4 x i32> %5 841} 842 843define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) { 844; CHECK-LABEL: test_vldrwq_gather_base_z_f32: 845; CHECK: @ %bb.0: @ %entry 846; CHECK-NEXT: vmsr p0, r0 847; CHECK-NEXT: vpst 848; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300] 849; CHECK-NEXT: vmov q0, q1 850; CHECK-NEXT: bx lr 851entry: 852 %0 = zext i16 %p to i32 853 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 854 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1) 855 ret <4 x float> %2 856} 857 858declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 859 860define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) { 861; CHECK-LABEL: test_vldrwq_gather_base_z_s32: 862; CHECK: @ %bb.0: @ %entry 863; CHECK-NEXT: vmsr p0, r0 864; CHECK-NEXT: vpst 865; CHECK-NEXT: vldrwt.u32 q1, [q0, #440] 866; CHECK-NEXT: vmov q0, q1 867; CHECK-NEXT: bx lr 868entry: 869 %0 = zext i16 %p to i32 870 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 871 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1) 872 ret <4 x i32> %2 873} 874 875declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>) 876 877define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) { 878; CHECK-LABEL: test_vldrwq_gather_base_z_u32: 879; CHECK: @ %bb.0: @ %entry 880; CHECK-NEXT: vmsr p0, r0 881; CHECK-NEXT: vpst 882; CHECK-NEXT: vldrwt.u32 q1, [q0, #300] 883; CHECK-NEXT: vmov q0, q1 884; CHECK-NEXT: bx lr 885entry: 886 %0 = zext i16 %p to i32 887 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 888 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1) 889 ret <4 x i32> %2 890} 891 892define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) { 893; CHECK-LABEL: test_vldrwq_gather_offset_f32: 894; CHECK: @ %bb.0: @ %entry 895; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 896; CHECK-NEXT: vmov q0, q1 897; CHECK-NEXT: bx lr 898entry: 899 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) 900 ret <4 x float> %0 901} 902 903declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32) 904 905define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) { 906; CHECK-LABEL: test_vldrwq_gather_offset_s32: 907; CHECK: @ %bb.0: @ %entry 908; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 909; CHECK-NEXT: vmov q0, q1 910; CHECK-NEXT: bx lr 911entry: 912 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0) 913 ret <4 x i32> %0 914} 915 916declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) 917 918define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) { 919; CHECK-LABEL: test_vldrwq_gather_offset_u32: 920; CHECK: @ %bb.0: @ %entry 921; CHECK-NEXT: vldrw.u32 q1, [r0, q0] 922; CHECK-NEXT: vmov q0, q1 923; CHECK-NEXT: bx lr 924entry: 925 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1) 926 ret <4 x i32> %0 927} 928 929define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { 930; CHECK-LABEL: test_vldrwq_gather_offset_z_f32: 931; CHECK: @ %bb.0: @ %entry 932; CHECK-NEXT: vmsr p0, r1 933; CHECK-NEXT: vpst 934; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 935; CHECK-NEXT: vmov q0, q1 936; CHECK-NEXT: bx lr 937entry: 938 %0 = zext i16 %p to i32 939 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 940 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) 941 ret <4 x float> %2 942} 943 944declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>) 945 946define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { 947; CHECK-LABEL: test_vldrwq_gather_offset_z_s32: 948; CHECK: @ %bb.0: @ %entry 949; CHECK-NEXT: vmsr p0, r1 950; CHECK-NEXT: vpst 951; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 952; CHECK-NEXT: vmov q0, q1 953; CHECK-NEXT: bx lr 954entry: 955 %0 = zext i16 %p to i32 956 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 957 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1) 958 ret <4 x i32> %2 959} 960 961declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>) 962 963define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { 964; CHECK-LABEL: test_vldrwq_gather_offset_z_u32: 965; CHECK: @ %bb.0: @ %entry 966; CHECK-NEXT: vmsr p0, r1 967; CHECK-NEXT: vpst 968; CHECK-NEXT: vldrwt.u32 q1, [r0, q0] 969; CHECK-NEXT: vmov q0, q1 970; CHECK-NEXT: bx lr 971entry: 972 %0 = zext i16 %p to i32 973 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 974 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1) 975 ret <4 x i32> %2 976} 977 978define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) { 979; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32: 980; CHECK: @ %bb.0: @ %entry 981; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 982; CHECK-NEXT: vmov q0, q1 983; CHECK-NEXT: bx lr 984entry: 985 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) 986 ret <4 x float> %0 987} 988 989define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) { 990; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32: 991; CHECK: @ %bb.0: @ %entry 992; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 993; CHECK-NEXT: vmov q0, q1 994; CHECK-NEXT: bx lr 995entry: 996 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0) 997 ret <4 x i32> %0 998} 999 1000define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) { 1001; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32: 1002; CHECK: @ %bb.0: @ %entry 1003; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2] 1004; CHECK-NEXT: vmov q0, q1 1005; CHECK-NEXT: bx lr 1006entry: 1007 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1) 1008 ret <4 x i32> %0 1009} 1010 1011define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) { 1012; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32: 1013; CHECK: @ %bb.0: @ %entry 1014; CHECK-NEXT: vmsr p0, r1 1015; CHECK-NEXT: vpst 1016; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1017; CHECK-NEXT: vmov q0, q1 1018; CHECK-NEXT: bx lr 1019entry: 1020 %0 = zext i16 %p to i32 1021 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1022 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) 1023 ret <4 x float> %2 1024} 1025 1026define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { 1027; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32: 1028; CHECK: @ %bb.0: @ %entry 1029; CHECK-NEXT: vmsr p0, r1 1030; CHECK-NEXT: vpst 1031; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1032; CHECK-NEXT: vmov q0, q1 1033; CHECK-NEXT: bx lr 1034entry: 1035 %0 = zext i16 %p to i32 1036 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1037 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1) 1038 ret <4 x i32> %2 1039} 1040 1041define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) { 1042; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32: 1043; CHECK: @ %bb.0: @ %entry 1044; CHECK-NEXT: vmsr p0, r1 1045; CHECK-NEXT: vpst 1046; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2] 1047; CHECK-NEXT: vmov q0, q1 1048; CHECK-NEXT: bx lr 1049entry: 1050 %0 = zext i16 %p to i32 1051 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1052 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1) 1053 ret <4 x i32> %2 1054} 1055 1056define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1057; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16: 1058; CHECK: @ %bb.0: @ %entry 1059; CHECK-NEXT: vmsr p0, r1 1060; CHECK-NEXT: vpst 1061; CHECK-NEXT: vstrbt.16 q1, [r0, q0] 1062; CHECK-NEXT: bx lr 1063entry: 1064 %0 = zext i16 %p to i32 1065 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1066 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) 1067 ret void 1068} 1069 1070declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) 1071 1072define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1073; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32: 1074; CHECK: @ %bb.0: @ %entry 1075; CHECK-NEXT: vmsr p0, r1 1076; CHECK-NEXT: vpst 1077; CHECK-NEXT: vstrbt.32 q1, [r0, q0] 1078; CHECK-NEXT: bx lr 1079entry: 1080 %0 = zext i16 %p to i32 1081 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1082 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) 1083 ret void 1084} 1085 1086declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) 1087 1088define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { 1089; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8: 1090; CHECK: @ %bb.0: @ %entry 1091; CHECK-NEXT: vmsr p0, r1 1092; CHECK-NEXT: vpst 1093; CHECK-NEXT: vstrbt.8 q1, [r0, q0] 1094; CHECK-NEXT: bx lr 1095entry: 1096 %0 = zext i16 %p to i32 1097 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1098 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) 1099 ret void 1100} 1101 1102declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>) 1103 1104define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1105; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16: 1106; CHECK: @ %bb.0: @ %entry 1107; CHECK-NEXT: vmsr p0, r1 1108; CHECK-NEXT: vpst 1109; CHECK-NEXT: vstrbt.16 q1, [r0, q0] 1110; CHECK-NEXT: bx lr 1111entry: 1112 %0 = zext i16 %p to i32 1113 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1114 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1) 1115 ret void 1116} 1117 1118define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1119; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32: 1120; CHECK: @ %bb.0: @ %entry 1121; CHECK-NEXT: vmsr p0, r1 1122; CHECK-NEXT: vpst 1123; CHECK-NEXT: vstrbt.32 q1, [r0, q0] 1124; CHECK-NEXT: bx lr 1125entry: 1126 %0 = zext i16 %p to i32 1127 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1128 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1) 1129 ret void 1130} 1131 1132define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) { 1133; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: vmsr p0, r1 1136; CHECK-NEXT: vpst 1137; CHECK-NEXT: vstrbt.8 q1, [r0, q0] 1138; CHECK-NEXT: bx lr 1139entry: 1140 %0 = zext i16 %p to i32 1141 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1142 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1) 1143 ret void 1144} 1145 1146define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { 1147; CHECK-LABEL: test_vstrbq_scatter_offset_s16: 1148; CHECK: @ %bb.0: @ %entry 1149; CHECK-NEXT: vstrb.16 q1, [r0, q0] 1150; CHECK-NEXT: bx lr 1151entry: 1152 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) 1153 ret void 1154} 1155 1156declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) 1157 1158define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { 1159; CHECK-LABEL: test_vstrbq_scatter_offset_s32: 1160; CHECK: @ %bb.0: @ %entry 1161; CHECK-NEXT: vstrb.32 q1, [r0, q0] 1162; CHECK-NEXT: bx lr 1163entry: 1164 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) 1165 ret void 1166} 1167 1168declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) 1169 1170define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { 1171; CHECK-LABEL: test_vstrbq_scatter_offset_s8: 1172; CHECK: @ %bb.0: @ %entry 1173; CHECK-NEXT: vstrb.8 q1, [r0, q0] 1174; CHECK-NEXT: bx lr 1175entry: 1176 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) 1177 ret void 1178} 1179 1180declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) 1181 1182define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) { 1183; CHECK-LABEL: test_vstrbq_scatter_offset_u16: 1184; CHECK: @ %bb.0: @ %entry 1185; CHECK-NEXT: vstrb.16 q1, [r0, q0] 1186; CHECK-NEXT: bx lr 1187entry: 1188 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0) 1189 ret void 1190} 1191 1192define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) { 1193; CHECK-LABEL: test_vstrbq_scatter_offset_u32: 1194; CHECK: @ %bb.0: @ %entry 1195; CHECK-NEXT: vstrb.32 q1, [r0, q0] 1196; CHECK-NEXT: bx lr 1197entry: 1198 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0) 1199 ret void 1200} 1201 1202define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) { 1203; CHECK-LABEL: test_vstrbq_scatter_offset_u8: 1204; CHECK: @ %bb.0: @ %entry 1205; CHECK-NEXT: vstrb.8 q1, [r0, q0] 1206; CHECK-NEXT: bx lr 1207entry: 1208 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0) 1209 ret void 1210} 1211 1212define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 1213; CHECK-LABEL: test_vstrdq_scatter_base_p_s64: 1214; CHECK: @ %bb.0: @ %entry 1215; CHECK-NEXT: vmsr p0, r0 1216; CHECK-NEXT: vpst 1217; CHECK-NEXT: vstrdt.64 q1, [q0, #888] 1218; CHECK-NEXT: bx lr 1219entry: 1220 %0 = zext i16 %p to i32 1221 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1222 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1) 1223 ret void 1224} 1225 1226declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) 1227 1228define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) { 1229; CHECK-LABEL: test_vstrdq_scatter_base_p_u64: 1230; CHECK: @ %bb.0: @ %entry 1231; CHECK-NEXT: vmsr p0, r0 1232; CHECK-NEXT: vpst 1233; CHECK-NEXT: vstrdt.64 q1, [q0, #264] 1234; CHECK-NEXT: bx lr 1235entry: 1236 %0 = zext i16 %p to i32 1237 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1238 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1) 1239 ret void 1240} 1241 1242define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) { 1243; CHECK-LABEL: test_vstrdq_scatter_base_s64: 1244; CHECK: @ %bb.0: @ %entry 1245; CHECK-NEXT: vstrd.64 q1, [q0, #408] 1246; CHECK-NEXT: bx lr 1247entry: 1248 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value) 1249 ret void 1250} 1251 1252declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 1253 1254define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) { 1255; CHECK-LABEL: test_vstrdq_scatter_base_u64: 1256; CHECK: @ %bb.0: @ %entry 1257; CHECK-NEXT: vstrd.64 q1, [q0, #-472] 1258; CHECK-NEXT: bx lr 1259entry: 1260 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value) 1261 ret void 1262} 1263 1264define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { 1265; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64: 1266; CHECK: @ %bb.0: @ %entry 1267; CHECK-NEXT: vldrw.u32 q1, [r0] 1268; CHECK-NEXT: vmsr p0, r1 1269; CHECK-NEXT: vpst 1270; CHECK-NEXT: vstrdt.64 q0, [q1, #248]! 1271; CHECK-NEXT: vstrw.32 q1, [r0] 1272; CHECK-NEXT: bx lr 1273entry: 1274 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 1275 %1 = zext i16 %p to i32 1276 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1277 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2) 1278 store <2 x i64> %3, <2 x i64>* %addr, align 8 1279 ret void 1280} 1281 1282declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>) 1283 1284define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) { 1285; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64: 1286; CHECK: @ %bb.0: @ %entry 1287; CHECK-NEXT: vldrw.u32 q1, [r0] 1288; CHECK-NEXT: vmsr p0, r1 1289; CHECK-NEXT: vpst 1290; CHECK-NEXT: vstrdt.64 q0, [q1, #136]! 1291; CHECK-NEXT: vstrw.32 q1, [r0] 1292; CHECK-NEXT: bx lr 1293entry: 1294 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 1295 %1 = zext i16 %p to i32 1296 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1297 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2) 1298 store <2 x i64> %3, <2 x i64>* %addr, align 8 1299 ret void 1300} 1301 1302define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) { 1303; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64: 1304; CHECK: @ %bb.0: @ %entry 1305; CHECK-NEXT: vldrw.u32 q1, [r0] 1306; CHECK-NEXT: vstrd.64 q0, [q1, #208]! 1307; CHECK-NEXT: vstrw.32 q1, [r0] 1308; CHECK-NEXT: bx lr 1309entry: 1310 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 1311 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value) 1312 store <2 x i64> %1, <2 x i64>* %addr, align 8 1313 ret void 1314} 1315 1316declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>) 1317 1318define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) { 1319; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64: 1320; CHECK: @ %bb.0: @ %entry 1321; CHECK-NEXT: vldrw.u32 q1, [r0] 1322; CHECK-NEXT: vstrd.64 q0, [q1, #-168]! 1323; CHECK-NEXT: vstrw.32 q1, [r0] 1324; CHECK-NEXT: bx lr 1325entry: 1326 %0 = load <2 x i64>, <2 x i64>* %addr, align 8 1327 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value) 1328 store <2 x i64> %1, <2 x i64>* %addr, align 8 1329 ret void 1330} 1331 1332define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1333; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64: 1334; CHECK: @ %bb.0: @ %entry 1335; CHECK-NEXT: vmsr p0, r1 1336; CHECK-NEXT: vpst 1337; CHECK-NEXT: vstrdt.64 q1, [r0, q0] 1338; CHECK-NEXT: bx lr 1339entry: 1340 %0 = zext i16 %p to i32 1341 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1342 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) 1343 ret void 1344} 1345 1346declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>) 1347 1348define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1349; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64: 1350; CHECK: @ %bb.0: @ %entry 1351; CHECK-NEXT: vmsr p0, r1 1352; CHECK-NEXT: vpst 1353; CHECK-NEXT: vstrdt.64 q1, [r0, q0] 1354; CHECK-NEXT: bx lr 1355entry: 1356 %0 = zext i16 %p to i32 1357 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1358 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1) 1359 ret void 1360} 1361 1362define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { 1363; CHECK-LABEL: test_vstrdq_scatter_offset_s64: 1364; CHECK: @ %bb.0: @ %entry 1365; CHECK-NEXT: vstrd.64 q1, [r0, q0] 1366; CHECK-NEXT: bx lr 1367entry: 1368 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) 1369 ret void 1370} 1371 1372declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32) 1373 1374define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { 1375; CHECK-LABEL: test_vstrdq_scatter_offset_u64: 1376; CHECK: @ %bb.0: @ %entry 1377; CHECK-NEXT: vstrd.64 q1, [r0, q0] 1378; CHECK-NEXT: bx lr 1379entry: 1380 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0) 1381 ret void 1382} 1383 1384define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1385; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64: 1386; CHECK: @ %bb.0: @ %entry 1387; CHECK-NEXT: vmsr p0, r1 1388; CHECK-NEXT: vpst 1389; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] 1390; CHECK-NEXT: bx lr 1391entry: 1392 %0 = zext i16 %p to i32 1393 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1394 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) 1395 ret void 1396} 1397 1398define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) { 1399; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64: 1400; CHECK: @ %bb.0: @ %entry 1401; CHECK-NEXT: vmsr p0, r1 1402; CHECK-NEXT: vpst 1403; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3] 1404; CHECK-NEXT: bx lr 1405entry: 1406 %0 = zext i16 %p to i32 1407 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1408 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1) 1409 ret void 1410} 1411 1412define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { 1413; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64: 1414; CHECK: @ %bb.0: @ %entry 1415; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] 1416; CHECK-NEXT: bx lr 1417entry: 1418 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) 1419 ret void 1420} 1421 1422define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) { 1423; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64: 1424; CHECK: @ %bb.0: @ %entry 1425; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3] 1426; CHECK-NEXT: bx lr 1427entry: 1428 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3) 1429 ret void 1430} 1431 1432define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { 1433; CHECK-LABEL: test_vstrhq_scatter_offset_f16: 1434; CHECK: @ %bb.0: @ %entry 1435; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1436; CHECK-NEXT: bx lr 1437entry: 1438 call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0) 1439 ret void 1440} 1441 1442declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32) 1443 1444define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { 1445; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16: 1446; CHECK: @ %bb.0: @ %entry 1447; CHECK-NEXT: vmsr p0, r1 1448; CHECK-NEXT: vpst 1449; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1450; CHECK-NEXT: bx lr 1451entry: 1452 %0 = zext i16 %p to i32 1453 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1454 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1) 1455 ret void 1456} 1457 1458declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>) 1459 1460define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1461; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16: 1462; CHECK: @ %bb.0: @ %entry 1463; CHECK-NEXT: vmsr p0, r1 1464; CHECK-NEXT: vpst 1465; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1466; CHECK-NEXT: bx lr 1467entry: 1468 %0 = zext i16 %p to i32 1469 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1470 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) 1471 ret void 1472} 1473 1474declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>) 1475 1476define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1477; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32: 1478; CHECK: @ %bb.0: @ %entry 1479; CHECK-NEXT: vmsr p0, r1 1480; CHECK-NEXT: vpst 1481; CHECK-NEXT: vstrht.32 q1, [r0, q0] 1482; CHECK-NEXT: bx lr 1483entry: 1484 %0 = zext i16 %p to i32 1485 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1486 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) 1487 ret void 1488} 1489 1490declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) 1491 1492define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1493; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16: 1494; CHECK: @ %bb.0: @ %entry 1495; CHECK-NEXT: vmsr p0, r1 1496; CHECK-NEXT: vpst 1497; CHECK-NEXT: vstrht.16 q1, [r0, q0] 1498; CHECK-NEXT: bx lr 1499entry: 1500 %0 = zext i16 %p to i32 1501 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1502 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1) 1503 ret void 1504} 1505 1506define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1507; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32: 1508; CHECK: @ %bb.0: @ %entry 1509; CHECK-NEXT: vmsr p0, r1 1510; CHECK-NEXT: vpst 1511; CHECK-NEXT: vstrht.32 q1, [r0, q0] 1512; CHECK-NEXT: bx lr 1513entry: 1514 %0 = zext i16 %p to i32 1515 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1516 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1) 1517 ret void 1518} 1519 1520define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { 1521; CHECK-LABEL: test_vstrhq_scatter_offset_s16: 1522; CHECK: @ %bb.0: @ %entry 1523; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1524; CHECK-NEXT: bx lr 1525entry: 1526 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) 1527 ret void 1528} 1529 1530declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32) 1531 1532define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { 1533; CHECK-LABEL: test_vstrhq_scatter_offset_s32: 1534; CHECK: @ %bb.0: @ %entry 1535; CHECK-NEXT: vstrh.32 q1, [r0, q0] 1536; CHECK-NEXT: bx lr 1537entry: 1538 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) 1539 ret void 1540} 1541 1542declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32) 1543 1544define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { 1545; CHECK-LABEL: test_vstrhq_scatter_offset_u16: 1546; CHECK: @ %bb.0: @ %entry 1547; CHECK-NEXT: vstrh.16 q1, [r0, q0] 1548; CHECK-NEXT: bx lr 1549entry: 1550 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0) 1551 ret void 1552} 1553 1554define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { 1555; CHECK-LABEL: test_vstrhq_scatter_offset_u32: 1556; CHECK: @ %bb.0: @ %entry 1557; CHECK-NEXT: vstrh.32 q1, [r0, q0] 1558; CHECK-NEXT: bx lr 1559entry: 1560 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0) 1561 ret void 1562} 1563 1564define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) { 1565; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16: 1566; CHECK: @ %bb.0: @ %entry 1567; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1568; CHECK-NEXT: bx lr 1569entry: 1570 call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1) 1571 ret void 1572} 1573 1574define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) { 1575; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16: 1576; CHECK: @ %bb.0: @ %entry 1577; CHECK-NEXT: vmsr p0, r1 1578; CHECK-NEXT: vpst 1579; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1580; CHECK-NEXT: bx lr 1581entry: 1582 %0 = zext i16 %p to i32 1583 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1584 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1) 1585 ret void 1586} 1587 1588define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1589; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16: 1590; CHECK: @ %bb.0: @ %entry 1591; CHECK-NEXT: vmsr p0, r1 1592; CHECK-NEXT: vpst 1593; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1594; CHECK-NEXT: bx lr 1595entry: 1596 %0 = zext i16 %p to i32 1597 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1598 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) 1599 ret void 1600} 1601 1602define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1603; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32: 1604; CHECK: @ %bb.0: @ %entry 1605; CHECK-NEXT: vmsr p0, r1 1606; CHECK-NEXT: vpst 1607; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] 1608; CHECK-NEXT: bx lr 1609entry: 1610 %0 = zext i16 %p to i32 1611 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1612 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) 1613 ret void 1614} 1615 1616define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) { 1617; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16: 1618; CHECK: @ %bb.0: @ %entry 1619; CHECK-NEXT: vmsr p0, r1 1620; CHECK-NEXT: vpst 1621; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1] 1622; CHECK-NEXT: bx lr 1623entry: 1624 %0 = zext i16 %p to i32 1625 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1626 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1) 1627 ret void 1628} 1629 1630define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1631; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32: 1632; CHECK: @ %bb.0: @ %entry 1633; CHECK-NEXT: vmsr p0, r1 1634; CHECK-NEXT: vpst 1635; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1] 1636; CHECK-NEXT: bx lr 1637entry: 1638 %0 = zext i16 %p to i32 1639 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1640 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1) 1641 ret void 1642} 1643 1644define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { 1645; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16: 1646; CHECK: @ %bb.0: @ %entry 1647; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1648; CHECK-NEXT: bx lr 1649entry: 1650 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) 1651 ret void 1652} 1653 1654define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { 1655; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32: 1656; CHECK: @ %bb.0: @ %entry 1657; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] 1658; CHECK-NEXT: bx lr 1659entry: 1660 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) 1661 ret void 1662} 1663 1664define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) { 1665; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16: 1666; CHECK: @ %bb.0: @ %entry 1667; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1] 1668; CHECK-NEXT: bx lr 1669entry: 1670 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1) 1671 ret void 1672} 1673 1674define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) { 1675; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32: 1676; CHECK: @ %bb.0: @ %entry 1677; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1] 1678; CHECK-NEXT: bx lr 1679entry: 1680 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1) 1681 ret void 1682} 1683 1684define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) { 1685; CHECK-LABEL: test_vstrwq_scatter_base_f32: 1686; CHECK: @ %bb.0: @ %entry 1687; CHECK-NEXT: vstrw.32 q1, [q0, #380] 1688; CHECK-NEXT: bx lr 1689entry: 1690 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value) 1691 ret void 1692} 1693 1694declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>) 1695 1696define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) { 1697; CHECK-LABEL: test_vstrwq_scatter_base_p_f32: 1698; CHECK: @ %bb.0: @ %entry 1699; CHECK-NEXT: vmsr p0, r0 1700; CHECK-NEXT: vpst 1701; CHECK-NEXT: vstrwt.32 q1, [q0, #-400] 1702; CHECK-NEXT: bx lr 1703entry: 1704 %0 = zext i16 %p to i32 1705 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1706 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1) 1707 ret void 1708} 1709 1710declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) 1711 1712define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { 1713; CHECK-LABEL: test_vstrwq_scatter_base_p_s32: 1714; CHECK: @ %bb.0: @ %entry 1715; CHECK-NEXT: vmsr p0, r0 1716; CHECK-NEXT: vpst 1717; CHECK-NEXT: vstrwt.32 q1, [q0, #48] 1718; CHECK-NEXT: bx lr 1719entry: 1720 %0 = zext i16 %p to i32 1721 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1722 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1) 1723 ret void 1724} 1725 1726declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) 1727 1728define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) { 1729; CHECK-LABEL: test_vstrwq_scatter_base_p_u32: 1730; CHECK: @ %bb.0: @ %entry 1731; CHECK-NEXT: vmsr p0, r0 1732; CHECK-NEXT: vpst 1733; CHECK-NEXT: vstrwt.32 q1, [q0, #-376] 1734; CHECK-NEXT: bx lr 1735entry: 1736 %0 = zext i16 %p to i32 1737 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1738 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1) 1739 ret void 1740} 1741 1742define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) { 1743; CHECK-LABEL: test_vstrwq_scatter_base_s32: 1744; CHECK: @ %bb.0: @ %entry 1745; CHECK-NEXT: vstrw.32 q1, [q0, #156] 1746; CHECK-NEXT: bx lr 1747entry: 1748 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value) 1749 ret void 1750} 1751 1752declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) 1753 1754define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) { 1755; CHECK-LABEL: test_vstrwq_scatter_base_u32: 1756; CHECK: @ %bb.0: @ %entry 1757; CHECK-NEXT: vstrw.32 q1, [q0, #212] 1758; CHECK-NEXT: bx lr 1759entry: 1760 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value) 1761 ret void 1762} 1763 1764define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) { 1765; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32: 1766; CHECK: @ %bb.0: @ %entry 1767; CHECK-NEXT: vldrw.u32 q1, [r0] 1768; CHECK-NEXT: vstrw.32 q0, [q1, #-412]! 1769; CHECK-NEXT: vstrw.32 q1, [r0] 1770; CHECK-NEXT: bx lr 1771entry: 1772 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1773 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value) 1774 store <4 x i32> %1, <4 x i32>* %addr, align 8 1775 ret void 1776} 1777 1778declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>) 1779 1780define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) { 1781; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32: 1782; CHECK: @ %bb.0: @ %entry 1783; CHECK-NEXT: vldrw.u32 q1, [r0] 1784; CHECK-NEXT: vmsr p0, r1 1785; CHECK-NEXT: vpst 1786; CHECK-NEXT: vstrwt.32 q0, [q1, #236]! 1787; CHECK-NEXT: vstrw.32 q1, [r0] 1788; CHECK-NEXT: bx lr 1789entry: 1790 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1791 %1 = zext i16 %p to i32 1792 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1793 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2) 1794 store <4 x i32> %3, <4 x i32>* %addr, align 8 1795 ret void 1796} 1797 1798declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>) 1799 1800define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { 1801; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32: 1802; CHECK: @ %bb.0: @ %entry 1803; CHECK-NEXT: vldrw.u32 q1, [r0] 1804; CHECK-NEXT: vmsr p0, r1 1805; CHECK-NEXT: vpst 1806; CHECK-NEXT: vstrwt.32 q0, [q1, #328]! 1807; CHECK-NEXT: vstrw.32 q1, [r0] 1808; CHECK-NEXT: bx lr 1809entry: 1810 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1811 %1 = zext i16 %p to i32 1812 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1813 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2) 1814 store <4 x i32> %3, <4 x i32>* %addr, align 8 1815 ret void 1816} 1817 1818declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>) 1819 1820define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) { 1821; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32: 1822; CHECK: @ %bb.0: @ %entry 1823; CHECK-NEXT: vldrw.u32 q1, [r0] 1824; CHECK-NEXT: vmsr p0, r1 1825; CHECK-NEXT: vpst 1826; CHECK-NEXT: vstrwt.32 q0, [q1, #412]! 1827; CHECK-NEXT: vstrw.32 q1, [r0] 1828; CHECK-NEXT: bx lr 1829entry: 1830 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1831 %1 = zext i16 %p to i32 1832 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1833 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2) 1834 store <4 x i32> %3, <4 x i32>* %addr, align 8 1835 ret void 1836} 1837 1838define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) { 1839; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32: 1840; CHECK: @ %bb.0: @ %entry 1841; CHECK-NEXT: vldrw.u32 q1, [r0] 1842; CHECK-NEXT: vstrw.32 q0, [q1, #-152]! 1843; CHECK-NEXT: vstrw.32 q1, [r0] 1844; CHECK-NEXT: bx lr 1845entry: 1846 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1847 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value) 1848 store <4 x i32> %1, <4 x i32>* %addr, align 8 1849 ret void 1850} 1851 1852declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>) 1853 1854define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) { 1855; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32: 1856; CHECK: @ %bb.0: @ %entry 1857; CHECK-NEXT: vldrw.u32 q1, [r0] 1858; CHECK-NEXT: vstrw.32 q0, [q1, #64]! 1859; CHECK-NEXT: vstrw.32 q1, [r0] 1860; CHECK-NEXT: bx lr 1861entry: 1862 %0 = load <4 x i32>, <4 x i32>* %addr, align 8 1863 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value) 1864 store <4 x i32> %1, <4 x i32>* %addr, align 8 1865 ret void 1866} 1867 1868define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { 1869; CHECK-LABEL: test_vstrwq_scatter_offset_f32: 1870; CHECK: @ %bb.0: @ %entry 1871; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1872; CHECK-NEXT: bx lr 1873entry: 1874 call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0) 1875 ret void 1876} 1877 1878declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32) 1879 1880define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { 1881; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32: 1882; CHECK: @ %bb.0: @ %entry 1883; CHECK-NEXT: vmsr p0, r1 1884; CHECK-NEXT: vpst 1885; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1886; CHECK-NEXT: bx lr 1887entry: 1888 %0 = zext i16 %p to i32 1889 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1890 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1) 1891 ret void 1892} 1893 1894declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>) 1895 1896define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1897; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32: 1898; CHECK: @ %bb.0: @ %entry 1899; CHECK-NEXT: vmsr p0, r1 1900; CHECK-NEXT: vpst 1901; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1902; CHECK-NEXT: bx lr 1903entry: 1904 %0 = zext i16 %p to i32 1905 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1906 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) 1907 ret void 1908} 1909 1910declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>) 1911 1912define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1913; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32: 1914; CHECK: @ %bb.0: @ %entry 1915; CHECK-NEXT: vmsr p0, r1 1916; CHECK-NEXT: vpst 1917; CHECK-NEXT: vstrwt.32 q1, [r0, q0] 1918; CHECK-NEXT: bx lr 1919entry: 1920 %0 = zext i16 %p to i32 1921 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1922 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1) 1923 ret void 1924} 1925 1926define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { 1927; CHECK-LABEL: test_vstrwq_scatter_offset_s32: 1928; CHECK: @ %bb.0: @ %entry 1929; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1930; CHECK-NEXT: bx lr 1931entry: 1932 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) 1933 ret void 1934} 1935 1936declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32) 1937 1938define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { 1939; CHECK-LABEL: test_vstrwq_scatter_offset_u32: 1940; CHECK: @ %bb.0: @ %entry 1941; CHECK-NEXT: vstrw.32 q1, [r0, q0] 1942; CHECK-NEXT: bx lr 1943entry: 1944 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0) 1945 ret void 1946} 1947 1948define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) { 1949; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32: 1950; CHECK: @ %bb.0: @ %entry 1951; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 1952; CHECK-NEXT: bx lr 1953entry: 1954 call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2) 1955 ret void 1956} 1957 1958define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) { 1959; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32: 1960; CHECK: @ %bb.0: @ %entry 1961; CHECK-NEXT: vmsr p0, r1 1962; CHECK-NEXT: vpst 1963; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1964; CHECK-NEXT: bx lr 1965entry: 1966 %0 = zext i16 %p to i32 1967 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1968 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1) 1969 ret void 1970} 1971 1972define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1973; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32: 1974; CHECK: @ %bb.0: @ %entry 1975; CHECK-NEXT: vmsr p0, r1 1976; CHECK-NEXT: vpst 1977; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1978; CHECK-NEXT: bx lr 1979entry: 1980 %0 = zext i16 %p to i32 1981 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1982 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) 1983 ret void 1984} 1985 1986define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) { 1987; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32: 1988; CHECK: @ %bb.0: @ %entry 1989; CHECK-NEXT: vmsr p0, r1 1990; CHECK-NEXT: vpst 1991; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2] 1992; CHECK-NEXT: bx lr 1993entry: 1994 %0 = zext i16 %p to i32 1995 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1996 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1) 1997 ret void 1998} 1999 2000define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { 2001; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32: 2002; CHECK: @ %bb.0: @ %entry 2003; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 2004; CHECK-NEXT: bx lr 2005entry: 2006 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) 2007 ret void 2008} 2009 2010define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) { 2011; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32: 2012; CHECK: @ %bb.0: @ %entry 2013; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2] 2014; CHECK-NEXT: bx lr 2015entry: 2016 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2) 2017 ret void 2018} 2019