1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK 3; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind { 8; CHECK-LABEL: insert_v2i64_nxv2i64: 9; CHECK: // %bb.0: 10; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 11; CHECK-NEXT: addvl sp, sp, #-1 12; CHECK-NEXT: cntd x8 13; CHECK-NEXT: sub x8, x8, #1 // =1 14; CHECK-NEXT: cmp x8, #0 // =0 15; CHECK-NEXT: csel x8, x8, xzr, lo 16; CHECK-NEXT: ptrue p0.d 17; CHECK-NEXT: lsl x8, x8, #3 18; CHECK-NEXT: mov x9, sp 19; CHECK-NEXT: st1d { z0.d }, p0, [sp] 20; CHECK-NEXT: str q1, [x9, x8] 21; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] 22; CHECK-NEXT: addvl sp, sp, #1 23; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 24; CHECK-NEXT: ret 25 %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0) 26 ret <vscale x 2 x i64> %retval 27} 28 29define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind { 30; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: 31; CHECK: // %bb.0: 32; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 33; CHECK-NEXT: addvl sp, sp, #-1 34; CHECK-NEXT: cntd x8 35; CHECK-NEXT: sub x8, x8, #1 // =1 36; CHECK-NEXT: cmp x8, #1 // =1 37; CHECK-NEXT: csinc x8, x8, xzr, lo 38; CHECK-NEXT: ptrue p0.d 39; CHECK-NEXT: lsl x8, x8, #3 40; CHECK-NEXT: mov x9, sp 41; CHECK-NEXT: st1d { z0.d }, p0, [sp] 42; CHECK-NEXT: str q1, [x9, x8] 43; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] 44; CHECK-NEXT: addvl sp, sp, #1 45; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 46; CHECK-NEXT: ret 47 %retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1) 48 ret <vscale x 2 x i64> %retval 49} 50 51define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind { 52; CHECK-LABEL: insert_v4i32_nxv4i32: 53; CHECK: // %bb.0: 54; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 55; CHECK-NEXT: addvl sp, sp, #-1 56; CHECK-NEXT: cntw x8 57; CHECK-NEXT: sub x8, x8, #1 // =1 58; CHECK-NEXT: cmp x8, #0 // =0 59; CHECK-NEXT: csel x8, x8, xzr, lo 60; CHECK-NEXT: ptrue p0.s 61; CHECK-NEXT: lsl x8, x8, #2 62; CHECK-NEXT: mov x9, sp 63; CHECK-NEXT: st1w { z0.s }, p0, [sp] 64; CHECK-NEXT: str q1, [x9, x8] 65; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] 66; CHECK-NEXT: addvl sp, sp, #1 67; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 68; CHECK-NEXT: ret 69 %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0) 70 ret <vscale x 4 x i32> %retval 71} 72 73define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind { 74; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: 75; CHECK: // %bb.0: 76; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 77; CHECK-NEXT: addvl sp, sp, #-1 78; CHECK-NEXT: cntw x8 79; CHECK-NEXT: sub x8, x8, #1 // =1 80; CHECK-NEXT: cmp x8, #1 // =1 81; CHECK-NEXT: csinc x8, x8, xzr, lo 82; CHECK-NEXT: ptrue p0.s 83; CHECK-NEXT: lsl x8, x8, #2 84; CHECK-NEXT: mov x9, sp 85; CHECK-NEXT: st1w { z0.s }, p0, [sp] 86; CHECK-NEXT: str q1, [x9, x8] 87; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] 88; CHECK-NEXT: addvl sp, sp, #1 89; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 90; CHECK-NEXT: ret 91 %retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1) 92 ret <vscale x 4 x i32> %retval 93} 94 95define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind { 96; CHECK-LABEL: insert_v8i16_nxv8i16: 97; CHECK: // %bb.0: 98; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 99; CHECK-NEXT: addvl sp, sp, #-1 100; CHECK-NEXT: cnth x8 101; CHECK-NEXT: sub x8, x8, #1 // =1 102; CHECK-NEXT: cmp x8, #0 // =0 103; CHECK-NEXT: csel x8, x8, xzr, lo 104; CHECK-NEXT: ptrue p0.h 105; CHECK-NEXT: lsl x8, x8, #1 106; CHECK-NEXT: mov x9, sp 107; CHECK-NEXT: st1h { z0.h }, p0, [sp] 108; CHECK-NEXT: str q1, [x9, x8] 109; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] 110; CHECK-NEXT: addvl sp, sp, #1 111; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 112; CHECK-NEXT: ret 113 %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0) 114 ret <vscale x 8 x i16> %retval 115} 116 117define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind { 118; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: 119; CHECK: // %bb.0: 120; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 121; CHECK-NEXT: addvl sp, sp, #-1 122; CHECK-NEXT: cnth x8 123; CHECK-NEXT: sub x8, x8, #1 // =1 124; CHECK-NEXT: cmp x8, #1 // =1 125; CHECK-NEXT: csinc x8, x8, xzr, lo 126; CHECK-NEXT: ptrue p0.h 127; CHECK-NEXT: lsl x8, x8, #1 128; CHECK-NEXT: mov x9, sp 129; CHECK-NEXT: st1h { z0.h }, p0, [sp] 130; CHECK-NEXT: str q1, [x9, x8] 131; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] 132; CHECK-NEXT: addvl sp, sp, #1 133; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 134; CHECK-NEXT: ret 135 %retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1) 136 ret <vscale x 8 x i16> %retval 137} 138 139define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind { 140; CHECK-LABEL: insert_v16i8_nxv16i8: 141; CHECK: // %bb.0: 142; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 143; CHECK-NEXT: addvl sp, sp, #-1 144; CHECK-NEXT: rdvl x8, #1 145; CHECK-NEXT: sub x8, x8, #1 // =1 146; CHECK-NEXT: cmp x8, #0 // =0 147; CHECK-NEXT: ptrue p0.b 148; CHECK-NEXT: csel x8, x8, xzr, lo 149; CHECK-NEXT: mov x9, sp 150; CHECK-NEXT: st1b { z0.b }, p0, [sp] 151; CHECK-NEXT: str q1, [x9, x8] 152; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] 153; CHECK-NEXT: addvl sp, sp, #1 154; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 155; CHECK-NEXT: ret 156 %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0) 157 ret <vscale x 16 x i8> %retval 158} 159 160define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind { 161; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: 162; CHECK: // %bb.0: 163; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 164; CHECK-NEXT: addvl sp, sp, #-1 165; CHECK-NEXT: rdvl x8, #1 166; CHECK-NEXT: sub x8, x8, #1 // =1 167; CHECK-NEXT: cmp x8, #1 // =1 168; CHECK-NEXT: ptrue p0.b 169; CHECK-NEXT: csinc x8, x8, xzr, lo 170; CHECK-NEXT: mov x9, sp 171; CHECK-NEXT: st1b { z0.b }, p0, [sp] 172; CHECK-NEXT: str q1, [x9, x8] 173; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] 174; CHECK-NEXT: addvl sp, sp, #1 175; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 176; CHECK-NEXT: ret 177 %retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1) 178 ret <vscale x 16 x i8> %retval 179} 180 181declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64) 182declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64) 183declare <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64) 184declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64) 185