1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; All these tests create a vector tuple, insert z5 into one of the elements, 8; and finally extracts that element from the wide vector to return it. These 9; checks ensure that z5 is always the value that is returned. 10 11; 12; Insert into two element tuples 13; 14 15; tuple: { tuple2.res0, tuple2.res1 } 16; insert z5: { z5 , tuple2.res1 } 17; extract z5: ^^ 18define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 19 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 20 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 21 ; CHECK-LABEL: set_tuple2_nxv8i32_elt0: 22 ; CHECK-NEXT: mov z0.d, z5.d 23 ; CHECK-NEXT: ret 24 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 25 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 26 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) 27 ret <vscale x 4 x i32> %ext 28} 29 30; tuple: { tuple2.res0, tuple2.res1 } 31; insert z5: { tuple2.res0, z5 } 32; extract z5: ^^ 33define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 34 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 35 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 36 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1: 37 ; CHECK-NEXT: mov z0.d, z5.d 38 ; CHECK-NEXT: ret 39 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 40 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 41 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1) 42 ret <vscale x 4 x i32> %ext 43} 44 45; This test checks the elements _not_ being set aren't changed. 46 47; tuple: { tuple2.res0, tuple2.res1 } 48; insert z5: { tuple2.res0, z5 } 49; extract z0: ^^ 50define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 51 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 52 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 53 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0: 54 ; CHECK-NEXT: ret 55 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) 56 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 57 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0) 58 ret <vscale x 4 x i32> %ext 59} 60 61; Test extract of tuple passed into function 62define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 { 63 ; CHECK-LABEL: get_tuple2_nxv8i32_elt1: 64 ; CHECK-NEXT: mov z0.d, z1.d 65 ; CHECK-NEXT: ret 66 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1) 67 ret <vscale x 4 x i32> %ext 68} 69 70; 71; Insert into three element tuples 72; 73 74; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 75; insert z5: { z5 , tuple3.res0, tuple3.res2 } 76; extract z5: ^^ 77define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 78 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 79 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 80 ; CHECK-LABEL: set_tuple3_nxv12i32_elt0: 81 ; CHECK-NEXT: mov z0.d, z5.d 82 ; CHECK-NEXT: ret 83 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 84 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 85 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0) 86 ret <vscale x 4 x i32> %ext 87} 88 89; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 90; insert z5: { tuple3.res0, z5 , tuple3.res2 } 91; extract z5: ^^ 92define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 93 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 94 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 95 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1: 96 ; CHECK-NEXT: mov z0.d, z5.d 97 ; CHECK-NEXT: ret 98 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 99 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 100 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1) 101 ret <vscale x 4 x i32> %ext 102} 103 104; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 105; insert z5: { tuple3.res0, tuple3.res1, z5 } 106; extract z5: ^^ 107define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 108 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 109 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 110 ; CHECK-LABEL: set_tuple3_nxv12i32_elt2: 111 ; CHECK-NEXT: mov z0.d, z5.d 112 ; CHECK-NEXT: ret 113 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 114 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) 115 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) 116 ret <vscale x 4 x i32> %ext 117} 118 119; This test checks the elements _not_ being set aren't changed. 120 121; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 } 122; insert z5: { tuple3.res0, z5 , tuple3.res2 } 123; extract z2: ^^ 124define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 125 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 126 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 127 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2: 128 ; CHECK-NEXT: mov z0.d, z2.d 129 ; CHECK-NEXT: ret 130 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) 131 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 132 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2) 133 ret <vscale x 4 x i32> %ext 134} 135 136; Test extract of tuple passed into function 137define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 { 138 ; CHECK-LABEL: get_tuple3_nxv12i32_elt2: 139 ; CHECK-NEXT: mov z0.d, z3.d 140 ; CHECK-NEXT: ret 141 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2) 142 ret <vscale x 4 x i32> %ext 143} 144 145; 146; Insert into four element tuples 147; 148 149; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 150; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 } 151; extract z5: ^^ 152define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 153 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 154 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 155 ; CHECK-LABEL: set_tuple4_nxv16i32_elt0: 156 ; CHECK-NEXT: mov z0.d, z5.d 157 ; CHECK-NEXT: ret 158 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 159 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5) 160 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0) 161 ret <vscale x 4 x i32> %ext 162} 163 164; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 165; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 } 166; extract z5: ^^ 167define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 168 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 169 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 170 ; CHECK-LABEL: set_tuple4_nxv16i32_elt1: 171 ; CHECK-NEXT: mov z0.d, z5.d 172 ; CHECK-NEXT: ret 173 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 174 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5) 175 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1) 176 ret <vscale x 4 x i32> %ext 177} 178 179; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 180; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 } 181; extract z5: ^^ 182define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 183 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 184 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 185 ; CHECK-LABEL: set_tuple4_nxv16i32_elt2: 186 ; CHECK-NEXT: mov z0.d, z5.d 187 ; CHECK-NEXT: ret 188 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 189 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5) 190 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) 191 ret <vscale x 4 x i32> %ext 192} 193 194; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 195; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } 196; extract z5: ^^ 197define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 198 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 199 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 200 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3: 201 ; CHECK-NEXT: mov z0.d, z5.d 202 ; CHECK-NEXT: ret 203 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 204 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) 205 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3) 206 ret <vscale x 4 x i32> %ext 207} 208 209; This test checks the elements _not_ being set aren't changed. 210 211; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 } 212; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 } 213; extract z2: ^^ 214define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, 215 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, 216 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 { 217 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2: 218 ; CHECK-NEXT: mov z0.d, z2.d 219 ; CHECK-NEXT: ret 220 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) 221 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5) 222 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2) 223 ret <vscale x 4 x i32> %ext 224} 225 226; Test extract of tuple passed into function 227define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 { 228 ; CHECK-LABEL: get_tuple4_nxv16i32_elt3: 229 ; CHECK-NEXT: mov z0.d, z3.d 230 ; CHECK-NEXT: ret 231 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3) 232 ret <vscale x 4 x i32> %ext 233} 234 235attributes #0 = { nounwind "target-features"="+sve" } 236 237declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 238declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>) 239declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32) 240 241declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 242declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>) 243declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32) 244 245declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 246declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>) 247declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32) 248