1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o 2>/dev/null - | FileCheck %s 3 4; VLDRB.u32 Qd, [base, offs] 5define arm_aapcs_vfpcc void @ext_unscaled_i8_i32(i8* %base, <4 x i32>* %offptr, <4 x i32> %input) { 6; CHECK-LABEL: ext_unscaled_i8_i32: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q1, [r1] 9; CHECK-NEXT: vstrb.32 q0, [r0, q1] 10; CHECK-NEXT: bx lr 11entry: 12 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 13 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs 14 %t = trunc <4 x i32> %input to <4 x i8> 15 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %t, <4 x i8*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 16 ret void 17} 18 19; VLDRH.u32 Qd, [base, offs] 20define arm_aapcs_vfpcc void @ext_unscaled_i16_i32(i8* %base, <4 x i32>* %offptr, <4 x i32> %input) { 21; CHECK-LABEL: ext_unscaled_i16_i32: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: vldrw.u32 q1, [r1] 24; CHECK-NEXT: vstrh.32 q0, [r0, q1] 25; CHECK-NEXT: bx lr 26entry: 27 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 28 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs 29 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*> 30 %t = trunc <4 x i32> %input to <4 x i16> 31 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %t, <4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 32 ret void 33} 34 35; VSTRW.32 Qd, [base, offs] 36define arm_aapcs_vfpcc void @unscaled_i32_i32(i8* %base, <4 x i32>* %offptr, <4 x i32> %input) { 37; CHECK-LABEL: unscaled_i32_i32: 38; CHECK: @ %bb.0: @ %entry 39; CHECK-NEXT: vldrw.u32 q1, [r1] 40; CHECK-NEXT: vstrw.32 q0, [r0, q1] 41; CHECK-NEXT: bx lr 42entry: 43 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 44 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs 45 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 46 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 47 ret void 48} 49 50; VSTRW.32 Qd, [base, offs] 51define arm_aapcs_vfpcc void @unscaled_f32_i32(i8* %base, <4 x i32>* %offptr, <4 x float> %input) { 52; CHECK-LABEL: unscaled_f32_i32: 53; CHECK: @ %bb.0: @ %entry 54; CHECK-NEXT: vldrw.u32 q1, [r1] 55; CHECK-NEXT: vstrw.32 q0, [r0, q1] 56; CHECK-NEXT: bx lr 57entry: 58 %offs = load <4 x i32>, <4 x i32>* %offptr, align 4 59 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs 60 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*> 61 call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %input, <4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 62 ret void 63} 64 65; VSTRW.32 Qd, [base, offs.zext] 66define arm_aapcs_vfpcc void @unsigned_unscaled_b_i32_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 67; CHECK-LABEL: unsigned_unscaled_b_i32_i16: 68; CHECK: @ %bb.0: @ %entry 69; CHECK-NEXT: vldrh.u32 q1, [r1] 70; CHECK-NEXT: vstrw.32 q0, [r0, q1] 71; CHECK-NEXT: bx lr 72entry: 73 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 74 %offs.zext = zext <4 x i16> %offs to <4 x i32> 75 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 76 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 77 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 78 ret void 79} 80 81; VSTRW.32 Qd, [base, offs.sext] 82define arm_aapcs_vfpcc void @signed_unscaled_i32_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 83; CHECK-LABEL: signed_unscaled_i32_i16: 84; CHECK: @ %bb.0: @ %entry 85; CHECK-NEXT: vldrh.s32 q1, [r1] 86; CHECK-NEXT: vstrw.32 q0, [r0, q1] 87; CHECK-NEXT: bx lr 88entry: 89 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 90 %offs.sext = sext <4 x i16> %offs to <4 x i32> 91 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 92 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 93 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 94 ret void 95} 96 97; VSTRW.32 Qd, [base, offs.zext] 98define arm_aapcs_vfpcc void @a_unsigned_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr, <4 x float> %input) { 99; CHECK-LABEL: a_unsigned_unscaled_f32_i16: 100; CHECK: @ %bb.0: @ %entry 101; CHECK-NEXT: vldrh.u32 q1, [r1] 102; CHECK-NEXT: vstrw.32 q0, [r0, q1] 103; CHECK-NEXT: bx lr 104entry: 105 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 106 %offs.zext = zext <4 x i16> %offs to <4 x i32> 107 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 108 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*> 109 call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %input, <4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 110 ret void 111} 112 113; VSTRW.32 Qd, [base, offs.sext] 114define arm_aapcs_vfpcc void @b_signed_unscaled_f32_i16(i8* %base, <4 x i16>* %offptr, <4 x float> %input) { 115; CHECK-LABEL: b_signed_unscaled_f32_i16: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vldrh.s32 q1, [r1] 118; CHECK-NEXT: vstrw.32 q0, [r0, q1] 119; CHECK-NEXT: bx lr 120entry: 121 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 122 %offs.sext = sext <4 x i16> %offs to <4 x i32> 123 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 124 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*> 125 call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %input, <4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 126 ret void 127} 128 129; VLDRH.u32 Qd, [base, offs.sext] 130define arm_aapcs_vfpcc void @ext_signed_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 131; CHECK-LABEL: ext_signed_unscaled_i16_i16: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vldrh.s32 q1, [r1] 134; CHECK-NEXT: vstrh.32 q0, [r0, q1] 135; CHECK-NEXT: bx lr 136entry: 137 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 138 %offs.sext = sext <4 x i16> %offs to <4 x i32> 139 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 140 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*> 141 %t = trunc <4 x i32> %input to <4 x i16> 142 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %t, <4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 143 ret void 144} 145 146; VLDRH.u32 Qd, [base, offs.zext] 147define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i16_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 148; CHECK-LABEL: ext_unsigned_unscaled_i16_i16: 149; CHECK: @ %bb.0: @ %entry 150; CHECK-NEXT: vldrh.u32 q1, [r1] 151; CHECK-NEXT: vstrh.32 q0, [r0, q1] 152; CHECK-NEXT: bx lr 153entry: 154 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 155 %offs.zext = zext <4 x i16> %offs to <4 x i32> 156 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 157 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*> 158 %t = trunc <4 x i32> %input to <4 x i16> 159 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %t, <4 x i16*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 160 ret void 161} 162 163; VLDRB.u32 Qd, [base, offs.sext] 164define arm_aapcs_vfpcc void @ext_signed_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 165; CHECK-LABEL: ext_signed_unscaled_i8_i16: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vldrh.s32 q1, [r1] 168; CHECK-NEXT: vstrb.32 q0, [r0, q1] 169; CHECK-NEXT: bx lr 170entry: 171 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 172 %offs.sext = sext <4 x i16> %offs to <4 x i32> 173 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 174 %t = trunc <4 x i32> %input to <4 x i8> 175 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %t, <4 x i8*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 176 ret void 177} 178 179; VLDRB.s32 Qd, [base, offs.zext] 180define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i8_i16(i8* %base, <4 x i16>* %offptr, <4 x i32> %input) { 181; CHECK-LABEL: ext_unsigned_unscaled_i8_i16: 182; CHECK: @ %bb.0: @ %entry 183; CHECK-NEXT: vldrh.u32 q1, [r1] 184; CHECK-NEXT: vstrb.32 q0, [r0, q1] 185; CHECK-NEXT: bx lr 186entry: 187 %offs = load <4 x i16>, <4 x i16>* %offptr, align 2 188 %offs.zext = zext <4 x i16> %offs to <4 x i32> 189 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 190 %t = trunc <4 x i32> %input to <4 x i8> 191 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %t, <4 x i8*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 192 ret void 193} 194 195; VSTRW.32 Qd, [base, offs.zext] 196define arm_aapcs_vfpcc void @unsigned_unscaled_b_i32_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 197; CHECK-LABEL: unsigned_unscaled_b_i32_i8: 198; CHECK: @ %bb.0: @ %entry 199; CHECK-NEXT: vldrb.u32 q1, [r1] 200; CHECK-NEXT: vstrw.32 q0, [r0, q1] 201; CHECK-NEXT: bx lr 202entry: 203 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 204 %offs.zext = zext <4 x i8> %offs to <4 x i32> 205 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 206 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 207 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 208 ret void 209} 210 211; VSTRW.32 Qd, [base, offs.sext] 212define arm_aapcs_vfpcc void @signed_unscaled_i32_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 213; CHECK-LABEL: signed_unscaled_i32_i8: 214; CHECK: @ %bb.0: @ %entry 215; CHECK-NEXT: vldrb.s32 q1, [r1] 216; CHECK-NEXT: vstrw.32 q0, [r0, q1] 217; CHECK-NEXT: bx lr 218entry: 219 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 220 %offs.sext = sext <4 x i8> %offs to <4 x i32> 221 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 222 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 223 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 224 ret void 225} 226 227; VSTRW.32 Qd, [base, offs.zext] 228define arm_aapcs_vfpcc void @a_unsigned_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr, <4 x float> %input) { 229; CHECK-LABEL: a_unsigned_unscaled_f32_i8: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vldrb.u32 q1, [r1] 232; CHECK-NEXT: vstrw.32 q0, [r0, q1] 233; CHECK-NEXT: bx lr 234entry: 235 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 236 %offs.zext = zext <4 x i8> %offs to <4 x i32> 237 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 238 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*> 239 call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %input, <4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 240 ret void 241} 242 243; VSTRW.32 Qd, [base, offs.sext] 244define arm_aapcs_vfpcc void @b_signed_unscaled_f32_i8(i8* %base, <4 x i8>* %offptr, <4 x float> %input) { 245; CHECK-LABEL: b_signed_unscaled_f32_i8: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vldrb.s32 q1, [r1] 248; CHECK-NEXT: vstrw.32 q0, [r0, q1] 249; CHECK-NEXT: bx lr 250entry: 251 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 252 %offs.sext = sext <4 x i8> %offs to <4 x i32> 253 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 254 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x float*> 255 call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %input, <4 x float*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 256 ret void 257} 258 259; VLDRH.u32 Qd, [base, offs.sext] 260define arm_aapcs_vfpcc void @ext_signed_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 261; CHECK-LABEL: ext_signed_unscaled_i8_i8: 262; CHECK: @ %bb.0: @ %entry 263; CHECK-NEXT: vldrb.s32 q1, [r1] 264; CHECK-NEXT: vstrb.32 q0, [r0, q1] 265; CHECK-NEXT: bx lr 266entry: 267 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 268 %offs.sext = sext <4 x i8> %offs to <4 x i32> 269 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 270 %t = trunc <4 x i32> %input to <4 x i8> 271 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %t, <4 x i8*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 272 ret void 273} 274 275; VLDRH.u32 Qd, [base, offs.zext] 276define arm_aapcs_vfpcc void @ext_unsigned_unscaled_i8_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 277; CHECK-LABEL: ext_unsigned_unscaled_i8_i8: 278; CHECK: @ %bb.0: @ %entry 279; CHECK-NEXT: vldrb.u32 q1, [r1] 280; CHECK-NEXT: vstrb.32 q0, [r0, q1] 281; CHECK-NEXT: bx lr 282entry: 283 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 284 %offs.zext = zext <4 x i8> %offs to <4 x i32> 285 %ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 286 %t = trunc <4 x i32> %input to <4 x i8> 287 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %t, <4 x i8*> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 288 ret void 289} 290 291define arm_aapcs_vfpcc void @trunc_signed_unscaled_i64_i8(i8* %base, <4 x i8>* %offptr, <4 x i64> %input) { 292; CHECK-LABEL: trunc_signed_unscaled_i64_i8: 293; CHECK: @ %bb.0: @ %entry 294; CHECK-NEXT: vmov.f32 s1, s2 295; CHECK-NEXT: vldrb.s32 q2, [r1] 296; CHECK-NEXT: vmov.f32 s2, s4 297; CHECK-NEXT: vmov.f32 s3, s6 298; CHECK-NEXT: vstrw.32 q0, [r0, q2] 299; CHECK-NEXT: bx lr 300entry: 301 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 302 %offs.sext = sext <4 x i8> %offs to <4 x i32> 303 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 304 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 305 %input.trunc = trunc <4 x i64> %input to <4 x i32> 306 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input.trunc, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 307 ret void 308} 309 310define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i64_i8(i8* %base, <4 x i8>* %offptr, <4 x i64> %input) { 311; CHECK-LABEL: trunc_unsigned_unscaled_i64_i8: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vmov.f32 s1, s2 314; CHECK-NEXT: vldrb.u32 q2, [r1] 315; CHECK-NEXT: vmov.f32 s2, s4 316; CHECK-NEXT: vmov.f32 s3, s6 317; CHECK-NEXT: vstrw.32 q0, [r0, q2] 318; CHECK-NEXT: bx lr 319entry: 320 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 321 %offs.zext = zext <4 x i8> %offs to <4 x i32> 322 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 323 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i32*> 324 %input.trunc = trunc <4 x i64> %input to <4 x i32> 325 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %input.trunc, <4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 326 ret void 327} 328 329define arm_aapcs_vfpcc void @trunc_signed_unscaled_i32_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 330; CHECK-LABEL: trunc_signed_unscaled_i32_i8: 331; CHECK: @ %bb.0: @ %entry 332; CHECK-NEXT: vldrb.s32 q1, [r1] 333; CHECK-NEXT: vstrh.32 q0, [r0, q1] 334; CHECK-NEXT: bx lr 335entry: 336 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 337 %offs.sext = sext <4 x i8> %offs to <4 x i32> 338 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 339 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*> 340 %input.trunc = trunc <4 x i32> %input to <4 x i16> 341 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %input.trunc, <4 x i16*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 342 ret void 343} 344 345define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i32_i8(i8* %base, <4 x i8>* %offptr, <4 x i32> %input) { 346; CHECK-LABEL: trunc_unsigned_unscaled_i32_i8: 347; CHECK: @ %bb.0: @ %entry 348; CHECK-NEXT: vldrb.u32 q1, [r1] 349; CHECK-NEXT: vstrh.32 q0, [r0, q1] 350; CHECK-NEXT: bx lr 351entry: 352 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 353 %offs.zext = zext <4 x i8> %offs to <4 x i32> 354 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 355 %ptrs = bitcast <4 x i8*> %byte_ptrs to <4 x i16*> 356 %input.trunc = trunc <4 x i32> %input to <4 x i16> 357 call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %input.trunc, <4 x i16*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 358 ret void 359} 360 361define arm_aapcs_vfpcc void @trunc_signed_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) { 362; CHECK-LABEL: trunc_signed_unscaled_i16_i8: 363; CHECK: @ %bb.0: @ %entry 364; CHECK-NEXT: vldrb.s32 q1, [r1] 365; CHECK-NEXT: vmov r1, s0 366; CHECK-NEXT: vadd.i32 q1, q1, r0 367; CHECK-NEXT: vmov r0, s4 368; CHECK-NEXT: strb r1, [r0] 369; CHECK-NEXT: vmov r0, s5 370; CHECK-NEXT: vmov r1, s1 371; CHECK-NEXT: strb r1, [r0] 372; CHECK-NEXT: vmov r0, s6 373; CHECK-NEXT: vmov r1, s2 374; CHECK-NEXT: strb r1, [r0] 375; CHECK-NEXT: vmov r0, s7 376; CHECK-NEXT: vmov r1, s3 377; CHECK-NEXT: strb r1, [r0] 378; CHECK-NEXT: bx lr 379entry: 380 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 381 %offs.sext = sext <4 x i8> %offs to <4 x i32> 382 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.sext 383 %input.trunc = trunc <4 x i16> %input to <4 x i8> 384 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %input.trunc, <4 x i8*> %byte_ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 385 ret void 386} 387 388define arm_aapcs_vfpcc void @trunc_unsigned_unscaled_i16_i8(i8* %base, <4 x i8>* %offptr, <4 x i16> %input) { 389; CHECK-LABEL: trunc_unsigned_unscaled_i16_i8: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vldrb.u32 q1, [r1] 392; CHECK-NEXT: vmov r1, s0 393; CHECK-NEXT: vadd.i32 q1, q1, r0 394; CHECK-NEXT: vmov r0, s4 395; CHECK-NEXT: strb r1, [r0] 396; CHECK-NEXT: vmov r0, s5 397; CHECK-NEXT: vmov r1, s1 398; CHECK-NEXT: strb r1, [r0] 399; CHECK-NEXT: vmov r0, s6 400; CHECK-NEXT: vmov r1, s2 401; CHECK-NEXT: strb r1, [r0] 402; CHECK-NEXT: vmov r0, s7 403; CHECK-NEXT: vmov r1, s3 404; CHECK-NEXT: strb r1, [r0] 405; CHECK-NEXT: bx lr 406entry: 407 %offs = load <4 x i8>, <4 x i8>* %offptr, align 1 408 %offs.zext = zext <4 x i8> %offs to <4 x i32> 409 %byte_ptrs = getelementptr inbounds i8, i8* %base, <4 x i32> %offs.zext 410 %input.trunc = trunc <4 x i16> %input to <4 x i8> 411 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %input.trunc, <4 x i8*> %byte_ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 412 ret void 413} 414 415declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) 416declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) 417declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>) 418declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) 419declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) 420