1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8(i8* %base, <16 x i8>* %offptr) { 5; CHECK-LABEL: unscaled_v16i8_i8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrb.u8 q1, [r1] 8; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 9; CHECK-NEXT: bx lr 10entry: 11 %offs = load <16 x i8>, <16 x i8>* %offptr, align 1 12 %offs.zext = zext <16 x i8> %offs to <16 x i32> 13 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.zext 14 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 15 ret <16 x i8> %gather 16} 17 18define arm_aapcs_vfpcc <8 x i8> @unscaled_v8i8_i8(i8* %base, <8 x i8>* %offptr) { 19; CHECK-LABEL: unscaled_v8i8_i8: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: .save {r4, r5, r7, lr} 22; CHECK-NEXT: push {r4, r5, r7, lr} 23; CHECK-NEXT: vldrb.u32 q0, [r1] 24; CHECK-NEXT: vldrb.u32 q1, [r1, #4] 25; CHECK-NEXT: vadd.i32 q0, q0, r0 26; CHECK-NEXT: vadd.i32 q1, q1, r0 27; CHECK-NEXT: vmov r2, s2 28; CHECK-NEXT: vmov r3, s3 29; CHECK-NEXT: vmov r5, s1 30; CHECK-NEXT: vmov r0, s4 31; CHECK-NEXT: vmov r1, s5 32; CHECK-NEXT: vmov r4, s7 33; CHECK-NEXT: ldrb.w r12, [r2] 34; CHECK-NEXT: vmov r2, s0 35; CHECK-NEXT: ldrb.w lr, [r3] 36; CHECK-NEXT: vmov r3, s6 37; CHECK-NEXT: ldrb r5, [r5] 38; CHECK-NEXT: ldrb r0, [r0] 39; CHECK-NEXT: ldrb r1, [r1] 40; CHECK-NEXT: ldrb r4, [r4] 41; CHECK-NEXT: ldrb r2, [r2] 42; CHECK-NEXT: ldrb r3, [r3] 43; CHECK-NEXT: vmov.16 q0[0], r2 44; CHECK-NEXT: vmov.16 q0[1], r5 45; CHECK-NEXT: vmov.16 q0[2], r12 46; CHECK-NEXT: vmov.16 q0[3], lr 47; CHECK-NEXT: vmov.16 q0[4], r0 48; CHECK-NEXT: vmov.16 q0[5], r1 49; CHECK-NEXT: vmov.16 q0[6], r3 50; CHECK-NEXT: vmov.16 q0[7], r4 51; CHECK-NEXT: pop {r4, r5, r7, pc} 52entry: 53 %offs = load <8 x i8>, <8 x i8>* %offptr, align 1 54 %offs.zext = zext <8 x i8> %offs to <8 x i32> 55 %ptrs = getelementptr inbounds i8, i8* %base, <8 x i32> %offs.zext 56 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 57 ret <8 x i8> %gather 58} 59 60define arm_aapcs_vfpcc <2 x i8> @unscaled_v2i8_i8(i8* %base, <2 x i8>* %offptr) { 61; CHECK-LABEL: unscaled_v2i8_i8: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: ldrb r2, [r1] 64; CHECK-NEXT: vmov.i32 q0, #0xff 65; CHECK-NEXT: ldrb r1, [r1, #1] 66; CHECK-NEXT: vmov.32 q1[0], r2 67; CHECK-NEXT: vmov.32 q1[2], r1 68; CHECK-NEXT: vand q0, q1, q0 69; CHECK-NEXT: vmov r1, s0 70; CHECK-NEXT: vmov r2, s2 71; CHECK-NEXT: ldrb r1, [r0, r1] 72; CHECK-NEXT: ldrb r0, [r0, r2] 73; CHECK-NEXT: vmov.32 q0[0], r1 74; CHECK-NEXT: vmov.32 q0[2], r0 75; CHECK-NEXT: bx lr 76entry: 77 %offs = load <2 x i8>, <2 x i8>* %offptr, align 1 78 %offs.zext = zext <2 x i8> %offs to <2 x i32> 79 %ptrs = getelementptr inbounds i8, i8* %base, <2 x i32> %offs.zext 80 %gather = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> %ptrs, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> undef) 81 ret <2 x i8> %gather 82} 83 84define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_sext(i8* %base, <16 x i8>* %offptr) { 85; CHECK-LABEL: unscaled_v16i8_sext: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: .save {r4, r5, r6, lr} 88; CHECK-NEXT: push {r4, r5, r6, lr} 89; CHECK-NEXT: vldrb.s32 q0, [r1, #8] 90; CHECK-NEXT: vldrb.s32 q2, [r1, #4] 91; CHECK-NEXT: vadd.i32 q1, q0, r0 92; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 93; CHECK-NEXT: vmov r2, s6 94; CHECK-NEXT: vadd.i32 q2, q2, r0 95; CHECK-NEXT: vadd.i32 q0, q0, r0 96; CHECK-NEXT: vmov r6, s4 97; CHECK-NEXT: vmov r3, s2 98; CHECK-NEXT: vmov r4, s3 99; CHECK-NEXT: vmov r5, s7 100; CHECK-NEXT: ldrb.w r12, [r2] 101; CHECK-NEXT: vmov r2, s0 102; CHECK-NEXT: ldrb r6, [r6] 103; CHECK-NEXT: ldrb r3, [r3] 104; CHECK-NEXT: ldrb r4, [r4] 105; CHECK-NEXT: ldrb r5, [r5] 106; CHECK-NEXT: ldrb.w lr, [r2] 107; CHECK-NEXT: vmov r2, s1 108; CHECK-NEXT: vldrb.s32 q0, [r1] 109; CHECK-NEXT: vadd.i32 q3, q0, r0 110; CHECK-NEXT: vmov r0, s12 111; CHECK-NEXT: ldrb r2, [r2] 112; CHECK-NEXT: ldrb r0, [r0] 113; CHECK-NEXT: vmov.8 q0[0], r0 114; CHECK-NEXT: vmov r0, s13 115; CHECK-NEXT: ldrb r0, [r0] 116; CHECK-NEXT: vmov.8 q0[1], r0 117; CHECK-NEXT: vmov r0, s14 118; CHECK-NEXT: ldrb r0, [r0] 119; CHECK-NEXT: vmov.8 q0[2], r0 120; CHECK-NEXT: vmov r0, s15 121; CHECK-NEXT: ldrb r0, [r0] 122; CHECK-NEXT: vmov.8 q0[3], r0 123; CHECK-NEXT: vmov r0, s8 124; CHECK-NEXT: ldrb r0, [r0] 125; CHECK-NEXT: vmov.8 q0[4], r0 126; CHECK-NEXT: vmov r0, s9 127; CHECK-NEXT: ldrb r0, [r0] 128; CHECK-NEXT: vmov.8 q0[5], r0 129; CHECK-NEXT: vmov r0, s10 130; CHECK-NEXT: ldrb r0, [r0] 131; CHECK-NEXT: vmov.8 q0[6], r0 132; CHECK-NEXT: vmov r0, s11 133; CHECK-NEXT: ldrb r0, [r0] 134; CHECK-NEXT: vmov.8 q0[7], r0 135; CHECK-NEXT: vmov r0, s5 136; CHECK-NEXT: vmov.8 q0[8], r6 137; CHECK-NEXT: ldrb r0, [r0] 138; CHECK-NEXT: vmov.8 q0[9], r0 139; CHECK-NEXT: vmov.8 q0[10], r12 140; CHECK-NEXT: vmov.8 q0[11], r5 141; CHECK-NEXT: vmov.8 q0[12], lr 142; CHECK-NEXT: vmov.8 q0[13], r2 143; CHECK-NEXT: vmov.8 q0[14], r3 144; CHECK-NEXT: vmov.8 q0[15], r4 145; CHECK-NEXT: pop {r4, r5, r6, pc} 146entry: 147 %offs = load <16 x i8>, <16 x i8>* %offptr, align 1 148 %offs.sext = sext <16 x i8> %offs to <16 x i32> 149 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.sext 150 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 151 ret <16 x i8> %gather 152} 153 154define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i16(i8* %base, <16 x i16>* %offptr) { 155; CHECK-LABEL: unscaled_v16i8_i16: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: .save {r4, r5, r6, lr} 158; CHECK-NEXT: push {r4, r5, r6, lr} 159; CHECK-NEXT: vldrh.s32 q0, [r1, #16] 160; CHECK-NEXT: vldrh.s32 q2, [r1, #8] 161; CHECK-NEXT: vadd.i32 q1, q0, r0 162; CHECK-NEXT: vldrh.s32 q0, [r1, #24] 163; CHECK-NEXT: vmov r2, s6 164; CHECK-NEXT: vadd.i32 q2, q2, r0 165; CHECK-NEXT: vadd.i32 q0, q0, r0 166; CHECK-NEXT: vmov r6, s4 167; CHECK-NEXT: vmov r3, s2 168; CHECK-NEXT: vmov r4, s3 169; CHECK-NEXT: vmov r5, s7 170; CHECK-NEXT: ldrb.w r12, [r2] 171; CHECK-NEXT: vmov r2, s0 172; CHECK-NEXT: ldrb r6, [r6] 173; CHECK-NEXT: ldrb r3, [r3] 174; CHECK-NEXT: ldrb r4, [r4] 175; CHECK-NEXT: ldrb r5, [r5] 176; CHECK-NEXT: ldrb.w lr, [r2] 177; CHECK-NEXT: vmov r2, s1 178; CHECK-NEXT: vldrh.s32 q0, [r1] 179; CHECK-NEXT: vadd.i32 q3, q0, r0 180; CHECK-NEXT: vmov r0, s12 181; CHECK-NEXT: ldrb r2, [r2] 182; CHECK-NEXT: ldrb r0, [r0] 183; CHECK-NEXT: vmov.8 q0[0], r0 184; CHECK-NEXT: vmov r0, s13 185; CHECK-NEXT: ldrb r0, [r0] 186; CHECK-NEXT: vmov.8 q0[1], r0 187; CHECK-NEXT: vmov r0, s14 188; CHECK-NEXT: ldrb r0, [r0] 189; CHECK-NEXT: vmov.8 q0[2], r0 190; CHECK-NEXT: vmov r0, s15 191; CHECK-NEXT: ldrb r0, [r0] 192; CHECK-NEXT: vmov.8 q0[3], r0 193; CHECK-NEXT: vmov r0, s8 194; CHECK-NEXT: ldrb r0, [r0] 195; CHECK-NEXT: vmov.8 q0[4], r0 196; CHECK-NEXT: vmov r0, s9 197; CHECK-NEXT: ldrb r0, [r0] 198; CHECK-NEXT: vmov.8 q0[5], r0 199; CHECK-NEXT: vmov r0, s10 200; CHECK-NEXT: ldrb r0, [r0] 201; CHECK-NEXT: vmov.8 q0[6], r0 202; CHECK-NEXT: vmov r0, s11 203; CHECK-NEXT: ldrb r0, [r0] 204; CHECK-NEXT: vmov.8 q0[7], r0 205; CHECK-NEXT: vmov r0, s5 206; CHECK-NEXT: vmov.8 q0[8], r6 207; CHECK-NEXT: ldrb r0, [r0] 208; CHECK-NEXT: vmov.8 q0[9], r0 209; CHECK-NEXT: vmov.8 q0[10], r12 210; CHECK-NEXT: vmov.8 q0[11], r5 211; CHECK-NEXT: vmov.8 q0[12], lr 212; CHECK-NEXT: vmov.8 q0[13], r2 213; CHECK-NEXT: vmov.8 q0[14], r3 214; CHECK-NEXT: vmov.8 q0[15], r4 215; CHECK-NEXT: pop {r4, r5, r6, pc} 216entry: 217 %offs = load <16 x i16>, <16 x i16>* %offptr, align 2 218 %offs.sext = sext <16 x i16> %offs to <16 x i32> 219 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs.sext 220 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 221 ret <16 x i8> %gather 222} 223 224define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_scaled(i32* %base, <16 x i8>* %offptr) { 225; CHECK-LABEL: unscaled_v16i8_scaled: 226; CHECK: @ %bb.0: @ %entry 227; CHECK-NEXT: .save {r4, r5, r6, lr} 228; CHECK-NEXT: push {r4, r5, r6, lr} 229; CHECK-NEXT: vldrb.u32 q0, [r1, #8] 230; CHECK-NEXT: vldrb.u32 q2, [r1, #4] 231; CHECK-NEXT: vshl.i32 q0, q0, #2 232; CHECK-NEXT: vshl.i32 q2, q2, #2 233; CHECK-NEXT: vadd.i32 q1, q0, r0 234; CHECK-NEXT: vldrb.u32 q0, [r1, #12] 235; CHECK-NEXT: vmov r2, s6 236; CHECK-NEXT: vadd.i32 q2, q2, r0 237; CHECK-NEXT: vshl.i32 q0, q0, #2 238; CHECK-NEXT: vmov r6, s4 239; CHECK-NEXT: vadd.i32 q0, q0, r0 240; CHECK-NEXT: vmov r5, s7 241; CHECK-NEXT: vmov r3, s2 242; CHECK-NEXT: vmov r4, s3 243; CHECK-NEXT: ldrb.w r12, [r2] 244; CHECK-NEXT: vmov r2, s0 245; CHECK-NEXT: ldrb r6, [r6] 246; CHECK-NEXT: ldrb r5, [r5] 247; CHECK-NEXT: ldrb r3, [r3] 248; CHECK-NEXT: ldrb r4, [r4] 249; CHECK-NEXT: ldrb.w lr, [r2] 250; CHECK-NEXT: vmov r2, s1 251; CHECK-NEXT: vldrb.u32 q0, [r1] 252; CHECK-NEXT: vshl.i32 q0, q0, #2 253; CHECK-NEXT: vadd.i32 q3, q0, r0 254; CHECK-NEXT: vmov r0, s12 255; CHECK-NEXT: ldrb r2, [r2] 256; CHECK-NEXT: ldrb r0, [r0] 257; CHECK-NEXT: vmov.8 q0[0], r0 258; CHECK-NEXT: vmov r0, s13 259; CHECK-NEXT: ldrb r0, [r0] 260; CHECK-NEXT: vmov.8 q0[1], r0 261; CHECK-NEXT: vmov r0, s14 262; CHECK-NEXT: ldrb r0, [r0] 263; CHECK-NEXT: vmov.8 q0[2], r0 264; CHECK-NEXT: vmov r0, s15 265; CHECK-NEXT: ldrb r0, [r0] 266; CHECK-NEXT: vmov.8 q0[3], r0 267; CHECK-NEXT: vmov r0, s8 268; CHECK-NEXT: ldrb r0, [r0] 269; CHECK-NEXT: vmov.8 q0[4], r0 270; CHECK-NEXT: vmov r0, s9 271; CHECK-NEXT: ldrb r0, [r0] 272; CHECK-NEXT: vmov.8 q0[5], r0 273; CHECK-NEXT: vmov r0, s10 274; CHECK-NEXT: ldrb r0, [r0] 275; CHECK-NEXT: vmov.8 q0[6], r0 276; CHECK-NEXT: vmov r0, s11 277; CHECK-NEXT: ldrb r0, [r0] 278; CHECK-NEXT: vmov.8 q0[7], r0 279; CHECK-NEXT: vmov r0, s5 280; CHECK-NEXT: vmov.8 q0[8], r6 281; CHECK-NEXT: ldrb r0, [r0] 282; CHECK-NEXT: vmov.8 q0[9], r0 283; CHECK-NEXT: vmov.8 q0[10], r12 284; CHECK-NEXT: vmov.8 q0[11], r5 285; CHECK-NEXT: vmov.8 q0[12], lr 286; CHECK-NEXT: vmov.8 q0[13], r2 287; CHECK-NEXT: vmov.8 q0[14], r3 288; CHECK-NEXT: vmov.8 q0[15], r4 289; CHECK-NEXT: pop {r4, r5, r6, pc} 290entry: 291 %offs = load <16 x i8>, <16 x i8>* %offptr, align 4 292 %offs.zext = zext <16 x i8> %offs to <16 x i32> 293 %ptrs32 = getelementptr inbounds i32, i32* %base, <16 x i32> %offs.zext 294 %ptrs = bitcast <16 x i32*> %ptrs32 to <16 x i8*> 295 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 296 ret <16 x i8> %gather 297} 298 299define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_next(i8* %base, <16 x i32>* %offptr) { 300; CHECK-LABEL: unscaled_v16i8_i8_next: 301; CHECK: @ %bb.0: @ %entry 302; CHECK-NEXT: .save {r4, r5, r6, lr} 303; CHECK-NEXT: push {r4, r5, r6, lr} 304; CHECK-NEXT: vldrw.u32 q0, [r1, #32] 305; CHECK-NEXT: vldrw.u32 q2, [r1, #16] 306; CHECK-NEXT: vadd.i32 q1, q0, r0 307; CHECK-NEXT: vldrw.u32 q0, [r1, #48] 308; CHECK-NEXT: vmov r2, s6 309; CHECK-NEXT: vadd.i32 q2, q2, r0 310; CHECK-NEXT: vadd.i32 q0, q0, r0 311; CHECK-NEXT: vmov r6, s4 312; CHECK-NEXT: vmov r3, s2 313; CHECK-NEXT: vmov r4, s3 314; CHECK-NEXT: vmov r5, s7 315; CHECK-NEXT: ldrb.w r12, [r2] 316; CHECK-NEXT: vmov r2, s0 317; CHECK-NEXT: ldrb r6, [r6] 318; CHECK-NEXT: ldrb r3, [r3] 319; CHECK-NEXT: ldrb r4, [r4] 320; CHECK-NEXT: ldrb r5, [r5] 321; CHECK-NEXT: ldrb.w lr, [r2] 322; CHECK-NEXT: vmov r2, s1 323; CHECK-NEXT: vldrw.u32 q0, [r1] 324; CHECK-NEXT: vadd.i32 q3, q0, r0 325; CHECK-NEXT: vmov r0, s12 326; CHECK-NEXT: ldrb r2, [r2] 327; CHECK-NEXT: ldrb r0, [r0] 328; CHECK-NEXT: vmov.8 q0[0], r0 329; CHECK-NEXT: vmov r0, s13 330; CHECK-NEXT: ldrb r0, [r0] 331; CHECK-NEXT: vmov.8 q0[1], r0 332; CHECK-NEXT: vmov r0, s14 333; CHECK-NEXT: ldrb r0, [r0] 334; CHECK-NEXT: vmov.8 q0[2], r0 335; CHECK-NEXT: vmov r0, s15 336; CHECK-NEXT: ldrb r0, [r0] 337; CHECK-NEXT: vmov.8 q0[3], r0 338; CHECK-NEXT: vmov r0, s8 339; CHECK-NEXT: ldrb r0, [r0] 340; CHECK-NEXT: vmov.8 q0[4], r0 341; CHECK-NEXT: vmov r0, s9 342; CHECK-NEXT: ldrb r0, [r0] 343; CHECK-NEXT: vmov.8 q0[5], r0 344; CHECK-NEXT: vmov r0, s10 345; CHECK-NEXT: ldrb r0, [r0] 346; CHECK-NEXT: vmov.8 q0[6], r0 347; CHECK-NEXT: vmov r0, s11 348; CHECK-NEXT: ldrb r0, [r0] 349; CHECK-NEXT: vmov.8 q0[7], r0 350; CHECK-NEXT: vmov r0, s5 351; CHECK-NEXT: vmov.8 q0[8], r6 352; CHECK-NEXT: ldrb r0, [r0] 353; CHECK-NEXT: vmov.8 q0[9], r0 354; CHECK-NEXT: vmov.8 q0[10], r12 355; CHECK-NEXT: vmov.8 q0[11], r5 356; CHECK-NEXT: vmov.8 q0[12], lr 357; CHECK-NEXT: vmov.8 q0[13], r2 358; CHECK-NEXT: vmov.8 q0[14], r3 359; CHECK-NEXT: vmov.8 q0[15], r4 360; CHECK-NEXT: pop {r4, r5, r6, pc} 361entry: 362 %offs = load <16 x i32>, <16 x i32>* %offptr, align 4 363 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> %offs 364 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 365 ret <16 x i8> %gather 366} 367 368define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep(i8* %base, <16 x i8>* %offptr) { 369; CHECK-LABEL: unscaled_v16i8_i8_2gep: 370; CHECK: @ %bb.0: @ %entry 371; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 372; CHECK-NEXT: push {r4, r5, r6, r7, lr} 373; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 374; CHECK-NEXT: vmov.i32 q2, #0x5 375; CHECK-NEXT: vadd.i32 q0, q0, r0 376; CHECK-NEXT: vadd.i32 q0, q0, q2 377; CHECK-NEXT: vmov r2, s0 378; CHECK-NEXT: ldrb r3, [r2] 379; CHECK-NEXT: vmov r2, s2 380; CHECK-NEXT: ldrb.w r12, [r2] 381; CHECK-NEXT: vmov r2, s3 382; CHECK-NEXT: ldrb.w lr, [r2] 383; CHECK-NEXT: vmov r2, s1 384; CHECK-NEXT: vldrb.s32 q0, [r1, #8] 385; CHECK-NEXT: vadd.i32 q0, q0, r0 386; CHECK-NEXT: vadd.i32 q1, q0, q2 387; CHECK-NEXT: vldrb.s32 q0, [r1] 388; CHECK-NEXT: vmov r6, s4 389; CHECK-NEXT: vadd.i32 q0, q0, r0 390; CHECK-NEXT: vmov r4, s6 391; CHECK-NEXT: vadd.i32 q3, q0, q2 392; CHECK-NEXT: vmov r5, s12 393; CHECK-NEXT: vmov r7, s15 394; CHECK-NEXT: ldrb r2, [r2] 395; CHECK-NEXT: ldrb r6, [r6] 396; CHECK-NEXT: ldrb r4, [r4] 397; CHECK-NEXT: ldrb r5, [r5] 398; CHECK-NEXT: ldrb r7, [r7] 399; CHECK-NEXT: vmov.8 q0[0], r5 400; CHECK-NEXT: vmov r5, s13 401; CHECK-NEXT: ldrb r5, [r5] 402; CHECK-NEXT: vmov.8 q0[1], r5 403; CHECK-NEXT: vmov r5, s14 404; CHECK-NEXT: vldrb.s32 q3, [r1, #4] 405; CHECK-NEXT: vadd.i32 q3, q3, r0 406; CHECK-NEXT: vadd.i32 q2, q3, q2 407; CHECK-NEXT: vmov r0, s8 408; CHECK-NEXT: ldrb r5, [r5] 409; CHECK-NEXT: vmov.8 q0[2], r5 410; CHECK-NEXT: vmov r5, s7 411; CHECK-NEXT: vmov.8 q0[3], r7 412; CHECK-NEXT: ldrb r0, [r0] 413; CHECK-NEXT: vmov.8 q0[4], r0 414; CHECK-NEXT: vmov r0, s9 415; CHECK-NEXT: ldrb r5, [r5] 416; CHECK-NEXT: ldrb r0, [r0] 417; CHECK-NEXT: vmov.8 q0[5], r0 418; CHECK-NEXT: vmov r0, s10 419; CHECK-NEXT: ldrb r0, [r0] 420; CHECK-NEXT: vmov.8 q0[6], r0 421; CHECK-NEXT: vmov r0, s11 422; CHECK-NEXT: ldrb r0, [r0] 423; CHECK-NEXT: vmov.8 q0[7], r0 424; CHECK-NEXT: vmov r0, s5 425; CHECK-NEXT: vmov.8 q0[8], r6 426; CHECK-NEXT: ldrb r0, [r0] 427; CHECK-NEXT: vmov.8 q0[9], r0 428; CHECK-NEXT: vmov.8 q0[10], r4 429; CHECK-NEXT: vmov.8 q0[11], r5 430; CHECK-NEXT: vmov.8 q0[12], r3 431; CHECK-NEXT: vmov.8 q0[13], r2 432; CHECK-NEXT: vmov.8 q0[14], r12 433; CHECK-NEXT: vmov.8 q0[15], lr 434; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 435entry: 436 %offs = load <16 x i8>, <16 x i8>* %offptr, align 1 437 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %offs 438 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i8 5 439 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 440 ret <16 x i8> %gather 441} 442 443 444define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep2(i8* %base, <16 x i8>* %offptr) { 445; CHECK-LABEL: unscaled_v16i8_i8_2gep2: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: adr r1, .LCPI8_0 448; CHECK-NEXT: vldrw.u32 q1, [r1] 449; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 450; CHECK-NEXT: bx lr 451; CHECK-NEXT: .p2align 4 452; CHECK-NEXT: @ %bb.1: 453; CHECK-NEXT: .LCPI8_0: 454; CHECK-NEXT: .byte 5 @ 0x5 455; CHECK-NEXT: .byte 8 @ 0x8 456; CHECK-NEXT: .byte 11 @ 0xb 457; CHECK-NEXT: .byte 14 @ 0xe 458; CHECK-NEXT: .byte 17 @ 0x11 459; CHECK-NEXT: .byte 20 @ 0x14 460; CHECK-NEXT: .byte 23 @ 0x17 461; CHECK-NEXT: .byte 26 @ 0x1a 462; CHECK-NEXT: .byte 29 @ 0x1d 463; CHECK-NEXT: .byte 32 @ 0x20 464; CHECK-NEXT: .byte 35 @ 0x23 465; CHECK-NEXT: .byte 38 @ 0x26 466; CHECK-NEXT: .byte 41 @ 0x29 467; CHECK-NEXT: .byte 44 @ 0x2c 468; CHECK-NEXT: .byte 47 @ 0x2f 469; CHECK-NEXT: .byte 50 @ 0x32 470entry: 471 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 472 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i8 5 473 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 474 ret <16 x i8> %gather 475} 476 477 478define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep(i8* %base) { 479; CHECK-LABEL: unscaled_v16i8_i8_biggep: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: adr r1, .LCPI9_0 482; CHECK-NEXT: vldrw.u32 q1, [r1] 483; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 484; CHECK-NEXT: bx lr 485; CHECK-NEXT: .p2align 4 486; CHECK-NEXT: @ %bb.1: 487; CHECK-NEXT: .LCPI9_0: 488; CHECK-NEXT: .byte 5 @ 0x5 489; CHECK-NEXT: .byte 8 @ 0x8 490; CHECK-NEXT: .byte 11 @ 0xb 491; CHECK-NEXT: .byte 14 @ 0xe 492; CHECK-NEXT: .byte 17 @ 0x11 493; CHECK-NEXT: .byte 20 @ 0x14 494; CHECK-NEXT: .byte 23 @ 0x17 495; CHECK-NEXT: .byte 26 @ 0x1a 496; CHECK-NEXT: .byte 29 @ 0x1d 497; CHECK-NEXT: .byte 32 @ 0x20 498; CHECK-NEXT: .byte 35 @ 0x23 499; CHECK-NEXT: .byte 38 @ 0x26 500; CHECK-NEXT: .byte 41 @ 0x29 501; CHECK-NEXT: .byte 44 @ 0x2c 502; CHECK-NEXT: .byte 47 @ 0x2f 503; CHECK-NEXT: .byte 50 @ 0x32 504entry: 505 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 506 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 5 507 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 508 ret <16 x i8> %gather 509} 510 511 512define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep2(i8* %base) { 513; CHECK-LABEL: unscaled_v16i8_i8_biggep2: 514; CHECK: @ %bb.0: @ %entry 515; CHECK-NEXT: adr r1, .LCPI10_0 516; CHECK-NEXT: vldrw.u32 q1, [r1] 517; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 518; CHECK-NEXT: bx lr 519; CHECK-NEXT: .p2align 4 520; CHECK-NEXT: @ %bb.1: 521; CHECK-NEXT: .LCPI10_0: 522; CHECK-NEXT: .byte 0 @ 0x0 523; CHECK-NEXT: .byte 3 @ 0x3 524; CHECK-NEXT: .byte 6 @ 0x6 525; CHECK-NEXT: .byte 9 @ 0x9 526; CHECK-NEXT: .byte 12 @ 0xc 527; CHECK-NEXT: .byte 15 @ 0xf 528; CHECK-NEXT: .byte 18 @ 0x12 529; CHECK-NEXT: .byte 21 @ 0x15 530; CHECK-NEXT: .byte 24 @ 0x18 531; CHECK-NEXT: .byte 27 @ 0x1b 532; CHECK-NEXT: .byte 30 @ 0x1e 533; CHECK-NEXT: .byte 33 @ 0x21 534; CHECK-NEXT: .byte 36 @ 0x24 535; CHECK-NEXT: .byte 39 @ 0x27 536; CHECK-NEXT: .byte 42 @ 0x2a 537; CHECK-NEXT: .byte 45 @ 0x2d 538entry: 539 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 540 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 541 ret <16 x i8> %gather 542} 543 544 545define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep3(i8* %base) { 546; CHECK-LABEL: unscaled_v16i8_i8_biggep3: 547; CHECK: @ %bb.0: @ %entry 548; CHECK-NEXT: .save {r4, r5, r6, lr} 549; CHECK-NEXT: push {r4, r5, r6, lr} 550; CHECK-NEXT: adr r1, .LCPI11_0 551; CHECK-NEXT: adr r2, .LCPI11_1 552; CHECK-NEXT: vldrw.u32 q0, [r1] 553; CHECK-NEXT: adr r6, .LCPI11_2 554; CHECK-NEXT: vadd.i32 q1, q0, r0 555; CHECK-NEXT: vldrw.u32 q0, [r2] 556; CHECK-NEXT: vmov r1, s6 557; CHECK-NEXT: vadd.i32 q0, q0, r0 558; CHECK-NEXT: vmov r5, s4 559; CHECK-NEXT: vmov r2, s3 560; CHECK-NEXT: vmov r4, s7 561; CHECK-NEXT: ldrb.w r12, [r1] 562; CHECK-NEXT: vmov r1, s0 563; CHECK-NEXT: ldrb r5, [r5] 564; CHECK-NEXT: ldrb r2, [r2] 565; CHECK-NEXT: ldrb r4, [r4] 566; CHECK-NEXT: ldrb.w lr, [r1] 567; CHECK-NEXT: vmov r1, s1 568; CHECK-NEXT: ldrb r3, [r1] 569; CHECK-NEXT: vmov r1, s2 570; CHECK-NEXT: vldrw.u32 q0, [r6] 571; CHECK-NEXT: adr r6, .LCPI11_3 572; CHECK-NEXT: vldrw.u32 q2, [r6] 573; CHECK-NEXT: vadd.i32 q3, q0, r0 574; CHECK-NEXT: vadd.i32 q2, q2, r0 575; CHECK-NEXT: vmov r0, s12 576; CHECK-NEXT: ldrb r1, [r1] 577; CHECK-NEXT: ldrb r0, [r0] 578; CHECK-NEXT: vmov.8 q0[0], r0 579; CHECK-NEXT: vmov r0, s13 580; CHECK-NEXT: ldrb r0, [r0] 581; CHECK-NEXT: vmov.8 q0[1], r0 582; CHECK-NEXT: vmov r0, s14 583; CHECK-NEXT: ldrb r0, [r0] 584; CHECK-NEXT: vmov.8 q0[2], r0 585; CHECK-NEXT: vmov r0, s15 586; CHECK-NEXT: ldrb r0, [r0] 587; CHECK-NEXT: vmov.8 q0[3], r0 588; CHECK-NEXT: vmov r0, s8 589; CHECK-NEXT: ldrb r0, [r0] 590; CHECK-NEXT: vmov.8 q0[4], r0 591; CHECK-NEXT: vmov r0, s9 592; CHECK-NEXT: ldrb r0, [r0] 593; CHECK-NEXT: vmov.8 q0[5], r0 594; CHECK-NEXT: vmov r0, s10 595; CHECK-NEXT: ldrb r0, [r0] 596; CHECK-NEXT: vmov.8 q0[6], r0 597; CHECK-NEXT: vmov r0, s11 598; CHECK-NEXT: ldrb r0, [r0] 599; CHECK-NEXT: vmov.8 q0[7], r0 600; CHECK-NEXT: vmov r0, s5 601; CHECK-NEXT: vmov.8 q0[8], r5 602; CHECK-NEXT: ldrb r0, [r0] 603; CHECK-NEXT: vmov.8 q0[9], r0 604; CHECK-NEXT: vmov.8 q0[10], r12 605; CHECK-NEXT: vmov.8 q0[11], r4 606; CHECK-NEXT: vmov.8 q0[12], lr 607; CHECK-NEXT: vmov.8 q0[13], r3 608; CHECK-NEXT: vmov.8 q0[14], r1 609; CHECK-NEXT: vmov.8 q0[15], r2 610; CHECK-NEXT: pop {r4, r5, r6, pc} 611; CHECK-NEXT: .p2align 4 612; CHECK-NEXT: @ %bb.1: 613; CHECK-NEXT: .LCPI11_0: 614; CHECK-NEXT: .long 280 @ 0x118 615; CHECK-NEXT: .long 283 @ 0x11b 616; CHECK-NEXT: .long 286 @ 0x11e 617; CHECK-NEXT: .long 289 @ 0x121 618; CHECK-NEXT: .LCPI11_1: 619; CHECK-NEXT: .long 292 @ 0x124 620; CHECK-NEXT: .long 295 @ 0x127 621; CHECK-NEXT: .long 298 @ 0x12a 622; CHECK-NEXT: .long 301 @ 0x12d 623; CHECK-NEXT: .LCPI11_2: 624; CHECK-NEXT: .long 256 @ 0x100 625; CHECK-NEXT: .long 259 @ 0x103 626; CHECK-NEXT: .long 262 @ 0x106 627; CHECK-NEXT: .long 265 @ 0x109 628; CHECK-NEXT: .LCPI11_3: 629; CHECK-NEXT: .long 268 @ 0x10c 630; CHECK-NEXT: .long 271 @ 0x10f 631; CHECK-NEXT: .long 274 @ 0x112 632; CHECK-NEXT: .long 277 @ 0x115 633entry: 634 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 635 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 256 636 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 637 ret <16 x i8> %gather 638} 639 640 641define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep4(i8* %base) { 642; CHECK-LABEL: unscaled_v16i8_i8_biggep4: 643; CHECK: @ %bb.0: @ %entry 644; CHECK-NEXT: .save {r4, r5, r6, lr} 645; CHECK-NEXT: push {r4, r5, r6, lr} 646; CHECK-NEXT: adr r1, .LCPI12_0 647; CHECK-NEXT: adr r2, .LCPI12_1 648; CHECK-NEXT: vldrw.u32 q0, [r1] 649; CHECK-NEXT: adr r6, .LCPI12_2 650; CHECK-NEXT: vadd.i32 q1, q0, r0 651; CHECK-NEXT: vldrw.u32 q0, [r2] 652; CHECK-NEXT: vmov r1, s6 653; CHECK-NEXT: vadd.i32 q0, q0, r0 654; CHECK-NEXT: vmov r5, s4 655; CHECK-NEXT: vmov r2, s3 656; CHECK-NEXT: vmov r4, s7 657; CHECK-NEXT: ldrb.w r12, [r1] 658; CHECK-NEXT: vmov r1, s0 659; CHECK-NEXT: ldrb r5, [r5] 660; CHECK-NEXT: ldrb r2, [r2] 661; CHECK-NEXT: ldrb r4, [r4] 662; CHECK-NEXT: ldrb.w lr, [r1] 663; CHECK-NEXT: vmov r1, s1 664; CHECK-NEXT: ldrb r3, [r1] 665; CHECK-NEXT: vmov r1, s2 666; CHECK-NEXT: vldrw.u32 q0, [r6] 667; CHECK-NEXT: adr r6, .LCPI12_3 668; CHECK-NEXT: vldrw.u32 q2, [r6] 669; CHECK-NEXT: vadd.i32 q3, q0, r0 670; CHECK-NEXT: vadd.i32 q2, q2, r0 671; CHECK-NEXT: vmov r0, s12 672; CHECK-NEXT: ldrb r1, [r1] 673; CHECK-NEXT: ldrb r0, [r0] 674; CHECK-NEXT: vmov.8 q0[0], r0 675; CHECK-NEXT: vmov r0, s13 676; CHECK-NEXT: ldrb r0, [r0] 677; CHECK-NEXT: vmov.8 q0[1], r0 678; CHECK-NEXT: vmov r0, s14 679; CHECK-NEXT: ldrb r0, [r0] 680; CHECK-NEXT: vmov.8 q0[2], r0 681; CHECK-NEXT: vmov r0, s15 682; CHECK-NEXT: ldrb r0, [r0] 683; CHECK-NEXT: vmov.8 q0[3], r0 684; CHECK-NEXT: vmov r0, s8 685; CHECK-NEXT: ldrb r0, [r0] 686; CHECK-NEXT: vmov.8 q0[4], r0 687; CHECK-NEXT: vmov r0, s9 688; CHECK-NEXT: ldrb r0, [r0] 689; CHECK-NEXT: vmov.8 q0[5], r0 690; CHECK-NEXT: vmov r0, s10 691; CHECK-NEXT: ldrb r0, [r0] 692; CHECK-NEXT: vmov.8 q0[6], r0 693; CHECK-NEXT: vmov r0, s11 694; CHECK-NEXT: ldrb r0, [r0] 695; CHECK-NEXT: vmov.8 q0[7], r0 696; CHECK-NEXT: vmov r0, s5 697; CHECK-NEXT: vmov.8 q0[8], r5 698; CHECK-NEXT: ldrb r0, [r0] 699; CHECK-NEXT: vmov.8 q0[9], r0 700; CHECK-NEXT: vmov.8 q0[10], r12 701; CHECK-NEXT: vmov.8 q0[11], r4 702; CHECK-NEXT: vmov.8 q0[12], lr 703; CHECK-NEXT: vmov.8 q0[13], r3 704; CHECK-NEXT: vmov.8 q0[14], r1 705; CHECK-NEXT: vmov.8 q0[15], r2 706; CHECK-NEXT: pop {r4, r5, r6, pc} 707; CHECK-NEXT: .p2align 4 708; CHECK-NEXT: @ %bb.1: 709; CHECK-NEXT: .LCPI12_0: 710; CHECK-NEXT: .long 256 @ 0x100 711; CHECK-NEXT: .long 27 @ 0x1b 712; CHECK-NEXT: .long 30 @ 0x1e 713; CHECK-NEXT: .long 33 @ 0x21 714; CHECK-NEXT: .LCPI12_1: 715; CHECK-NEXT: .long 36 @ 0x24 716; CHECK-NEXT: .long 39 @ 0x27 717; CHECK-NEXT: .long 42 @ 0x2a 718; CHECK-NEXT: .long 45 @ 0x2d 719; CHECK-NEXT: .LCPI12_2: 720; CHECK-NEXT: .long 0 @ 0x0 721; CHECK-NEXT: .long 3 @ 0x3 722; CHECK-NEXT: .long 6 @ 0x6 723; CHECK-NEXT: .long 9 @ 0x9 724; CHECK-NEXT: .LCPI12_3: 725; CHECK-NEXT: .long 12 @ 0xc 726; CHECK-NEXT: .long 15 @ 0xf 727; CHECK-NEXT: .long 18 @ 0x12 728; CHECK-NEXT: .long 21 @ 0x15 729entry: 730 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 731 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 732 ret <16 x i8> %gather 733} 734 735 736define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep5(<16 x i8*> %base) { 737; CHECK-LABEL: unscaled_v16i8_i8_biggep5: 738; CHECK: @ %bb.0: @ %entry 739; CHECK-NEXT: .save {r4, r5, r7, lr} 740; CHECK-NEXT: push {r4, r5, r7, lr} 741; CHECK-NEXT: .vsave {d8, d9} 742; CHECK-NEXT: vpush {d8, d9} 743; CHECK-NEXT: vmov.i32 q4, #0x100 744; CHECK-NEXT: vadd.i32 q2, q2, q4 745; CHECK-NEXT: vadd.i32 q3, q3, q4 746; CHECK-NEXT: vmov r0, s10 747; CHECK-NEXT: vadd.i32 q1, q1, q4 748; CHECK-NEXT: vmov r1, s15 749; CHECK-NEXT: vmov r4, s8 750; CHECK-NEXT: ldrb.w r12, [r0] 751; CHECK-NEXT: vmov r0, s11 752; CHECK-NEXT: ldrb r1, [r1] 753; CHECK-NEXT: ldrb r4, [r4] 754; CHECK-NEXT: ldrb.w lr, [r0] 755; CHECK-NEXT: vmov r0, s12 756; CHECK-NEXT: ldrb r2, [r0] 757; CHECK-NEXT: vmov r0, s13 758; CHECK-NEXT: ldrb r3, [r0] 759; CHECK-NEXT: vmov r0, s14 760; CHECK-NEXT: vadd.i32 q3, q0, q4 761; CHECK-NEXT: vmov r5, s12 762; CHECK-NEXT: ldrb r0, [r0] 763; CHECK-NEXT: ldrb r5, [r5] 764; CHECK-NEXT: vmov.8 q0[0], r5 765; CHECK-NEXT: vmov r5, s13 766; CHECK-NEXT: ldrb r5, [r5] 767; CHECK-NEXT: vmov.8 q0[1], r5 768; CHECK-NEXT: vmov r5, s14 769; CHECK-NEXT: ldrb r5, [r5] 770; CHECK-NEXT: vmov.8 q0[2], r5 771; CHECK-NEXT: vmov r5, s15 772; CHECK-NEXT: ldrb r5, [r5] 773; CHECK-NEXT: vmov.8 q0[3], r5 774; CHECK-NEXT: vmov r5, s4 775; CHECK-NEXT: ldrb r5, [r5] 776; CHECK-NEXT: vmov.8 q0[4], r5 777; CHECK-NEXT: vmov r5, s5 778; CHECK-NEXT: ldrb r5, [r5] 779; CHECK-NEXT: vmov.8 q0[5], r5 780; CHECK-NEXT: vmov r5, s6 781; CHECK-NEXT: ldrb r5, [r5] 782; CHECK-NEXT: vmov.8 q0[6], r5 783; CHECK-NEXT: vmov r5, s7 784; CHECK-NEXT: ldrb r5, [r5] 785; CHECK-NEXT: vmov.8 q0[7], r5 786; CHECK-NEXT: vmov r5, s9 787; CHECK-NEXT: vmov.8 q0[8], r4 788; CHECK-NEXT: ldrb r5, [r5] 789; CHECK-NEXT: vmov.8 q0[9], r5 790; CHECK-NEXT: vmov.8 q0[10], r12 791; CHECK-NEXT: vmov.8 q0[11], lr 792; CHECK-NEXT: vmov.8 q0[12], r2 793; CHECK-NEXT: vmov.8 q0[13], r3 794; CHECK-NEXT: vmov.8 q0[14], r0 795; CHECK-NEXT: vmov.8 q0[15], r1 796; CHECK-NEXT: vpop {d8, d9} 797; CHECK-NEXT: pop {r4, r5, r7, pc} 798entry: 799 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %base, i32 256 800 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 801 ret <16 x i8> %gather 802} 803 804 805define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep6(i8* %base) { 806; CHECK-LABEL: unscaled_v16i8_i8_biggep6: 807; CHECK: @ %bb.0: @ %entry 808; CHECK-NEXT: .save {r4, r5, r6, lr} 809; CHECK-NEXT: push {r4, r5, r6, lr} 810; CHECK-NEXT: adr r1, .LCPI14_0 811; CHECK-NEXT: adr r2, .LCPI14_1 812; CHECK-NEXT: vldrw.u32 q0, [r1] 813; CHECK-NEXT: adr r6, .LCPI14_2 814; CHECK-NEXT: vadd.i32 q1, q0, r0 815; CHECK-NEXT: vldrw.u32 q0, [r2] 816; CHECK-NEXT: vmov r1, s6 817; CHECK-NEXT: vadd.i32 q0, q0, r0 818; CHECK-NEXT: vmov r5, s4 819; CHECK-NEXT: vmov r2, s3 820; CHECK-NEXT: vmov r4, s7 821; CHECK-NEXT: ldrb.w r12, [r1] 822; CHECK-NEXT: vmov r1, s0 823; CHECK-NEXT: ldrb r5, [r5] 824; CHECK-NEXT: ldrb r2, [r2] 825; CHECK-NEXT: ldrb r4, [r4] 826; CHECK-NEXT: ldrb.w lr, [r1] 827; CHECK-NEXT: vmov r1, s1 828; CHECK-NEXT: ldrb r3, [r1] 829; CHECK-NEXT: vmov r1, s2 830; CHECK-NEXT: vldrw.u32 q0, [r6] 831; CHECK-NEXT: adr r6, .LCPI14_3 832; CHECK-NEXT: vldrw.u32 q2, [r6] 833; CHECK-NEXT: vadd.i32 q3, q0, r0 834; CHECK-NEXT: vadd.i32 q2, q2, r0 835; CHECK-NEXT: vmov r0, s12 836; CHECK-NEXT: ldrb r1, [r1] 837; CHECK-NEXT: ldrb r0, [r0] 838; CHECK-NEXT: vmov.8 q0[0], r0 839; CHECK-NEXT: vmov r0, s13 840; CHECK-NEXT: ldrb r0, [r0] 841; CHECK-NEXT: vmov.8 q0[1], r0 842; CHECK-NEXT: vmov r0, s14 843; CHECK-NEXT: ldrb r0, [r0] 844; CHECK-NEXT: vmov.8 q0[2], r0 845; CHECK-NEXT: vmov r0, s15 846; CHECK-NEXT: ldrb r0, [r0] 847; CHECK-NEXT: vmov.8 q0[3], r0 848; CHECK-NEXT: vmov r0, s8 849; CHECK-NEXT: ldrb r0, [r0] 850; CHECK-NEXT: vmov.8 q0[4], r0 851; CHECK-NEXT: vmov r0, s9 852; CHECK-NEXT: ldrb r0, [r0] 853; CHECK-NEXT: vmov.8 q0[5], r0 854; CHECK-NEXT: vmov r0, s10 855; CHECK-NEXT: ldrb r0, [r0] 856; CHECK-NEXT: vmov.8 q0[6], r0 857; CHECK-NEXT: vmov r0, s11 858; CHECK-NEXT: ldrb r0, [r0] 859; CHECK-NEXT: vmov.8 q0[7], r0 860; CHECK-NEXT: vmov r0, s5 861; CHECK-NEXT: vmov.8 q0[8], r5 862; CHECK-NEXT: ldrb r0, [r0] 863; CHECK-NEXT: vmov.8 q0[9], r0 864; CHECK-NEXT: vmov.8 q0[10], r12 865; CHECK-NEXT: vmov.8 q0[11], r4 866; CHECK-NEXT: vmov.8 q0[12], lr 867; CHECK-NEXT: vmov.8 q0[13], r3 868; CHECK-NEXT: vmov.8 q0[14], r1 869; CHECK-NEXT: vmov.8 q0[15], r2 870; CHECK-NEXT: pop {r4, r5, r6, pc} 871; CHECK-NEXT: .p2align 4 872; CHECK-NEXT: @ %bb.1: 873; CHECK-NEXT: .LCPI14_0: 874; CHECK-NEXT: .long 257 @ 0x101 875; CHECK-NEXT: .long 28 @ 0x1c 876; CHECK-NEXT: .long 31 @ 0x1f 877; CHECK-NEXT: .long 34 @ 0x22 878; CHECK-NEXT: .LCPI14_1: 879; CHECK-NEXT: .long 37 @ 0x25 880; CHECK-NEXT: .long 40 @ 0x28 881; CHECK-NEXT: .long 43 @ 0x2b 882; CHECK-NEXT: .long 46 @ 0x2e 883; CHECK-NEXT: .LCPI14_2: 884; CHECK-NEXT: .long 1 @ 0x1 885; CHECK-NEXT: .long 4 @ 0x4 886; CHECK-NEXT: .long 7 @ 0x7 887; CHECK-NEXT: .long 10 @ 0xa 888; CHECK-NEXT: .LCPI14_3: 889; CHECK-NEXT: .long 13 @ 0xd 890; CHECK-NEXT: .long 16 @ 0x10 891; CHECK-NEXT: .long 19 @ 0x13 892; CHECK-NEXT: .long 22 @ 0x16 893entry: 894 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 895 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 1 896 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 897 ret <16 x i8> %gather 898} 899 900 901define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep7(i8* %base) { 902; CHECK-LABEL: unscaled_v16i8_i8_biggep7: 903; CHECK: @ %bb.0: @ %entry 904; CHECK-NEXT: .save {r4, r5, r6, lr} 905; CHECK-NEXT: push {r4, r5, r6, lr} 906; CHECK-NEXT: adr r1, .LCPI15_0 907; CHECK-NEXT: adr r2, .LCPI15_1 908; CHECK-NEXT: vldrw.u32 q0, [r1] 909; CHECK-NEXT: adr r6, .LCPI15_2 910; CHECK-NEXT: vadd.i32 q1, q0, r0 911; CHECK-NEXT: vldrw.u32 q0, [r2] 912; CHECK-NEXT: vmov r1, s6 913; CHECK-NEXT: vadd.i32 q0, q0, r0 914; CHECK-NEXT: vmov r5, s4 915; CHECK-NEXT: vmov r2, s3 916; CHECK-NEXT: vmov r4, s7 917; CHECK-NEXT: ldrb.w r12, [r1] 918; CHECK-NEXT: vmov r1, s0 919; CHECK-NEXT: ldrb r5, [r5] 920; CHECK-NEXT: ldrb r2, [r2] 921; CHECK-NEXT: ldrb r4, [r4] 922; CHECK-NEXT: ldrb.w lr, [r1] 923; CHECK-NEXT: vmov r1, s1 924; CHECK-NEXT: ldrb r3, [r1] 925; CHECK-NEXT: vmov r1, s2 926; CHECK-NEXT: vldrw.u32 q0, [r6] 927; CHECK-NEXT: adr r6, .LCPI15_3 928; CHECK-NEXT: vldrw.u32 q2, [r6] 929; CHECK-NEXT: vadd.i32 q3, q0, r0 930; CHECK-NEXT: vadd.i32 q2, q2, r0 931; CHECK-NEXT: vmov r0, s12 932; CHECK-NEXT: ldrb r1, [r1] 933; CHECK-NEXT: ldrb r0, [r0] 934; CHECK-NEXT: vmov.8 q0[0], r0 935; CHECK-NEXT: vmov r0, s13 936; CHECK-NEXT: ldrb r0, [r0] 937; CHECK-NEXT: vmov.8 q0[1], r0 938; CHECK-NEXT: vmov r0, s14 939; CHECK-NEXT: ldrb r0, [r0] 940; CHECK-NEXT: vmov.8 q0[2], r0 941; CHECK-NEXT: vmov r0, s15 942; CHECK-NEXT: ldrb r0, [r0] 943; CHECK-NEXT: vmov.8 q0[3], r0 944; CHECK-NEXT: vmov r0, s8 945; CHECK-NEXT: ldrb r0, [r0] 946; CHECK-NEXT: vmov.8 q0[4], r0 947; CHECK-NEXT: vmov r0, s9 948; CHECK-NEXT: ldrb r0, [r0] 949; CHECK-NEXT: vmov.8 q0[5], r0 950; CHECK-NEXT: vmov r0, s10 951; CHECK-NEXT: ldrb r0, [r0] 952; CHECK-NEXT: vmov.8 q0[6], r0 953; CHECK-NEXT: vmov r0, s11 954; CHECK-NEXT: ldrb r0, [r0] 955; CHECK-NEXT: vmov.8 q0[7], r0 956; CHECK-NEXT: vmov r0, s5 957; CHECK-NEXT: vmov.8 q0[8], r5 958; CHECK-NEXT: ldrb r0, [r0] 959; CHECK-NEXT: vmov.8 q0[9], r0 960; CHECK-NEXT: vmov.8 q0[10], r12 961; CHECK-NEXT: vmov.8 q0[11], r4 962; CHECK-NEXT: vmov.8 q0[12], lr 963; CHECK-NEXT: vmov.8 q0[13], r3 964; CHECK-NEXT: vmov.8 q0[14], r1 965; CHECK-NEXT: vmov.8 q0[15], r2 966; CHECK-NEXT: pop {r4, r5, r6, pc} 967; CHECK-NEXT: .p2align 4 968; CHECK-NEXT: @ %bb.1: 969; CHECK-NEXT: .LCPI15_0: 970; CHECK-NEXT: .long 224 @ 0xe0 971; CHECK-NEXT: .long 227 @ 0xe3 972; CHECK-NEXT: .long 230 @ 0xe6 973; CHECK-NEXT: .long 233 @ 0xe9 974; CHECK-NEXT: .LCPI15_1: 975; CHECK-NEXT: .long 236 @ 0xec 976; CHECK-NEXT: .long 239 @ 0xef 977; CHECK-NEXT: .long 242 @ 0xf2 978; CHECK-NEXT: .long 245 @ 0xf5 979; CHECK-NEXT: .LCPI15_2: 980; CHECK-NEXT: .long 300 @ 0x12c 981; CHECK-NEXT: .long 203 @ 0xcb 982; CHECK-NEXT: .long 206 @ 0xce 983; CHECK-NEXT: .long 209 @ 0xd1 984; CHECK-NEXT: .LCPI15_3: 985; CHECK-NEXT: .long 212 @ 0xd4 986; CHECK-NEXT: .long 215 @ 0xd7 987; CHECK-NEXT: .long 218 @ 0xda 988; CHECK-NEXT: .long 221 @ 0xdd 989entry: 990 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i32> <i32 100, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 991 %ptrs2 = getelementptr inbounds i8, <16 x i8*> %ptrs, i32 200 992 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 993 ret <16 x i8> %gather 994} 995 996 997define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2(i8* %base, <16 x i8>* %offptr) { 998; CHECK-LABEL: unscaled_v16i8_i8_2: 999; CHECK: @ %bb.0: @ %entry 1000; CHECK-NEXT: .save {r4, r5, r6, lr} 1001; CHECK-NEXT: push {r4, r5, r6, lr} 1002; CHECK-NEXT: vldrb.s32 q0, [r1, #8] 1003; CHECK-NEXT: vldrb.s32 q2, [r1, #4] 1004; CHECK-NEXT: vadd.i32 q1, q0, r0 1005; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 1006; CHECK-NEXT: vmov r2, s6 1007; CHECK-NEXT: vadd.i32 q2, q2, r0 1008; CHECK-NEXT: vadd.i32 q0, q0, r0 1009; CHECK-NEXT: vmov r6, s4 1010; CHECK-NEXT: vmov r3, s2 1011; CHECK-NEXT: vmov r4, s3 1012; CHECK-NEXT: vmov r5, s7 1013; CHECK-NEXT: ldrb.w r12, [r2] 1014; CHECK-NEXT: vmov r2, s0 1015; CHECK-NEXT: ldrb r6, [r6] 1016; CHECK-NEXT: ldrb r3, [r3] 1017; CHECK-NEXT: ldrb r4, [r4] 1018; CHECK-NEXT: ldrb r5, [r5] 1019; CHECK-NEXT: ldrb.w lr, [r2] 1020; CHECK-NEXT: vmov r2, s1 1021; CHECK-NEXT: vldrb.s32 q0, [r1] 1022; CHECK-NEXT: vadd.i32 q3, q0, r0 1023; CHECK-NEXT: vmov r0, s12 1024; CHECK-NEXT: ldrb r2, [r2] 1025; CHECK-NEXT: ldrb r0, [r0] 1026; CHECK-NEXT: vmov.8 q0[0], r0 1027; CHECK-NEXT: vmov r0, s13 1028; CHECK-NEXT: ldrb r0, [r0] 1029; CHECK-NEXT: vmov.8 q0[1], r0 1030; CHECK-NEXT: vmov r0, s14 1031; CHECK-NEXT: ldrb r0, [r0] 1032; CHECK-NEXT: vmov.8 q0[2], r0 1033; CHECK-NEXT: vmov r0, s15 1034; CHECK-NEXT: ldrb r0, [r0] 1035; CHECK-NEXT: vmov.8 q0[3], r0 1036; CHECK-NEXT: vmov r0, s8 1037; CHECK-NEXT: ldrb r0, [r0] 1038; CHECK-NEXT: vmov.8 q0[4], r0 1039; CHECK-NEXT: vmov r0, s9 1040; CHECK-NEXT: ldrb r0, [r0] 1041; CHECK-NEXT: vmov.8 q0[5], r0 1042; CHECK-NEXT: vmov r0, s10 1043; CHECK-NEXT: ldrb r0, [r0] 1044; CHECK-NEXT: vmov.8 q0[6], r0 1045; CHECK-NEXT: vmov r0, s11 1046; CHECK-NEXT: ldrb r0, [r0] 1047; CHECK-NEXT: vmov.8 q0[7], r0 1048; CHECK-NEXT: vmov r0, s5 1049; CHECK-NEXT: vmov.8 q0[8], r6 1050; CHECK-NEXT: ldrb r0, [r0] 1051; CHECK-NEXT: vmov.8 q0[9], r0 1052; CHECK-NEXT: vmov.8 q0[10], r12 1053; CHECK-NEXT: vmov.8 q0[11], r5 1054; CHECK-NEXT: vmov.8 q0[12], lr 1055; CHECK-NEXT: vmov.8 q0[13], r2 1056; CHECK-NEXT: vmov.8 q0[14], r3 1057; CHECK-NEXT: vmov.8 q0[15], r4 1058; CHECK-NEXT: pop {r4, r5, r6, pc} 1059entry: 1060 %offs = load <16 x i8>, <16 x i8>* %offptr, align 1 1061 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %offs 1062 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1063 ret <16 x i8> %gather 1064} 1065 1066 1067define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_3(i8* %base, <16 x i8>* %offptr) { 1068; CHECK-LABEL: unscaled_v16i8_i8_3: 1069; CHECK: @ %bb.0: @ %entry 1070; CHECK-NEXT: adr r1, .LCPI17_0 1071; CHECK-NEXT: vldrw.u32 q1, [r1] 1072; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 1073; CHECK-NEXT: bx lr 1074; CHECK-NEXT: .p2align 4 1075; CHECK-NEXT: @ %bb.1: 1076; CHECK-NEXT: .LCPI17_0: 1077; CHECK-NEXT: .byte 0 @ 0x0 1078; CHECK-NEXT: .byte 3 @ 0x3 1079; CHECK-NEXT: .byte 6 @ 0x6 1080; CHECK-NEXT: .byte 9 @ 0x9 1081; CHECK-NEXT: .byte 12 @ 0xc 1082; CHECK-NEXT: .byte 15 @ 0xf 1083; CHECK-NEXT: .byte 18 @ 0x12 1084; CHECK-NEXT: .byte 21 @ 0x15 1085; CHECK-NEXT: .byte 24 @ 0x18 1086; CHECK-NEXT: .byte 27 @ 0x1b 1087; CHECK-NEXT: .byte 30 @ 0x1e 1088; CHECK-NEXT: .byte 33 @ 0x21 1089; CHECK-NEXT: .byte 36 @ 0x24 1090; CHECK-NEXT: .byte 39 @ 0x27 1091; CHECK-NEXT: .byte 42 @ 0x2a 1092; CHECK-NEXT: .byte 45 @ 0x2d 1093entry: 1094 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 1095 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1096 ret <16 x i8> %gather 1097} 1098 1099define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_basei16(i16* %base, <16 x i8>* %offptr) { 1100; CHECK-LABEL: unscaled_v16i8_basei16: 1101; CHECK: @ %bb.0: @ %entry 1102; CHECK-NEXT: .save {r4, r5, r6, lr} 1103; CHECK-NEXT: push {r4, r5, r6, lr} 1104; CHECK-NEXT: vldrb.u32 q0, [r1, #8] 1105; CHECK-NEXT: vldrb.u32 q2, [r1, #4] 1106; CHECK-NEXT: vshl.i32 q0, q0, #1 1107; CHECK-NEXT: vshl.i32 q2, q2, #1 1108; CHECK-NEXT: vadd.i32 q1, q0, r0 1109; CHECK-NEXT: vldrb.u32 q0, [r1, #12] 1110; CHECK-NEXT: vmov r2, s6 1111; CHECK-NEXT: vadd.i32 q2, q2, r0 1112; CHECK-NEXT: vshl.i32 q0, q0, #1 1113; CHECK-NEXT: vmov r6, s4 1114; CHECK-NEXT: vadd.i32 q0, q0, r0 1115; CHECK-NEXT: vmov r5, s7 1116; CHECK-NEXT: vmov r3, s2 1117; CHECK-NEXT: vmov r4, s3 1118; CHECK-NEXT: ldrb.w r12, [r2] 1119; CHECK-NEXT: vmov r2, s0 1120; CHECK-NEXT: ldrb r6, [r6] 1121; CHECK-NEXT: ldrb r5, [r5] 1122; CHECK-NEXT: ldrb r3, [r3] 1123; CHECK-NEXT: ldrb r4, [r4] 1124; CHECK-NEXT: ldrb.w lr, [r2] 1125; CHECK-NEXT: vmov r2, s1 1126; CHECK-NEXT: vldrb.u32 q0, [r1] 1127; CHECK-NEXT: vshl.i32 q0, q0, #1 1128; CHECK-NEXT: vadd.i32 q3, q0, r0 1129; CHECK-NEXT: vmov r0, s12 1130; CHECK-NEXT: ldrb r2, [r2] 1131; CHECK-NEXT: ldrb r0, [r0] 1132; CHECK-NEXT: vmov.8 q0[0], r0 1133; CHECK-NEXT: vmov r0, s13 1134; CHECK-NEXT: ldrb r0, [r0] 1135; CHECK-NEXT: vmov.8 q0[1], r0 1136; CHECK-NEXT: vmov r0, s14 1137; CHECK-NEXT: ldrb r0, [r0] 1138; CHECK-NEXT: vmov.8 q0[2], r0 1139; CHECK-NEXT: vmov r0, s15 1140; CHECK-NEXT: ldrb r0, [r0] 1141; CHECK-NEXT: vmov.8 q0[3], r0 1142; CHECK-NEXT: vmov r0, s8 1143; CHECK-NEXT: ldrb r0, [r0] 1144; CHECK-NEXT: vmov.8 q0[4], r0 1145; CHECK-NEXT: vmov r0, s9 1146; CHECK-NEXT: ldrb r0, [r0] 1147; CHECK-NEXT: vmov.8 q0[5], r0 1148; CHECK-NEXT: vmov r0, s10 1149; CHECK-NEXT: ldrb r0, [r0] 1150; CHECK-NEXT: vmov.8 q0[6], r0 1151; CHECK-NEXT: vmov r0, s11 1152; CHECK-NEXT: ldrb r0, [r0] 1153; CHECK-NEXT: vmov.8 q0[7], r0 1154; CHECK-NEXT: vmov r0, s5 1155; CHECK-NEXT: vmov.8 q0[8], r6 1156; CHECK-NEXT: ldrb r0, [r0] 1157; CHECK-NEXT: vmov.8 q0[9], r0 1158; CHECK-NEXT: vmov.8 q0[10], r12 1159; CHECK-NEXT: vmov.8 q0[11], r5 1160; CHECK-NEXT: vmov.8 q0[12], lr 1161; CHECK-NEXT: vmov.8 q0[13], r2 1162; CHECK-NEXT: vmov.8 q0[14], r3 1163; CHECK-NEXT: vmov.8 q0[15], r4 1164; CHECK-NEXT: pop {r4, r5, r6, pc} 1165entry: 1166 %offs = load <16 x i8>, <16 x i8>* %offptr, align 1 1167 %offs.zext = zext <16 x i8> %offs to <16 x i32> 1168 %ptrs = getelementptr inbounds i16, i16* %base, <16 x i32> %offs.zext 1169 %ptrs.cast = bitcast <16 x i16*> %ptrs to <16 x i8*> 1170 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs.cast, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1171 ret <16 x i8> %gather 1172} 1173 1174declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) 1175declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x i8>) 1176declare <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*>, i32, <2 x i1>, <2 x i8>) 1177