1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4%struct.s_int8_t = type { [16 x i8], [16 x i8] } 5%struct.s_int16_t = type { [8 x i16], [8 x i16] } 6%struct.s_int32_t = type { [4 x i32], [4 x i32] } 7%struct.s_float16_t = type { [8 x half], [8 x half] } 8%struct.s_float32_t = type { [4 x float], [4 x float] } 9 10define hidden void @fwd_int8_t(%struct.s_int8_t* noalias %v) local_unnamed_addr #0 { 11; CHECK-LABEL: fwd_int8_t: 12; CHECK: @ %bb.0: @ %entry 13; CHECK-NEXT: vldrb.u8 q0, [r0] 14; CHECK-NEXT: vstrb.8 q0, [r0, #16] 15; CHECK-NEXT: bx lr 16entry: 17 %arrayidx3 = getelementptr inbounds %struct.s_int8_t, %struct.s_int8_t* %v, i32 0, i32 1, i32 0 18 %0 = bitcast %struct.s_int8_t* %v to <16 x i8>* 19 %1 = load <16 x i8>, <16 x i8>* %0, align 1 20 %2 = bitcast i8* %arrayidx3 to <16 x i8>* 21 store <16 x i8> %1, <16 x i8>* %2, align 1 22 ret void 23} 24 25define hidden void @fwd_int16_t(%struct.s_int16_t* noalias nocapture %v) local_unnamed_addr #0 { 26; CHECK-LABEL: fwd_int16_t: 27; CHECK: @ %bb.0: @ %entry 28; CHECK-NEXT: vldrh.u16 q0, [r0] 29; CHECK-NEXT: vstrh.16 q0, [r0, #16] 30; CHECK-NEXT: bx lr 31entry: 32 %arrayidx3 = getelementptr inbounds %struct.s_int16_t, %struct.s_int16_t* %v, i32 0, i32 1, i32 0 33 %0 = bitcast %struct.s_int16_t* %v to <8 x i16>* 34 %1 = load <8 x i16>, <8 x i16>* %0, align 2 35 %2 = bitcast i16* %arrayidx3 to <8 x i16>* 36 store <8 x i16> %1, <8 x i16>* %2, align 2 37 ret void 38} 39 40define hidden void @fwd_int32_t(%struct.s_int32_t* noalias nocapture %v) local_unnamed_addr #0 { 41; CHECK-LABEL: fwd_int32_t: 42; CHECK: @ %bb.0: @ %entry 43; CHECK-NEXT: vldrw.u32 q0, [r0] 44; CHECK-NEXT: vstrw.32 q0, [r0, #16] 45; CHECK-NEXT: bx lr 46entry: 47 %arrayidx3 = getelementptr inbounds %struct.s_int32_t, %struct.s_int32_t* %v, i32 0, i32 1, i32 0 48 %0 = bitcast %struct.s_int32_t* %v to <4 x i32>* 49 %1 = load <4 x i32>, <4 x i32>* %0, align 4 50 %2 = bitcast i32* %arrayidx3 to <4 x i32>* 51 store <4 x i32> %1, <4 x i32>* %2, align 4 52 ret void 53} 54 55define hidden void @fwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { 56; CHECK-LABEL: fwd_float16_t: 57; CHECK: @ %bb.0: @ %entry 58; CHECK-NEXT: vldrh.u16 q0, [r0], #16 59; CHECK-NEXT: vstrh.16 q0, [r0] 60; CHECK-NEXT: bx lr 61entry: 62 %arrayidx3 = getelementptr inbounds %struct.s_float16_t, %struct.s_float16_t* %v, i32 0, i32 1, i32 0 63 %0 = bitcast %struct.s_float16_t* %v to <8 x half>* 64 %1 = load <8 x half>, <8 x half>* %0, align 2 65 %2 = bitcast half* %arrayidx3 to <8 x half>* 66 store <8 x half> %1, <8 x half>* %2, align 2 67 ret void 68} 69 70define hidden void @fwd_float32_t(%struct.s_float32_t* noalias nocapture %v) local_unnamed_addr #0 { 71; CHECK-LABEL: fwd_float32_t: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vldrw.u32 q0, [r0] 74; CHECK-NEXT: vstrw.32 q0, [r0, #16] 75; CHECK-NEXT: bx lr 76entry: 77 %d = getelementptr inbounds %struct.s_float32_t, %struct.s_float32_t* %v, i32 0, i32 1 78 %0 = bitcast %struct.s_float32_t* %v to <4 x i32>* 79 %1 = load <4 x i32>, <4 x i32>* %0, align 4 80 %2 = bitcast [4 x float]* %d to <4 x i32>* 81 store <4 x i32> %1, <4 x i32>* %2, align 4 82 ret void 83} 84 85define hidden void @bwd_int8_t(%struct.s_int8_t* noalias %v) local_unnamed_addr #0 { 86; CHECK-LABEL: bwd_int8_t: 87; CHECK: @ %bb.0: @ %for.end 88; CHECK-NEXT: vldrb.u8 q0, [r0] 89; CHECK-NEXT: vstrb.8 q0, [r0, #-16] 90; CHECK-NEXT: bx lr 91for.end: 92 %0 = bitcast %struct.s_int8_t* %v to <16 x i8>* 93 %1 = load <16 x i8>, <16 x i8>* %0, align 1 94 %arrayidx3 = getelementptr inbounds %struct.s_int8_t, %struct.s_int8_t* %v, i32 -1, i32 1, i32 0 95 %2 = bitcast i8* %arrayidx3 to <16 x i8>* 96 store <16 x i8> %1, <16 x i8>* %2, align 1 97 ret void 98} 99 100define hidden void @bwd_int16_t(%struct.s_int16_t* noalias nocapture %v) local_unnamed_addr #0 { 101; CHECK-LABEL: bwd_int16_t: 102; CHECK: @ %bb.0: @ %for.end 103; CHECK-NEXT: vldrh.u16 q0, [r0] 104; CHECK-NEXT: vstrh.16 q0, [r0, #-16] 105; CHECK-NEXT: bx lr 106for.end: 107 %0 = bitcast %struct.s_int16_t* %v to <8 x i16>* 108 %1 = load <8 x i16>, <8 x i16>* %0, align 2 109 %arrayidx3 = getelementptr inbounds %struct.s_int16_t, %struct.s_int16_t* %v, i32 -1, i32 1, i32 0 110 %2 = bitcast i16* %arrayidx3 to <8 x i16>* 111 store <8 x i16> %1, <8 x i16>* %2, align 2 112 ret void 113} 114 115define hidden void @bwd_int32_t(%struct.s_int32_t* noalias nocapture %v) local_unnamed_addr #0 { 116; CHECK-LABEL: bwd_int32_t: 117; CHECK: @ %bb.0: @ %for.end 118; CHECK-NEXT: vldrw.u32 q0, [r0] 119; CHECK-NEXT: vstrw.32 q0, [r0, #-16] 120; CHECK-NEXT: bx lr 121for.end: 122 %0 = bitcast %struct.s_int32_t* %v to <4 x i32>* 123 %1 = load <4 x i32>, <4 x i32>* %0, align 4 124 %arrayidx3 = getelementptr inbounds %struct.s_int32_t, %struct.s_int32_t* %v, i32 -1, i32 1, i32 0 125 %2 = bitcast i32* %arrayidx3 to <4 x i32>* 126 store <4 x i32> %1, <4 x i32>* %2, align 4 127 ret void 128} 129 130define hidden void @bwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { 131; CHECK-LABEL: bwd_float16_t: 132; CHECK: @ %bb.0: @ %for.end 133; CHECK-NEXT: vldrh.u16 q0, [r0], #-16 134; CHECK-NEXT: vstrh.16 q0, [r0] 135; CHECK-NEXT: bx lr 136for.end: 137 %0 = bitcast %struct.s_float16_t* %v to <8 x half>* 138 %1 = load <8 x half>, <8 x half>* %0, align 2 139 %arrayidx3 = getelementptr inbounds %struct.s_float16_t, %struct.s_float16_t* %v, i32 -1, i32 1, i32 0 140 %2 = bitcast half* %arrayidx3 to <8 x half>* 141 store <8 x half> %1, <8 x half>* %2, align 2 142 ret void 143} 144 145define hidden void @bwd_float32_t(%struct.s_float32_t* noalias nocapture %v) local_unnamed_addr #0 { 146; CHECK-LABEL: bwd_float32_t: 147; CHECK: @ %bb.0: @ %for.end 148; CHECK-NEXT: vldrw.u32 q0, [r0] 149; CHECK-NEXT: vstrw.32 q0, [r0, #-16] 150; CHECK-NEXT: bx lr 151for.end: 152 %0 = bitcast %struct.s_float32_t* %v to <4 x i32>* 153 %1 = load <4 x i32>, <4 x i32>* %0, align 4 154 %d = getelementptr inbounds %struct.s_float32_t, %struct.s_float32_t* %v, i32 -1, i32 1 155 %2 = bitcast [4 x float]* %d to <4 x i32>* 156 store <4 x i32> %1, <4 x i32>* %2, align 4 157 ret void 158} 159