1; RUN: llc -mtriple armv7 %s -o - | FileCheck %s 2 3define float @f(<4 x i16>* nocapture %in) { 4; CHECK-LABEL: f: 5; CHECK: @ %bb.0: 6; CHECK-NEXT: vld1.16 {d16}, [r0:64] 7; CHECK-NEXT: vmovl.u16 q8, d16 8; CHECK-NEXT: vcvt.f32.u32 q0, q8 9; CHECK-NEXT: vadd.f32 s4, s0, s1 10; CHECK-NEXT: vadd.f32 s0, s4, s2 11; CHECK-NEXT: vmov r0, s0 12; CHECK-NEXT: bx lr 13 %1 = load <4 x i16>, <4 x i16>* %in 14 %2 = uitofp <4 x i16> %1 to <4 x float> 15 %3 = extractelement <4 x float> %2, i32 0 16 %4 = extractelement <4 x float> %2, i32 1 17 %5 = extractelement <4 x float> %2, i32 2 18 19 %6 = fadd float %3, %4 20 %7 = fadd float %6, %5 21 22 ret float %7 23} 24 25define float @g(<4 x i16>* nocapture %in) { 26; CHECK-LABEL: g: 27; CHECK: @ %bb.0: 28; CHECK-NEXT: vldr d16, [r0] 29; CHECK-NEXT: vmov.u16 r0, d16[0] 30; CHECK-NEXT: vmov s0, r0 31; CHECK-NEXT: vcvt.f32.u32 s0, s0 32; CHECK-NEXT: vmov r0, s0 33; CHECK-NEXT: bx lr 34 %1 = load <4 x i16>, <4 x i16>* %in 35 %2 = extractelement <4 x i16> %1, i32 0 36 %3 = uitofp i16 %2 to float 37 ret float %3 38} 39 40; Make sure we generate zext from <4 x i8> to <4 x 32>. 41define <4 x i32> @h(<4 x i8> *%in) { 42; CHECK-LABEL: h: 43; CHECK: @ %bb.0: 44; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 45; CHECK-NEXT: vmovl.u8 q8, d16 46; CHECK-NEXT: vmovl.u16 q8, d16 47; CHECK-NEXT: vmov r0, r1, d16 48; CHECK-NEXT: vmov r2, r3, d17 49; CHECK-NEXT: bx lr 50 %1 = load <4 x i8>, <4 x i8>* %in, align 4 51 %2 = extractelement <4 x i8> %1, i32 0 52 %3 = zext i8 %2 to i32 53 %4 = insertelement <4 x i32> undef, i32 %3, i32 0 54 %5 = extractelement <4 x i8> %1, i32 1 55 %6 = zext i8 %5 to i32 56 %7 = insertelement <4 x i32> %4, i32 %6, i32 1 57 %8 = extractelement <4 x i8> %1, i32 2 58 %9 = zext i8 %8 to i32 59 %10 = insertelement <4 x i32> %7, i32 %9, i32 2 60 %11 = extractelement <4 x i8> %1, i32 3 61 %12 = zext i8 %11 to i32 62 %13 = insertelement <4 x i32> %10, i32 %12, i32 3 63 ret <4 x i32> %13 64} 65 66define float @i(<4 x i16>* nocapture %in) { 67 ; FIXME: The vmov.u + sxt can convert to a vmov.s 68; CHECK-LABEL: i: 69; CHECK: @ %bb.0: 70; CHECK-NEXT: vldr d16, [r0] 71; CHECK-NEXT: vmov.u16 r0, d16[0] 72; CHECK-NEXT: sxth r0, r0 73; CHECK-NEXT: vmov s0, r0 74; CHECK-NEXT: vcvt.f32.s32 s0, s0 75; CHECK-NEXT: vmov r0, s0 76; CHECK-NEXT: bx lr 77 %1 = load <4 x i16>, <4 x i16>* %in 78 %2 = extractelement <4 x i16> %1, i32 0 79 %3 = sitofp i16 %2 to float 80 ret float %3 81} 82 83define float @j(<8 x i8>* nocapture %in) { 84; CHECK-LABEL: j: 85; CHECK: @ %bb.0: 86; CHECK-NEXT: vldr d16, [r0] 87; CHECK-NEXT: vmov.u8 r0, d16[7] 88; CHECK-NEXT: vmov s0, r0 89; CHECK-NEXT: vcvt.f32.u32 s0, s0 90; CHECK-NEXT: vmov r0, s0 91; CHECK-NEXT: bx lr 92 %1 = load <8 x i8>, <8 x i8>* %in 93 %2 = extractelement <8 x i8> %1, i32 7 94 %3 = uitofp i8 %2 to float 95 ret float %3 96} 97 98define float @k(<8 x i8>* nocapture %in) { 99; FIXME: The vmov.u + sxt can convert to a vmov.s 100; CHECK-LABEL: k: 101; CHECK: @ %bb.0: 102; CHECK-NEXT: vldr d16, [r0] 103; CHECK-NEXT: vmov.u8 r0, d16[7] 104; CHECK-NEXT: sxtb r0, r0 105; CHECK-NEXT: vmov s0, r0 106; CHECK-NEXT: vcvt.f32.s32 s0, s0 107; CHECK-NEXT: vmov r0, s0 108; CHECK-NEXT: bx lr 109 %1 = load <8 x i8>, <8 x i8>* %in 110 %2 = extractelement <8 x i8> %1, i32 7 111 %3 = sitofp i8 %2 to float 112 ret float %3 113} 114 115define float @KnownUpperZero(<4 x i16> %v) { 116; CHECK-LABEL: KnownUpperZero: 117; CHECK: @ %bb.0: 118; CHECK-NEXT: vmov d16, r0, r1 119; CHECK-NEXT: vmov.u16 r0, d16[0] 120; CHECK-NEXT: vmov.u16 r1, d16[3] 121; CHECK-NEXT: and r0, r0, #3 122; CHECK-NEXT: vmov s0, r0 123; CHECK-NEXT: and r0, r1, #3 124; CHECK-NEXT: vmov s2, r0 125; CHECK-NEXT: vcvt.f32.s32 s0, s0 126; CHECK-NEXT: vcvt.f32.s32 s2, s2 127; CHECK-NEXT: vadd.f32 s0, s2, s0 128; CHECK-NEXT: vmov r0, s0 129; CHECK-NEXT: bx lr 130 %1 = and <4 x i16> %v, <i16 3,i16 3,i16 3,i16 3> 131 %2 = extractelement <4 x i16> %1, i32 3 132 %3 = extractelement <4 x i16> %1, i32 0 133 %sinf1 = sitofp i16 %2 to float 134 %sinf2 = sitofp i16 %3 to float 135 %sum = fadd float %sinf1, %sinf2 136 ret float %sum 137} 138