• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
2
3define float @f(<4 x i16>* nocapture %in) {
4; CHECK-LABEL: f:
5; CHECK:       @ %bb.0:
6; CHECK-NEXT:    vld1.16 {d16}, [r0:64]
7; CHECK-NEXT:    vmovl.u16 q8, d16
8; CHECK-NEXT:    vcvt.f32.u32 q0, q8
9; CHECK-NEXT:    vadd.f32 s4, s0, s1
10; CHECK-NEXT:    vadd.f32 s0, s4, s2
11; CHECK-NEXT:    vmov r0, s0
12; CHECK-NEXT:    bx lr
13  %1 = load <4 x i16>, <4 x i16>* %in
14  %2 = uitofp <4 x i16> %1 to <4 x float>
15  %3 = extractelement <4 x float> %2, i32 0
16  %4 = extractelement <4 x float> %2, i32 1
17  %5 = extractelement <4 x float> %2, i32 2
18
19  %6 = fadd float %3, %4
20  %7 = fadd float %6, %5
21
22  ret float %7
23}
24
25define float @g(<4 x i16>* nocapture %in) {
26; CHECK-LABEL: g:
27; CHECK:       @ %bb.0:
28; CHECK-NEXT:    vldr d16, [r0]
29; CHECK-NEXT:    vmov.u16 r0, d16[0]
30; CHECK-NEXT:    vmov s0, r0
31; CHECK-NEXT:    vcvt.f32.u32 s0, s0
32; CHECK-NEXT:    vmov r0, s0
33; CHECK-NEXT:    bx lr
34  %1 = load <4 x i16>, <4 x i16>* %in
35  %2 = extractelement <4 x i16> %1, i32 0
36  %3 = uitofp i16 %2 to float
37  ret float %3
38}
39
40; Make sure we generate zext from <4 x i8> to <4 x 32>.
41define <4 x i32> @h(<4 x i8> *%in) {
42; CHECK-LABEL: h:
43; CHECK:       @ %bb.0:
44; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
45; CHECK-NEXT:    vmovl.u8 q8, d16
46; CHECK-NEXT:    vmovl.u16 q8, d16
47; CHECK-NEXT:    vmov r0, r1, d16
48; CHECK-NEXT:    vmov r2, r3, d17
49; CHECK-NEXT:    bx lr
50  %1 = load <4 x i8>, <4 x i8>* %in, align 4
51  %2 = extractelement <4 x i8> %1, i32 0
52  %3 = zext i8 %2 to i32
53  %4 = insertelement <4 x i32> undef, i32 %3, i32 0
54  %5 = extractelement <4 x i8> %1, i32 1
55  %6 = zext i8 %5 to i32
56  %7 = insertelement <4 x i32> %4, i32 %6, i32 1
57  %8 = extractelement <4 x i8> %1, i32 2
58  %9 = zext i8 %8 to i32
59  %10 = insertelement <4 x i32> %7, i32 %9, i32 2
60  %11 = extractelement <4 x i8> %1, i32 3
61  %12 = zext i8 %11 to i32
62  %13 = insertelement <4 x i32> %10, i32 %12, i32 3
63  ret <4 x i32> %13
64}
65
66define float @i(<4 x i16>* nocapture %in) {
67  ; FIXME: The vmov.u + sxt can convert to a vmov.s
68; CHECK-LABEL: i:
69; CHECK:       @ %bb.0:
70; CHECK-NEXT:    vldr d16, [r0]
71; CHECK-NEXT:    vmov.u16 r0, d16[0]
72; CHECK-NEXT:    sxth r0, r0
73; CHECK-NEXT:    vmov s0, r0
74; CHECK-NEXT:    vcvt.f32.s32 s0, s0
75; CHECK-NEXT:    vmov r0, s0
76; CHECK-NEXT:    bx lr
77  %1 = load <4 x i16>, <4 x i16>* %in
78  %2 = extractelement <4 x i16> %1, i32 0
79  %3 = sitofp i16 %2 to float
80  ret float %3
81}
82
83define float @j(<8 x i8>* nocapture %in) {
84; CHECK-LABEL: j:
85; CHECK:       @ %bb.0:
86; CHECK-NEXT:    vldr d16, [r0]
87; CHECK-NEXT:    vmov.u8 r0, d16[7]
88; CHECK-NEXT:    vmov s0, r0
89; CHECK-NEXT:    vcvt.f32.u32 s0, s0
90; CHECK-NEXT:    vmov r0, s0
91; CHECK-NEXT:    bx lr
92  %1 = load <8 x i8>, <8 x i8>* %in
93  %2 = extractelement <8 x i8> %1, i32 7
94  %3 = uitofp i8 %2 to float
95  ret float %3
96}
97
98define float @k(<8 x i8>* nocapture %in) {
99; FIXME: The vmov.u + sxt can convert to a vmov.s
100; CHECK-LABEL: k:
101; CHECK:       @ %bb.0:
102; CHECK-NEXT:    vldr d16, [r0]
103; CHECK-NEXT:    vmov.u8 r0, d16[7]
104; CHECK-NEXT:    sxtb r0, r0
105; CHECK-NEXT:    vmov s0, r0
106; CHECK-NEXT:    vcvt.f32.s32 s0, s0
107; CHECK-NEXT:    vmov r0, s0
108; CHECK-NEXT:    bx lr
109  %1 = load <8 x i8>, <8 x i8>* %in
110  %2 = extractelement <8 x i8> %1, i32 7
111  %3 = sitofp i8 %2 to float
112  ret float %3
113}
114
115define float @KnownUpperZero(<4 x i16> %v) {
116; CHECK-LABEL: KnownUpperZero:
117; CHECK:       @ %bb.0:
118; CHECK-NEXT:    vmov d16, r0, r1
119; CHECK-NEXT:    vmov.u16 r0, d16[0]
120; CHECK-NEXT:    vmov.u16 r1, d16[3]
121; CHECK-NEXT:    and r0, r0, #3
122; CHECK-NEXT:    vmov s0, r0
123; CHECK-NEXT:    and r0, r1, #3
124; CHECK-NEXT:    vmov s2, r0
125; CHECK-NEXT:    vcvt.f32.s32 s0, s0
126; CHECK-NEXT:    vcvt.f32.s32 s2, s2
127; CHECK-NEXT:    vadd.f32 s0, s2, s0
128; CHECK-NEXT:    vmov r0, s0
129; CHECK-NEXT:    bx lr
130  %1 = and <4 x i16> %v, <i16 3,i16 3,i16 3,i16 3>
131  %2 = extractelement <4 x i16> %1, i32 3
132  %3 = extractelement <4 x i16> %1, i32 0
133  %sinf1 = sitofp i16 %2 to float
134  %sinf2 = sitofp i16 %3 to float
135  %sum =   fadd float %sinf1, %sinf2
136  ret float %sum
137}
138