• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx | FileCheck %s
3; RUN: llc < %s -mtriple=i386-apple-darwin10 -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
4
5define <2 x float> @cvt_v2i8_v2f32(<2 x i8> %src) {
6; CHECK-LABEL: cvt_v2i8_v2f32:
7; CHECK:       ## %bb.0:
8; CHECK-NEXT:    vpsllq $56, %xmm0, %xmm0
9; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
10; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
11; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
12; CHECK-NEXT:    retl
13;
14; CHECK-WIDE-LABEL: cvt_v2i8_v2f32:
15; CHECK-WIDE:       ## %bb.0:
16; CHECK-WIDE-NEXT:    vpmovsxbd %xmm0, %xmm0
17; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
18; CHECK-WIDE-NEXT:    retl
19  %res = sitofp <2 x i8> %src to <2 x float>
20  ret <2 x float> %res
21}
22
23define <2 x float> @cvt_v2i16_v2f32(<2 x i16> %src) {
24; CHECK-LABEL: cvt_v2i16_v2f32:
25; CHECK:       ## %bb.0:
26; CHECK-NEXT:    vpsllq $48, %xmm0, %xmm0
27; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
28; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
29; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
30; CHECK-NEXT:    retl
31;
32; CHECK-WIDE-LABEL: cvt_v2i16_v2f32:
33; CHECK-WIDE:       ## %bb.0:
34; CHECK-WIDE-NEXT:    vpmovsxwd %xmm0, %xmm0
35; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
36; CHECK-WIDE-NEXT:    retl
37  %res = sitofp <2 x i16> %src to <2 x float>
38  ret <2 x float> %res
39}
40
41define <2 x float> @cvt_v2i32_v2f32(<2 x i32> %src) {
42; CHECK-LABEL: cvt_v2i32_v2f32:
43; CHECK:       ## %bb.0:
44; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
45; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
46; CHECK-NEXT:    retl
47;
48; CHECK-WIDE-LABEL: cvt_v2i32_v2f32:
49; CHECK-WIDE:       ## %bb.0:
50; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
51; CHECK-WIDE-NEXT:    retl
52  %res = sitofp <2 x i32> %src to <2 x float>
53  ret <2 x float> %res
54}
55
56define <2 x float> @cvt_v2u8_v2f32(<2 x i8> %src) {
57; CHECK-LABEL: cvt_v2u8_v2f32:
58; CHECK:       ## %bb.0:
59; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
60; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
61; CHECK-NEXT:    retl
62;
63; CHECK-WIDE-LABEL: cvt_v2u8_v2f32:
64; CHECK-WIDE:       ## %bb.0:
65; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
66; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
67; CHECK-WIDE-NEXT:    retl
68  %res = uitofp <2 x i8> %src to <2 x float>
69  ret <2 x float> %res
70}
71
72define <2 x float> @cvt_v2u16_v2f32(<2 x i16> %src) {
73; CHECK-LABEL: cvt_v2u16_v2f32:
74; CHECK:       ## %bb.0:
75; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[8,9],zero,zero,xmm0[8,9],zero,zero,xmm0[10,11],zero,zero
76; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
77; CHECK-NEXT:    retl
78;
79; CHECK-WIDE-LABEL: cvt_v2u16_v2f32:
80; CHECK-WIDE:       ## %bb.0:
81; CHECK-WIDE-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
82; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
83; CHECK-WIDE-NEXT:    retl
84  %res = uitofp <2 x i16> %src to <2 x float>
85  ret <2 x float> %res
86}
87
88define <2 x float> @cvt_v2u32_v2f32(<2 x i32> %src) {
89; CHECK-LABEL: cvt_v2u32_v2f32:
90; CHECK:       ## %bb.0:
91; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
92; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
93; CHECK-NEXT:    vmovaps {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
94; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
95; CHECK-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
96; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
97; CHECK-NEXT:    retl
98;
99; CHECK-WIDE-LABEL: cvt_v2u32_v2f32:
100; CHECK-WIDE:       ## %bb.0:
101; CHECK-WIDE-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
102; CHECK-WIDE-NEXT:    vmovdqa {{.*#+}} xmm1 = [4.503600e+15,4.503600e+15]
103; CHECK-WIDE-NEXT:    vpor %xmm1, %xmm0, %xmm0
104; CHECK-WIDE-NEXT:    vsubpd %xmm1, %xmm0, %xmm0
105; CHECK-WIDE-NEXT:    vcvtpd2ps %xmm0, %xmm0
106; CHECK-WIDE-NEXT:    retl
107  %res = uitofp <2 x i32> %src to <2 x float>
108  ret <2 x float> %res
109}
110
111define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
112; CHECK-LABEL: cvt_v2f32_v2i8:
113; CHECK:       ## %bb.0:
114; CHECK-NEXT:    subl $68, %esp
115; CHECK-NEXT:    .cfi_def_cfa_offset 72
116; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
117; CHECK-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
118; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
119; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
120; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
121; CHECK-NEXT:    fisttpll (%esp)
122; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
123; CHECK-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
124; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
125; CHECK-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
126; CHECK-NEXT:    addl $68, %esp
127; CHECK-NEXT:    retl
128;
129; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
130; CHECK-WIDE:       ## %bb.0:
131; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
132; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
133; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
134; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm0
135; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
136; CHECK-WIDE-NEXT:    retl
137  %res = fptosi <2 x float> %src to <2 x i8>
138  ret <2 x i8> %res
139}
140
141define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
142; CHECK-LABEL: cvt_v2f32_v2i16:
143; CHECK:       ## %bb.0:
144; CHECK-NEXT:    subl $68, %esp
145; CHECK-NEXT:    .cfi_def_cfa_offset 72
146; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
147; CHECK-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
148; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
149; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
150; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
151; CHECK-NEXT:    fisttpll (%esp)
152; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
153; CHECK-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
154; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
155; CHECK-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
156; CHECK-NEXT:    addl $68, %esp
157; CHECK-NEXT:    retl
158;
159; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
160; CHECK-WIDE:       ## %bb.0:
161; CHECK-WIDE-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
162; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
163; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
164; CHECK-WIDE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
165; CHECK-WIDE-NEXT:    vzeroupper
166; CHECK-WIDE-NEXT:    retl
167  %res = fptosi <2 x float> %src to <2 x i16>
168  ret <2 x i16> %res
169}
170
171define <2 x i32> @cvt_v2f32_v2i32(<2 x float> %src) {
172; CHECK-LABEL: cvt_v2f32_v2i32:
173; CHECK:       ## %bb.0:
174; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
175; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
176; CHECK-NEXT:    retl
177;
178; CHECK-WIDE-LABEL: cvt_v2f32_v2i32:
179; CHECK-WIDE:       ## %bb.0:
180; CHECK-WIDE-NEXT:    vcvttps2dq %xmm0, %xmm0
181; CHECK-WIDE-NEXT:    retl
182  %res = fptosi <2 x float> %src to <2 x i32>
183  ret <2 x i32> %res
184}
185
186define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
187; CHECK-LABEL: cvt_v2f32_v2u8:
188; CHECK:       ## %bb.0:
189; CHECK-NEXT:    subl $68, %esp
190; CHECK-NEXT:    .cfi_def_cfa_offset 72
191; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
192; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
193; CHECK-NEXT:    vcmpltss %xmm2, %xmm1, %xmm3
194; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm4
195; CHECK-NEXT:    vblendvps %xmm3, %xmm1, %xmm4, %xmm3
196; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
197; CHECK-NEXT:    vcmpltss %xmm2, %xmm0, %xmm3
198; CHECK-NEXT:    vsubss %xmm2, %xmm0, %xmm4
199; CHECK-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm3
200; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
201; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
202; CHECK-NEXT:    fisttpll (%esp)
203; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
204; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
205; CHECK-NEXT:    xorl %eax, %eax
206; CHECK-NEXT:    vucomiss %xmm2, %xmm1
207; CHECK-NEXT:    setae %al
208; CHECK-NEXT:    shll $31, %eax
209; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
210; CHECK-NEXT:    xorl %ecx, %ecx
211; CHECK-NEXT:    vucomiss %xmm2, %xmm0
212; CHECK-NEXT:    setae %cl
213; CHECK-NEXT:    shll $31, %ecx
214; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
215; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
216; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
217; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
218; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
219; CHECK-NEXT:    addl $68, %esp
220; CHECK-NEXT:    retl
221;
222; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
223; CHECK-WIDE:       ## %bb.0:
224; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
225; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
226; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
227; CHECK-WIDE-NEXT:    vmovd %ecx, %xmm0
228; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
229; CHECK-WIDE-NEXT:    retl
230  %res = fptoui <2 x float> %src to <2 x i8>
231  ret <2 x i8> %res
232}
233
234define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
235; CHECK-LABEL: cvt_v2f32_v2u16:
236; CHECK:       ## %bb.0:
237; CHECK-NEXT:    subl $68, %esp
238; CHECK-NEXT:    .cfi_def_cfa_offset 72
239; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
240; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
241; CHECK-NEXT:    vcmpltss %xmm2, %xmm1, %xmm3
242; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm4
243; CHECK-NEXT:    vblendvps %xmm3, %xmm1, %xmm4, %xmm3
244; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
245; CHECK-NEXT:    vcmpltss %xmm2, %xmm0, %xmm3
246; CHECK-NEXT:    vsubss %xmm2, %xmm0, %xmm4
247; CHECK-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm3
248; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
249; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
250; CHECK-NEXT:    fisttpll (%esp)
251; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
252; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
253; CHECK-NEXT:    xorl %eax, %eax
254; CHECK-NEXT:    vucomiss %xmm2, %xmm1
255; CHECK-NEXT:    setae %al
256; CHECK-NEXT:    shll $31, %eax
257; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
258; CHECK-NEXT:    xorl %ecx, %ecx
259; CHECK-NEXT:    vucomiss %xmm2, %xmm0
260; CHECK-NEXT:    setae %cl
261; CHECK-NEXT:    shll $31, %ecx
262; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
263; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
264; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
265; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
266; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
267; CHECK-NEXT:    addl $68, %esp
268; CHECK-NEXT:    retl
269;
270; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
271; CHECK-WIDE:       ## %bb.0:
272; CHECK-WIDE-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
273; CHECK-WIDE-NEXT:    vcvttps2dq %ymm0, %ymm0
274; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm1
275; CHECK-WIDE-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
276; CHECK-WIDE-NEXT:    vzeroupper
277; CHECK-WIDE-NEXT:    retl
278  %res = fptoui <2 x float> %src to <2 x i16>
279  ret <2 x i16> %res
280}
281
282define <2 x i32> @cvt_v2f32_v2u32(<2 x float> %src) {
283; CHECK-LABEL: cvt_v2f32_v2u32:
284; CHECK:       ## %bb.0:
285; CHECK-NEXT:    subl $68, %esp
286; CHECK-NEXT:    .cfi_def_cfa_offset 72
287; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
288; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
289; CHECK-NEXT:    vcmpltss %xmm2, %xmm1, %xmm3
290; CHECK-NEXT:    vsubss %xmm2, %xmm1, %xmm4
291; CHECK-NEXT:    vblendvps %xmm3, %xmm1, %xmm4, %xmm3
292; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
293; CHECK-NEXT:    vcmpltss %xmm2, %xmm0, %xmm3
294; CHECK-NEXT:    vsubss %xmm2, %xmm0, %xmm4
295; CHECK-NEXT:    vblendvps %xmm3, %xmm0, %xmm4, %xmm3
296; CHECK-NEXT:    vmovss %xmm3, {{[0-9]+}}(%esp)
297; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
298; CHECK-NEXT:    fisttpll (%esp)
299; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
300; CHECK-NEXT:    fisttpll {{[0-9]+}}(%esp)
301; CHECK-NEXT:    xorl %eax, %eax
302; CHECK-NEXT:    vucomiss %xmm2, %xmm1
303; CHECK-NEXT:    setae %al
304; CHECK-NEXT:    shll $31, %eax
305; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %eax
306; CHECK-NEXT:    xorl %ecx, %ecx
307; CHECK-NEXT:    vucomiss %xmm2, %xmm0
308; CHECK-NEXT:    setae %cl
309; CHECK-NEXT:    shll $31, %ecx
310; CHECK-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
311; CHECK-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
312; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
313; CHECK-NEXT:    vpinsrd $2, (%esp), %xmm0, %xmm0
314; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
315; CHECK-NEXT:    addl $68, %esp
316; CHECK-NEXT:    retl
317;
318; CHECK-WIDE-LABEL: cvt_v2f32_v2u32:
319; CHECK-WIDE:       ## %bb.0:
320; CHECK-WIDE-NEXT:    subl $68, %esp
321; CHECK-WIDE-NEXT:    .cfi_def_cfa_offset 72
322; CHECK-WIDE-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
323; CHECK-WIDE-NEXT:    vextractps $1, %xmm0, {{[0-9]+}}(%esp)
324; CHECK-WIDE-NEXT:    vextractps $2, %xmm0, {{[0-9]+}}(%esp)
325; CHECK-WIDE-NEXT:    vextractps $3, %xmm0, {{[0-9]+}}(%esp)
326; CHECK-WIDE-NEXT:    flds {{[0-9]+}}(%esp)
327; CHECK-WIDE-NEXT:    fisttpll {{[0-9]+}}(%esp)
328; CHECK-WIDE-NEXT:    flds {{[0-9]+}}(%esp)
329; CHECK-WIDE-NEXT:    fisttpll {{[0-9]+}}(%esp)
330; CHECK-WIDE-NEXT:    flds {{[0-9]+}}(%esp)
331; CHECK-WIDE-NEXT:    fisttpll {{[0-9]+}}(%esp)
332; CHECK-WIDE-NEXT:    flds {{[0-9]+}}(%esp)
333; CHECK-WIDE-NEXT:    fisttpll (%esp)
334; CHECK-WIDE-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
335; CHECK-WIDE-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
336; CHECK-WIDE-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
337; CHECK-WIDE-NEXT:    vpinsrd $3, (%esp), %xmm0, %xmm0
338; CHECK-WIDE-NEXT:    addl $68, %esp
339; CHECK-WIDE-NEXT:    retl
340  %res = fptoui <2 x float> %src to <2 x i32>
341  ret <2 x i32> %res
342}
343