• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
4
5define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
6; AVX: sext_8i16_to_8i32
7; AVX: vpmovsxwd
8
9  %B = sext <8 x i16> %A to <8 x i32>
10  ret <8 x i32>%B
11}
12
13define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
14; AVX: sext_4i32_to_4i64
15; AVX: vpmovsxdq
16
17  %B = sext <4 x i32> %A to <4 x i64>
18  ret <4 x i64>%B
19}
20
21; AVX: load_sext_test1
22; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
23; AVX: ret
24
25; SSSE3: load_sext_test1
26; SSSE3: movq
27; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
28; SSSE3: psrad $16
29; SSSE3: ret
30
31; SSE2: load_sext_test1
32; SSE2: movq
33; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
34; SSE2: psrad $16
35; SSE2: ret
36define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
37 %X = load <4 x i16>* %ptr
38 %Y = sext <4 x i16> %X to <4 x i32>
39 ret <4 x i32>%Y
40}
41
42; AVX: load_sext_test2
43; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
44; AVX: ret
45
46; SSSE3: load_sext_test2
47; SSSE3: movd
48; SSSE3: pshufb
49; SSSE3: psrad $24
50; SSSE3: ret
51
52; SSE2: load_sext_test2
53; SSE2: movl
54; SSE2: psrad $24
55; SSE2: ret
56define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
57 %X = load <4 x i8>* %ptr
58 %Y = sext <4 x i8> %X to <4 x i32>
59 ret <4 x i32>%Y
60}
61
62; AVX: load_sext_test3
63; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
64; AVX: ret
65
66; SSSE3: load_sext_test3
67; SSSE3: movsbq
68; SSSE3: movsbq
69; SSSE3: punpcklqdq
70; SSSE3: ret
71
72; SSE2: load_sext_test3
73; SSE2: movsbq
74; SSE2: movsbq
75; SSE2: punpcklqdq
76; SSE2: ret
77define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
78 %X = load <2 x i8>* %ptr
79 %Y = sext <2 x i8> %X to <2 x i64>
80 ret <2 x i64>%Y
81}
82
83; AVX: load_sext_test4
84; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
85; AVX: ret
86
87; SSSE3: load_sext_test4
88; SSSE3: movswq
89; SSSE3: movswq
90; SSSE3: punpcklqdq
91; SSSE3: ret
92
93; SSE2: load_sext_test4
94; SSE2: movswq
95; SSE2: movswq
96; SSE2: punpcklqdq
97; SSE2: ret
98define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
99 %X = load <2 x i16>* %ptr
100 %Y = sext <2 x i16> %X to <2 x i64>
101 ret <2 x i64>%Y
102}
103
104; AVX: load_sext_test5
105; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
106; AVX: ret
107
108; SSSE3: load_sext_test5
109; SSSE3: movslq
110; SSSE3: movslq
111; SSSE3: punpcklqdq
112; SSSE3: ret
113
114; SSE2: load_sext_test5
115; SSE2: movslq
116; SSE2: movslq
117; SSE2: punpcklqdq
118; SSE2: ret
119define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
120 %X = load <2 x i32>* %ptr
121 %Y = sext <2 x i32> %X to <2 x i64>
122 ret <2 x i64>%Y
123}
124
125; AVX: load_sext_test6
126; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
127; AVX: ret
128
129; SSSE3: load_sext_test6
130; SSSE3: movq
131; SSSE3: punpcklbw
132; SSSE3: psraw $8
133; SSSE3: ret
134
135; SSE2: load_sext_test6
136; SSE2: movq
137; SSE2: punpcklbw
138; SSE2: psraw $8
139; SSE2: ret
140define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
141 %X = load <8 x i8>* %ptr
142 %Y = sext <8 x i8> %X to <8 x i16>
143 ret <8 x i16>%Y
144}
145
146; AVX: sext_4i1_to_4i64
147; AVX: vpslld  $31
148; AVX: vpsrad  $31
149; AVX: vpmovsxdq
150; AVX: vpmovsxdq
151; AVX: ret
152define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
153  %extmask = sext <4 x i1> %mask to <4 x i64>
154  ret <4 x i64> %extmask
155}
156
157; AVX: sext_4i8_to_4i64
158; AVX: vpslld  $24
159; AVX: vpsrad  $24
160; AVX: vpmovsxdq
161; AVX: vpmovsxdq
162; AVX: ret
163define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
164  %extmask = sext <4 x i8> %mask to <4 x i64>
165  ret <4 x i64> %extmask
166}
167
168