• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64
4; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X32
5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512vl,avx512bw | FileCheck %s --check-prefix=X64
6
7define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
8; X32-LABEL: test_llvm_x86_avx2_pmovsxbw:
9; X32:       ## %bb.0:
10; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X32-NEXT:    vpmovsxbw (%eax), %ymm0
12; X32-NEXT:    retl
13;
14; X64-LABEL: test_llvm_x86_avx2_pmovsxbw:
15; X64:       ## %bb.0:
16; X64-NEXT:    vpmovsxbw (%rdi), %ymm0
17; X64-NEXT:    retq
18  %1 = load <16 x i8>, <16 x i8>* %a, align 1
19  %2 = sext <16 x i8> %1 to <16 x i16>
20  ret <16 x i16> %2
21}
22
23define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
24; X32-LABEL: test_llvm_x86_avx2_pmovsxbd:
25; X32:       ## %bb.0:
26; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X32-NEXT:    vpmovsxbd (%eax), %ymm0
28; X32-NEXT:    retl
29;
30; X64-LABEL: test_llvm_x86_avx2_pmovsxbd:
31; X64:       ## %bb.0:
32; X64-NEXT:    vpmovsxbd (%rdi), %ymm0
33; X64-NEXT:    retq
34  %1 = load <16 x i8>, <16 x i8>* %a, align 1
35  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
36  %3 = sext <8 x i8> %2 to <8 x i32>
37  ret <8 x i32> %3
38}
39
40define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
41; X32-LABEL: test_llvm_x86_avx2_pmovsxbq:
42; X32:       ## %bb.0:
43; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
44; X32-NEXT:    vpmovsxbq (%eax), %ymm0
45; X32-NEXT:    retl
46;
47; X64-LABEL: test_llvm_x86_avx2_pmovsxbq:
48; X64:       ## %bb.0:
49; X64-NEXT:    vpmovsxbq (%rdi), %ymm0
50; X64-NEXT:    retq
51  %1 = load <16 x i8>, <16 x i8>* %a, align 1
52  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
53  %3 = sext <4 x i8> %2 to <4 x i64>
54  ret <4 x i64> %3
55}
56
57define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
58; X32-LABEL: test_llvm_x86_avx2_pmovsxwd:
59; X32:       ## %bb.0:
60; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
61; X32-NEXT:    vpmovsxwd (%eax), %ymm0
62; X32-NEXT:    retl
63;
64; X64-LABEL: test_llvm_x86_avx2_pmovsxwd:
65; X64:       ## %bb.0:
66; X64-NEXT:    vpmovsxwd (%rdi), %ymm0
67; X64-NEXT:    retq
68  %1 = load <8 x i16>, <8 x i16>* %a, align 1
69  %2 = sext <8 x i16> %1 to <8 x i32>
70  ret <8 x i32> %2
71}
72
73define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
74; X32-LABEL: test_llvm_x86_avx2_pmovsxwq:
75; X32:       ## %bb.0:
76; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
77; X32-NEXT:    vpmovsxwq (%eax), %ymm0
78; X32-NEXT:    retl
79;
80; X64-LABEL: test_llvm_x86_avx2_pmovsxwq:
81; X64:       ## %bb.0:
82; X64-NEXT:    vpmovsxwq (%rdi), %ymm0
83; X64-NEXT:    retq
84  %1 = load <8 x i16>, <8 x i16>* %a, align 1
85  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86  %3 = sext <4 x i16> %2 to <4 x i64>
87  ret <4 x i64> %3
88}
89
90define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
91; X32-LABEL: test_llvm_x86_avx2_pmovsxdq:
92; X32:       ## %bb.0:
93; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
94; X32-NEXT:    vpmovsxdq (%eax), %ymm0
95; X32-NEXT:    retl
96;
97; X64-LABEL: test_llvm_x86_avx2_pmovsxdq:
98; X64:       ## %bb.0:
99; X64-NEXT:    vpmovsxdq (%rdi), %ymm0
100; X64-NEXT:    retq
101  %1 = load <4 x i32>, <4 x i32>* %a, align 1
102  %2 = sext <4 x i32> %1 to <4 x i64>
103  ret <4 x i64> %2
104}
105
106define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
107; X32-LABEL: test_llvm_x86_avx2_pmovzxbw:
108; X32:       ## %bb.0:
109; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
110; X32-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
111; X32-NEXT:    retl
112;
113; X64-LABEL: test_llvm_x86_avx2_pmovzxbw:
114; X64:       ## %bb.0:
115; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
116; X64-NEXT:    retq
117  %1 = load <16 x i8>, <16 x i8>* %a, align 1
118  %2 = zext <16 x i8> %1 to <16 x i16>
119  ret <16 x i16> %2
120}
121
122define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
123; X32-LABEL: test_llvm_x86_avx2_pmovzxbd:
124; X32:       ## %bb.0:
125; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; X32-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
127; X32-NEXT:    retl
128;
129; X64-LABEL: test_llvm_x86_avx2_pmovzxbd:
130; X64:       ## %bb.0:
131; X64-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
132; X64-NEXT:    retq
133  %1 = load <16 x i8>, <16 x i8>* %a, align 1
134  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
135  %3 = zext <8 x i8> %2 to <8 x i32>
136  ret <8 x i32> %3
137}
138
139define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
140; X32-LABEL: test_llvm_x86_avx2_pmovzxbq:
141; X32:       ## %bb.0:
142; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
143; X32-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
144; X32-NEXT:    retl
145;
146; X64-LABEL: test_llvm_x86_avx2_pmovzxbq:
147; X64:       ## %bb.0:
148; X64-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
149; X64-NEXT:    retq
150  %1 = load <16 x i8>, <16 x i8>* %a, align 1
151  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
152  %3 = zext <4 x i8> %2 to <4 x i64>
153  ret <4 x i64> %3
154}
155
156define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
157; X32-LABEL: test_llvm_x86_avx2_pmovzxwd:
158; X32:       ## %bb.0:
159; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
160; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
161; X32-NEXT:    retl
162;
163; X64-LABEL: test_llvm_x86_avx2_pmovzxwd:
164; X64:       ## %bb.0:
165; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
166; X64-NEXT:    retq
167  %1 = load <8 x i16>, <8 x i16>* %a, align 1
168  %2 = zext <8 x i16> %1 to <8 x i32>
169  ret <8 x i32> %2
170}
171
172define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
173; X32-LABEL: test_llvm_x86_avx2_pmovzxwq:
174; X32:       ## %bb.0:
175; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
176; X32-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
177; X32-NEXT:    retl
178;
179; X64-LABEL: test_llvm_x86_avx2_pmovzxwq:
180; X64:       ## %bb.0:
181; X64-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
182; X64-NEXT:    retq
183  %1 = load <8 x i16>, <8 x i16>* %a, align 1
184  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
185  %3 = zext <4 x i16> %2 to <4 x i64>
186  ret <4 x i64> %3
187}
188
189define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
190; X32-LABEL: test_llvm_x86_avx2_pmovzxdq:
191; X32:       ## %bb.0:
192; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
193; X32-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
194; X32-NEXT:    retl
195;
196; X64-LABEL: test_llvm_x86_avx2_pmovzxdq:
197; X64:       ## %bb.0:
198; X64-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
199; X64-NEXT:    retq
200  %1 = load <4 x i32>, <4 x i32>* %a, align 1
201  %2 = zext <4 x i32> %1 to <4 x i64>
202  ret <4 x i64> %2
203}
204