• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
2
3;;; Shift left
4define <8 x i32> @vshift00(<8 x i32> %a) {
5; CHECK-LABEL: vshift00:
6; CHECK:       # BB#0:
7; CHECK-NEXT:    vpslld $2, %xmm0, %xmm1
8; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
9; CHECK-NEXT:    vpslld $2, %xmm0, %xmm0
10; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
11; CHECK-NEXT:    retq
12  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
132>
14  ret <8 x i32> %s
15}
16
17define <16 x i16> @vshift01(<16 x i16> %a) {
18; CHECK-LABEL: vshift01:
19; CHECK:       # BB#0:
20; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm1
21; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
22; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
23; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
24; CHECK-NEXT:    retq
25  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
26  ret <16 x i16> %s
27}
28
29define <4 x i64> @vshift02(<4 x i64> %a) {
30; CHECK-LABEL: vshift02:
31; CHECK:       # BB#0:
32; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm1
33; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
34; CHECK-NEXT:    vpsllq $2, %xmm0, %xmm0
35; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
36; CHECK-NEXT:    retq
37  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
38  ret <4 x i64> %s
39}
40
41;;; Logical Shift right
42define <8 x i32> @vshift03(<8 x i32> %a) {
43; CHECK-LABEL: vshift03:
44; CHECK:       # BB#0:
45; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm1
46; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
47; CHECK-NEXT:    vpsrld $2, %xmm0, %xmm0
48; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
49; CHECK-NEXT:    retq
50  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
512>
52  ret <8 x i32> %s
53}
54
55define <16 x i16> @vshift04(<16 x i16> %a) {
56; CHECK-LABEL: vshift04:
57; CHECK:       # BB#0:
58; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm1
59; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
60; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
61; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
62; CHECK-NEXT:    retq
63  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
64  ret <16 x i16> %s
65}
66
67define <4 x i64> @vshift05(<4 x i64> %a) {
68; CHECK-LABEL: vshift05:
69; CHECK:       # BB#0:
70; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm1
71; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
72; CHECK-NEXT:    vpsrlq $2, %xmm0, %xmm0
73; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
74; CHECK-NEXT:    retq
75  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
76  ret <4 x i64> %s
77}
78
79;;; Arithmetic Shift right
80define <8 x i32> @vshift06(<8 x i32> %a) {
81; CHECK-LABEL: vshift06:
82; CHECK:       # BB#0:
83; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm1
84; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
85; CHECK-NEXT:    vpsrad $2, %xmm0, %xmm0
86; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
87; CHECK-NEXT:    retq
88  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
892>
90  ret <8 x i32> %s
91}
92
93define <16 x i16> @vshift07(<16 x i16> %a) {
94; CHECK-LABEL: vshift07:
95; CHECK:       # BB#0:
96; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm1
97; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
98; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm0
99; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
100; CHECK-NEXT:    retq
101  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
102  ret <16 x i16> %s
103}
104
105define <32 x i8> @vshift09(<32 x i8> %a) {
106; CHECK-LABEL: vshift09:
107; CHECK:       # BB#0:
108; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
109; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
110; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
111; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
112; CHECK-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
113; CHECK-NEXT:    vpxor %xmm3, %xmm1, %xmm1
114; CHECK-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
115; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
116; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
117; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
118; CHECK-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
119; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
120; CHECK-NEXT:    retq
121  %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
122  ret <32 x i8> %s
123}
124
125define <32 x i8> @vshift10(<32 x i8> %a) {
126; CHECK-LABEL: vshift10:
127; CHECK:       # BB#0:
128; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
129; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
130; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
131; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm0
132; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
133; CHECK-NEXT:    retq
134  %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
135  ret <32 x i8> %s
136}
137
138define <32 x i8> @vshift11(<32 x i8> %a) {
139; CHECK-LABEL: vshift11:
140; CHECK:       # BB#0:
141; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
142; CHECK-NEXT:    vpsrlw $2, %xmm1, %xmm1
143; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
144; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
145; CHECK-NEXT:    vpsrlw $2, %xmm0, %xmm0
146; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
147; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
148; CHECK-NEXT:    retq
149  %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
150  ret <32 x i8> %s
151}
152
153define <32 x i8> @vshift12(<32 x i8> %a) {
154; CHECK-LABEL: vshift12:
155; CHECK:       # BB#0:
156; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
157; CHECK-NEXT:    vpsllw $2, %xmm1, %xmm1
158; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
159; CHECK-NEXT:    vpand %xmm2, %xmm1, %xmm1
160; CHECK-NEXT:    vpsllw $2, %xmm0, %xmm0
161; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
162; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
163; CHECK-NEXT:    retq
164  %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
165  ret <32 x i8> %s
166}
167
168;;; Support variable shifts
169define <8 x i32> @vshift08(<8 x i32> %a)  {
170; CHECK-LABEL: vshift08:
171; CHECK:       # BB#0:
172; CHECK-NEXT:    vpslld $23, %xmm0, %xmm1
173; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
174; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
175; CHECK-NEXT:    vcvttps2dq %xmm1, %xmm1
176; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
177; CHECK-NEXT:    vpslld $23, %xmm0, %xmm0
178; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
179; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
180; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
181; CHECK-NEXT:    retq
182  %bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
183  ret <8 x i32> %bitop
184}
185
186; PR15141
187define <4 x i32> @vshift13(<4 x i32> %in) {
188; CHECK-LABEL: vshift13:
189; CHECK:       # BB#0:
190; CHECK-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm0
191; CHECK-NEXT:    retq
192  %T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
193  ret <4 x i32> %T
194}
195
196;;; Uses shifts for sign extension
197define <16 x i16> @sext_v16i16(<16 x i16> %a)  {
198; CHECK-LABEL: sext_v16i16:
199; CHECK:       # BB#0:
200; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm1
201; CHECK-NEXT:    vpsraw $8, %xmm1, %xmm1
202; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
203; CHECK-NEXT:    vpsllw $8, %xmm0, %xmm0
204; CHECK-NEXT:    vpsraw $8, %xmm0, %xmm0
205; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
206; CHECK-NEXT:    retq
207  %b = trunc <16 x i16> %a to <16 x i8>
208  %c = sext <16 x i8> %b to <16 x i16>
209  ret <16 x i16> %c
210}
211
212define <8 x i32> @sext_v8i32(<8 x i32> %a)  {
213; CHECK-LABEL: sext_v8i32:
214; CHECK:       # BB#0:
215; CHECK-NEXT:    vpslld $16, %xmm0, %xmm1
216; CHECK-NEXT:    vpsrad $16, %xmm1, %xmm1
217; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
218; CHECK-NEXT:    vpslld $16, %xmm0, %xmm0
219; CHECK-NEXT:    vpsrad $16, %xmm0, %xmm0
220; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
221; CHECK-NEXT:    retq
222  %b = trunc <8 x i32> %a to <8 x i16>
223  %c = sext <8 x i16> %b to <8 x i32>
224  ret <8 x i32> %c
225}
226