• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=AVX
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
7
8define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #0 {
9; SSE2-LABEL: sad8_32bit_icmp_sge:
10; SSE2:       # %bb.0: # %entry
11; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
12; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
13; SSE2-NEXT:    psadbw %xmm0, %xmm1
14; SSE2-NEXT:    movd %xmm1, %eax
15; SSE2-NEXT:    retq
16;
17; AVX-LABEL: sad8_32bit_icmp_sge:
18; AVX:       # %bb.0: # %entry
19; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
20; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
21; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
22; AVX-NEXT:    vmovd %xmm0, %eax
23; AVX-NEXT:    retq
24
25entry:
26  %idx.ext = zext i32 %stride to i64
27  br label %for.body
28
29for.body:                                         ; preds = %entry
30  %0 = bitcast i8* %cur to <8 x i8>*
31  %1 = load <8 x i8>, <8 x i8>* %0, align 1
32  %2 = zext <8 x i8> %1 to <8 x i32>
33  %3 = bitcast i8* %ref to <8 x i8>*
34  %4 = load <8 x i8>, <8 x i8>* %3, align 1
35  %5 = zext <8 x i8> %4 to <8 x i32>
36  %6 = sub nsw <8 x i32> %2, %5
37  %7 = icmp sgt <8 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
38  %8 = sub nsw <8 x i32> zeroinitializer, %6
39  %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
40  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
41  %bin.rdx = add <8 x i32> %9, %rdx.shuf
42  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
43  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
44  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
45  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
46  %10 = extractelement <8 x i32> %bin.rdx232, i32 0
47  ret i32 %10
48}
49
50define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #1 {
51; SSE2-LABEL: sad8_32bit_icmp_sgt:
52; SSE2:       # %bb.0: # %entry
53; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
54; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
55; SSE2-NEXT:    psadbw %xmm0, %xmm1
56; SSE2-NEXT:    movd %xmm1, %eax
57; SSE2-NEXT:    retq
58;
59; AVX-LABEL: sad8_32bit_icmp_sgt:
60; AVX:       # %bb.0: # %entry
61; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
62; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
63; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
64; AVX-NEXT:    vmovd %xmm0, %eax
65; AVX-NEXT:    retq
66entry:
67  %idx.ext = zext i32 %stride to i64
68  br label %for.body
69
70for.body:                                         ; preds = %entry
71  %0 = bitcast i8* %cur to <8 x i8>*
72  %1 = load <8 x i8>, <8 x i8>* %0, align 1
73  %2 = zext <8 x i8> %1 to <8 x i32>
74  %3 = bitcast i8* %ref to <8 x i8>*
75  %4 = load <8 x i8>, <8 x i8>* %3, align 1
76  %5 = zext <8 x i8> %4 to <8 x i32>
77  %6 = sub nsw <8 x i32> %2, %5
78  %7 = icmp sgt <8 x i32> %6, zeroinitializer
79  %8 = sub nsw <8 x i32> zeroinitializer, %6
80  %9 = select <8 x i1> %7, <8 x i32> %6, <8 x i32> %8
81  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
82  %bin.rdx = add <8 x i32> %9, %rdx.shuf
83  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
84  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
85  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
86  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
87  %10 = extractelement <8 x i32> %bin.rdx232, i32 0
88  ret i32 %10
89}
90
91define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #2 {
92; SSE2-LABEL: sad8_32bit_icmp_sle:
93; SSE2:       # %bb.0: # %entry
94; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
95; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
96; SSE2-NEXT:    psadbw %xmm0, %xmm1
97; SSE2-NEXT:    movd %xmm1, %eax
98; SSE2-NEXT:    retq
99;
100; AVX-LABEL: sad8_32bit_icmp_sle:
101; AVX:       # %bb.0: # %entry
102; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
103; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
104; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
105; AVX-NEXT:    vmovd %xmm0, %eax
106; AVX-NEXT:    retq
107entry:
108  %idx.ext = zext i32 %stride to i64
109  br label %for.body
110
111for.body:                                         ; preds = %entry
112  %0 = bitcast i8* %cur to <8 x i8>*
113  %1 = load <8 x i8>, <8 x i8>* %0, align 1
114  %2 = zext <8 x i8> %1 to <8 x i32>
115  %3 = bitcast i8* %ref to <8 x i8>*
116  %4 = load <8 x i8>, <8 x i8>* %3, align 1
117  %5 = zext <8 x i8> %4 to <8 x i32>
118  %6 = sub nsw <8 x i32> %2, %5
119  %7 = icmp slt <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
120  %8 = sub nsw <8 x i32> zeroinitializer, %6
121  %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
122  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
123  %bin.rdx = add <8 x i32> %9, %rdx.shuf
124  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
125  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
126  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
127  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
128  %10 = extractelement <8 x i32> %bin.rdx232, i32 0
129  ret i32 %10
130}
131
132define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i32 %stride) local_unnamed_addr #3 {
133; SSE2-LABEL: sad8_32bit_icmp_slt:
134; SSE2:       # %bb.0: # %entry
135; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
136; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
137; SSE2-NEXT:    psadbw %xmm0, %xmm1
138; SSE2-NEXT:    movd %xmm1, %eax
139; SSE2-NEXT:    retq
140;
141; AVX-LABEL: sad8_32bit_icmp_slt:
142; AVX:       # %bb.0: # %entry
143; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
144; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
145; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
146; AVX-NEXT:    vmovd %xmm0, %eax
147; AVX-NEXT:    retq
148entry:
149  %idx.ext = zext i32 %stride to i64
150  br label %for.body
151
152for.body:                                         ; preds = %entry
153  %0 = bitcast i8* %cur to <8 x i8>*
154  %1 = load <8 x i8>, <8 x i8>* %0, align 1
155  %2 = zext <8 x i8> %1 to <8 x i32>
156  %3 = bitcast i8* %ref to <8 x i8>*
157  %4 = load <8 x i8>, <8 x i8>* %3, align 1
158  %5 = zext <8 x i8> %4 to <8 x i32>
159  %6 = sub nsw <8 x i32> %2, %5
160  %7 = icmp slt <8 x i32> %6, zeroinitializer
161  %8 = sub nsw <8 x i32> zeroinitializer, %6
162  %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
163  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
164  %bin.rdx = add <8 x i32> %9, %rdx.shuf
165  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
166  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
167  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
168  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
169  %10 = extractelement <8 x i32> %bin.rdx232, i32 0
170  ret i32 %10
171}
172
173define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
174; SSE2-LABEL: sad8_64bit_icmp_sext_slt:
175; SSE2:       # %bb.0: # %entry
176; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
177; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
178; SSE2-NEXT:    psadbw %xmm0, %xmm1
179; SSE2-NEXT:    movq %xmm1, %rax
180; SSE2-NEXT:    retq
181;
182; AVX-LABEL: sad8_64bit_icmp_sext_slt:
183; AVX:       # %bb.0: # %entry
184; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
185; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
186; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
187; AVX-NEXT:    vmovq %xmm0, %rax
188; AVX-NEXT:    retq
189entry:
190  br label %for.body
191
192for.body:                                         ; preds = %entry
193  %0 = bitcast i8* %cur to <8 x i8>*
194  %1 = load <8 x i8>, <8 x i8>* %0, align 1
195  %2 = zext <8 x i8> %1 to <8 x i32>
196  %3 = bitcast i8* %ref to <8 x i8>*
197  %4 = load <8 x i8>, <8 x i8>* %3, align 1
198  %5 = zext <8 x i8> %4 to <8 x i32>
199  %6 = sub nsw <8 x i32> %2, %5
200  %7 = icmp slt <8 x i32> %6, zeroinitializer
201  %8 = sub nsw <8 x i32> zeroinitializer, %6
202  %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
203  %10 = sext <8 x i32> %9 to <8 x i64>
204  %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
205  %bin.rdx = add <8 x i64> %rdx.shuf, %10
206  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
207  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
208  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
209  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
210  %11 = extractelement <8 x i64> %bin.rdx239, i32 0
211  ret i64 %11
212}
213
214define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
215; SSE2-LABEL: sad8_64bit_icmp_zext_slt:
216; SSE2:       # %bb.0: # %entry
217; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
218; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
219; SSE2-NEXT:    psadbw %xmm0, %xmm1
220; SSE2-NEXT:    movq %xmm1, %rax
221; SSE2-NEXT:    retq
222;
223; AVX-LABEL: sad8_64bit_icmp_zext_slt:
224; AVX:       # %bb.0: # %entry
225; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
226; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
227; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
228; AVX-NEXT:    vmovq %xmm0, %rax
229; AVX-NEXT:    retq
230entry:
231  br label %for.body
232
233for.body:                                         ; preds = %entry
234  %0 = bitcast i8* %cur to <8 x i8>*
235  %1 = load <8 x i8>, <8 x i8>* %0, align 1
236  %2 = zext <8 x i8> %1 to <8 x i32>
237  %3 = bitcast i8* %ref to <8 x i8>*
238  %4 = load <8 x i8>, <8 x i8>* %3, align 1
239  %5 = zext <8 x i8> %4 to <8 x i32>
240  %6 = sub nsw <8 x i32> %2, %5
241  %7 = icmp slt <8 x i32> %6, zeroinitializer
242  %8 = sub nsw <8 x i32> zeroinitializer, %6
243  %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
244  %10 = zext <8 x i32> %9 to <8 x i64>
245  %rdx.shuf = shufflevector <8 x i64> %10, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
246  %bin.rdx = add <8 x i64> %rdx.shuf, %10
247  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
248  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
249  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
250  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
251  %11 = extractelement <8 x i64> %bin.rdx239, i32 0
252  ret i64 %11
253}
254
255define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
256; SSE2-LABEL: sad8_early_64bit_icmp_zext_slt:
257; SSE2:       # %bb.0: # %entry
258; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
259; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
260; SSE2-NEXT:    psadbw %xmm0, %xmm1
261; SSE2-NEXT:    movq %xmm1, %rax
262; SSE2-NEXT:    retq
263;
264; AVX-LABEL: sad8_early_64bit_icmp_zext_slt:
265; AVX:       # %bb.0: # %entry
266; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
267; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
268; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
269; AVX-NEXT:    vmovq %xmm0, %rax
270; AVX-NEXT:    retq
271entry:
272  br label %for.body
273
274for.body:                                         ; preds = %entry
275  %0 = bitcast i8* %cur to <8 x i8>*
276  %1 = load <8 x i8>, <8 x i8>* %0, align 1
277  %2 = zext <8 x i8> %1 to <8 x i64>
278  %3 = bitcast i8* %ref to <8 x i8>*
279  %4 = load <8 x i8>, <8 x i8>* %3, align 1
280  %5 = zext <8 x i8> %4 to <8 x i64>
281  %6 = sub nsw <8 x i64> %2, %5
282  %7 = icmp slt <8 x i64> %6, zeroinitializer
283  %8 = sub nsw <8 x i64> zeroinitializer, %6
284  %9 = select <8 x i1> %7, <8 x i64> %8, <8 x i64> %6
285  %rdx.shuf = shufflevector <8 x i64> %9, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
286  %bin.rdx = add <8 x i64> %rdx.shuf, %9
287  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
288  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
289  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
290  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
291  %10 = extractelement <8 x i64> %bin.rdx239, i32 0
292  ret i64 %10
293}
294