• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=X64
4; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32-AVX2
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64-AVX2
6
7define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
8; X32-LABEL: and_masks:
9; X32:       ## %bb.0:
10; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
12; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
13; X32-NEXT:    vmovups (%edx), %ymm0
14; X32-NEXT:    vmovups (%ecx), %ymm1
15; X32-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
16; X32-NEXT:    vmovups (%eax), %ymm2
17; X32-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
18; X32-NEXT:    vandps %ymm1, %ymm0, %ymm0
19; X32-NEXT:    vandps LCPI0_0, %ymm0, %ymm0
20; X32-NEXT:    vmovaps %ymm0, (%eax)
21; X32-NEXT:    vzeroupper
22; X32-NEXT:    retl
23;
24; X64-LABEL: and_masks:
25; X64:       ## %bb.0:
26; X64-NEXT:    vmovups (%rdi), %ymm0
27; X64-NEXT:    vmovups (%rsi), %ymm1
28; X64-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
29; X64-NEXT:    vmovups (%rdx), %ymm2
30; X64-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
31; X64-NEXT:    vandps %ymm1, %ymm0, %ymm0
32; X64-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
33; X64-NEXT:    vmovaps %ymm0, (%rax)
34; X64-NEXT:    vzeroupper
35; X64-NEXT:    retq
36;
37; X32-AVX2-LABEL: and_masks:
38; X32-AVX2:       ## %bb.0:
39; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
40; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
41; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %edx
42; X32-AVX2-NEXT:    vmovups (%edx), %ymm0
43; X32-AVX2-NEXT:    vmovups (%ecx), %ymm1
44; X32-AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
45; X32-AVX2-NEXT:    vmovups (%eax), %ymm2
46; X32-AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
47; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
48; X32-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
49; X32-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
50; X32-AVX2-NEXT:    vmovaps %ymm0, (%eax)
51; X32-AVX2-NEXT:    vzeroupper
52; X32-AVX2-NEXT:    retl
53;
54; X64-AVX2-LABEL: and_masks:
55; X64-AVX2:       ## %bb.0:
56; X64-AVX2-NEXT:    vmovups (%rdi), %ymm0
57; X64-AVX2-NEXT:    vmovups (%rsi), %ymm1
58; X64-AVX2-NEXT:    vcmpltps %ymm0, %ymm1, %ymm1
59; X64-AVX2-NEXT:    vmovups (%rdx), %ymm2
60; X64-AVX2-NEXT:    vcmpltps %ymm0, %ymm2, %ymm0
61; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
62; X64-AVX2-NEXT:    vandps %ymm2, %ymm1, %ymm1
63; X64-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
64; X64-AVX2-NEXT:    vmovaps %ymm0, (%rax)
65; X64-AVX2-NEXT:    vzeroupper
66; X64-AVX2-NEXT:    retq
67  %v0 = load <8 x float>, <8 x float>* %a, align 16
68  %v1 = load <8 x float>, <8 x float>* %b, align 16
69  %m0 = fcmp olt <8 x float> %v1, %v0
70  %v2 = load <8 x float>, <8 x float>* %c, align 16
71  %m1 = fcmp olt <8 x float> %v2, %v0
72  %mand = and <8 x i1> %m1, %m0
73  %r = zext <8 x i1> %mand to <8 x i32>
74  store <8 x i32> %r, <8 x i32>* undef, align 32
75  ret void
76}
77
78define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
79; X32-LABEL: neg_masks:
80; X32:       ## %bb.0:
81; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
82; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
83; X32-NEXT:    vmovups (%ecx), %ymm0
84; X32-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
85; X32-NEXT:    vandps LCPI1_0, %ymm0, %ymm0
86; X32-NEXT:    vmovaps %ymm0, (%eax)
87; X32-NEXT:    vzeroupper
88; X32-NEXT:    retl
89;
90; X64-LABEL: neg_masks:
91; X64:       ## %bb.0:
92; X64-NEXT:    vmovups (%rsi), %ymm0
93; X64-NEXT:    vcmpnltps (%rdi), %ymm0, %ymm0
94; X64-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
95; X64-NEXT:    vmovaps %ymm0, (%rax)
96; X64-NEXT:    vzeroupper
97; X64-NEXT:    retq
98;
99; X32-AVX2-LABEL: neg_masks:
100; X32-AVX2:       ## %bb.0:
101; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
103; X32-AVX2-NEXT:    vmovups (%ecx), %ymm0
104; X32-AVX2-NEXT:    vcmpnltps (%eax), %ymm0, %ymm0
105; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
106; X32-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
107; X32-AVX2-NEXT:    vmovaps %ymm0, (%eax)
108; X32-AVX2-NEXT:    vzeroupper
109; X32-AVX2-NEXT:    retl
110;
111; X64-AVX2-LABEL: neg_masks:
112; X64-AVX2:       ## %bb.0:
113; X64-AVX2-NEXT:    vmovups (%rsi), %ymm0
114; X64-AVX2-NEXT:    vcmpnltps (%rdi), %ymm0, %ymm0
115; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
116; X64-AVX2-NEXT:    vandps %ymm1, %ymm0, %ymm0
117; X64-AVX2-NEXT:    vmovaps %ymm0, (%rax)
118; X64-AVX2-NEXT:    vzeroupper
119; X64-AVX2-NEXT:    retq
120  %v0 = load <8 x float>, <8 x float>* %a, align 16
121  %v1 = load <8 x float>, <8 x float>* %b, align 16
122  %m0 = fcmp olt <8 x float> %v1, %v0
123  %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
124  %r = zext <8 x i1> %mand to <8 x i32>
125  store <8 x i32> %r, <8 x i32>* undef, align 32
126  ret void
127}
128
129define <8 x i32> @and_mask_constant(<8 x i32> %v0, <8 x i32> %v1) {
130; X32-LABEL: and_mask_constant:
131; X32:       ## %bb.0:
132; X32-NEXT:    vextractf128 $1, %ymm0, %xmm1
133; X32-NEXT:    vpxor %xmm2, %xmm2, %xmm2
134; X32-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
135; X32-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
136; X32-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
137; X32-NEXT:    vpand LCPI2_0, %xmm0, %xmm0
138; X32-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
139; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
140; X32-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
141; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
142; X32-NEXT:    retl
143;
144; X64-LABEL: and_mask_constant:
145; X64:       ## %bb.0:
146; X64-NEXT:    vextractf128 $1, %ymm0, %xmm1
147; X64-NEXT:    vpxor %xmm2, %xmm2, %xmm2
148; X64-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
149; X64-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
150; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
151; X64-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
152; X64-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
153; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
154; X64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
155; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
156; X64-NEXT:    retq
157;
158; X32-AVX2-LABEL: and_mask_constant:
159; X32-AVX2:       ## %bb.0:
160; X32-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
161; X32-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
162; X32-AVX2-NEXT:    vpand LCPI2_0, %ymm0, %ymm0
163; X32-AVX2-NEXT:    retl
164;
165; X64-AVX2-LABEL: and_mask_constant:
166; X64-AVX2:       ## %bb.0:
167; X64-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
168; X64-AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
169; X64-AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
170; X64-AVX2-NEXT:    retq
171  %m = icmp eq <8 x i32> %v0, zeroinitializer
172  %mand = and <8 x i1> %m, <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>
173  %r = zext <8 x i1> %mand to <8 x i32>
174  ret <8 x i32> %r
175}
176