• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
3; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
6
7;
8; Signed Saturation
9;
10
11define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
12; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
13; AVX512BW:       ## %bb.0:
14; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
15; AVX512BW-NEXT:    retq
16;
17; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
18; AVX512F-32:       # %bb.0:
19; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
20; AVX512F-32-NEXT:    retl
21  %res = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
22  ret <32 x i16> %res
23}
24declare <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16>, <32 x i16>)
25
26define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
27; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
28; AVX512BW:       ## %bb.0:
29; AVX512BW-NEXT:    kmovd %edi, %k1
30; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
31; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
32; AVX512BW-NEXT:    retq
33;
34; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
35; AVX512F-32:       # %bb.0:
36; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
37; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
38; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
39; AVX512F-32-NEXT:    retl
40  %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
41  %2 = bitcast i32 %mask to <32 x i1>
42  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
43  ret <32 x i16> %3
44}
45
46define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
47; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
48; AVX512BW:       ## %bb.0:
49; AVX512BW-NEXT:    kmovd %edi, %k1
50; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
51; AVX512BW-NEXT:    retq
52;
53; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
54; AVX512F-32:       # %bb.0:
55; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
56; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
57; AVX512F-32-NEXT:    retl
58  %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
59  %2 = bitcast i32 %mask to <32 x i1>
60  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
61  ret <32 x i16> %3
62}
63
64define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
65; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
66; AVX512BW:       ## %bb.0:
67; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0
68; AVX512BW-NEXT:    retq
69;
70; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
71; AVX512F-32:       # %bb.0:
72; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
73; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0
74; AVX512F-32-NEXT:    retl
75  %b = load <32 x i16>, <32 x i16>* %ptr_b
76  %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
77  ret <32 x i16> %1
78}
79
80define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
81; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
82; AVX512BW:       ## %bb.0:
83; AVX512BW-NEXT:    kmovd %esi, %k1
84; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
85; AVX512BW-NEXT:    vmovdqa64 %zmm1, %zmm0
86; AVX512BW-NEXT:    retq
87;
88; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
89; AVX512F-32:       # %bb.0:
90; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
91; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
92; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1}
93; AVX512F-32-NEXT:    vmovdqa64 %zmm1, %zmm0
94; AVX512F-32-NEXT:    retl
95  %b = load <32 x i16>, <32 x i16>* %ptr_b
96  %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
97  %2 = bitcast i32 %mask to <32 x i1>
98  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
99  ret <32 x i16> %3
100}
101
102define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
103; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
104; AVX512BW:       ## %bb.0:
105; AVX512BW-NEXT:    kmovd %esi, %k1
106; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
107; AVX512BW-NEXT:    retq
108;
109; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
110; AVX512F-32:       # %bb.0:
111; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
112; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
113; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
114; AVX512F-32-NEXT:    retl
115  %b = load <32 x i16>, <32 x i16>* %ptr_b
116  %1 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
117  %2 = bitcast i32 %mask to <32 x i1>
118  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
119  ret <32 x i16> %3
120}
121
122define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
123; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
124; AVX512BW:       ## %bb.0:
125; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
126; AVX512BW-NEXT:    retq
127;
128; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
129; AVX512F-32:       # %bb.0:
130; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
131; AVX512F-32-NEXT:    retl
132  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
133  ret <32 x i16> %sub
134}
135declare <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16>, <32 x i16>)
136
137define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
138; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
139; AVX512BW:       ## %bb.0:
140; AVX512BW-NEXT:    kmovd %edi, %k1
141; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
142; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
143; AVX512BW-NEXT:    retq
144;
145; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
146; AVX512F-32:       # %bb.0:
147; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
148; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
149; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
150; AVX512F-32-NEXT:    retl
151  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
152  %bc = bitcast i32 %mask to <32 x i1>
153  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
154  ret <32 x i16> %res
155}
156
157define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
158; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
159; AVX512BW:       ## %bb.0:
160; AVX512BW-NEXT:    kmovd %edi, %k1
161; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
162; AVX512BW-NEXT:    retq
163;
164; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
165; AVX512F-32:       # %bb.0:
166; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
167; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
168; AVX512F-32-NEXT:    retl
169  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
170  %bc = bitcast i32 %mask to <32 x i1>
171  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
172  ret <32 x i16> %res
173}
174
175define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
176; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
177; AVX512BW:       ## %bb.0:
178; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0
179; AVX512BW-NEXT:    retq
180;
181; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
182; AVX512F-32:       # %bb.0:
183; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0
185; AVX512F-32-NEXT:    retl
186  %b = load <32 x i16>, <32 x i16>* %ptr_b
187  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
188  ret <32 x i16> %sub
189}
190
191define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
192; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
193; AVX512BW:       ## %bb.0:
194; AVX512BW-NEXT:    kmovd %esi, %k1
195; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
196; AVX512BW-NEXT:    vmovdqa64 %zmm1, %zmm0
197; AVX512BW-NEXT:    retq
198;
199; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
200; AVX512F-32:       # %bb.0:
201; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
202; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
203; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1}
204; AVX512F-32-NEXT:    vmovdqa64 %zmm1, %zmm0
205; AVX512F-32-NEXT:    retl
206  %b = load <32 x i16>, <32 x i16>* %ptr_b
207  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
208  %bc = bitcast i32 %mask to <32 x i1>
209  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
210  ret <32 x i16> %res
211}
212
213define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
214; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
215; AVX512BW:       ## %bb.0:
216; AVX512BW-NEXT:    kmovd %esi, %k1
217; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
218; AVX512BW-NEXT:    retq
219;
220; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
221; AVX512F-32:       # %bb.0:
222; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
223; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
224; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
225; AVX512F-32-NEXT:    retl
226  %b = load <32 x i16>, <32 x i16>* %ptr_b
227  %sub = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
228  %bc = bitcast i32 %mask to <32 x i1>
229  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
230  ret <32 x i16> %res
231}
232
233
234define <64 x i16> @test_mask_adds_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
235; AVX512BW-LABEL: test_mask_adds_epi16_rr_1024:
236; AVX512BW:       ## %bb.0:
237; AVX512BW-NEXT:    vpaddsw %zmm2, %zmm0, %zmm0
238; AVX512BW-NEXT:    vpaddsw %zmm3, %zmm1, %zmm1
239; AVX512BW-NEXT:    retq
240;
241; AVX512F-32-LABEL: test_mask_adds_epi16_rr_1024:
242; AVX512F-32:       # %bb.0:
243; AVX512F-32-NEXT:    pushl %ebp
244; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
245; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
246; AVX512F-32-NEXT:    movl %esp, %ebp
247; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
248; AVX512F-32-NEXT:    andl $-64, %esp
249; AVX512F-32-NEXT:    subl $64, %esp
250; AVX512F-32-NEXT:    vpaddsw %zmm2, %zmm0, %zmm0
251; AVX512F-32-NEXT:    vpaddsw 8(%ebp), %zmm1, %zmm1
252; AVX512F-32-NEXT:    movl %ebp, %esp
253; AVX512F-32-NEXT:    popl %ebp
254; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
255; AVX512F-32-NEXT:    retl
256  %1 = call <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
257  ret <64 x i16> %1
258}
259declare <64 x i16> @llvm.sadd.sat.v64i16(<64 x i16>, <64 x i16>)
260
261define <64 x i16> @test_mask_subs_epi16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
262; AVX512BW-LABEL: test_mask_subs_epi16_rr_1024:
263; AVX512BW:       ## %bb.0:
264; AVX512BW-NEXT:    vpsubsw %zmm2, %zmm0, %zmm0
265; AVX512BW-NEXT:    vpsubsw %zmm3, %zmm1, %zmm1
266; AVX512BW-NEXT:    retq
267;
268; AVX512F-32-LABEL: test_mask_subs_epi16_rr_1024:
269; AVX512F-32:       # %bb.0:
270; AVX512F-32-NEXT:    pushl %ebp
271; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
272; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
273; AVX512F-32-NEXT:    movl %esp, %ebp
274; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
275; AVX512F-32-NEXT:    andl $-64, %esp
276; AVX512F-32-NEXT:    subl $64, %esp
277; AVX512F-32-NEXT:    vpsubsw %zmm2, %zmm0, %zmm0
278; AVX512F-32-NEXT:    vpsubsw 8(%ebp), %zmm1, %zmm1
279; AVX512F-32-NEXT:    movl %ebp, %esp
280; AVX512F-32-NEXT:    popl %ebp
281; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
282; AVX512F-32-NEXT:    retl
283  %sub = call <64 x i16> @llvm.ssub.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
284  ret <64 x i16> %sub
285}
286declare <64 x i16> @llvm.ssub.sat.v64i16(<64 x i16>, <64 x i16>);
287
288;
289; Unsigned Saturation
290;
291
292define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
293; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
294; AVX512BW:       ## %bb.0:
295; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
296; AVX512BW-NEXT:    retq
297;
298; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
299; AVX512F-32:       # %bb.0:
300; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
301; AVX512F-32-NEXT:    retl
302  %res = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
303  ret <32 x i16> %res
304}
305declare <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16>, <32 x i16>)
306
307define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
308; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
309; AVX512BW:       ## %bb.0:
310; AVX512BW-NEXT:    kmovd %edi, %k1
311; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
312; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
313; AVX512BW-NEXT:    retq
314;
315; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
316; AVX512F-32:       # %bb.0:
317; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
318; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
319; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
320; AVX512F-32-NEXT:    retl
321  %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
322  %2 = bitcast i32 %mask to <32 x i1>
323  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
324  ret <32 x i16> %3
325}
326
327define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
328; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
329; AVX512BW:       ## %bb.0:
330; AVX512BW-NEXT:    kmovd %edi, %k1
331; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
332; AVX512BW-NEXT:    retq
333;
334; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
335; AVX512F-32:       # %bb.0:
336; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
337; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
338; AVX512F-32-NEXT:    retl
339  %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
340  %2 = bitcast i32 %mask to <32 x i1>
341  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
342  ret <32 x i16> %3
343}
344
345define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
346; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
347; AVX512BW:       ## %bb.0:
348; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0
349; AVX512BW-NEXT:    retq
350;
351; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
352; AVX512F-32:       # %bb.0:
353; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
354; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0
355; AVX512F-32-NEXT:    retl
356  %b = load <32 x i16>, <32 x i16>* %ptr_b
357  %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
358  ret <32 x i16> %1
359}
360
361define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
362; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
363; AVX512BW:       ## %bb.0:
364; AVX512BW-NEXT:    kmovd %esi, %k1
365; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
366; AVX512BW-NEXT:    vmovdqa64 %zmm1, %zmm0
367; AVX512BW-NEXT:    retq
368;
369; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
370; AVX512F-32:       # %bb.0:
371; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
372; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
373; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1}
374; AVX512F-32-NEXT:    vmovdqa64 %zmm1, %zmm0
375; AVX512F-32-NEXT:    retl
376  %b = load <32 x i16>, <32 x i16>* %ptr_b
377  %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
378  %2 = bitcast i32 %mask to <32 x i1>
379  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
380  ret <32 x i16> %3
381}
382
383define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
384; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
385; AVX512BW:       ## %bb.0:
386; AVX512BW-NEXT:    kmovd %esi, %k1
387; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
388; AVX512BW-NEXT:    retq
389;
390; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
391; AVX512F-32:       # %bb.0:
392; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
393; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
394; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
395; AVX512F-32-NEXT:    retl
396  %b = load <32 x i16>, <32 x i16>* %ptr_b
397  %1 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
398  %2 = bitcast i32 %mask to <32 x i1>
399  %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
400  ret <32 x i16> %3
401}
402
403define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
404; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
405; AVX512BW:       ## %bb.0:
406; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
407; AVX512BW-NEXT:    retq
408;
409; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
410; AVX512F-32:       # %bb.0:
411; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
412; AVX512F-32-NEXT:    retl
413  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
414  ret <32 x i16> %sub
415}
416declare <32 x i16> @llvm.usub.sat.v32i16(<32 x i16>, <32 x i16>)
417
418define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
419; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
420; AVX512BW:       ## %bb.0:
421; AVX512BW-NEXT:    kmovd %edi, %k1
422; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
423; AVX512BW-NEXT:    vmovdqa64 %zmm2, %zmm0
424; AVX512BW-NEXT:    retq
425;
426; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
427; AVX512F-32:       # %bb.0:
428; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
429; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
430; AVX512F-32-NEXT:    vmovdqa64 %zmm2, %zmm0
431; AVX512F-32-NEXT:    retl
432  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
433  %bc = bitcast i32 %mask to <32 x i1>
434  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
435  ret <32 x i16> %res
436}
437
438define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
439; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
440; AVX512BW:       ## %bb.0:
441; AVX512BW-NEXT:    kmovd %edi, %k1
442; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
443; AVX512BW-NEXT:    retq
444;
445; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
446; AVX512F-32:       # %bb.0:
447; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
448; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
449; AVX512F-32-NEXT:    retl
450  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
451  %bc = bitcast i32 %mask to <32 x i1>
452  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
453  ret <32 x i16> %res
454}
455
456define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
457; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
458; AVX512BW:       ## %bb.0:
459; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0
460; AVX512BW-NEXT:    retq
461;
462; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
463; AVX512F-32:       # %bb.0:
464; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
465; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0
466; AVX512F-32-NEXT:    retl
467  %b = load <32 x i16>, <32 x i16>* %ptr_b
468  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
469  ret <32 x i16> %sub
470}
471
472define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
473; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
474; AVX512BW:       ## %bb.0:
475; AVX512BW-NEXT:    kmovd %esi, %k1
476; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
477; AVX512BW-NEXT:    vmovdqa64 %zmm1, %zmm0
478; AVX512BW-NEXT:    retq
479;
480; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
481; AVX512F-32:       # %bb.0:
482; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
483; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
484; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1}
485; AVX512F-32-NEXT:    vmovdqa64 %zmm1, %zmm0
486; AVX512F-32-NEXT:    retl
487  %b = load <32 x i16>, <32 x i16>* %ptr_b
488  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
489  %bc = bitcast i32 %mask to <32 x i1>
490  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> %passThru
491  ret <32 x i16> %res
492}
493
494define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
495; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
496; AVX512BW:       ## %bb.0:
497; AVX512BW-NEXT:    kmovd %esi, %k1
498; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
499; AVX512BW-NEXT:    retq
500;
501; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
502; AVX512F-32:       # %bb.0:
503; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
504; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
505; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
506; AVX512F-32-NEXT:    retl
507  %b = load <32 x i16>, <32 x i16>* %ptr_b
508  %sub = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %a, <32 x i16> %b)
509  %bc = bitcast i32 %mask to <32 x i1>
510  %res = select <32 x i1> %bc, <32 x i16> %sub, <32 x i16> zeroinitializer
511  ret <32 x i16> %res
512}
513
514
515define <64 x i16> @test_mask_adds_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
516; AVX512BW-LABEL: test_mask_adds_epu16_rr_1024:
517; AVX512BW:       ## %bb.0:
518; AVX512BW-NEXT:    vpaddusw %zmm2, %zmm0, %zmm0
519; AVX512BW-NEXT:    vpaddusw %zmm3, %zmm1, %zmm1
520; AVX512BW-NEXT:    retq
521;
522; AVX512F-32-LABEL: test_mask_adds_epu16_rr_1024:
523; AVX512F-32:       # %bb.0:
524; AVX512F-32-NEXT:    pushl %ebp
525; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
526; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
527; AVX512F-32-NEXT:    movl %esp, %ebp
528; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
529; AVX512F-32-NEXT:    andl $-64, %esp
530; AVX512F-32-NEXT:    subl $64, %esp
531; AVX512F-32-NEXT:    vpaddusw %zmm2, %zmm0, %zmm0
532; AVX512F-32-NEXT:    vpaddusw 8(%ebp), %zmm1, %zmm1
533; AVX512F-32-NEXT:    movl %ebp, %esp
534; AVX512F-32-NEXT:    popl %ebp
535; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
536; AVX512F-32-NEXT:    retl
537  %1 = call <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
538  ret <64 x i16> %1
539}
540declare <64 x i16> @llvm.uadd.sat.v64i16(<64 x i16>, <64 x i16>)
541
542define <64 x i16> @test_mask_subs_epu16_rr_1024(<64 x i16> %a, <64 x i16> %b) {
543; AVX512BW-LABEL: test_mask_subs_epu16_rr_1024:
544; AVX512BW:       ## %bb.0:
545; AVX512BW-NEXT:    vpsubusw %zmm2, %zmm0, %zmm0
546; AVX512BW-NEXT:    vpsubusw %zmm3, %zmm1, %zmm1
547; AVX512BW-NEXT:    retq
548;
549; AVX512F-32-LABEL: test_mask_subs_epu16_rr_1024:
550; AVX512F-32:       # %bb.0:
551; AVX512F-32-NEXT:    pushl %ebp
552; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
553; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
554; AVX512F-32-NEXT:    movl %esp, %ebp
555; AVX512F-32-NEXT:    .cfi_def_cfa_register %ebp
556; AVX512F-32-NEXT:    andl $-64, %esp
557; AVX512F-32-NEXT:    subl $64, %esp
558; AVX512F-32-NEXT:    vpsubusw %zmm2, %zmm0, %zmm0
559; AVX512F-32-NEXT:    vpsubusw 8(%ebp), %zmm1, %zmm1
560; AVX512F-32-NEXT:    movl %ebp, %esp
561; AVX512F-32-NEXT:    popl %ebp
562; AVX512F-32-NEXT:    .cfi_def_cfa %esp, 4
563; AVX512F-32-NEXT:    retl
564  %sub = call <64 x i16> @llvm.usub.sat.v64i16(<64 x i16> %a, <64 x i16> %b)
565  ret <64 x i16> %sub
566}
567declare <64 x i16> @llvm.usub.sat.v64i16(<64 x i16>, <64 x i16>)
568