• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
5
6define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
7; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
8; CHECK:       ## BB#0:
9; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
11; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
12; CHECK-NEXT:    vpaddd (%rsi){1to8}, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e]
13; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
14; CHECK-NEXT:    retq ## encoding: [0xc3]
15  %y_32  = load i32, i32 * %y_ptr
16  %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
17  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
18  %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
19  %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
20  %res3 = add <8 x i32> %res, %res1
21  %res4 = add <8 x i32> %res2, %res3
22  ret <8 x i32> %res4
23}
24
25declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
26
27define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
28; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
29; CHECK:       ## BB#0:
30; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x58,0xd0]
31; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
32; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
33; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
34; CHECK-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc9]
35; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
36; CHECK-NEXT:    retq ## encoding: [0xc3]
37  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
38  %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
39  %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
40  %res3 = add <4 x i32> %res, %res1
41  %res4 = add <4 x i32> %res2, %res3
42  ret <4 x i32> %res4
43}
44
45declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
46
47define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
48; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
49; CHECK:       ## BB#0:
50; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x59,0xd0]
51; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
52; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
53; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
54; CHECK-NEXT:    vpaddq %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc9]
55; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
56; CHECK-NEXT:    retq ## encoding: [0xc3]
57  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
58  %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
59  %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
60  %res3 = add <4 x i64> %res, %res1
61  %res4 = add <4 x i64> %res2, %res3
62  ret <4 x i64> %res4
63}
64
65declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
66
67define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
68; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
69; CHECK:       ## BB#0:
70; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x59,0xd0]
71; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
72; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
73; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
74; CHECK-NEXT:    vpaddq %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc9]
75; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1]
76; CHECK-NEXT:    retq ## encoding: [0xc3]
77  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
78  %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
79  %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
80  %res3 = add <2 x i64> %res, %res1
81  %res4 = add <2 x i64> %res2, %res3
82  ret <2 x i64> %res4
83}
84
85declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
86
87define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
88; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
89; CHECK:       ## BB#0:
90; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x19,0xd0]
91; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
92; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
93; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
94; CHECK-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc9]
95; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
96; CHECK-NEXT:    retq ## encoding: [0xc3]
97  %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
98  %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
99  %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
100  %res3 = fadd <4 x double> %res, %res1
101  %res4 = fadd <4 x double> %res2, %res3
102  ret <4 x double> %res4
103}
104
105declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
106
107define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
108; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
109; CHECK:       ## BB#0:
110; CHECK-NEXT:    vbroadcastss %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x18,0xd0]
111; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
112; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
113; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
114; CHECK-NEXT:    vaddps %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc9]
115; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
116; CHECK-NEXT:    retq ## encoding: [0xc3]
117  %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
118  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
119  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
120  %res3 = fadd <8 x float> %res, %res1
121  %res4 = fadd <8 x float> %res2, %res3
122  ret <8 x float> %res4
123}
124
125declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
126
127define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
128; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
129; CHECK:       ## BB#0:
130; CHECK-NEXT:    vbroadcastss %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0xd0]
131; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
132; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
133; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
134; CHECK-NEXT:    vaddps %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc9]
135; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
136; CHECK-NEXT:    retq ## encoding: [0xc3]
137  %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
138  %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
139  %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
140  %res3 = fadd <4 x float> %res, %res1
141  %res4 = fadd <4 x float> %res2, %res3
142  ret <4 x float> %res4
143}
144
145declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
146
147define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
148; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
149; CHECK:       ## BB#0:
150; CHECK-NEXT:    vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0]
151; CHECK-NEXT:    ## xmm2 = xmm0[0,0,2,2]
152; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
153; CHECK-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
154; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0,0,2,2]
155; CHECK-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
156; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[0,0,2,2]
157; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
158; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
159; CHECK-NEXT:    retq ## encoding: [0xc3]
160  %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
161  %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
162  %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
163  %res3 = fadd <4 x float> %res, %res1
164  %res4 = fadd <4 x float> %res2, %res3
165  ret <4 x float> %res4
166}
167
168declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
169
170define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
171; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
172; CHECK:       ## BB#0:
173; CHECK-NEXT:    vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0]
174; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
175; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
176; CHECK-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
177; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
178; CHECK-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
179; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
180; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
181; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
182; CHECK-NEXT:    retq ## encoding: [0xc3]
183  %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
184  %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
185  %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
186  %res3 = fadd <8 x float> %res, %res1
187  %res4 = fadd <8 x float> %res2, %res3
188  ret <8 x float> %res4
189}
190
191declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
192
193define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
194; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
195; CHECK:       ## BB#0:
196; CHECK-NEXT:    vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0]
197; CHECK-NEXT:    ## xmm2 = xmm0[1,1,3,3]
198; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
199; CHECK-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
200; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[1,1,3,3]
201; CHECK-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
202; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[1,1,3,3]
203; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
204; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
205; CHECK-NEXT:    retq ## encoding: [0xc3]
206  %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
207  %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
208  %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
209  %res3 = fadd <4 x float> %res, %res1
210  %res4 = fadd <4 x float> %res2, %res3
211  ret <4 x float> %res4
212}
213
214declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
215
216define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
217; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
218; CHECK:       ## BB#0:
219; CHECK-NEXT:    vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0]
220; CHECK-NEXT:    ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
221; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
222; CHECK-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
223; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
224; CHECK-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
225; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
226; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
227; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
228; CHECK-NEXT:    retq ## encoding: [0xc3]
229  %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
230  %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
231  %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
232  %res3 = fadd <8 x float> %res, %res1
233  %res4 = fadd <8 x float> %res2, %res3
234  ret <8 x float> %res4
235}
236declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
237
238define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
239; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
240; CHECK:       ## BB#0:
241; CHECK-NEXT:    vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0]
242; CHECK-NEXT:    ## xmm2 = xmm0[0,0]
243; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
244; CHECK-NEXT:    vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
245; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0,0]
246; CHECK-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
247; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[0,0]
248; CHECK-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
249; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1]
250; CHECK-NEXT:    retq ## encoding: [0xc3]
251  %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
252  %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
253  %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
254  %res3 = fadd <2 x double> %res, %res1
255  %res4 = fadd <2 x double> %res2, %res3
256  ret <2 x double> %res4
257}
258
259declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
260
261define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
262; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
263; CHECK:       ## BB#0:
264; CHECK-NEXT:    vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0]
265; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2]
266; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
267; CHECK-NEXT:    vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
268; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,0,2,2]
269; CHECK-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
270; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,0,2,2]
271; CHECK-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
272; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
273; CHECK-NEXT:    retq ## encoding: [0xc3]
274  %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
275  %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
276  %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
277  %res3 = fadd <4 x double> %res, %res1
278  %res4 = fadd <4 x double> %res2, %res3
279  ret <4 x double> %res4
280}
281
282declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
283
284define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
285; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
286; CHECK:       ## BB#0:
287; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06]
288; CHECK-NEXT:    ## ymm2 = ymm0[0,1,3,2]
289; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
290; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
291; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,3,2]
292; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
293; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,1,3,2]
294; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
295; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
296; CHECK-NEXT:    retq ## encoding: [0xc3]
297  %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
298  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
299  %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
300  %res3 = fadd <4 x double> %res, %res1
301  %res4 = fadd <4 x double> %res2, %res3
302  ret <4 x double> %res4
303}
304
305declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
306
307define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
308; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
309; CHECK:       ## BB#0:
310; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01]
311; CHECK-NEXT:    ## xmm2 = xmm0[1,0]
312; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
313; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
314; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[1,0]
315; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
316; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[1,0]
317; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
318; CHECK-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2]
319; CHECK-NEXT:    retq ## encoding: [0xc3]
320  %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
321  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
322  %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
323  %res3 = fadd <2 x double> %res, %res1
324  %res4 = fadd <2 x double> %res3, %res2
325  ret <2 x double> %res4
326}
327
328declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
329
330define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
331; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
332; CHECK:       ## BB#0:
333; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16]
334; CHECK-NEXT:    ## ymm2 = ymm0[2,1,1,0,6,5,5,4]
335; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
336; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
337; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
338; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
339; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
340; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
341; CHECK-NEXT:    vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2]
342; CHECK-NEXT:    retq ## encoding: [0xc3]
343  %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
344  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
345  %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
346  %res3 = fadd <8 x float> %res, %res1
347  %res4 = fadd <8 x float> %res3, %res2
348  ret <8 x float> %res4
349}
350
351declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
352
353define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
354; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
355; CHECK:       ## BB#0:
356; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16]
357; CHECK-NEXT:    ## xmm2 = xmm0[2,1,1,0]
358; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
359; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
360; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[2,1,1,0]
361; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
362; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[2,1,1,0]
363; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
364; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
365; CHECK-NEXT:    retq ## encoding: [0xc3]
366  %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
367  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
368  %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
369  %res3 = fadd <4 x float> %res, %res1
370  %res4 = fadd <4 x float> %res2, %res3
371  ret <4 x float> %res4
372}
373
374declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
375
376define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
377; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256:
378; CHECK:       ## BB#0:
379; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03]
380; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0]
381; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
382; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
383; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0]
384; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
385; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
386; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
387; CHECK-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2]
388; CHECK-NEXT:    retq ## encoding: [0xc3]
389  %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
390  %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
391  %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
392  %res3 = fadd <4 x double> %res, %res1
393  %res4 = fadd <4 x double> %res3, %res2
394  ret <4 x double> %res4
395}
396
397declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
398
399define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
400; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256:
401; CHECK:       ## BB#0:
402; CHECK-NEXT:    vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03]
403; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0]
404; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
405; CHECK-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
406; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0]
407; CHECK-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
408; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
409; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
410; CHECK-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2]
411; CHECK-NEXT:    retq ## encoding: [0xc3]
412  %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
413  %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
414  %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
415  %res3 = add <4 x i64> %res, %res1
416  %res4 = add <4 x i64> %res3, %res2
417  ret <4 x i64> %res4
418}
419
420declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
421
422define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
423; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128:
424; CHECK:       ## BB#0:
425; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
426; CHECK-NEXT:    vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
427; CHECK-NEXT:    vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06]
428; CHECK-NEXT:    retq ## encoding: [0xc3]
429  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
430  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
431  ret void
432}
433
434declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
435
436define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
437; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256:
438; CHECK:       ## BB#0:
439; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
440; CHECK-NEXT:    vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
441; CHECK-NEXT:    vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06]
442; CHECK-NEXT:    retq ## encoding: [0xc3]
443  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
444  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
445  ret void
446}
447
448declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
449
450define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
451; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
452; CHECK:       ## BB#0:
453; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
454; CHECK-NEXT:    vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
455; CHECK-NEXT:    vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06]
456; CHECK-NEXT:    retq ## encoding: [0xc3]
457  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
458  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
459  ret void
460}
461
462declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
463
464define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
465; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
466; CHECK:       ## BB#0:
467; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
468; CHECK-NEXT:    vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
469; CHECK-NEXT:    vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06]
470; CHECK-NEXT:    retq ## encoding: [0xc3]
471  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
472  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
473  ret void
474}
475
476declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
477
478define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
479; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128:
480; CHECK:       ## BB#0:
481; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
482; CHECK-NEXT:    vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
483; CHECK-NEXT:    vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06]
484; CHECK-NEXT:    retq ## encoding: [0xc3]
485    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
486    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
487    ret void
488}
489
490declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
491
492define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
493; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256:
494; CHECK:       ## BB#0:
495; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
496; CHECK-NEXT:    vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
497; CHECK-NEXT:    vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06]
498; CHECK-NEXT:    retq ## encoding: [0xc3]
499    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
500    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
501    ret void
502}
503
504declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
505
506define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
507; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
508; CHECK:       ## BB#0:
509; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
510; CHECK-NEXT:    vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
511; CHECK-NEXT:    vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06]
512; CHECK-NEXT:    retq ## encoding: [0xc3]
513    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
514    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
515    ret void
516}
517
518declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
519
520define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
521; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
522; CHECK:       ## BB#0:
523; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
524; CHECK-NEXT:    vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
525; CHECK-NEXT:    vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06]
526; CHECK-NEXT:    retq ## encoding: [0xc3]
527    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
528    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
529    ret void
530}
531
532declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
533
534define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
535; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128:
536; CHECK:       ## BB#0:
537; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
538; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
539; CHECK-NEXT:    vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06]
540; CHECK-NEXT:    retq ## encoding: [0xc3]
541  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
542  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
543  ret void
544}
545
546declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
547
548define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
549; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256:
550; CHECK:       ## BB#0:
551; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
552; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
553; CHECK-NEXT:    vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06]
554; CHECK-NEXT:    retq ## encoding: [0xc3]
555  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
556  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
557  ret void
558}
559
560declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
561
562define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
563; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128:
564; CHECK:       ## BB#0:
565; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
566; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
567; CHECK-NEXT:    vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06]
568; CHECK-NEXT:    retq ## encoding: [0xc3]
569  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
570  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
571  ret void
572}
573
574declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
575
576define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
577; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256:
578; CHECK:       ## BB#0:
579; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
580; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
581; CHECK-NEXT:    vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06]
582; CHECK-NEXT:    retq ## encoding: [0xc3]
583  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
584  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
585  ret void
586}
587
588declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
589
590define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
591; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128:
592; CHECK:       ## BB#0:
593; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
594; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
595; CHECK-NEXT:    vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06]
596; CHECK-NEXT:    retq ## encoding: [0xc3]
597  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
598  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
599  ret void
600}
601
602declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
603
604define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
605; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256:
606; CHECK:       ## BB#0:
607; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
608; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
609; CHECK-NEXT:    vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06]
610; CHECK-NEXT:    retq ## encoding: [0xc3]
611  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
612  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
613  ret void
614}
615
616declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
617
618define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
619; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128:
620; CHECK:       ## BB#0:
621; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
622; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
623; CHECK-NEXT:    vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06]
624; CHECK-NEXT:    retq ## encoding: [0xc3]
625  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
626  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
627  ret void
628}
629
630declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
631
632define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
633; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256:
634; CHECK:       ## BB#0:
635; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
636; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
637; CHECK-NEXT:    vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06]
638; CHECK-NEXT:    retq ## encoding: [0xc3]
639  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
640  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
641  ret void
642}
643
644define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
645; CHECK-LABEL: test_mask_load_aligned_ps_256:
646; CHECK:       ## BB#0:
647; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
648; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
649; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
650; CHECK-NEXT:    vmovaps (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
651; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
652; CHECK-NEXT:    retq ## encoding: [0xc3]
653  %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
654  %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
655  %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
656  %res4 = fadd <8 x float> %res2, %res1
657  ret <8 x float> %res4
658}
659
660declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
661
662define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
663; CHECK-LABEL: test_mask_load_unaligned_ps_256:
664; CHECK:       ## BB#0:
665; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
666; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
667; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
668; CHECK-NEXT:    vmovups (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
669; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
670; CHECK-NEXT:    retq ## encoding: [0xc3]
671  %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
672  %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
673  %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
674  %res4 = fadd <8 x float> %res2, %res1
675  ret <8 x float> %res4
676}
677
678declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
679
680define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
681; CHECK-LABEL: test_mask_load_aligned_pd_256:
682; CHECK:       ## BB#0:
683; CHECK-NEXT:    vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07]
684; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
685; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
686; CHECK-NEXT:    vmovapd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
687; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
688; CHECK-NEXT:    retq ## encoding: [0xc3]
689  %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
690  %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
691  %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
692  %res4 = fadd <4 x double> %res2, %res1
693  ret <4 x double> %res4
694}
695
696declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
697
698define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
699; CHECK-LABEL: test_mask_load_unaligned_pd_256:
700; CHECK:       ## BB#0:
701; CHECK-NEXT:    vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07]
702; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
703; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
704; CHECK-NEXT:    vmovupd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
705; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
706; CHECK-NEXT:    retq ## encoding: [0xc3]
707  %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
708  %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
709  %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
710  %res4 = fadd <4 x double> %res2, %res1
711  ret <4 x double> %res4
712}
713
714declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
715
716define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
717; CHECK-LABEL: test_mask_load_aligned_ps_128:
718; CHECK:       ## BB#0:
719; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
720; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
721; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
722; CHECK-NEXT:    vmovaps (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
723; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
724; CHECK-NEXT:    retq ## encoding: [0xc3]
725  %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
726  %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
727  %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
728  %res4 = fadd <4 x float> %res2, %res1
729  ret <4 x float> %res4
730}
731
732declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
733
734define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
735; CHECK-LABEL: test_mask_load_unaligned_ps_128:
736; CHECK:       ## BB#0:
737; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
738; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
739; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
740; CHECK-NEXT:    vmovups (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
741; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
742; CHECK-NEXT:    retq ## encoding: [0xc3]
743  %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
744  %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
745  %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
746  %res4 = fadd <4 x float> %res2, %res1
747  ret <4 x float> %res4
748}
749
750declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
751
752define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
753; CHECK-LABEL: test_mask_load_aligned_pd_128:
754; CHECK:       ## BB#0:
755; CHECK-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07]
756; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
757; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
758; CHECK-NEXT:    vmovapd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
759; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
760; CHECK-NEXT:    retq ## encoding: [0xc3]
761  %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
762  %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
763  %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
764  %res4 = fadd <2 x double> %res2, %res1
765  ret <2 x double> %res4
766}
767
768declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
769
770define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
771; CHECK-LABEL: test_mask_load_unaligned_pd_128:
772; CHECK:       ## BB#0:
773; CHECK-NEXT:    vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07]
774; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
775; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
776; CHECK-NEXT:    vmovupd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
777; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
778; CHECK-NEXT:    retq ## encoding: [0xc3]
779  %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
780  %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
781  %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
782  %res4 = fadd <2 x double> %res2, %res1
783  ret <2 x double> %res4
784}
785
786declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
787
788declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
789
790define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
791; CHECK-LABEL: test_mask_load_unaligned_d_128:
792; CHECK:       ## BB#0:
793; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
794; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
795; CHECK-NEXT:    vmovdqu32 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
796; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
797; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
798; CHECK-NEXT:    retq ## encoding: [0xc3]
799  %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
800  %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
801  %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
802  %res4 = add <4 x i32> %res2, %res1
803  ret <4 x i32> %res4
804}
805
806declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
807
808define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
809; CHECK-LABEL: test_mask_load_unaligned_d_256:
810; CHECK:       ## BB#0:
811; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
812; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
813; CHECK-NEXT:    vmovdqu32 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
814; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
815; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
816; CHECK-NEXT:    retq ## encoding: [0xc3]
817  %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
818  %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
819  %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
820  %res4 = add <8 x i32> %res2, %res1
821  ret <8 x i32> %res4
822}
823
824declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
825
826define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
827; CHECK-LABEL: test_mask_load_unaligned_q_128:
828; CHECK:       ## BB#0:
829; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
830; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
831; CHECK-NEXT:    vmovdqu64 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
832; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
833; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
834; CHECK-NEXT:    retq ## encoding: [0xc3]
835  %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
836  %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
837  %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
838  %res4 = add <2 x i64> %res2, %res1
839  ret <2 x i64> %res4
840}
841
842declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
843
844define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
845; CHECK-LABEL: test_mask_load_unaligned_q_256:
846; CHECK:       ## BB#0:
847; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
848; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
849; CHECK-NEXT:    vmovdqu64 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
850; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
851; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
852; CHECK-NEXT:    retq ## encoding: [0xc3]
853  %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
854  %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
855  %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
856  %res4 = add <4 x i64> %res2, %res1
857  ret <4 x i64> %res4
858}
859
860declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
861
862define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
863; CHECK-LABEL: test_mask_load_aligned_d_128:
864; CHECK:       ## BB#0:
865; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
866; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
867; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
868; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
869; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
870; CHECK-NEXT:    retq ## encoding: [0xc3]
871  %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
872  %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
873  %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
874  %res4 = add <4 x i32> %res2, %res1
875  ret <4 x i32> %res4
876}
877
878declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
879
880define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
881; CHECK-LABEL: test_mask_load_aligned_d_256:
882; CHECK:       ## BB#0:
883; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
884; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
885; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
886; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
887; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
888; CHECK-NEXT:    retq ## encoding: [0xc3]
889  %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
890  %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
891  %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
892  %res4 = add <8 x i32> %res2, %res1
893  ret <8 x i32> %res4
894}
895
896declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
897
898define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
899; CHECK-LABEL: test_mask_load_aligned_q_128:
900; CHECK:       ## BB#0:
901; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
902; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
903; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
904; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
905; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
906; CHECK-NEXT:    retq ## encoding: [0xc3]
907  %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
908  %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
909  %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
910  %res4 = add <2 x i64> %res2, %res1
911  ret <2 x i64> %res4
912}
913
914declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
915
916define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
917; CHECK-LABEL: test_mask_load_aligned_q_256:
918; CHECK:       ## BB#0:
919; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
920; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
921; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
922; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
923; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
924; CHECK-NEXT:    retq ## encoding: [0xc3]
925  %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
926  %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
927  %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
928  %res4 = add <4 x i64> %res2, %res1
929  ret <4 x i64> %res4
930}
931
932declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
933
934define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
935; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
936; CHECK:       ## BB#0:
937; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7d,0x08,0x70,0xd0,0x03]
938; CHECK-NEXT:    ## xmm2 = xmm0[3,0,0,0]
939; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
940; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
941; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[3,0,0,0]
942; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
943; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[3,0,0,0]
944; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
945; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
946; CHECK-NEXT:    retq ## encoding: [0xc3]
947	%res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
948	%res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
949	%res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
950	%res3 = add <4 x i32> %res, %res1
951	%res4 = add <4 x i32> %res3, %res2
952	ret <4 x i32> %res4
953}
954
955declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
956
957define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
958; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
959; CHECK:       ## BB#0:
960; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7d,0x28,0x70,0xd0,0x03]
961; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0,7,4,4,4]
962; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
963; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
964; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
965; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
966; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
967; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
968; CHECK-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2]
969; CHECK-NEXT:    retq ## encoding: [0xc3]
970	%res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
971	%res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
972	%res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
973	%res3 = add <8 x i32> %res, %res1
974	%res4 = add <8 x i32> %res3, %res2
975	ret <8 x i32> %res4
976}
977
978define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
979; CHECK-LABEL: test_pcmpeq_d_256:
980; CHECK:       ## BB#0:
981; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
982; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
983; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
984; CHECK-NEXT:    retq ## encoding: [0xc3]
985  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
986  ret i8 %res
987}
988
989define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
990; CHECK-LABEL: test_mask_pcmpeq_d_256:
991; CHECK:       ## BB#0:
992; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
993; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
994; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
995; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
996; CHECK-NEXT:    retq ## encoding: [0xc3]
997  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
998  ret i8 %res
999}
1000
1001declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
1002
1003define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
1004; CHECK-LABEL: test_pcmpeq_q_256:
1005; CHECK:       ## BB#0:
1006; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
1007; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1008; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1009; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1010; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1011; CHECK-NEXT:    retq ## encoding: [0xc3]
1012  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1013  ret i8 %res
1014}
1015
1016define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1017; CHECK-LABEL: test_mask_pcmpeq_q_256:
1018; CHECK:       ## BB#0:
1019; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1020; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
1021; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1022; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1023; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1024; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1025; CHECK-NEXT:    retq ## encoding: [0xc3]
1026  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1027  ret i8 %res
1028}
1029
1030declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
1031
1032define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
1033; CHECK-LABEL: test_pcmpgt_d_256:
1034; CHECK:       ## BB#0:
1035; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
1036; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1037; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1038; CHECK-NEXT:    retq ## encoding: [0xc3]
1039  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
1040  ret i8 %res
1041}
1042
1043define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1044; CHECK-LABEL: test_mask_pcmpgt_d_256:
1045; CHECK:       ## BB#0:
1046; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1047; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
1048; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1049; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1050; CHECK-NEXT:    retq ## encoding: [0xc3]
1051  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
1052  ret i8 %res
1053}
1054
1055declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
1056
1057define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
1058; CHECK-LABEL: test_pcmpgt_q_256:
1059; CHECK:       ## BB#0:
1060; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
1061; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1062; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1063; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1064; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1065; CHECK-NEXT:    retq ## encoding: [0xc3]
1066  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
1067  ret i8 %res
1068}
1069
1070define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
1071; CHECK-LABEL: test_mask_pcmpgt_q_256:
1072; CHECK:       ## BB#0:
1073; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1074; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
1075; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1076; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1077; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1078; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1079; CHECK-NEXT:    retq ## encoding: [0xc3]
1080  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
1081  ret i8 %res
1082}
1083
1084declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
1085
1086define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
1087; CHECK-LABEL: test_pcmpeq_d_128:
1088; CHECK:       ## BB#0:
1089; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
1090; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1091; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1092; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1093; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1094; CHECK-NEXT:    retq ## encoding: [0xc3]
1095  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1096  ret i8 %res
1097}
1098
1099define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1100; CHECK-LABEL: test_mask_pcmpeq_d_128:
1101; CHECK:       ## BB#0:
1102; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1103; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
1104; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1105; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1106; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1107; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1108; CHECK-NEXT:    retq ## encoding: [0xc3]
1109  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1110  ret i8 %res
1111}
1112
1113declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
1114
1115define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
1116; CHECK-LABEL: test_pcmpeq_q_128:
1117; CHECK:       ## BB#0:
1118; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
1119; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
1120; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
1121; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1122; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1123; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1124; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1125; CHECK-NEXT:    retq ## encoding: [0xc3]
1126  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
1127  ret i8 %res
1128}
1129
1130define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
1131; CHECK-LABEL: test_mask_pcmpeq_q_128:
1132; CHECK:       ## BB#0:
1133; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1134; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
1135; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
1136; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
1137; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1138; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1139; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1140; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1141; CHECK-NEXT:    retq ## encoding: [0xc3]
1142  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
1143  ret i8 %res
1144}
1145
1146declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
1147
1148define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
1149; CHECK-LABEL: test_pcmpgt_d_128:
1150; CHECK:       ## BB#0:
1151; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
1152; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1153; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1154; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1155; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1156; CHECK-NEXT:    retq ## encoding: [0xc3]
1157  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
1158  ret i8 %res
1159}
1160
1161define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1162; CHECK-LABEL: test_mask_pcmpgt_d_128:
1163; CHECK:       ## BB#0:
1164; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1165; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
1166; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1167; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1168; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1169; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1170; CHECK-NEXT:    retq ## encoding: [0xc3]
1171  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
1172  ret i8 %res
1173}
1174
1175declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
1176
1177define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
1178; CHECK-LABEL: test_pcmpgt_q_128:
1179; CHECK:       ## BB#0:
1180; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
1181; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
1182; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
1183; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1184; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1185; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1186; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1187; CHECK-NEXT:    retq ## encoding: [0xc3]
1188  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
1189  ret i8 %res
1190}
1191
1192define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
1193; CHECK-LABEL: test_mask_pcmpgt_q_128:
1194; CHECK:       ## BB#0:
1195; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1196; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
1197; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
1198; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
1199; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
1200; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
1201; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1202; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
1203; CHECK-NEXT:    retq ## encoding: [0xc3]
1204  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
1205  ret i8 %res
1206}
1207
1208declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
1209
1210declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1211
1212define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1213; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
1214; CHECK:       ## BB#0:
1215; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xd9]
1216; CHECK-NEXT:    ## xmm3 = xmm0[1],xmm1[1]
1217; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1218; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
1219; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[1],xmm1[1]
1220; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
1221; CHECK-NEXT:    retq ## encoding: [0xc3]
1222  %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1223  %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1224  %res2 = fadd <2 x double> %res, %res1
1225  ret <2 x double> %res2
1226}
1227
1228declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1229
1230define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1231; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
1232; CHECK:       ## BB#0:
1233; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xd9]
1234; CHECK-NEXT:    ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1235; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1236; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
1237; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1238; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
1239; CHECK-NEXT:    retq ## encoding: [0xc3]
1240  %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1241  %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1242  %res2 = fadd <4 x double> %res, %res1
1243  ret <4 x double> %res2
1244}
1245
1246declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1247
1248define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1249; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
1250; CHECK:       ## BB#0:
1251; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xd9]
1252; CHECK-NEXT:    ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1253; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1254; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
1255; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1256; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
1257; CHECK-NEXT:    retq ## encoding: [0xc3]
1258  %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1259  %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1260  %res2 = fadd <4 x float> %res, %res1
1261  ret <4 x float> %res2
1262}
1263
1264declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1265
1266define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1267; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
1268; CHECK:       ## BB#0:
1269; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xd9]
1270; CHECK-NEXT:    ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1271; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1272; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
1273; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1274; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
1275; CHECK-NEXT:    retq ## encoding: [0xc3]
1276  %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1277  %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1278  %res2 = fadd <8 x float> %res, %res1
1279  ret <8 x float> %res2
1280}
1281
1282declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
1283
1284define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
1285; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
1286; CHECK:       ## BB#0:
1287; CHECK-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xd9]
1288; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0]
1289; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1290; CHECK-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
1291; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0]
1292; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
1293; CHECK-NEXT:    retq ## encoding: [0xc3]
1294  %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
1295  %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
1296  %res2 = fadd <2 x double> %res, %res1
1297  ret <2 x double> %res2
1298}
1299
1300declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
1301
1302define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
1303; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
1304; CHECK:       ## BB#0:
1305; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xd9]
1306; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1307; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1308; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
1309; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1310; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
1311; CHECK-NEXT:    retq ## encoding: [0xc3]
1312  %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
1313  %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
1314  %res2 = fadd <4 x double> %res, %res1
1315  ret <4 x double> %res2
1316}
1317
1318declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
1319
1320define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
1321; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
1322; CHECK:       ## BB#0:
1323; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xd9]
1324; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1325; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1326; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
1327; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1328; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
1329; CHECK-NEXT:    retq ## encoding: [0xc3]
1330  %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
1331  %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
1332  %res2 = fadd <4 x float> %res, %res1
1333  ret <4 x float> %res2
1334}
1335
1336declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
1337
1338define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
1339; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
1340; CHECK:       ## BB#0:
1341; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xd9]
1342; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1343; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1344; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
1345; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1346; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
1347; CHECK-NEXT:    retq ## encoding: [0xc3]
1348  %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
1349  %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
1350  %res2 = fadd <8 x float> %res, %res1
1351  ret <8 x float> %res2
1352}
1353
1354declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1355
1356define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1357; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
1358; CHECK:       ## BB#0:
1359; CHECK-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xd9]
1360; CHECK-NEXT:    ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1361; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1362; CHECK-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
1363; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1364; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
1365; CHECK-NEXT:    retq ## encoding: [0xc3]
1366  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1367  %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1368  %res2 = add <4 x i32> %res, %res1
1369  ret <4 x i32> %res2
1370}
1371
1372declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1373
1374define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
1375; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
1376; CHECK:       ## BB#0:
1377; CHECK-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xd9]
1378; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1379; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1380; CHECK-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
1381; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1382; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
1383; CHECK-NEXT:    retq ## encoding: [0xc3]
1384  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
1385  %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
1386  %res2 = add <4 x i32> %res, %res1
1387  ret <4 x i32> %res2
1388}
1389
1390declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1391
1392define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1393; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
1394; CHECK:       ## BB#0:
1395; CHECK-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xd9]
1396; CHECK-NEXT:    ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1397; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1398; CHECK-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
1399; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1400; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
1401; CHECK-NEXT:    retq ## encoding: [0xc3]
1402  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1403  %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1404  %res2 = add <8 x i32> %res, %res1
1405  ret <8 x i32> %res2
1406}
1407
1408declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1409
1410define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
1411; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
1412; CHECK:       ## BB#0:
1413; CHECK-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xd9]
1414; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1415; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1416; CHECK-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
1417; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
1418; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
1419; CHECK-NEXT:    retq ## encoding: [0xc3]
1420  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
1421  %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
1422  %res2 = add <8 x i32> %res, %res1
1423  ret <8 x i32> %res2
1424}
1425
1426declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1427
1428define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1429; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
1430; CHECK:       ## BB#0:
1431; CHECK-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xd9]
1432; CHECK-NEXT:    ## xmm3 = xmm0[1],xmm1[1]
1433; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1434; CHECK-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
1435; CHECK-NEXT:    ## xmm2 = xmm0[1],xmm1[1]
1436; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3]
1437; CHECK-NEXT:    retq ## encoding: [0xc3]
1438  %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1439  %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1440  %res2 = add <2 x i64> %res, %res1
1441  ret <2 x i64> %res2
1442}
1443
1444declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
1445
1446define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
1447; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
1448; CHECK:       ## BB#0:
1449; CHECK-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xd9]
1450; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0]
1451; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1452; CHECK-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
1453; CHECK-NEXT:    ## xmm2 = xmm0[0],xmm1[0]
1454; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3]
1455; CHECK-NEXT:    retq ## encoding: [0xc3]
1456  %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
1457  %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
1458  %res2 = add <2 x i64> %res, %res1
1459  ret <2 x i64> %res2
1460}
1461
1462declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1463
1464define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1465; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
1466; CHECK:       ## BB#0:
1467; CHECK-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xd9]
1468; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1469; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1470; CHECK-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
1471; CHECK-NEXT:    ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1472; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3]
1473; CHECK-NEXT:    retq ## encoding: [0xc3]
1474  %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1475  %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1476  %res2 = add <4 x i64> %res, %res1
1477  ret <4 x i64> %res2
1478}
1479
1480declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
1481
1482define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
1483; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
1484; CHECK:       ## BB#0:
1485; CHECK-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xd9]
1486; CHECK-NEXT:    ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1487; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1488; CHECK-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
1489; CHECK-NEXT:    ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
1490; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3]
1491; CHECK-NEXT:    retq ## encoding: [0xc3]
1492  %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
1493  %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
1494  %res2 = add <4 x i64> %res, %res1
1495  ret <4 x i64> %res2
1496}
1497
1498define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1499; CHECK-LABEL: test_mask_and_epi32_rr_128:
1500; CHECK:       ## BB#0:
1501; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
1502; CHECK-NEXT:    retq ## encoding: [0xc3]
1503  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1504  ret <4 x i32> %res
1505}
1506
1507define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1508; CHECK-LABEL: test_mask_and_epi32_rrk_128:
1509; CHECK:       ## BB#0:
1510; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1511; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
1512; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1513; CHECK-NEXT:    retq ## encoding: [0xc3]
1514  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1515  ret <4 x i32> %res
1516}
1517
1518define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1519; CHECK-LABEL: test_mask_and_epi32_rrkz_128:
1520; CHECK:       ## BB#0:
1521; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1522; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
1523; CHECK-NEXT:    retq ## encoding: [0xc3]
1524  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1525  ret <4 x i32> %res
1526}
1527
1528define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1529; CHECK-LABEL: test_mask_and_epi32_rm_128:
1530; CHECK:       ## BB#0:
1531; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
1532; CHECK-NEXT:    retq ## encoding: [0xc3]
1533  %b = load <4 x i32>, <4 x i32>* %ptr_b
1534  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1535  ret <4 x i32> %res
1536}
1537
1538define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1539; CHECK-LABEL: test_mask_and_epi32_rmk_128:
1540; CHECK:       ## BB#0:
1541; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1542; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
1543; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1544; CHECK-NEXT:    retq ## encoding: [0xc3]
1545  %b = load <4 x i32>, <4 x i32>* %ptr_b
1546  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1547  ret <4 x i32> %res
1548}
1549
1550define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1551; CHECK-LABEL: test_mask_and_epi32_rmkz_128:
1552; CHECK:       ## BB#0:
1553; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1554; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
1555; CHECK-NEXT:    retq ## encoding: [0xc3]
1556  %b = load <4 x i32>, <4 x i32>* %ptr_b
1557  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1558  ret <4 x i32> %res
1559}
1560
1561define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1562; CHECK-LABEL: test_mask_and_epi32_rmb_128:
1563; CHECK:       ## BB#0:
1564; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
1565; CHECK-NEXT:    retq ## encoding: [0xc3]
1566  %q = load i32, i32* %ptr_b
1567  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1568  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1569  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1570  ret <4 x i32> %res
1571}
1572
1573define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1574; CHECK-LABEL: test_mask_and_epi32_rmbk_128:
1575; CHECK:       ## BB#0:
1576; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1577; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
1578; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1579; CHECK-NEXT:    retq ## encoding: [0xc3]
1580  %q = load i32, i32* %ptr_b
1581  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1582  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1583  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1584  ret <4 x i32> %res
1585}
1586
1587define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1588; CHECK-LABEL: test_mask_and_epi32_rmbkz_128:
1589; CHECK:       ## BB#0:
1590; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1591; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
1592; CHECK-NEXT:    retq ## encoding: [0xc3]
1593  %q = load i32, i32* %ptr_b
1594  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1595  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1596  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1597  ret <4 x i32> %res
1598}
1599
1600declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1601
1602define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1603; CHECK-LABEL: test_mask_and_epi32_rr_256:
1604; CHECK:       ## BB#0:
1605; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
1606; CHECK-NEXT:    retq ## encoding: [0xc3]
1607  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1608  ret <8 x i32> %res
1609}
1610
1611define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1612; CHECK-LABEL: test_mask_and_epi32_rrk_256:
1613; CHECK:       ## BB#0:
1614; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1615; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
1616; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1617; CHECK-NEXT:    retq ## encoding: [0xc3]
1618  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1619  ret <8 x i32> %res
1620}
1621
1622define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1623; CHECK-LABEL: test_mask_and_epi32_rrkz_256:
1624; CHECK:       ## BB#0:
1625; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1626; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
1627; CHECK-NEXT:    retq ## encoding: [0xc3]
1628  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1629  ret <8 x i32> %res
1630}
1631
1632define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1633; CHECK-LABEL: test_mask_and_epi32_rm_256:
1634; CHECK:       ## BB#0:
1635; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
1636; CHECK-NEXT:    retq ## encoding: [0xc3]
1637  %b = load <8 x i32>, <8 x i32>* %ptr_b
1638  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1639  ret <8 x i32> %res
1640}
1641
1642define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1643; CHECK-LABEL: test_mask_and_epi32_rmk_256:
1644; CHECK:       ## BB#0:
1645; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1646; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
1647; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1648; CHECK-NEXT:    retq ## encoding: [0xc3]
1649  %b = load <8 x i32>, <8 x i32>* %ptr_b
1650  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1651  ret <8 x i32> %res
1652}
1653
1654define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1655; CHECK-LABEL: test_mask_and_epi32_rmkz_256:
1656; CHECK:       ## BB#0:
1657; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1658; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
1659; CHECK-NEXT:    retq ## encoding: [0xc3]
1660  %b = load <8 x i32>, <8 x i32>* %ptr_b
1661  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1662  ret <8 x i32> %res
1663}
1664
1665define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1666; CHECK-LABEL: test_mask_and_epi32_rmb_256:
1667; CHECK:       ## BB#0:
1668; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
1669; CHECK-NEXT:    retq ## encoding: [0xc3]
1670  %q = load i32, i32* %ptr_b
1671  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1672  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1673  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1674  ret <8 x i32> %res
1675}
1676
1677define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1678; CHECK-LABEL: test_mask_and_epi32_rmbk_256:
1679; CHECK:       ## BB#0:
1680; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1681; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
1682; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1683; CHECK-NEXT:    retq ## encoding: [0xc3]
1684  %q = load i32, i32* %ptr_b
1685  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1686  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1687  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1688  ret <8 x i32> %res
1689}
1690
1691define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1692; CHECK-LABEL: test_mask_and_epi32_rmbkz_256:
1693; CHECK:       ## BB#0:
1694; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1695; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
1696; CHECK-NEXT:    retq ## encoding: [0xc3]
1697  %q = load i32, i32* %ptr_b
1698  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1699  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1700  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1701  ret <8 x i32> %res
1702}
1703
1704declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1705
1706define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1707; CHECK-LABEL: test_mask_or_epi32_rr_128:
1708; CHECK:       ## BB#0:
1709; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
1710; CHECK-NEXT:    retq ## encoding: [0xc3]
1711  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1712  ret <4 x i32> %res
1713}
1714
1715define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1716; CHECK-LABEL: test_mask_or_epi32_rrk_128:
1717; CHECK:       ## BB#0:
1718; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1719; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
1720; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1721; CHECK-NEXT:    retq ## encoding: [0xc3]
1722  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1723  ret <4 x i32> %res
1724}
1725
1726define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1727; CHECK-LABEL: test_mask_or_epi32_rrkz_128:
1728; CHECK:       ## BB#0:
1729; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1730; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
1731; CHECK-NEXT:    retq ## encoding: [0xc3]
1732  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1733  ret <4 x i32> %res
1734}
1735
1736define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1737; CHECK-LABEL: test_mask_or_epi32_rm_128:
1738; CHECK:       ## BB#0:
1739; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
1740; CHECK-NEXT:    retq ## encoding: [0xc3]
1741  %b = load <4 x i32>, <4 x i32>* %ptr_b
1742  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1743  ret <4 x i32> %res
1744}
1745
1746define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1747; CHECK-LABEL: test_mask_or_epi32_rmk_128:
1748; CHECK:       ## BB#0:
1749; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1750; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
1751; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1752; CHECK-NEXT:    retq ## encoding: [0xc3]
1753  %b = load <4 x i32>, <4 x i32>* %ptr_b
1754  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1755  ret <4 x i32> %res
1756}
1757
1758define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1759; CHECK-LABEL: test_mask_or_epi32_rmkz_128:
1760; CHECK:       ## BB#0:
1761; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1762; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
1763; CHECK-NEXT:    retq ## encoding: [0xc3]
1764  %b = load <4 x i32>, <4 x i32>* %ptr_b
1765  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1766  ret <4 x i32> %res
1767}
1768
1769define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1770; CHECK-LABEL: test_mask_or_epi32_rmb_128:
1771; CHECK:       ## BB#0:
1772; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
1773; CHECK-NEXT:    retq ## encoding: [0xc3]
1774  %q = load i32, i32* %ptr_b
1775  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1776  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1777  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1778  ret <4 x i32> %res
1779}
1780
1781define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1782; CHECK-LABEL: test_mask_or_epi32_rmbk_128:
1783; CHECK:       ## BB#0:
1784; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1785; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
1786; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1787; CHECK-NEXT:    retq ## encoding: [0xc3]
1788  %q = load i32, i32* %ptr_b
1789  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1790  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1791  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1792  ret <4 x i32> %res
1793}
1794
1795define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
1796; CHECK-LABEL: test_mask_or_epi32_rmbkz_128:
1797; CHECK:       ## BB#0:
1798; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1799; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
1800; CHECK-NEXT:    retq ## encoding: [0xc3]
1801  %q = load i32, i32* %ptr_b
1802  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1803  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1804  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1805  ret <4 x i32> %res
1806}
1807
1808declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
1809
1810define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
1811; CHECK-LABEL: test_mask_or_epi32_rr_256:
1812; CHECK:       ## BB#0:
1813; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
1814; CHECK-NEXT:    retq ## encoding: [0xc3]
1815  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1816  ret <8 x i32> %res
1817}
1818
1819define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
1820; CHECK-LABEL: test_mask_or_epi32_rrk_256:
1821; CHECK:       ## BB#0:
1822; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1823; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
1824; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1825; CHECK-NEXT:    retq ## encoding: [0xc3]
1826  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1827  ret <8 x i32> %res
1828}
1829
1830define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
1831; CHECK-LABEL: test_mask_or_epi32_rrkz_256:
1832; CHECK:       ## BB#0:
1833; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1834; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
1835; CHECK-NEXT:    retq ## encoding: [0xc3]
1836  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1837  ret <8 x i32> %res
1838}
1839
1840define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
1841; CHECK-LABEL: test_mask_or_epi32_rm_256:
1842; CHECK:       ## BB#0:
1843; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
1844; CHECK-NEXT:    retq ## encoding: [0xc3]
1845  %b = load <8 x i32>, <8 x i32>* %ptr_b
1846  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1847  ret <8 x i32> %res
1848}
1849
1850define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1851; CHECK-LABEL: test_mask_or_epi32_rmk_256:
1852; CHECK:       ## BB#0:
1853; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1854; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
1855; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1856; CHECK-NEXT:    retq ## encoding: [0xc3]
1857  %b = load <8 x i32>, <8 x i32>* %ptr_b
1858  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1859  ret <8 x i32> %res
1860}
1861
1862define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
1863; CHECK-LABEL: test_mask_or_epi32_rmkz_256:
1864; CHECK:       ## BB#0:
1865; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1866; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
1867; CHECK-NEXT:    retq ## encoding: [0xc3]
1868  %b = load <8 x i32>, <8 x i32>* %ptr_b
1869  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1870  ret <8 x i32> %res
1871}
1872
1873define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
1874; CHECK-LABEL: test_mask_or_epi32_rmb_256:
1875; CHECK:       ## BB#0:
1876; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
1877; CHECK-NEXT:    retq ## encoding: [0xc3]
1878  %q = load i32, i32* %ptr_b
1879  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1880  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1881  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
1882  ret <8 x i32> %res
1883}
1884
1885define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
1886; CHECK-LABEL: test_mask_or_epi32_rmbk_256:
1887; CHECK:       ## BB#0:
1888; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1889; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
1890; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1891; CHECK-NEXT:    retq ## encoding: [0xc3]
1892  %q = load i32, i32* %ptr_b
1893  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1894  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1895  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
1896  ret <8 x i32> %res
1897}
1898
1899define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
1900; CHECK-LABEL: test_mask_or_epi32_rmbkz_256:
1901; CHECK:       ## BB#0:
1902; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1903; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
1904; CHECK-NEXT:    retq ## encoding: [0xc3]
1905  %q = load i32, i32* %ptr_b
1906  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
1907  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
1908  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
1909  ret <8 x i32> %res
1910}
1911
1912declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
1913
1914define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
1915; CHECK-LABEL: test_mask_xor_epi32_rr_128:
1916; CHECK:       ## BB#0:
1917; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
1918; CHECK-NEXT:    retq ## encoding: [0xc3]
1919  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1920  ret <4 x i32> %res
1921}
1922
1923define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
1924; CHECK-LABEL: test_mask_xor_epi32_rrk_128:
1925; CHECK:       ## BB#0:
1926; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1927; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
1928; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1929; CHECK-NEXT:    retq ## encoding: [0xc3]
1930  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1931  ret <4 x i32> %res
1932}
1933
1934define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
1935; CHECK-LABEL: test_mask_xor_epi32_rrkz_128:
1936; CHECK:       ## BB#0:
1937; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1938; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
1939; CHECK-NEXT:    retq ## encoding: [0xc3]
1940  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1941  ret <4 x i32> %res
1942}
1943
1944define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
1945; CHECK-LABEL: test_mask_xor_epi32_rm_128:
1946; CHECK:       ## BB#0:
1947; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
1948; CHECK-NEXT:    retq ## encoding: [0xc3]
1949  %b = load <4 x i32>, <4 x i32>* %ptr_b
1950  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1951  ret <4 x i32> %res
1952}
1953
1954define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1955; CHECK-LABEL: test_mask_xor_epi32_rmk_128:
1956; CHECK:       ## BB#0:
1957; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1958; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
1959; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1960; CHECK-NEXT:    retq ## encoding: [0xc3]
1961  %b = load <4 x i32>, <4 x i32>* %ptr_b
1962  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
1963  ret <4 x i32> %res
1964}
1965
1966define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
1967; CHECK-LABEL: test_mask_xor_epi32_rmkz_128:
1968; CHECK:       ## BB#0:
1969; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1970; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
1971; CHECK-NEXT:    retq ## encoding: [0xc3]
1972  %b = load <4 x i32>, <4 x i32>* %ptr_b
1973  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
1974  ret <4 x i32> %res
1975}
1976
1977define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
1978; CHECK-LABEL: test_mask_xor_epi32_rmb_128:
1979; CHECK:       ## BB#0:
1980; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
1981; CHECK-NEXT:    retq ## encoding: [0xc3]
1982  %q = load i32, i32* %ptr_b
1983  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1984  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1985  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
1986  ret <4 x i32> %res
1987}
1988
1989define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
1990; CHECK-LABEL: test_mask_xor_epi32_rmbk_128:
1991; CHECK:       ## BB#0:
1992; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1993; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
1994; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1995; CHECK-NEXT:    retq ## encoding: [0xc3]
1996  %q = load i32, i32* %ptr_b
1997  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
1998  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
1999  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2000  ret <4 x i32> %res
2001}
2002
2003define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2004; CHECK-LABEL: test_mask_xor_epi32_rmbkz_128:
2005; CHECK:       ## BB#0:
2006; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2007; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
2008; CHECK-NEXT:    retq ## encoding: [0xc3]
2009  %q = load i32, i32* %ptr_b
2010  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2011  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2012  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2013  ret <4 x i32> %res
2014}
2015
2016declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2017
2018define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2019; CHECK-LABEL: test_mask_xor_epi32_rr_256:
2020; CHECK:       ## BB#0:
2021; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
2022; CHECK-NEXT:    retq ## encoding: [0xc3]
2023  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2024  ret <8 x i32> %res
2025}
2026
2027define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2028; CHECK-LABEL: test_mask_xor_epi32_rrk_256:
2029; CHECK:       ## BB#0:
2030; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2031; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
2032; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2033; CHECK-NEXT:    retq ## encoding: [0xc3]
2034  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2035  ret <8 x i32> %res
2036}
2037
2038define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2039; CHECK-LABEL: test_mask_xor_epi32_rrkz_256:
2040; CHECK:       ## BB#0:
2041; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2042; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
2043; CHECK-NEXT:    retq ## encoding: [0xc3]
2044  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2045  ret <8 x i32> %res
2046}
2047
2048define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2049; CHECK-LABEL: test_mask_xor_epi32_rm_256:
2050; CHECK:       ## BB#0:
2051; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
2052; CHECK-NEXT:    retq ## encoding: [0xc3]
2053  %b = load <8 x i32>, <8 x i32>* %ptr_b
2054  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2055  ret <8 x i32> %res
2056}
2057
2058define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2059; CHECK-LABEL: test_mask_xor_epi32_rmk_256:
2060; CHECK:       ## BB#0:
2061; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2062; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
2063; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2064; CHECK-NEXT:    retq ## encoding: [0xc3]
2065  %b = load <8 x i32>, <8 x i32>* %ptr_b
2066  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2067  ret <8 x i32> %res
2068}
2069
2070define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2071; CHECK-LABEL: test_mask_xor_epi32_rmkz_256:
2072; CHECK:       ## BB#0:
2073; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2074; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
2075; CHECK-NEXT:    retq ## encoding: [0xc3]
2076  %b = load <8 x i32>, <8 x i32>* %ptr_b
2077  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2078  ret <8 x i32> %res
2079}
2080
2081define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2082; CHECK-LABEL: test_mask_xor_epi32_rmb_256:
2083; CHECK:       ## BB#0:
2084; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
2085; CHECK-NEXT:    retq ## encoding: [0xc3]
2086  %q = load i32, i32* %ptr_b
2087  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2088  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2089  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2090  ret <8 x i32> %res
2091}
2092
2093define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2094; CHECK-LABEL: test_mask_xor_epi32_rmbk_256:
2095; CHECK:       ## BB#0:
2096; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2097; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
2098; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2099; CHECK-NEXT:    retq ## encoding: [0xc3]
2100  %q = load i32, i32* %ptr_b
2101  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2102  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2103  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2104  ret <8 x i32> %res
2105}
2106
2107define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2108; CHECK-LABEL: test_mask_xor_epi32_rmbkz_256:
2109; CHECK:       ## BB#0:
2110; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2111; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
2112; CHECK-NEXT:    retq ## encoding: [0xc3]
2113  %q = load i32, i32* %ptr_b
2114  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2115  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2116  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2117  ret <8 x i32> %res
2118}
2119
2120declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2121
2122define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2123; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
2124; CHECK:       ## BB#0:
2125; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1]
2126; CHECK-NEXT:    retq ## encoding: [0xc3]
2127  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2128  ret <4 x i32> %res
2129}
2130
2131define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2132; CHECK-LABEL: test_mask_andnot_epi32_rrk_128:
2133; CHECK:       ## BB#0:
2134; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2135; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
2136; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2137; CHECK-NEXT:    retq ## encoding: [0xc3]
2138  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2139  ret <4 x i32> %res
2140}
2141
2142define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2143; CHECK-LABEL: test_mask_andnot_epi32_rrkz_128:
2144; CHECK:       ## BB#0:
2145; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2146; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
2147; CHECK-NEXT:    retq ## encoding: [0xc3]
2148  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2149  ret <4 x i32> %res
2150}
2151
2152define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2153; CHECK-LABEL: test_mask_andnot_epi32_rm_128:
2154; CHECK:       ## BB#0:
2155; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07]
2156; CHECK-NEXT:    retq ## encoding: [0xc3]
2157  %b = load <4 x i32>, <4 x i32>* %ptr_b
2158  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2159  ret <4 x i32> %res
2160}
2161
2162define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2163; CHECK-LABEL: test_mask_andnot_epi32_rmk_128:
2164; CHECK:       ## BB#0:
2165; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2166; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
2167; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2168; CHECK-NEXT:    retq ## encoding: [0xc3]
2169  %b = load <4 x i32>, <4 x i32>* %ptr_b
2170  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2171  ret <4 x i32> %res
2172}
2173
2174define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2175; CHECK-LABEL: test_mask_andnot_epi32_rmkz_128:
2176; CHECK:       ## BB#0:
2177; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2178; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
2179; CHECK-NEXT:    retq ## encoding: [0xc3]
2180  %b = load <4 x i32>, <4 x i32>* %ptr_b
2181  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2182  ret <4 x i32> %res
2183}
2184
2185define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2186; CHECK-LABEL: test_mask_andnot_epi32_rmb_128:
2187; CHECK:       ## BB#0:
2188; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
2189; CHECK-NEXT:    retq ## encoding: [0xc3]
2190  %q = load i32, i32* %ptr_b
2191  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2192  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2193  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2194  ret <4 x i32> %res
2195}
2196
2197define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2198; CHECK-LABEL: test_mask_andnot_epi32_rmbk_128:
2199; CHECK:       ## BB#0:
2200; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2201; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
2202; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2203; CHECK-NEXT:    retq ## encoding: [0xc3]
2204  %q = load i32, i32* %ptr_b
2205  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2206  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2207  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2208  ret <4 x i32> %res
2209}
2210
2211define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
2212; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128:
2213; CHECK:       ## BB#0:
2214; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2215; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
2216; CHECK-NEXT:    retq ## encoding: [0xc3]
2217  %q = load i32, i32* %ptr_b
2218  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2219  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2220  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2221  ret <4 x i32> %res
2222}
2223
2224declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2225
2226define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
2227; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
2228; CHECK:       ## BB#0:
2229; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1]
2230; CHECK-NEXT:    retq ## encoding: [0xc3]
2231  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2232  ret <8 x i32> %res
2233}
2234
2235define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
2236; CHECK-LABEL: test_mask_andnot_epi32_rrk_256:
2237; CHECK:       ## BB#0:
2238; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2239; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
2240; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2241; CHECK-NEXT:    retq ## encoding: [0xc3]
2242  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2243  ret <8 x i32> %res
2244}
2245
2246define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2247; CHECK-LABEL: test_mask_andnot_epi32_rrkz_256:
2248; CHECK:       ## BB#0:
2249; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2250; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
2251; CHECK-NEXT:    retq ## encoding: [0xc3]
2252  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2253  ret <8 x i32> %res
2254}
2255
2256define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
2257; CHECK-LABEL: test_mask_andnot_epi32_rm_256:
2258; CHECK:       ## BB#0:
2259; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07]
2260; CHECK-NEXT:    retq ## encoding: [0xc3]
2261  %b = load <8 x i32>, <8 x i32>* %ptr_b
2262  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2263  ret <8 x i32> %res
2264}
2265
2266define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2267; CHECK-LABEL: test_mask_andnot_epi32_rmk_256:
2268; CHECK:       ## BB#0:
2269; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2270; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
2271; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2272; CHECK-NEXT:    retq ## encoding: [0xc3]
2273  %b = load <8 x i32>, <8 x i32>* %ptr_b
2274  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2275  ret <8 x i32> %res
2276}
2277
2278define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
2279; CHECK-LABEL: test_mask_andnot_epi32_rmkz_256:
2280; CHECK:       ## BB#0:
2281; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2282; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
2283; CHECK-NEXT:    retq ## encoding: [0xc3]
2284  %b = load <8 x i32>, <8 x i32>* %ptr_b
2285  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2286  ret <8 x i32> %res
2287}
2288
2289define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
2290; CHECK-LABEL: test_mask_andnot_epi32_rmb_256:
2291; CHECK:       ## BB#0:
2292; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
2293; CHECK-NEXT:    retq ## encoding: [0xc3]
2294  %q = load i32, i32* %ptr_b
2295  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2296  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2297  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
2298  ret <8 x i32> %res
2299}
2300
2301define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
2302; CHECK-LABEL: test_mask_andnot_epi32_rmbk_256:
2303; CHECK:       ## BB#0:
2304; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2305; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
2306; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2307; CHECK-NEXT:    retq ## encoding: [0xc3]
2308  %q = load i32, i32* %ptr_b
2309  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2310  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2311  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
2312  ret <8 x i32> %res
2313}
2314
2315define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
2316; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256:
2317; CHECK:       ## BB#0:
2318; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2319; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
2320; CHECK-NEXT:    retq ## encoding: [0xc3]
2321  %q = load i32, i32* %ptr_b
2322  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
2323  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
2324  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
2325  ret <8 x i32> %res
2326}
2327
2328declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2329
2330define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
2331; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
2332; CHECK:       ## BB#0:
2333; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
2334; CHECK-NEXT:    retq ## encoding: [0xc3]
2335  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2336  ret <2 x i64> %res
2337}
2338
2339define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
2340; CHECK-LABEL: test_mask_andnot_epi64_rrk_128:
2341; CHECK:       ## BB#0:
2342; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2343; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
2344; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2345; CHECK-NEXT:    retq ## encoding: [0xc3]
2346  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2347  ret <2 x i64> %res
2348}
2349
2350define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2351; CHECK-LABEL: test_mask_andnot_epi64_rrkz_128:
2352; CHECK:       ## BB#0:
2353; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2354; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
2355; CHECK-NEXT:    retq ## encoding: [0xc3]
2356  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2357  ret <2 x i64> %res
2358}
2359
2360define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
2361; CHECK-LABEL: test_mask_andnot_epi64_rm_128:
2362; CHECK:       ## BB#0:
2363; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
2364; CHECK-NEXT:    retq ## encoding: [0xc3]
2365  %b = load <2 x i64>, <2 x i64>* %ptr_b
2366  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2367  ret <2 x i64> %res
2368}
2369
2370define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2371; CHECK-LABEL: test_mask_andnot_epi64_rmk_128:
2372; CHECK:       ## BB#0:
2373; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2374; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
2375; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2376; CHECK-NEXT:    retq ## encoding: [0xc3]
2377  %b = load <2 x i64>, <2 x i64>* %ptr_b
2378  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2379  ret <2 x i64> %res
2380}
2381
2382define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
2383; CHECK-LABEL: test_mask_andnot_epi64_rmkz_128:
2384; CHECK:       ## BB#0:
2385; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2386; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
2387; CHECK-NEXT:    retq ## encoding: [0xc3]
2388  %b = load <2 x i64>, <2 x i64>* %ptr_b
2389  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2390  ret <2 x i64> %res
2391}
2392
2393define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
2394; CHECK-LABEL: test_mask_andnot_epi64_rmb_128:
2395; CHECK:       ## BB#0:
2396; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
2397; CHECK-NEXT:    retq ## encoding: [0xc3]
2398  %q = load i64, i64* %ptr_b
2399  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2400  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2401  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
2402  ret <2 x i64> %res
2403}
2404
2405define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
2406; CHECK-LABEL: test_mask_andnot_epi64_rmbk_128:
2407; CHECK:       ## BB#0:
2408; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2409; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
2410; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2411; CHECK-NEXT:    retq ## encoding: [0xc3]
2412  %q = load i64, i64* %ptr_b
2413  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2414  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2415  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
2416  ret <2 x i64> %res
2417}
2418
2419define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
2420; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128:
2421; CHECK:       ## BB#0:
2422; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2423; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
2424; CHECK-NEXT:    retq ## encoding: [0xc3]
2425  %q = load i64, i64* %ptr_b
2426  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
2427  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
2428  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
2429  ret <2 x i64> %res
2430}
2431
2432declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2433
2434define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
2435; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
2436; CHECK:       ## BB#0:
2437; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
2438; CHECK-NEXT:    retq ## encoding: [0xc3]
2439  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2440  ret <4 x i64> %res
2441}
2442
2443define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
2444; CHECK-LABEL: test_mask_andnot_epi64_rrk_256:
2445; CHECK:       ## BB#0:
2446; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2447; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
2448; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2449; CHECK-NEXT:    retq ## encoding: [0xc3]
2450  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2451  ret <4 x i64> %res
2452}
2453
2454define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2455; CHECK-LABEL: test_mask_andnot_epi64_rrkz_256:
2456; CHECK:       ## BB#0:
2457; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2458; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
2459; CHECK-NEXT:    retq ## encoding: [0xc3]
2460  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2461  ret <4 x i64> %res
2462}
2463
2464define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
2465; CHECK-LABEL: test_mask_andnot_epi64_rm_256:
2466; CHECK:       ## BB#0:
2467; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
2468; CHECK-NEXT:    retq ## encoding: [0xc3]
2469  %b = load <4 x i64>, <4 x i64>* %ptr_b
2470  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2471  ret <4 x i64> %res
2472}
2473
2474define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2475; CHECK-LABEL: test_mask_andnot_epi64_rmk_256:
2476; CHECK:       ## BB#0:
2477; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2478; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
2479; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2480; CHECK-NEXT:    retq ## encoding: [0xc3]
2481  %b = load <4 x i64>, <4 x i64>* %ptr_b
2482  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2483  ret <4 x i64> %res
2484}
2485
2486define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
2487; CHECK-LABEL: test_mask_andnot_epi64_rmkz_256:
2488; CHECK:       ## BB#0:
2489; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2490; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
2491; CHECK-NEXT:    retq ## encoding: [0xc3]
2492  %b = load <4 x i64>, <4 x i64>* %ptr_b
2493  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2494  ret <4 x i64> %res
2495}
2496
2497define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
2498; CHECK-LABEL: test_mask_andnot_epi64_rmb_256:
2499; CHECK:       ## BB#0:
2500; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
2501; CHECK-NEXT:    retq ## encoding: [0xc3]
2502  %q = load i64, i64* %ptr_b
2503  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2504  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2505  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
2506  ret <4 x i64> %res
2507}
2508
2509define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
2510; CHECK-LABEL: test_mask_andnot_epi64_rmbk_256:
2511; CHECK:       ## BB#0:
2512; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2513; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
2514; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
2515; CHECK-NEXT:    retq ## encoding: [0xc3]
2516  %q = load i64, i64* %ptr_b
2517  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2518  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2519  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
2520  ret <4 x i64> %res
2521}
2522
2523define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
2524; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256:
2525; CHECK:       ## BB#0:
2526; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2527; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
2528; CHECK-NEXT:    retq ## encoding: [0xc3]
2529  %q = load i64, i64* %ptr_b
2530  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
2531  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
2532  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
2533  ret <4 x i64> %res
2534}
2535
2536declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2537