• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
6
7define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
8; X86-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
9; X86:       # %bb.0:
10; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2 # encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
11; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
12; X86-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x19,0xc1,0x01]
13; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x58,0xca]
14; X86-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x19,0xc0,0x01]
15; X86-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
16; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17; X86-NEXT:    retl # encoding: [0xc3]
18;
19; X64-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
20; X64:       # %bb.0:
21; X64-NEXT:    vextractf128 $1, %ymm0, %xmm2 # encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
22; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
23; X64-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x19,0xc1,0x01]
24; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x58,0xca]
25; X64-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x19,0xc0,0x01]
26; X64-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
27; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
28; X64-NEXT:    retq # encoding: [0xc3]
29  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
30  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
31  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
32  %res3 = fadd <2 x double> %res, %res1
33  %res4 = fadd <2 x double> %res2, %res3
34  ret <2 x double> %res4
35}
36
37declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
38
39define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
40; X86-LABEL: test_int_x86_avx512_mask_vextractf32x8:
41; X86:       # %bb.0:
42; X86-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc2,0x01]
43; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
44; X86-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1b,0xc1,0x01]
45; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf4,0x58,0xca]
46; X86-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1b,0xc0,0x01]
47; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x58,0xc1]
48; X86-NEXT:    retl # encoding: [0xc3]
49;
50; X64-LABEL: test_int_x86_avx512_mask_vextractf32x8:
51; X64:       # %bb.0:
52; X64-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc2,0x01]
53; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
54; X64-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1b,0xc1,0x01]
55; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf4,0x58,0xca]
56; X64-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1b,0xc0,0x01]
57; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x58,0xc1]
58; X64-NEXT:    retq # encoding: [0xc3]
59  %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
60  %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
61  %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
62  %res3 = fadd <8 x float> %res, %res1
63  %res4 = fadd <8 x float> %res2, %res3
64  ret <8 x float> %res4
65}
66
67declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
68
69define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
70; X86-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
71; X86:       # %bb.0:
72; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
73; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
74; X86-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xd1,0x01]
75; X86-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc1,0x01]
76; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
77; X86-NEXT:    vaddps %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
78; X86-NEXT:    retl # encoding: [0xc3]
79;
80; X64-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
81; X64:       # %bb.0:
82; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
83; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
84; X64-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xd1,0x01]
85; X64-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc1,0x01]
86; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
87; X64-NEXT:    vaddps %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
88; X64-NEXT:    retq # encoding: [0xc3]
89  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
90  %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
91  %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
92  %res3 = fadd <16 x float> %res, %res1
93  %res4 = fadd <16 x float> %res2, %res3
94  ret <16 x float> %res4
95}
96
97declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
98
99define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
100; X86-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
101; X86:       # %bb.0:
102; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
103; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
104; X86-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x18,0xd1,0x01]
105; X86-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x18,0xc1,0x01]
106; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
107; X86-NEXT:    vaddpd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
108; X86-NEXT:    retl # encoding: [0xc3]
109;
110; X64-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
111; X64:       # %bb.0:
112; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
113; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
114; X64-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x18,0xd1,0x01]
115; X64-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x18,0xc1,0x01]
116; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
117; X64-NEXT:    vaddpd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
118; X64-NEXT:    retq # encoding: [0xc3]
119  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
120  %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
121  %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
122  %res3 = fadd <8 x double> %res, %res1
123  %res4 = fadd <8 x double> %res3, %res2
124  ret <8 x double> %res4
125}
126
127declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
128
129define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
130; X86-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
131; X86:       # %bb.0:
132; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
133; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
134; X86-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xd1,0x01]
135; X86-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc1,0x01]
136; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
137; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
138; X86-NEXT:    retl # encoding: [0xc3]
139;
140; X64-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
141; X64:       # %bb.0:
142; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
143; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
144; X64-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xd1,0x01]
145; X64-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc1,0x01]
146; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
147; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
148; X64-NEXT:    retq # encoding: [0xc3]
149  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
150  %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
151  %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
152  %res3 = add <16 x i32> %res, %res1
153  %res4 = add <16 x i32> %res3, %res2
154  ret <16 x i32> %res4
155}
156
157declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
158
159define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
160; X86-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
161; X86:       # %bb.0:
162; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
163; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
164; X86-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x38,0xd1,0x01]
165; X86-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x38,0xc1,0x01]
166; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
167; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
168; X86-NEXT:    retl # encoding: [0xc3]
169;
170; X64-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
171; X64:       # %bb.0:
172; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
173; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
174; X64-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x38,0xd1,0x01]
175; X64-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x38,0xc1,0x01]
176; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
177; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
178; X64-NEXT:    retq # encoding: [0xc3]
179  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
180  %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
181  %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
182  %res3 = add <8 x i64> %res, %res1
183  %res4 = add <8 x i64> %res2, %res3
184  ret <8 x i64> %res4
185}
186
187
188declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16)
189
190define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) {
191; X86-LABEL: test_int_x86_avx512_cvtmask2d_512:
192; X86:       # %bb.0:
193; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
194; X86-NEXT:    vpmovm2d %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
195; X86-NEXT:    retl # encoding: [0xc3]
196;
197; X64-LABEL: test_int_x86_avx512_cvtmask2d_512:
198; X64:       # %bb.0:
199; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
200; X64-NEXT:    vpmovm2d %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x38,0xc0]
201; X64-NEXT:    retq # encoding: [0xc3]
202  %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0)
203  ret <16 x i32> %res
204}
205
206declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8)
207
208define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) {
209; X86-LABEL: test_int_x86_avx512_cvtmask2q_512:
210; X86:       # %bb.0:
211; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf9,0x90,0x44,0x24,0x04]
212; X86-NEXT:    vpmovm2q %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x38,0xc0]
213; X86-NEXT:    retl # encoding: [0xc3]
214;
215; X64-LABEL: test_int_x86_avx512_cvtmask2q_512:
216; X64:       # %bb.0:
217; X64-NEXT:    kmovw %edi, %k0 # encoding: [0xc5,0xf8,0x92,0xc7]
218; X64-NEXT:    vpmovm2q %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x38,0xc0]
219; X64-NEXT:    retq # encoding: [0xc3]
220  %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0)
221  ret <8 x i64> %res
222}
223
224declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16)
225
226define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
227; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
228; X86:       # %bb.0:
229; X86-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
230; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
231; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
232; X86-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01]
233; X86-NEXT:    vaddps %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
234; X86-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01]
235; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
236; X86-NEXT:    retl # encoding: [0xc3]
237;
238; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
239; X64:       # %bb.0:
240; X64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
241; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
242; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
243; X64-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01]
244; X64-NEXT:    vaddps %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
245; X64-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01]
246; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
247; X64-NEXT:    retq # encoding: [0xc3]
248
249  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1)
250  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
251  %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
252  %res4 = fadd <16 x float> %res1, %res2
253  %res5 = fadd <16 x float> %res3, %res4
254  ret <16 x float> %res5
255}
256
257define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512_load(<8 x float>* %x0ptr, <16 x float> %x2, i16 %mask) {
258; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512_load:
259; X86:       # %bb.0:
260; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
261; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
262; X86-NEXT:    vbroadcastf32x8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1b,0x00]
263; X86-NEXT:    # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
264; X86-NEXT:    retl # encoding: [0xc3]
265;
266; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512_load:
267; X64:       # %bb.0:
268; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
269; X64-NEXT:    vbroadcastf32x8 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1b,0x07]
270; X64-NEXT:    # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
271; X64-NEXT:    retq # encoding: [0xc3]
272
273  %x0 = load <8 x float>, <8 x float>* %x0ptr
274  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
275  ret <16 x float> %res
276}
277
278declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8)
279
280define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
281; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
282; X86:       # %bb.0:
283; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
284; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
285; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
286; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
287; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
288; X86-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
289; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
290; X86-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
291; X86-NEXT:    retl # encoding: [0xc3]
292;
293; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
294; X64:       # %bb.0:
295; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
296; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
297; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
298; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
299; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
300; X64-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 # encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
301; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
302; X64-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
303; X64-NEXT:    retq # encoding: [0xc3]
304
305  %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
306  %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
307  %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
308  %res4 = fadd <8 x double> %res1, %res2
309  %res5 = fadd <8 x double> %res3, %res4
310  ret <8 x double> %res5
311}
312
313define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512_load(<2 x double>* %x0ptr, <8 x double> %x2, i8 %mask) {
314; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512_load:
315; X86:       # %bb.0:
316; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
317; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
318; X86-NEXT:    vbroadcastf64x2 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x1a,0x00]
319; X86-NEXT:    # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
320; X86-NEXT:    retl # encoding: [0xc3]
321;
322; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512_load:
323; X64:       # %bb.0:
324; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
325; X64-NEXT:    vbroadcastf64x2 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x1a,0x07]
326; X64-NEXT:    # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
327; X64-NEXT:    retq # encoding: [0xc3]
328
329  %x0 = load <2 x double>, <2 x double>* %x0ptr
330  %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
331  ret <8 x double> %res
332}
333
334declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16)
335
336define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
337; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
338; X86:       # %bb.0:
339; X86-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
340; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
341; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
342; X86-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01]
343; X86-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01]
344; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
345; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
346; X86-NEXT:    retl # encoding: [0xc3]
347;
348; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
349; X64:       # %bb.0:
350; X64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
351; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
352; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
353; X64-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01]
354; X64-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01]
355; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
356; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
357; X64-NEXT:    retq # encoding: [0xc3]
358
359  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1)
360  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
361  %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
362  %res4 = add <16 x i32> %res1, %res2
363  %res5 = add <16 x i32> %res3, %res4
364  ret <16 x i32> %res5
365}
366
367define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512_load(<8 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) {
368; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512_load:
369; X86:       # %bb.0:
370; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
371; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
372; X86-NEXT:    vbroadcasti32x8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x5b,0x00]
373; X86-NEXT:    # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
374; X86-NEXT:    retl # encoding: [0xc3]
375;
376; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512_load:
377; X64:       # %bb.0:
378; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
379; X64-NEXT:    vbroadcasti32x8 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x5b,0x07]
380; X64-NEXT:    # zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
381; X64-NEXT:    retq # encoding: [0xc3]
382
383  %x0 = load <8 x i32>, <8 x i32>* %x0ptr
384  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
385  ret <16 x i32> %res
386}
387
388declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8)
389
390define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
391; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
392; X86:       # %bb.0:
393; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
394; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
395; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
396; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
397; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
398; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
399; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
400; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
401; X86-NEXT:    retl # encoding: [0xc3]
402;
403; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
404; X64:       # %bb.0:
405; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
406; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
407; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
408; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
409; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
410; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
411; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
412; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
413; X64-NEXT:    retq # encoding: [0xc3]
414
415  %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
416  %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
417  %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
418  %res4 = add <8 x i64> %res1, %res2
419  %res5 = add <8 x i64> %res3, %res4
420  ret <8 x i64> %res5
421}
422
423define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512_load(<2 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) {
424; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512_load:
425; X86:       # %bb.0:
426; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
427; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
428; X86-NEXT:    vbroadcasti64x2 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x5a,0x00]
429; X86-NEXT:    # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
430; X86-NEXT:    retl # encoding: [0xc3]
431;
432; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512_load:
433; X64:       # %bb.0:
434; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
435; X64-NEXT:    vbroadcasti64x2 (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x5a,0x07]
436; X64-NEXT:    # zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
437; X64-NEXT:    retq # encoding: [0xc3]
438
439  %x0 = load <2 x i64>, <2 x i64>* %x0ptr
440  %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
441  ret <8 x i64> %res
442}
443
444declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
445
446define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
447; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
448; X86:       # %bb.0:
449; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
450; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
451; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
452; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
453; X86-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01]
454; X86-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01]
455; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
456; X86-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
457; X86-NEXT:    retl # encoding: [0xc3]
458;
459; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
460; X64:       # %bb.0:
461; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
462; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
463; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
464; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
465; X64-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x1a,0xc8,0x01]
466; X64-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x1a,0xc0,0x01]
467; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
468; X64-NEXT:    vaddps %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
469; X64-NEXT:    retq # encoding: [0xc3]
470  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
471  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
472  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
473  %res3 = fadd <16 x float> %res, %res1
474  %res4 = fadd <16 x float> %res3, %res2
475  ret <16 x float> %res4
476}
477
478declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
479
480define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
481; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
482; X86:       # %bb.0:
483; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
484; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
485; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
486; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
487; X86-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01]
488; X86-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01]
489; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
490; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
491; X86-NEXT:    retl # encoding: [0xc3]
492;
493; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
494; X64:       # %bb.0:
495; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
496; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
497; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 # encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
498; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
499; X64-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3a,0xc8,0x01]
500; X64-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x3a,0xc0,0x01]
501; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
502; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
503; X64-NEXT:    retq # encoding: [0xc3]
504  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
505  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
506  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
507  %res3 = add <16 x i32> %res, %res1
508  %res4 = add <16 x i32> %res3, %res2
509  ret <16 x i32> %res4
510}
511
512declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
513
514define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
515; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
516; CHECK:       # %bb.0:
517; CHECK-NEXT:    vpmovd2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x39,0xc0]
518; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
519; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
520; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
521; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
522  %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
523  ret i16 %res
524}
525
526declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
527
528define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
529; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
530; CHECK:       # %bb.0:
531; CHECK-NEXT:    vpmovq2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x39,0xc0]
532; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
533; CHECK-NEXT:    # kill: def $al killed $al killed $eax
534; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
535; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
536  %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
537  ret i8 %res
538}
539
540declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
541
542define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
543; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
546; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
547; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
548; CHECK-NEXT:    # kill: def $al killed $al killed $eax
549; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
550; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
551    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
552    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %res)
553    ret i8 %res1
554}
555declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
556
557define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
558; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
559; CHECK:       # %bb.0:
560; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
561; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
562; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
563; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
564; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
565; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
566    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
567    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 2, i16 %res)
568    ret i16 %res1
569}
570