• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
6; CHECK-LABEL: test_rsqrt28_ps:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
9; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
11  ret <16 x float> %res
12}
13
14define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
15; CHECK-LABEL: test1_rsqrt28_ps:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
18; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
19; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
20; CHECK-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
21; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
22  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
23  ret <16 x float> %res
24}
25
26define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) {
27; CHECK-LABEL: test2_rsqrt28_ps:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
30; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
31; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
32; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
33  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4)
34  ret <16 x float> %res
35}
36
37define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
38; CHECK-LABEL: test3_rsqrt28_ps:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
41; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
42; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
43; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
44  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4)
45  ret <16 x float> %res
46}
47
48define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
49; CHECK-LABEL: test4_rsqrt28_ps:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
52; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
53; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
54; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
55  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
56  ret <16 x float> %res
57}
58
59declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
60
61define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
62; CHECK-LABEL: test_rcp28_ps_512:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
65; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
66  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
67  ret <16 x float> %res
68}
69declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
70
71define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
72; CHECK-LABEL: test_rcp28_pd_512:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
75; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
76  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
77  ret <8 x double> %res
78}
79declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
80
81define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
82; CHECK-LABEL: test_exp2_ps_512:
83; CHECK:       # %bb.0:
84; CHECK-NEXT:    vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
85; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
86  %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
87  ret <16 x float> %res
88}
89declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
90
91define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
92; CHECK-LABEL: test_exp2_pd_512:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
95; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
96  %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
97  ret <8 x double> %res
98}
99declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
100
101define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
102; CHECK-LABEL: test_rsqrt28_ss:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
105; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
106  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
107  ret <4 x float> %res
108}
109declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
110
111define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
112; CHECK-LABEL: test_rcp28_ss:
113; CHECK:       # %bb.0:
114; CHECK-NEXT:    vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
115; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
116  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
117  ret <4 x float> %res
118}
119declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
120
121define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
122; X86-LABEL: test_rcp28_ss_load:
123; X86:       # %bb.0:
124; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
125; X86-NEXT:    vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00]
126; X86-NEXT:    retl # encoding: [0xc3]
127;
128; X64-LABEL: test_rcp28_ss_load:
129; X64:       # %bb.0:
130; X64-NEXT:    vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07]
131; X64-NEXT:    retq # encoding: [0xc3]
132  %a1 = load <4 x float>, <4 x float>* %a1ptr
133  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
134  ret <4 x float> %res
135}
136
137define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) {
138; X86-LABEL: test_rsqrt28_ss_load:
139; X86:       # %bb.0:
140; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
141; X86-NEXT:    vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00]
142; X86-NEXT:    retl # encoding: [0xc3]
143;
144; X64-LABEL: test_rsqrt28_ss_load:
145; X64:       # %bb.0:
146; X64-NEXT:    vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07]
147; X64-NEXT:    retq # encoding: [0xc3]
148  %a1 = load <4 x float>, <4 x float>* %a1ptr
149  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
150  ret <4 x float> %res
151}
152
153define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) {
154; X86-LABEL: test_rsqrt28_ss_maskz:
155; X86:       # %bb.0:
156; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
157; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
158; X86-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
159; X86-NEXT:    retl # encoding: [0xc3]
160;
161; X64-LABEL: test_rsqrt28_ss_maskz:
162; X64:       # %bb.0:
163; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
164; X64-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
165; X64-NEXT:    retq # encoding: [0xc3]
166  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ;
167  ret <4 x float> %res
168}
169
170define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) {
171; X86-LABEL: test_rsqrt28_ss_mask:
172; X86:       # %bb.0:
173; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
174; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
175; X86-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
176; X86-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
177; X86-NEXT:    retl # encoding: [0xc3]
178;
179; X64-LABEL: test_rsqrt28_ss_mask:
180; X64:       # %bb.0:
181; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
182; X64-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
183; X64-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
184; X64-NEXT:    retq # encoding: [0xc3]
185  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ;
186  ret <4 x float> %res
187}
188
189define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, <2 x double>* %a1ptr, <2 x double> %a2, i8 %mask) {
190; X86-LABEL: test_rcp28_sd_mask_load:
191; X86:       # %bb.0:
192; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
193; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
194; X86-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
195; X86-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
196; X86-NEXT:    retl # encoding: [0xc3]
197;
198; X64-LABEL: test_rcp28_sd_mask_load:
199; X64:       # %bb.0:
200; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
201; X64-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
202; X64-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
203; X64-NEXT:    retq # encoding: [0xc3]
204  %a1 = load <2 x double>, <2 x double>* %a1ptr
205  %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ;
206  ret <2 x double> %res
207}
208declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
209
210define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, <2 x double>* %a1ptr, i8 %mask) {
211; X86-LABEL: test_rsqrt28_sd_maskz_load:
212; X86:       # %bb.0:
213; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
214; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
215; X86-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
216; X86-NEXT:    retl # encoding: [0xc3]
217;
218; X64-LABEL: test_rsqrt28_sd_maskz_load:
219; X64:       # %bb.0:
220; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
221; X64-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
222; X64-NEXT:    retq # encoding: [0xc3]
223  %a1 = load <2 x double>, <2 x double>* %a1ptr
224  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
225  ret <2 x double> %res
226}
227
228define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) {
229; X86-LABEL: test_rsqrt28_sd_maskz:
230; X86:       # %bb.0:
231; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
232; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
233; X86-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
234; X86-NEXT:    retl # encoding: [0xc3]
235;
236; X64-LABEL: test_rsqrt28_sd_maskz:
237; X64:       # %bb.0:
238; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
239; X64-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
240; X64-NEXT:    retq # encoding: [0xc3]
241  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ;
242  ret <2 x double> %res
243}
244
245define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) {
246; X86-LABEL: test_rsqrt28_sd_mask:
247; X86:       # %bb.0:
248; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
249; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
250; X86-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
251; X86-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
252; X86-NEXT:    retl # encoding: [0xc3]
253;
254; X64-LABEL: test_rsqrt28_sd_mask:
255; X64:       # %bb.0:
256; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
257; X64-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
258; X64-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
259; X64-NEXT:    retq # encoding: [0xc3]
260  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ;
261  ret <2 x double> %res
262}
263
264declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
265
266define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr, i8 %mask) {
267; X86-LABEL: test_rsqrt28_sd_maskz_mem:
268; X86:       # %bb.0:
269; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
270; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
271; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
272; X86-NEXT:    vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00]
273; X86-NEXT:    retl # encoding: [0xc3]
274;
275; X64-LABEL: test_rsqrt28_sd_maskz_mem:
276; X64:       # %bb.0:
277; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
278; X64-NEXT:    vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
279; X64-NEXT:    retq # encoding: [0xc3]
280  %mem = load double , double * %ptr, align 8
281  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
282  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
283  ret <2 x double> %res
284}
285
286define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr, i8 %mask) {
287; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset:
288; X86:       # %bb.0:
289; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
290; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
291; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
292; X86-NEXT:    vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12]
293; X86-NEXT:    retl # encoding: [0xc3]
294;
295; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset:
296; X64:       # %bb.0:
297; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
298; X64-NEXT:    vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
299; X64-NEXT:    retq # encoding: [0xc3]
300  %ptr1 = getelementptr double, double* %ptr, i32 18
301  %mem = load double , double * %ptr1, align 8
302  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
303  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
304  ret <2 x double> %res
305}
306
307