1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) { 6; CHECK-LABEL: test_rsqrt28_ps: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0] 9; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 10 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 11 ret <16 x float> %res 12} 13 14define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) { 15; CHECK-LABEL: test1_rsqrt28_ps: 16; CHECK: # %bb.0: 17; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 18; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 19; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8] 20; CHECK-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1] 21; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 22 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8) 23 ret <16 x float> %res 24} 25 26define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) { 27; CHECK-LABEL: test2_rsqrt28_ps: 28; CHECK: # %bb.0: 29; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 30; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 31; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] 32; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 33 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4) 34 ret <16 x float> %res 35} 36 37define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) { 38; CHECK-LABEL: test3_rsqrt28_ps: 39; CHECK: # %bb.0: 40; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 41; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 42; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0] 43; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 44 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4) 45 ret <16 x float> %res 46} 47 48define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) { 49; CHECK-LABEL: test4_rsqrt28_ps: 50; CHECK: # %bb.0: 51; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00] 52; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 53; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0] 54; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 55 %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8) 56 ret <16 x float> %res 57} 58 59declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 60 61define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) { 62; CHECK-LABEL: test_rcp28_ps_512: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0] 65; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 66 %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 67 ret <16 x float> %res 68} 69declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 70 71define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) { 72; CHECK-LABEL: test_rcp28_pd_512: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0] 75; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 76 %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 77 ret <8 x double> %res 78} 79declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone 80 81define <16 x float> @test_exp2_ps_512(<16 x float> %a0) { 82; CHECK-LABEL: test_exp2_ps_512: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0] 85; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 86 %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 87 ret <16 x float> %res 88} 89declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone 90 91define <8 x double> @test_exp2_pd_512(<8 x double> %a0) { 92; CHECK-LABEL: test_exp2_pd_512: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0] 95; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 96 %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 97 ret <8 x double> %res 98} 99declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone 100 101define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) { 102; CHECK-LABEL: test_rsqrt28_ss: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0] 105; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 106 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] 107 ret <4 x float> %res 108} 109declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 110 111define <4 x float> @test_rcp28_ss(<4 x float> %a0) { 112; CHECK-LABEL: test_rcp28_ss: 113; CHECK: # %bb.0: 114; CHECK-NEXT: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0] 115; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 116 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1] 117 ret <4 x float> %res 118} 119declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone 120 121define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 122; X86-LABEL: test_rcp28_ss_load: 123; X86: # %bb.0: 124; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 125; X86-NEXT: vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00] 126; X86-NEXT: retl # encoding: [0xc3] 127; 128; X64-LABEL: test_rcp28_ss_load: 129; X64: # %bb.0: 130; X64-NEXT: vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07] 131; X64-NEXT: retq # encoding: [0xc3] 132 %a1 = load <4 x float>, <4 x float>* %a1ptr 133 %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] 134 ret <4 x float> %res 135} 136 137define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, <4 x float>* %a1ptr) { 138; X86-LABEL: test_rsqrt28_ss_load: 139; X86: # %bb.0: 140; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 141; X86-NEXT: vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00] 142; X86-NEXT: retl # encoding: [0xc3] 143; 144; X64-LABEL: test_rsqrt28_ss_load: 145; X64: # %bb.0: 146; X64-NEXT: vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07] 147; X64-NEXT: retq # encoding: [0xc3] 148 %a1 = load <4 x float>, <4 x float>* %a1ptr 149 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1] 150 ret <4 x float> %res 151} 152 153define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) { 154; X86-LABEL: test_rsqrt28_ss_maskz: 155; X86: # %bb.0: 156; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 157; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 158; X86-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] 159; X86-NEXT: retl # encoding: [0xc3] 160; 161; X64-LABEL: test_rsqrt28_ss_maskz: 162; X64: # %bb.0: 163; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 164; X64-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0] 165; X64-NEXT: retq # encoding: [0xc3] 166 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ; 167 ret <4 x float> %res 168} 169 170define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) { 171; X86-LABEL: test_rsqrt28_ss_mask: 172; X86: # %bb.0: 173; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 174; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 175; X86-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] 176; X86-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 177; X86-NEXT: retl # encoding: [0xc3] 178; 179; X64-LABEL: test_rsqrt28_ss_mask: 180; X64: # %bb.0: 181; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 182; X64-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1] 183; X64-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 184; X64-NEXT: retq # encoding: [0xc3] 185 %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ; 186 ret <4 x float> %res 187} 188 189define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, <2 x double>* %a1ptr, <2 x double> %a2, i8 %mask) { 190; X86-LABEL: test_rcp28_sd_mask_load: 191; X86: # %bb.0: 192; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 193; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 194; X86-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] 195; X86-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 196; X86-NEXT: retl # encoding: [0xc3] 197; 198; X64-LABEL: test_rcp28_sd_mask_load: 199; X64: # %bb.0: 200; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 201; X64-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8] 202; X64-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 203; X64-NEXT: retq # encoding: [0xc3] 204 %a1 = load <2 x double>, <2 x double>* %a1ptr 205 %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ; 206 ret <2 x double> %res 207} 208declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 209 210define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, <2 x double>* %a1ptr, i8 %mask) { 211; X86-LABEL: test_rsqrt28_sd_maskz_load: 212; X86: # %bb.0: 213; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 214; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 215; X86-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] 216; X86-NEXT: retl # encoding: [0xc3] 217; 218; X64-LABEL: test_rsqrt28_sd_maskz_load: 219; X64: # %bb.0: 220; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 221; X64-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0] 222; X64-NEXT: retq # encoding: [0xc3] 223 %a1 = load <2 x double>, <2 x double>* %a1ptr 224 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 225 ret <2 x double> %res 226} 227 228define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) { 229; X86-LABEL: test_rsqrt28_sd_maskz: 230; X86: # %bb.0: 231; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 232; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 233; X86-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] 234; X86-NEXT: retl # encoding: [0xc3] 235; 236; X64-LABEL: test_rsqrt28_sd_maskz: 237; X64: # %bb.0: 238; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 239; X64-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0] 240; X64-NEXT: retq # encoding: [0xc3] 241 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ; 242 ret <2 x double> %res 243} 244 245define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) { 246; X86-LABEL: test_rsqrt28_sd_mask: 247; X86: # %bb.0: 248; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 249; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 250; X86-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] 251; X86-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 252; X86-NEXT: retl # encoding: [0xc3] 253; 254; X64-LABEL: test_rsqrt28_sd_mask: 255; X64: # %bb.0: 256; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 257; X64-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1] 258; X64-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2] 259; X64-NEXT: retq # encoding: [0xc3] 260 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ; 261 ret <2 x double> %res 262} 263 264declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone 265 266define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr, i8 %mask) { 267; X86-LABEL: test_rsqrt28_sd_maskz_mem: 268; X86: # %bb.0: 269; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 270; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 271; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 272; X86-NEXT: vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00] 273; X86-NEXT: retl # encoding: [0xc3] 274; 275; X64-LABEL: test_rsqrt28_sd_maskz_mem: 276; X64: # %bb.0: 277; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 278; X64-NEXT: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07] 279; X64-NEXT: retq # encoding: [0xc3] 280 %mem = load double , double * %ptr, align 8 281 %mem_v = insertelement <2 x double> undef, double %mem, i32 0 282 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 283 ret <2 x double> %res 284} 285 286define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr, i8 %mask) { 287; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset: 288; X86: # %bb.0: 289; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] 290; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 291; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 292; X86-NEXT: vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12] 293; X86-NEXT: retl # encoding: [0xc3] 294; 295; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset: 296; X64: # %bb.0: 297; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 298; X64-NEXT: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12] 299; X64-NEXT: retq # encoding: [0xc3] 300 %ptr1 = getelementptr double, double* %ptr, i32 18 301 %mem = load double , double * %ptr1, align 8 302 %mem_v = insertelement <2 x double> undef, double %mem, i32 0 303 %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ; 304 ret <2 x double> %res 305} 306 307