1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) 6declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) 7 8define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 9; CHECK-LABEL: test_x86_vfnmadd_ps_z: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2] 12; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2 13; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 14 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1 15 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2) 16 ret <16 x float> %2 17} 18 19define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 20; X86-LABEL: test_mask_vfnmadd_ps: 21; X86: # %bb.0: 22; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 23; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 24; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 25; X86-NEXT: retl # encoding: [0xc3] 26; 27; X64-LABEL: test_mask_vfnmadd_ps: 28; X64: # %bb.0: 29; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 30; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 31; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 32; X64-NEXT: retq # encoding: [0xc3] 33 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1 34 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2) 35 %3 = bitcast i16 %mask to <16 x i1> 36 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %a0 37 ret <16 x float> %4 38} 39 40define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 41; CHECK-LABEL: test_x86_vfnmadd_pd_z: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2] 44; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2 45; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 46 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 47 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2) 48 ret <8 x double> %2 49} 50 51define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 52; X86-LABEL: test_mask_vfnmadd_pd: 53; X86: # %bb.0: 54; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 55; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 56; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 57; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 58; X86-NEXT: retl # encoding: [0xc3] 59; 60; X64-LABEL: test_mask_vfnmadd_pd: 61; X64: # %bb.0: 62; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 63; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 64; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 65; X64-NEXT: retq # encoding: [0xc3] 66 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 67 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2) 68 %3 = bitcast i8 %mask to <8 x i1> 69 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %a0 70 ret <8 x double> %4 71} 72 73define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 74; CHECK-LABEL: test_x86_vfnmsubps_z: 75; CHECK: # %bb.0: 76; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2] 77; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2 78; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 79 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1 80 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 81 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2) 82 ret <16 x float> %3 83} 84 85define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 86; X86-LABEL: test_mask_vfnmsub_ps: 87; X86: # %bb.0: 88; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 89; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 90; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 91; X86-NEXT: retl # encoding: [0xc3] 92; 93; X64-LABEL: test_mask_vfnmsub_ps: 94; X64: # %bb.0: 95; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 96; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 97; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 98; X64-NEXT: retq # encoding: [0xc3] 99 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1 100 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 101 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2) 102 %4 = bitcast i16 %mask to <16 x i1> 103 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %a0 104 ret <16 x float> %5 105} 106 107define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 108; CHECK-LABEL: test_x86_vfnmsubpd_z: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] 111; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2 112; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 113 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 114 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 115 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2) 116 ret <8 x double> %3 117} 118 119define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 120; X86-LABEL: test_mask_vfnmsub_pd: 121; X86: # %bb.0: 122; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 123; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 124; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 125; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 126; X86-NEXT: retl # encoding: [0xc3] 127; 128; X64-LABEL: test_mask_vfnmsub_pd: 129; X64: # %bb.0: 130; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 131; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 132; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 133; X64-NEXT: retq # encoding: [0xc3] 134 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 135 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 136 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2) 137 %4 = bitcast i8 %mask to <8 x i1> 138 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 139 ret <8 x double> %5 140} 141 142define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 143; CHECK-LABEL: test_x86_vfmaddsubps_z: 144; CHECK: # %bb.0: 145; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2] 146; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2 147; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 148 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) #2 149 ret <16 x float> %res 150} 151 152define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) { 153; X86-LABEL: test_mask_fmaddsub_ps: 154; X86: # %bb.0: 155; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 156; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 157; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 158; X86-NEXT: retl # encoding: [0xc3] 159; 160; X64-LABEL: test_mask_fmaddsub_ps: 161; X64: # %bb.0: 162; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 163; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 164; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 165; X64-NEXT: retq # encoding: [0xc3] 166 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4) 167 %bc = bitcast i16 %mask to <16 x i1> 168 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a 169 ret <16 x float> %sel 170} 171 172declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) nounwind readnone 173 174define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 175; CHECK-LABEL: test_x86_vfmaddsubpd_z: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2] 178; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2 179; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 180 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2 181 ret <8 x double> %res 182} 183declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) nounwind readnone 184 185define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 186; X86-LABEL: test_mask_vfmaddsub_pd: 187; X86: # %bb.0: 188; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 189; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 190; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 191; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 192; X86-NEXT: retl # encoding: [0xc3] 193; 194; X64-LABEL: test_mask_vfmaddsub_pd: 195; X64: # %bb.0: 196; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 197; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 198; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 199; X64-NEXT: retq # encoding: [0xc3] 200 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2 201 %bc = bitcast i8 %mask to <8 x i1> 202 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 203 ret <8 x double> %sel 204} 205 206define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 207; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 208; X86: # %bb.0: 209; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 210; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 211; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 212; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 213; X86-NEXT: retl # encoding: [0xc3] 214; 215; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512: 216; X64: # %bb.0: 217; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 218; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1] 219; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 220; X64-NEXT: retq # encoding: [0xc3] 221 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4) 222 %bc = bitcast i8 %x3 to <8 x i1> 223 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0 224 ret <8 x double> %sel 225} 226 227define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 228; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 229; X86: # %bb.0: 230; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 231; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 232; X86-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1] 233; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 234; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 235; X86-NEXT: retl # encoding: [0xc3] 236; 237; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512: 238; X64: # %bb.0: 239; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 240; X64-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1] 241; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 242; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 243; X64-NEXT: retq # encoding: [0xc3] 244 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) 245 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 246 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2) 247 %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 248 %5 = bitcast i8 %x3 to <8 x i1> 249 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2 250 ret <8 x double> %6 251} 252 253define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 254; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 255; X86: # %bb.0: 256; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 257; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 258; X86-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2] 259; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 260; X86-NEXT: retl # encoding: [0xc3] 261; 262; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512: 263; X64: # %bb.0: 264; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 265; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2] 266; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 267; X64-NEXT: retq # encoding: [0xc3] 268 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) 269 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 270 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2) 271 %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 272 %5 = bitcast i8 %x3 to <8 x i1> 273 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer 274 ret <8 x double> %6 275} 276 277define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 278; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 279; X86: # %bb.0: 280; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 281; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 282; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 283; X86-NEXT: retl # encoding: [0xc3] 284; 285; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512: 286; X64: # %bb.0: 287; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 288; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1] 289; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 290; X64-NEXT: retq # encoding: [0xc3] 291 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4) 292 %bc = bitcast i16 %x3 to <16 x i1> 293 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0 294 ret <16 x float> %sel 295} 296 297define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 298; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 299; X86: # %bb.0: 300; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 301; X86-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1] 302; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 303; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 304; X86-NEXT: retl # encoding: [0xc3] 305; 306; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512: 307; X64: # %bb.0: 308; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 309; X64-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1] 310; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 311; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 312; X64-NEXT: retq # encoding: [0xc3] 313 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) 314 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 315 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2) 316 %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 317 %5 = bitcast i16 %x3 to <16 x i1> 318 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2 319 ret <16 x float> %6 320} 321 322define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 323; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 324; X86: # %bb.0: 325; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 326; X86-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2] 327; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 328; X86-NEXT: retl # encoding: [0xc3] 329; 330; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512: 331; X64: # %bb.0: 332; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 333; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2] 334; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 335; X64-NEXT: retq # encoding: [0xc3] 336 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) 337 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 338 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2) 339 %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 340 %5 = bitcast i16 %x3 to <16 x i1> 341 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer 342 ret <16 x float> %6 343} 344 345define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 346; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 347; X86: # %bb.0: 348; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 349; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 350; X86-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1] 351; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 352; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 353; X86-NEXT: retl # encoding: [0xc3] 354; 355; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512: 356; X64: # %bb.0: 357; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 358; X64-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1] 359; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 360; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 361; X64-NEXT: retq # encoding: [0xc3] 362 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) 363 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 364 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2) 365 %4 = shufflevector <8 x double> %1, <8 x double> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 366 %5 = bitcast i8 %x3 to <8 x i1> 367 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2 368 ret <8 x double> %6 369} 370 371define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 372; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 373; X86: # %bb.0: 374; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 375; X86-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1] 376; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 377; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 378; X86-NEXT: retl # encoding: [0xc3] 379; 380; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512: 381; X64: # %bb.0: 382; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 383; X64-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1] 384; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 385; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 386; X64-NEXT: retq # encoding: [0xc3] 387 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) 388 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 389 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2) 390 %4 = shufflevector <16 x float> %1, <16 x float> %3, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 391 %5 = bitcast i16 %x3 to <16 x i1> 392 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2 393 ret <16 x float> %6 394} 395 396define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 397; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 398; X86: # %bb.0: 399; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 400; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1] 401; X86-NEXT: retl # encoding: [0xc3] 402; 403; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rne: 404; X64: # %bb.0: 405; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 406; X64-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1] 407; X64-NEXT: retq # encoding: [0xc3] 408 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind 409 %bc = bitcast i16 %mask to <16 x i1> 410 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0 411 ret <16 x float> %sel 412} 413 414define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 415; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 416; X86: # %bb.0: 417; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 418; X86-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1] 419; X86-NEXT: retl # encoding: [0xc3] 420; 421; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn: 422; X64: # %bb.0: 423; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 424; X64-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1] 425; X64-NEXT: retq # encoding: [0xc3] 426 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind 427 %bc = bitcast i16 %mask to <16 x i1> 428 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0 429 ret <16 x float> %sel 430} 431 432define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 433; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 434; X86: # %bb.0: 435; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 436; X86-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1] 437; X86-NEXT: retl # encoding: [0xc3] 438; 439; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp: 440; X64: # %bb.0: 441; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 442; X64-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1] 443; X64-NEXT: retq # encoding: [0xc3] 444 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind 445 %bc = bitcast i16 %mask to <16 x i1> 446 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0 447 ret <16 x float> %sel 448} 449 450define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 451; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 452; X86: # %bb.0: 453; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 454; X86-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1] 455; X86-NEXT: retl # encoding: [0xc3] 456; 457; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz: 458; X64: # %bb.0: 459; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 460; X64-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1] 461; X64-NEXT: retq # encoding: [0xc3] 462 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind 463 %bc = bitcast i16 %mask to <16 x i1> 464 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0 465 ret <16 x float> %sel 466} 467 468define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) { 469; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 470; X86: # %bb.0: 471; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 472; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 473; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 474; X86-NEXT: retl # encoding: [0xc3] 475; 476; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_current: 477; X64: # %bb.0: 478; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 479; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 480; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 481; X64-NEXT: retq # encoding: [0xc3] 482 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind 483 %bc = bitcast i16 %mask to <16 x i1> 484 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0 485 ret <16 x float> %sel 486} 487 488define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 489; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne: 490; CHECK: # %bb.0: 491; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2] 492; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 493 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind 494 ret <16 x float> %res 495} 496 497define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 498; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn: 499; CHECK: # %bb.0: 500; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2] 501; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 502 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind 503 ret <16 x float> %res 504} 505 506define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 507; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp: 508; CHECK: # %bb.0: 509; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2] 510; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 511 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind 512 ret <16 x float> %res 513} 514 515define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 516; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz: 517; CHECK: # %bb.0: 518; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2] 519; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 520 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind 521 ret <16 x float> %res 522} 523 524define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 525; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current: 526; CHECK: # %bb.0: 527; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] 528; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2 529; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 530 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind 531 ret <16 x float> %res 532} 533 534define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 535; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 536; X86: # %bb.0: 537; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 538; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 539; X86-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1] 540; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2 541; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 542; X86-NEXT: retl # encoding: [0xc3] 543; 544; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512: 545; X64: # %bb.0: 546; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 547; X64-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1] 548; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2 549; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 550; X64-NEXT: retq # encoding: [0xc3] 551 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 552 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %1) 553 %3 = bitcast i8 %x3 to <8 x i1> 554 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x2 555 ret <8 x double> %4 556} 557 558define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 559; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 560; X86: # %bb.0: 561; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 562; X86-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1] 563; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2 564; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 565; X86-NEXT: retl # encoding: [0xc3] 566; 567; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512: 568; X64: # %bb.0: 569; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 570; X64-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1] 571; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2 572; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 573; X64-NEXT: retq # encoding: [0xc3] 574 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 575 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %1) 576 %3 = bitcast i16 %x3 to <16 x i1> 577 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x2 578 ret <16 x float> %4 579} 580 581define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 582; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 583; X86: # %bb.0: 584; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 585; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 586; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1] 587; X86-NEXT: retl # encoding: [0xc3] 588; 589; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rne: 590; X64: # %bb.0: 591; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 592; X64-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1] 593; X64-NEXT: retq # encoding: [0xc3] 594 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind 595 %bc = bitcast i8 %mask to <8 x i1> 596 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 597 ret <8 x double> %sel 598} 599 600define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 601; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 602; X86: # %bb.0: 603; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 604; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 605; X86-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1] 606; X86-NEXT: retl # encoding: [0xc3] 607; 608; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn: 609; X64: # %bb.0: 610; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 611; X64-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1] 612; X64-NEXT: retq # encoding: [0xc3] 613 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind 614 %bc = bitcast i8 %mask to <8 x i1> 615 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 616 ret <8 x double> %sel 617} 618 619define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 620; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 621; X86: # %bb.0: 622; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 623; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 624; X86-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1] 625; X86-NEXT: retl # encoding: [0xc3] 626; 627; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp: 628; X64: # %bb.0: 629; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 630; X64-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1] 631; X64-NEXT: retq # encoding: [0xc3] 632 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind 633 %bc = bitcast i8 %mask to <8 x i1> 634 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 635 ret <8 x double> %sel 636} 637 638define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 639; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 640; X86: # %bb.0: 641; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 642; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 643; X86-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1] 644; X86-NEXT: retl # encoding: [0xc3] 645; 646; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz: 647; X64: # %bb.0: 648; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 649; X64-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1] 650; X64-NEXT: retq # encoding: [0xc3] 651 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind 652 %bc = bitcast i8 %mask to <8 x i1> 653 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 654 ret <8 x double> %sel 655} 656 657define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 658; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 659; X86: # %bb.0: 660; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 661; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 662; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 663; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 664; X86-NEXT: retl # encoding: [0xc3] 665; 666; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_current: 667; X64: # %bb.0: 668; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 669; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 670; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 671; X64-NEXT: retq # encoding: [0xc3] 672 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind 673 %bc = bitcast i8 %mask to <8 x i1> 674 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0 675 ret <8 x double> %sel 676} 677 678define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 679; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne: 680; CHECK: # %bb.0: 681; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2] 682; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 683 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind 684 ret <8 x double> %res 685} 686 687define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 688; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn: 689; CHECK: # %bb.0: 690; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2] 691; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 692 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind 693 ret <8 x double> %res 694} 695 696define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 697; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp: 698; CHECK: # %bb.0: 699; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2] 700; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 701 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind 702 ret <8 x double> %res 703} 704 705define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 706; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz: 707; CHECK: # %bb.0: 708; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2] 709; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 710 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind 711 ret <8 x double> %res 712} 713 714define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 715; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current: 716; CHECK: # %bb.0: 717; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] 718; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2 719; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 720 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind 721 ret <8 x double> %res 722} 723 724define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 725; X86-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 726; X86: # %bb.0: 727; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 728; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 729; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 730; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 731; X86-NEXT: retl # encoding: [0xc3] 732; 733; X64-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512: 734; X64: # %bb.0: 735; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 736; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1] 737; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 738; X64-NEXT: retq # encoding: [0xc3] 739 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4) 740 %bc = bitcast i8 %x3 to <8 x i1> 741 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0 742 ret <8 x double> %sel 743} 744 745define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 746; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 747; X86: # %bb.0: 748; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 749; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 750; X86-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1] 751; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2 752; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 753; X86-NEXT: retl # encoding: [0xc3] 754; 755; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512: 756; X64: # %bb.0: 757; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 758; X64-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1] 759; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2 760; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 761; X64-NEXT: retq # encoding: [0xc3] 762 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) 763 %2 = bitcast i8 %x3 to <8 x i1> 764 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2 765 ret <8 x double> %3 766} 767 768define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 769; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 770; X86: # %bb.0: 771; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 772; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 773; X86-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2] 774; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 775; X86-NEXT: retl # encoding: [0xc3] 776; 777; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512: 778; X64: # %bb.0: 779; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 780; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2] 781; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 782; X64-NEXT: retq # encoding: [0xc3] 783 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) 784 %2 = bitcast i8 %x3 to <8 x i1> 785 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer 786 ret <8 x double> %3 787} 788 789define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 790; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 791; X86: # %bb.0: 792; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 793; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 794; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 795; X86-NEXT: retl # encoding: [0xc3] 796; 797; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512: 798; X64: # %bb.0: 799; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 800; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1] 801; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2 802; X64-NEXT: retq # encoding: [0xc3] 803 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4) 804 %bc = bitcast i16 %x3 to <16 x i1> 805 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0 806 ret <16 x float> %sel 807} 808 809define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 810; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 811; X86: # %bb.0: 812; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 813; X86-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1] 814; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2 815; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 816; X86-NEXT: retl # encoding: [0xc3] 817; 818; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512: 819; X64: # %bb.0: 820; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 821; X64-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1] 822; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2 823; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 824; X64-NEXT: retq # encoding: [0xc3] 825 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) 826 %2 = bitcast i16 %x3 to <16 x i1> 827 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2 828 ret <16 x float> %3 829} 830 831define <16 x float> @test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { 832; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 833; X86: # %bb.0: 834; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 835; X86-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2] 836; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 837; X86-NEXT: retl # encoding: [0xc3] 838; 839; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512: 840; X64: # %bb.0: 841; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 842; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2] 843; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 844; X64-NEXT: retq # encoding: [0xc3] 845 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) 846 %2 = bitcast i16 %x3 to <16 x i1> 847 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer 848 ret <16 x float> %3 849} 850 851define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 852; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 853; X86: # %bb.0: 854; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 855; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 856; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1] 857; X86-NEXT: retl # encoding: [0xc3] 858; 859; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne: 860; X64: # %bb.0: 861; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 862; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1] 863; X64-NEXT: retq # encoding: [0xc3] 864 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 865 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 866 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8) 867 %4 = bitcast i8 %mask to <8 x i1> 868 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 869 ret <8 x double> %5 870} 871 872define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 873; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 874; X86: # %bb.0: 875; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 876; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 877; X86-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1] 878; X86-NEXT: retl # encoding: [0xc3] 879; 880; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn: 881; X64: # %bb.0: 882; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 883; X64-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1] 884; X64-NEXT: retq # encoding: [0xc3] 885 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 886 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 887 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9) 888 %4 = bitcast i8 %mask to <8 x i1> 889 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 890 ret <8 x double> %5 891} 892 893define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 894; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 895; X86: # %bb.0: 896; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 897; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 898; X86-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1] 899; X86-NEXT: retl # encoding: [0xc3] 900; 901; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp: 902; X64: # %bb.0: 903; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 904; X64-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1] 905; X64-NEXT: retq # encoding: [0xc3] 906 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 907 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 908 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10) 909 %4 = bitcast i8 %mask to <8 x i1> 910 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 911 ret <8 x double> %5 912} 913 914define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 915; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 916; X86: # %bb.0: 917; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 918; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 919; X86-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1] 920; X86-NEXT: retl # encoding: [0xc3] 921; 922; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz: 923; X64: # %bb.0: 924; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 925; X64-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1] 926; X64-NEXT: retq # encoding: [0xc3] 927 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 928 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 929 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11) 930 %4 = bitcast i8 %mask to <8 x i1> 931 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 932 ret <8 x double> %5 933} 934 935define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) { 936; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 937; X86: # %bb.0: 938; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 939; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 940; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 941; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 942; X86-NEXT: retl # encoding: [0xc3] 943; 944; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_current: 945; X64: # %bb.0: 946; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 947; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 948; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 949; X64-NEXT: retq # encoding: [0xc3] 950 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 951 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 952 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2) 953 %4 = bitcast i8 %mask to <8 x i1> 954 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0 955 ret <8 x double> %5 956} 957 958define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 959; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne: 960; CHECK: # %bb.0: 961; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2] 962; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 963 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 964 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 965 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8) 966 ret <8 x double> %3 967} 968 969define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 970; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn: 971; CHECK: # %bb.0: 972; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2] 973; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 974 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 975 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 976 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9) 977 ret <8 x double> %3 978} 979 980define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 981; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp: 982; CHECK: # %bb.0: 983; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2] 984; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 985 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 986 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 987 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10) 988 ret <8 x double> %3 989} 990 991define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 992; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz: 993; CHECK: # %bb.0: 994; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2] 995; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 996 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 997 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 998 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11) 999 ret <8 x double> %3 1000} 1001 1002define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 1003; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current: 1004; CHECK: # %bb.0: 1005; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2] 1006; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2 1007; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1008 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1 1009 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 1010 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2) 1011 ret <8 x double> %3 1012} 1013 1014define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 1015; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 1016; X86: # %bb.0: 1017; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1018; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1019; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 1020; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1021; X86-NEXT: retl # encoding: [0xc3] 1022; 1023; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512: 1024; X64: # %bb.0: 1025; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1026; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1] 1027; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1028; X64-NEXT: retq # encoding: [0xc3] 1029 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1 1030 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 1031 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %2) 1032 %4 = bitcast i8 %x3 to <8 x i1> 1033 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x0 1034 ret <8 x double> %5 1035} 1036 1037define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 1038; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 1039; X86: # %bb.0: 1040; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1041; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1042; X86-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1] 1043; X86-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1044; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1045; X86-NEXT: retl # encoding: [0xc3] 1046; 1047; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512: 1048; X64: # %bb.0: 1049; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1050; X64-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1] 1051; X64-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1052; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] 1053; X64-NEXT: retq # encoding: [0xc3] 1054 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0 1055 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2 1056 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %1, <8 x double> %x1, <8 x double> %2) 1057 %4 = bitcast i8 %x3 to <8 x i1> 1058 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x2 1059 ret <8 x double> %5 1060} 1061 1062define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 1063; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 1064; X86: # %bb.0: 1065; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1066; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 1067; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1068; X86-NEXT: retl # encoding: [0xc3] 1069; 1070; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512: 1071; X64: # %bb.0: 1072; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1073; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1] 1074; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 1075; X64-NEXT: retq # encoding: [0xc3] 1076 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1 1077 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 1078 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %2) 1079 %4 = bitcast i16 %x3 to <16 x i1> 1080 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x0 1081 ret <16 x float> %5 1082} 1083 1084define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 1085; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 1086; X86: # %bb.0: 1087; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1088; X86-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1] 1089; X86-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1090; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1091; X86-NEXT: retl # encoding: [0xc3] 1092; 1093; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512: 1094; X64: # %bb.0: 1095; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1096; X64-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1] 1097; X64-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 1098; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2] 1099; X64-NEXT: retq # encoding: [0xc3] 1100 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0 1101 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2 1102 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %1, <16 x float> %x1, <16 x float> %2) 1103 %4 = bitcast i16 %x3 to <16 x i1> 1104 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x2 1105 ret <16 x float> %5 1106} 1107 1108define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){ 1109; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 1110; X86: # %bb.0: 1111; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] 1112; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] 1113; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 1114; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1115; X86-NEXT: retl # encoding: [0xc3] 1116; 1117; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512: 1118; X64: # %bb.0: 1119; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1120; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1] 1121; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1122; X64-NEXT: retq # encoding: [0xc3] 1123 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1 1124 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %x2) 1125 %3 = bitcast i8 %x3 to <8 x i1> 1126 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x0 1127 ret <8 x double> %4 1128} 1129 1130define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){ 1131; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 1132; X86: # %bb.0: 1133; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 1134; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 1135; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1136; X86-NEXT: retl # encoding: [0xc3] 1137; 1138; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512: 1139; X64: # %bb.0: 1140; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] 1141; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1] 1142; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 1143; X64-NEXT: retq # encoding: [0xc3] 1144 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1 1145 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %x2) 1146 %3 = bitcast i16 %x3 to <16 x i1> 1147 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x0 1148 ret <16 x float> %4 1149} 1150 1151; This test case used to crash due to combineFMA not bitcasting results of isFNEG. 1152define <4 x float> @foo() { 1153; X86-LABEL: foo: 1154; X86: # %bb.0: # %entry 1155; X86-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1156; X86-NEXT: # xmm0 = mem[0],zero,zero,zero 1157; X86-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0] 1158; X86-NEXT: retl # encoding: [0xc3] 1159; 1160; X64-LABEL: foo: 1161; X64: # %bb.0: # %entry 1162; X64-NEXT: vmovss (%rax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00] 1163; X64-NEXT: # xmm0 = mem[0],zero,zero,zero 1164; X64-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0] 1165; X64-NEXT: retq # encoding: [0xc3] 1166entry: 1167 %0 = load <4 x float>, <4 x float>* undef, align 16 1168 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 1169 %1 = extractelement <4 x float> %sub, i64 0 1170 %2 = call float @llvm.x86.avx512.vfmadd.f32(float undef, float undef, float %1, i32 9) 1171 %3 = select i1 extractelement (<8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), i64 0), float %2, float undef 1172 %4 = insertelement <4 x float> undef, float %3, i64 0 1173 ret <4 x float> %4 1174} 1175 1176; Function Attrs: nounwind readnone 1177declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) 1178 1179declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) 1180declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) 1181