1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL 4; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 5 6; VFMADD 7define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 8; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss: 9; CHECK-FMA: # %bb.0: 10; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 11; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12; CHECK-FMA-NEXT: retq # encoding: [0xc3] 13; 14; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss: 15; CHECK-AVX512VL: # %bb.0: 16; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 17; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 18; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 19; 20; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss: 21; CHECK-FMA-WIN: # %bb.0: 22; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 23; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 24; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 25; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02] 26; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 27; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 28 %1 = extractelement <4 x float> %a0, i64 0 29 %2 = extractelement <4 x float> %a1, i64 0 30 %3 = extractelement <4 x float> %a2, i64 0 31 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 32 %5 = insertelement <4 x float> %a0, float %4, i64 0 33 ret <4 x float> %5 34} 35 36define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 37; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss: 38; CHECK-FMA: # %bb.0: 39; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca] 40; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 41; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 42; CHECK-FMA-NEXT: retq # encoding: [0xc3] 43; 44; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss: 45; CHECK-AVX512VL: # %bb.0: 46; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca] 47; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 48; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 49; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 50; 51; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss: 52; CHECK-FMA-WIN: # %bb.0: 53; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 54; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 55; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 56; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01] 57; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 58; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 59 %1 = extractelement <4 x float> %a1, i64 0 60 %2 = extractelement <4 x float> %a0, i64 0 61 %3 = extractelement <4 x float> %a2, i64 0 62 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 63 %5 = insertelement <4 x float> %a1, float %4, i64 0 64 ret <4 x float> %5 65} 66 67define <4 x float> @test_x86_fma_vfmadd_ss_231(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 68; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss_231: 69; CHECK-FMA: # %bb.0: 70; CHECK-FMA-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # encoding: [0xc4,0xe2,0x79,0xb9,0xd1] 71; CHECK-FMA-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 72; CHECK-FMA-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 73; CHECK-FMA-NEXT: retq # encoding: [0xc3] 74; 75; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss_231: 76; CHECK-AVX512VL: # %bb.0: 77; CHECK-AVX512VL-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd1] 78; CHECK-AVX512VL-NEXT: # xmm2 = (xmm0 * xmm1) + xmm2 79; CHECK-AVX512VL-NEXT: vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2] 80; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 81; 82; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss_231: 83; CHECK-FMA-WIN: # %bb.0: 84; CHECK-FMA-WIN-NEXT: vmovaps (%r8), %xmm0 # encoding: [0xc4,0xc1,0x78,0x28,0x00] 85; CHECK-FMA-WIN-NEXT: vmovss (%rcx), %xmm1 # encoding: [0xc5,0xfa,0x10,0x09] 86; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 87; CHECK-FMA-WIN-NEXT: vfmadd231ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xb9,0x02] 88; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * mem) + xmm0 89; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 90 %1 = extractelement <4 x float> %a0, i64 0 91 %2 = extractelement <4 x float> %a1, i64 0 92 %3 = extractelement <4 x float> %a2, i64 0 93 %4 = call float @llvm.fma.f32(float %1, float %2, float %3) 94 %5 = insertelement <4 x float> %a2, float %4, i64 0 95 ret <4 x float> %5 96} 97 98define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 99; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd: 100; CHECK-FMA: # %bb.0: 101; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 102; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 103; CHECK-FMA-NEXT: retq # encoding: [0xc3] 104; 105; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd: 106; CHECK-AVX512VL: # %bb.0: 107; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 108; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 109; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 110; 111; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd: 112; CHECK-FMA-WIN: # %bb.0: 113; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 114; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 115; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 116; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02] 117; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 118; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 119 %1 = extractelement <2 x double> %a0, i64 0 120 %2 = extractelement <2 x double> %a1, i64 0 121 %3 = extractelement <2 x double> %a2, i64 0 122 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 123 %5 = insertelement <2 x double> %a0, double %4, i64 0 124 ret <2 x double> %5 125} 126 127define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 128; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd: 129; CHECK-FMA: # %bb.0: 130; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 131; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 132; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 133; CHECK-FMA-NEXT: retq # encoding: [0xc3] 134; 135; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd: 136; CHECK-AVX512VL: # %bb.0: 137; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 138; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 139; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 140; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 141; 142; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd: 143; CHECK-FMA-WIN: # %bb.0: 144; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 145; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 146; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 147; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01] 148; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 149; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 150 %1 = extractelement <2 x double> %a1, i64 0 151 %2 = extractelement <2 x double> %a0, i64 0 152 %3 = extractelement <2 x double> %a2, i64 0 153 %4 = call double @llvm.fma.f64(double %1, double %2, double %3) 154 %5 = insertelement <2 x double> %a1, double %4, i64 0 155 ret <2 x double> %5 156} 157 158define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 159; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps: 160; CHECK-FMA: # %bb.0: 161; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 162; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 163; CHECK-FMA-NEXT: retq # encoding: [0xc3] 164; 165; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps: 166; CHECK-AVX512VL: # %bb.0: 167; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 168; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 169; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 170; 171; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps: 172; CHECK-FMA-WIN: # %bb.0: 173; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 174; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 175; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00] 176; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 177; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 178 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 179 ret <4 x float> %1 180} 181 182define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 183; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd: 184; CHECK-FMA: # %bb.0: 185; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 186; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 187; CHECK-FMA-NEXT: retq # encoding: [0xc3] 188; 189; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd: 190; CHECK-AVX512VL: # %bb.0: 191; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 192; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 193; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 194; 195; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd: 196; CHECK-FMA-WIN: # %bb.0: 197; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 198; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 199; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00] 200; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 201; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 202 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 203 ret <2 x double> %1 204} 205 206define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 207; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256: 208; CHECK-FMA: # %bb.0: 209; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 210; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 211; CHECK-FMA-NEXT: retq # encoding: [0xc3] 212; 213; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256: 214; CHECK-AVX512VL: # %bb.0: 215; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 216; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 217; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 218; 219; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256: 220; CHECK-FMA-WIN: # %bb.0: 221; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 222; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 223; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00] 224; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 225; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 226 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 227 ret <8 x float> %1 228} 229 230define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 231; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256: 232; CHECK-FMA: # %bb.0: 233; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 234; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 235; CHECK-FMA-NEXT: retq # encoding: [0xc3] 236; 237; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256: 238; CHECK-AVX512VL: # %bb.0: 239; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 240; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 241; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 242; 243; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256: 244; CHECK-FMA-WIN: # %bb.0: 245; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 246; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 247; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00] 248; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 249; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 250 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 251 ret <4 x double> %1 252} 253 254; VFMSUB 255define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 256; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss: 257; CHECK-FMA: # %bb.0: 258; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2] 259; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 260; CHECK-FMA-NEXT: retq # encoding: [0xc3] 261; 262; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss: 263; CHECK-AVX512VL: # %bb.0: 264; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2] 265; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 266; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 267; 268; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss: 269; CHECK-FMA-WIN: # %bb.0: 270; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 271; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 272; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 273; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02] 274; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 275; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 276 %1 = extractelement <4 x float> %a0, i64 0 277 %2 = extractelement <4 x float> %a1, i64 0 278 %3 = extractelement <4 x float> %a2, i64 0 279 %4 = fsub float -0.000000e+00, %3 280 %5 = call float @llvm.fma.f32(float %1, float %2, float %4) 281 %6 = insertelement <4 x float> %a0, float %5, i64 0 282 ret <4 x float> %6 283} 284 285define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 286; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss: 287; CHECK-FMA: # %bb.0: 288; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca] 289; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 290; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 291; CHECK-FMA-NEXT: retq # encoding: [0xc3] 292; 293; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss: 294; CHECK-AVX512VL: # %bb.0: 295; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca] 296; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 297; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 298; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 299; 300; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss: 301; CHECK-FMA-WIN: # %bb.0: 302; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 303; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 304; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 305; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01] 306; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 307; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 308 %1 = extractelement <4 x float> %a1, i64 0 309 %2 = extractelement <4 x float> %a0, i64 0 310 %3 = extractelement <4 x float> %a2, i64 0 311 %4 = fsub float -0.000000e+00, %3 312 %5 = call float @llvm.fma.f32(float %1, float %2, float %4) 313 %6 = insertelement <4 x float> %a1, float %5, i64 0 314 ret <4 x float> %6 315} 316 317define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 318; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd: 319; CHECK-FMA: # %bb.0: 320; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 321; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 322; CHECK-FMA-NEXT: retq # encoding: [0xc3] 323; 324; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd: 325; CHECK-AVX512VL: # %bb.0: 326; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 327; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 328; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 329; 330; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd: 331; CHECK-FMA-WIN: # %bb.0: 332; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 333; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 334; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 335; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02] 336; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 337; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 338 %1 = extractelement <2 x double> %a0, i64 0 339 %2 = extractelement <2 x double> %a1, i64 0 340 %3 = extractelement <2 x double> %a2, i64 0 341 %4 = fsub double -0.000000e+00, %3 342 %5 = call double @llvm.fma.f64(double %1, double %2, double %4) 343 %6 = insertelement <2 x double> %a0, double %5, i64 0 344 ret <2 x double> %6 345} 346 347define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 348; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd: 349; CHECK-FMA: # %bb.0: 350; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca] 351; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 352; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 353; CHECK-FMA-NEXT: retq # encoding: [0xc3] 354; 355; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd: 356; CHECK-AVX512VL: # %bb.0: 357; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca] 358; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 359; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 360; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 361; 362; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd: 363; CHECK-FMA-WIN: # %bb.0: 364; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 365; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 366; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 367; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01] 368; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 369; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 370 %1 = extractelement <2 x double> %a1, i64 0 371 %2 = extractelement <2 x double> %a0, i64 0 372 %3 = extractelement <2 x double> %a2, i64 0 373 %4 = fsub double -0.000000e+00, %3 374 %5 = call double @llvm.fma.f64(double %1, double %2, double %4) 375 %6 = insertelement <2 x double> %a1, double %5, i64 0 376 ret <2 x double> %6 377} 378 379define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 380; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps: 381; CHECK-FMA: # %bb.0: 382; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 383; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 384; CHECK-FMA-NEXT: retq # encoding: [0xc3] 385; 386; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps: 387; CHECK-AVX512VL: # %bb.0: 388; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 389; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 390; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 391; 392; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps: 393; CHECK-FMA-WIN: # %bb.0: 394; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 395; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 396; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00] 397; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 398; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 399 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 400 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %1) 401 ret <4 x float> %2 402} 403 404define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 405; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd: 406; CHECK-FMA: # %bb.0: 407; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 408; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 409; CHECK-FMA-NEXT: retq # encoding: [0xc3] 410; 411; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd: 412; CHECK-AVX512VL: # %bb.0: 413; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 414; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 415; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 416; 417; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd: 418; CHECK-FMA-WIN: # %bb.0: 419; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 420; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 421; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00] 422; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 423; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 424 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 425 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %1) 426 ret <2 x double> %2 427} 428 429define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 430; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256: 431; CHECK-FMA: # %bb.0: 432; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 433; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 434; CHECK-FMA-NEXT: retq # encoding: [0xc3] 435; 436; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256: 437; CHECK-AVX512VL: # %bb.0: 438; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 439; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 440; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 441; 442; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256: 443; CHECK-FMA-WIN: # %bb.0: 444; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 445; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 446; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00] 447; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 448; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 449 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 450 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %1) 451 ret <8 x float> %2 452} 453 454define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 455; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256: 456; CHECK-FMA: # %bb.0: 457; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 458; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 459; CHECK-FMA-NEXT: retq # encoding: [0xc3] 460; 461; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256: 462; CHECK-AVX512VL: # %bb.0: 463; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 464; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 465; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 466; 467; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256: 468; CHECK-FMA-WIN: # %bb.0: 469; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 470; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 471; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00] 472; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 473; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 474 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 475 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %1) 476 ret <4 x double> %2 477} 478 479; VFNMADD 480define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 481; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss: 482; CHECK-FMA: # %bb.0: 483; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2] 484; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 485; CHECK-FMA-NEXT: retq # encoding: [0xc3] 486; 487; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss: 488; CHECK-AVX512VL: # %bb.0: 489; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2] 490; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 491; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 492; 493; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss: 494; CHECK-FMA-WIN: # %bb.0: 495; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 496; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 497; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 498; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02] 499; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 500; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 501 %1 = extractelement <4 x float> %a0, i64 0 502 %2 = extractelement <4 x float> %a1, i64 0 503 %3 = extractelement <4 x float> %a2, i64 0 504 %4 = fsub float -0.000000e+00, %2 505 %5 = call float @llvm.fma.f32(float %1, float %4, float %3) 506 %6 = insertelement <4 x float> %a0, float %5, i64 0 507 ret <4 x float> %6 508} 509 510define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 511; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss: 512; CHECK-FMA: # %bb.0: 513; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca] 514; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 515; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 516; CHECK-FMA-NEXT: retq # encoding: [0xc3] 517; 518; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss: 519; CHECK-AVX512VL: # %bb.0: 520; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca] 521; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 522; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 523; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 524; 525; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss: 526; CHECK-FMA-WIN: # %bb.0: 527; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 528; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 529; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 530; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01] 531; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 532; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 533 %1 = extractelement <4 x float> %a1, i64 0 534 %2 = extractelement <4 x float> %a0, i64 0 535 %3 = extractelement <4 x float> %a2, i64 0 536 %4 = fsub float -0.000000e+00, %2 537 %5 = call float @llvm.fma.f32(float %1, float %4, float %3) 538 %6 = insertelement <4 x float> %a1, float %5, i64 0 539 ret <4 x float> %6 540} 541 542define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 543; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd: 544; CHECK-FMA: # %bb.0: 545; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 546; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 547; CHECK-FMA-NEXT: retq # encoding: [0xc3] 548; 549; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd: 550; CHECK-AVX512VL: # %bb.0: 551; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 552; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 553; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 554; 555; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd: 556; CHECK-FMA-WIN: # %bb.0: 557; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 558; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 559; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 560; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02] 561; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 562; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 563 %1 = extractelement <2 x double> %a0, i64 0 564 %2 = extractelement <2 x double> %a1, i64 0 565 %3 = extractelement <2 x double> %a2, i64 0 566 %4 = fsub double -0.000000e+00, %2 567 %5 = call double @llvm.fma.f64(double %1, double %4, double %3) 568 %6 = insertelement <2 x double> %a0, double %5, i64 0 569 ret <2 x double> %6 570} 571 572define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 573; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd: 574; CHECK-FMA: # %bb.0: 575; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca] 576; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 577; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 578; CHECK-FMA-NEXT: retq # encoding: [0xc3] 579; 580; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd: 581; CHECK-AVX512VL: # %bb.0: 582; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca] 583; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 584; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 585; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 586; 587; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd: 588; CHECK-FMA-WIN: # %bb.0: 589; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 590; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 591; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 592; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01] 593; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 594; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 595 %1 = extractelement <2 x double> %a1, i64 0 596 %2 = extractelement <2 x double> %a0, i64 0 597 %3 = extractelement <2 x double> %a2, i64 0 598 %4 = fsub double -0.000000e+00, %2 599 %5 = call double @llvm.fma.f64(double %1, double %4, double %3) 600 %6 = insertelement <2 x double> %a1, double %5, i64 0 601 ret <2 x double> %6 602} 603 604define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 605; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps: 606; CHECK-FMA: # %bb.0: 607; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2] 608; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 609; CHECK-FMA-NEXT: retq # encoding: [0xc3] 610; 611; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps: 612; CHECK-AVX512VL: # %bb.0: 613; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 614; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 615; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 616; 617; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps: 618; CHECK-FMA-WIN: # %bb.0: 619; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 620; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 621; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00] 622; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 623; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 624 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 625 %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %a2) 626 ret <4 x float> %2 627} 628 629define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 630; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd: 631; CHECK-FMA: # %bb.0: 632; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 633; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 634; CHECK-FMA-NEXT: retq # encoding: [0xc3] 635; 636; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd: 637; CHECK-AVX512VL: # %bb.0: 638; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 639; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 640; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 641; 642; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd: 643; CHECK-FMA-WIN: # %bb.0: 644; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 645; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 646; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00] 647; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 648; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 649 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 650 %2 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %a2) 651 ret <2 x double> %2 652} 653 654define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 655; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256: 656; CHECK-FMA: # %bb.0: 657; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2] 658; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 659; CHECK-FMA-NEXT: retq # encoding: [0xc3] 660; 661; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256: 662; CHECK-AVX512VL: # %bb.0: 663; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 664; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 665; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 666; 667; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256: 668; CHECK-FMA-WIN: # %bb.0: 669; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 670; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 671; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00] 672; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 673; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 674 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 675 %2 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %a2) 676 ret <8 x float> %2 677} 678 679define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 680; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256: 681; CHECK-FMA: # %bb.0: 682; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 683; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 684; CHECK-FMA-NEXT: retq # encoding: [0xc3] 685; 686; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256: 687; CHECK-AVX512VL: # %bb.0: 688; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 689; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 690; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 691; 692; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256: 693; CHECK-FMA-WIN: # %bb.0: 694; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 695; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 696; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00] 697; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 698; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 699 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 700 %2 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %a2) 701 ret <4 x double> %2 702} 703 704; VFNMSUB 705define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 706; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss: 707; CHECK-FMA: # %bb.0: 708; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 709; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 710; CHECK-FMA-NEXT: retq # encoding: [0xc3] 711; 712; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss: 713; CHECK-AVX512VL: # %bb.0: 714; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 715; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 716; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 717; 718; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss: 719; CHECK-FMA-WIN: # %bb.0: 720; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 721; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 722; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 723; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02] 724; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 725; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 726 %1 = extractelement <4 x float> %a0, i64 0 727 %2 = extractelement <4 x float> %a1, i64 0 728 %3 = extractelement <4 x float> %a2, i64 0 729 %4 = fsub float -0.000000e+00, %2 730 %5 = fsub float -0.000000e+00, %3 731 %6 = call float @llvm.fma.f32(float %1, float %4, float %5) 732 %7 = insertelement <4 x float> %a0, float %6, i64 0 733 ret <4 x float> %7 734} 735 736define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 737; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss: 738; CHECK-FMA: # %bb.0: 739; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca] 740; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 741; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 742; CHECK-FMA-NEXT: retq # encoding: [0xc3] 743; 744; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss: 745; CHECK-AVX512VL: # %bb.0: 746; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca] 747; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 748; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 749; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 750; 751; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss: 752; CHECK-FMA-WIN: # %bb.0: 753; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 754; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 755; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 756; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01] 757; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 758; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 759 %1 = extractelement <4 x float> %a1, i64 0 760 %2 = extractelement <4 x float> %a0, i64 0 761 %3 = extractelement <4 x float> %a2, i64 0 762 %4 = fsub float -0.000000e+00, %2 763 %5 = fsub float -0.000000e+00, %3 764 %6 = call float @llvm.fma.f32(float %1, float %4, float %5) 765 %7 = insertelement <4 x float> %a1, float %6, i64 0 766 ret <4 x float> %7 767} 768 769define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 770; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd: 771; CHECK-FMA: # %bb.0: 772; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 773; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 774; CHECK-FMA-NEXT: retq # encoding: [0xc3] 775; 776; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd: 777; CHECK-AVX512VL: # %bb.0: 778; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 779; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 780; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 781; 782; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd: 783; CHECK-FMA-WIN: # %bb.0: 784; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 785; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 786; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 787; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02] 788; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 789; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 790 %1 = extractelement <2 x double> %a0, i64 0 791 %2 = extractelement <2 x double> %a1, i64 0 792 %3 = extractelement <2 x double> %a2, i64 0 793 %4 = fsub double -0.000000e+00, %2 794 %5 = fsub double -0.000000e+00, %3 795 %6 = call double @llvm.fma.f64(double %1, double %4, double %5) 796 %7 = insertelement <2 x double> %a0, double %6, i64 0 797 ret <2 x double> %7 798} 799 800define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 801; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd: 802; CHECK-FMA: # %bb.0: 803; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 804; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 805; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 806; CHECK-FMA-NEXT: retq # encoding: [0xc3] 807; 808; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd: 809; CHECK-AVX512VL: # %bb.0: 810; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 811; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 812; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 813; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 814; 815; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd: 816; CHECK-FMA-WIN: # %bb.0: 817; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 818; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 819; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 820; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01] 821; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 822; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 823 %1 = extractelement <2 x double> %a1, i64 0 824 %2 = extractelement <2 x double> %a0, i64 0 825 %3 = extractelement <2 x double> %a2, i64 0 826 %4 = fsub double -0.000000e+00, %2 827 %5 = fsub double -0.000000e+00, %3 828 %6 = call double @llvm.fma.f64(double %1, double %4, double %5) 829 %7 = insertelement <2 x double> %a1, double %6, i64 0 830 ret <2 x double> %7 831} 832 833define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 834; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps: 835; CHECK-FMA: # %bb.0: 836; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2] 837; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 838; CHECK-FMA-NEXT: retq # encoding: [0xc3] 839; 840; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps: 841; CHECK-AVX512VL: # %bb.0: 842; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 843; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 844; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 845; 846; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps: 847; CHECK-FMA-WIN: # %bb.0: 848; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 849; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 850; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00] 851; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 852; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 853 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 854 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 855 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %1, <4 x float> %a1, <4 x float> %2) 856 ret <4 x float> %3 857} 858 859define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 860; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd: 861; CHECK-FMA: # %bb.0: 862; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 863; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 864; CHECK-FMA-NEXT: retq # encoding: [0xc3] 865; 866; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd: 867; CHECK-AVX512VL: # %bb.0: 868; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 869; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 870; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 871; 872; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd: 873; CHECK-FMA-WIN: # %bb.0: 874; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 875; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 876; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00] 877; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 878; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 879 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a0 880 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 881 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %1, <2 x double> %a1, <2 x double> %2) 882 ret <2 x double> %3 883} 884 885define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 886; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256: 887; CHECK-FMA: # %bb.0: 888; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2] 889; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 890; CHECK-FMA-NEXT: retq # encoding: [0xc3] 891; 892; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256: 893; CHECK-AVX512VL: # %bb.0: 894; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 895; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 896; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 897; 898; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256: 899; CHECK-FMA-WIN: # %bb.0: 900; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 901; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 902; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00] 903; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 904; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 905 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a0 906 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 907 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %1, <8 x float> %a1, <8 x float> %2) 908 ret <8 x float> %3 909} 910 911define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 912; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256: 913; CHECK-FMA: # %bb.0: 914; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 915; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 916; CHECK-FMA-NEXT: retq # encoding: [0xc3] 917; 918; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256: 919; CHECK-AVX512VL: # %bb.0: 920; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 921; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 922; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 923; 924; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256: 925; CHECK-FMA-WIN: # %bb.0: 926; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 927; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 928; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00] 929; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 930; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 931 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a0 932 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 933 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %1, <4 x double> %a1, <4 x double> %2) 934 ret <4 x double> %3 935} 936 937; VFMADDSUB 938define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 939; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps: 940; CHECK-FMA: # %bb.0: 941; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 942; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 943; CHECK-FMA-NEXT: retq # encoding: [0xc3] 944; 945; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps: 946; CHECK-AVX512VL: # %bb.0: 947; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 948; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 949; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 950; 951; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps: 952; CHECK-FMA-WIN: # %bb.0: 953; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 954; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 955; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00] 956; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 957; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 958 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 959 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 960 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 961 %4 = shufflevector <4 x float> %3, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 962 ret <4 x float> %4 963} 964 965define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 966; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd: 967; CHECK-FMA: # %bb.0: 968; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 969; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 970; CHECK-FMA-NEXT: retq # encoding: [0xc3] 971; 972; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd: 973; CHECK-AVX512VL: # %bb.0: 974; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 975; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 976; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 977; 978; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd: 979; CHECK-FMA-WIN: # %bb.0: 980; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 981; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 982; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00] 983; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 984; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 985 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 986 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 987 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 988 %4 = shufflevector <2 x double> %3, <2 x double> %1, <2 x i32> <i32 0, i32 3> 989 ret <2 x double> %4 990} 991 992define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 993; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256: 994; CHECK-FMA: # %bb.0: 995; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 996; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 997; CHECK-FMA-NEXT: retq # encoding: [0xc3] 998; 999; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256: 1000; CHECK-AVX512VL: # %bb.0: 1001; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 1002; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1003; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1004; 1005; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256: 1006; CHECK-FMA-WIN: # %bb.0: 1007; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 1008; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 1009; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00] 1010; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 1011; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1012 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 1013 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1014 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 1015 %4 = shufflevector <8 x float> %3, <8 x float> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1016 ret <8 x float> %4 1017} 1018 1019define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1020; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256: 1021; CHECK-FMA: # %bb.0: 1022; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 1023; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1024; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1025; 1026; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256: 1027; CHECK-AVX512VL: # %bb.0: 1028; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 1029; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 1030; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1031; 1032; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256: 1033; CHECK-FMA-WIN: # %bb.0: 1034; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 1035; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 1036; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00] 1037; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 1038; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1039 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 1040 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 1041 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 1042 %4 = shufflevector <4 x double> %3, <4 x double> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1043 ret <4 x double> %4 1044} 1045 1046; VFMSUBADD 1047define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1048; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps: 1049; CHECK-FMA: # %bb.0: 1050; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 1051; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1052; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1053; 1054; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps: 1055; CHECK-AVX512VL: # %bb.0: 1056; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 1057; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1058; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1059; 1060; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps: 1061; CHECK-FMA-WIN: # %bb.0: 1062; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 1063; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 1064; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00] 1065; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 1066; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1067 %1 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 1068 %2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1069 %3 = call <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %2) 1070 %4 = shufflevector <4 x float> %1, <4 x float> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1071 ret <4 x float> %4 1072} 1073 1074define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 1075; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd: 1076; CHECK-FMA: # %bb.0: 1077; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 1078; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1079; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1080; 1081; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd: 1082; CHECK-AVX512VL: # %bb.0: 1083; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 1084; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 1085; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1086; 1087; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd: 1088; CHECK-FMA-WIN: # %bb.0: 1089; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 1090; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 1091; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00] 1092; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 1093; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1094 %1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 1095 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a2 1096 %3 = call <2 x double> @llvm.fma.v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %2) 1097 %4 = shufflevector <2 x double> %1, <2 x double> %3, <2 x i32> <i32 0, i32 3> 1098 ret <2 x double> %4 1099} 1100 1101define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 1102; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256: 1103; CHECK-FMA: # %bb.0: 1104; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 1105; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1106; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1107; 1108; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256: 1109; CHECK-AVX512VL: # %bb.0: 1110; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 1111; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1112; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1113; 1114; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256: 1115; CHECK-FMA-WIN: # %bb.0: 1116; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 1117; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 1118; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00] 1119; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1120; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1121 %1 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 1122 %2 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2 1123 %3 = call <8 x float> @llvm.fma.v8f32(<8 x float> %a0, <8 x float> %a1, <8 x float> %2) 1124 %4 = shufflevector <8 x float> %1, <8 x float> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 1125 ret <8 x float> %4 1126} 1127 1128define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1129; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256: 1130; CHECK-FMA: # %bb.0: 1131; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1132; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1133; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1134; 1135; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256: 1136; CHECK-AVX512VL: # %bb.0: 1137; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1138; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1139; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1140; 1141; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256: 1142; CHECK-FMA-WIN: # %bb.0: 1143; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 1144; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 1145; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00] 1146; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1147; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1148 %1 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 1149 %2 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2 1150 %3 = call <4 x double> @llvm.fma.v4f64(<4 x double> %a0, <4 x double> %a1, <4 x double> %2) 1151 %4 = shufflevector <4 x double> %1, <4 x double> %3, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 1152 ret <4 x double> %4 1153} 1154 1155declare float @llvm.fma.f32(float, float, float) 1156declare double @llvm.fma.f64(double, double, double) 1157declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 1158declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) 1159declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) 1160declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) 1161 1162attributes #0 = { nounwind } 1163