1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL 4; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 5 6; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/fma-builtins.c 7 8define <4 x float> @test_mm_fmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 9; CHECK-FMA-LABEL: test_mm_fmadd_ps: 10; CHECK-FMA: # %bb.0: # %entry 11; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 12; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 13; CHECK-FMA-NEXT: retq # encoding: [0xc3] 14; 15; CHECK-AVX512VL-LABEL: test_mm_fmadd_ps: 16; CHECK-AVX512VL: # %bb.0: # %entry 17; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 18; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 19; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 20; 21; CHECK-FMA-WIN-LABEL: test_mm_fmadd_ps: 22; CHECK-FMA-WIN: # %bb.0: # %entry 23; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 24; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 25; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00] 26; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 27; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 28entry: 29 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 30 ret <4 x float> %0 31} 32 33define <2 x double> @test_mm_fmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 34; CHECK-FMA-LABEL: test_mm_fmadd_pd: 35; CHECK-FMA: # %bb.0: # %entry 36; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 37; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 38; CHECK-FMA-NEXT: retq # encoding: [0xc3] 39; 40; CHECK-AVX512VL-LABEL: test_mm_fmadd_pd: 41; CHECK-AVX512VL: # %bb.0: # %entry 42; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 43; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 44; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 45; 46; CHECK-FMA-WIN-LABEL: test_mm_fmadd_pd: 47; CHECK-FMA-WIN: # %bb.0: # %entry 48; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 49; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 50; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00] 51; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 52; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 53entry: 54 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 55 ret <2 x double> %0 56} 57 58define <4 x float> @test_mm_fmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 59; CHECK-FMA-LABEL: test_mm_fmadd_ss: 60; CHECK-FMA: # %bb.0: # %entry 61; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 62; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 63; CHECK-FMA-NEXT: retq # encoding: [0xc3] 64; 65; CHECK-AVX512VL-LABEL: test_mm_fmadd_ss: 66; CHECK-AVX512VL: # %bb.0: # %entry 67; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 68; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 69; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 70; 71; CHECK-FMA-WIN-LABEL: test_mm_fmadd_ss: 72; CHECK-FMA-WIN: # %bb.0: # %entry 73; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 74; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 75; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 76; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02] 77; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 78; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 79entry: 80 %0 = extractelement <4 x float> %a, i64 0 81 %1 = extractelement <4 x float> %b, i64 0 82 %2 = extractelement <4 x float> %c, i64 0 83 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 84 %4 = insertelement <4 x float> %a, float %3, i64 0 85 ret <4 x float> %4 86} 87 88define <2 x double> @test_mm_fmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 89; CHECK-FMA-LABEL: test_mm_fmadd_sd: 90; CHECK-FMA: # %bb.0: # %entry 91; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 92; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 93; CHECK-FMA-NEXT: retq # encoding: [0xc3] 94; 95; CHECK-AVX512VL-LABEL: test_mm_fmadd_sd: 96; CHECK-AVX512VL: # %bb.0: # %entry 97; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 98; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 99; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 100; 101; CHECK-FMA-WIN-LABEL: test_mm_fmadd_sd: 102; CHECK-FMA-WIN: # %bb.0: # %entry 103; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 104; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 105; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 106; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02] 107; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 108; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 109entry: 110 %0 = extractelement <2 x double> %a, i64 0 111 %1 = extractelement <2 x double> %b, i64 0 112 %2 = extractelement <2 x double> %c, i64 0 113 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 114 %4 = insertelement <2 x double> %a, double %3, i64 0 115 ret <2 x double> %4 116} 117 118define <4 x float> @test_mm_fmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 119; CHECK-FMA-LABEL: test_mm_fmsub_ps: 120; CHECK-FMA: # %bb.0: # %entry 121; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 122; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 123; CHECK-FMA-NEXT: retq # encoding: [0xc3] 124; 125; CHECK-AVX512VL-LABEL: test_mm_fmsub_ps: 126; CHECK-AVX512VL: # %bb.0: # %entry 127; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 128; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 129; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 130; 131; CHECK-FMA-WIN-LABEL: test_mm_fmsub_ps: 132; CHECK-FMA-WIN: # %bb.0: # %entry 133; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 134; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 135; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00] 136; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 137; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 138entry: 139 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 140 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 141 ret <4 x float> %0 142} 143 144define <2 x double> @test_mm_fmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 145; CHECK-FMA-LABEL: test_mm_fmsub_pd: 146; CHECK-FMA: # %bb.0: # %entry 147; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 148; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 149; CHECK-FMA-NEXT: retq # encoding: [0xc3] 150; 151; CHECK-AVX512VL-LABEL: test_mm_fmsub_pd: 152; CHECK-AVX512VL: # %bb.0: # %entry 153; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 154; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 155; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 156; 157; CHECK-FMA-WIN-LABEL: test_mm_fmsub_pd: 158; CHECK-FMA-WIN: # %bb.0: # %entry 159; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 160; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 161; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00] 162; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 163; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 164entry: 165 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 166 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 167 ret <2 x double> %0 168} 169 170define <4 x float> @test_mm_fmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 171; CHECK-FMA-LABEL: test_mm_fmsub_ss: 172; CHECK-FMA: # %bb.0: # %entry 173; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2] 174; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 175; CHECK-FMA-NEXT: retq # encoding: [0xc3] 176; 177; CHECK-AVX512VL-LABEL: test_mm_fmsub_ss: 178; CHECK-AVX512VL: # %bb.0: # %entry 179; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2] 180; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 181; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 182; 183; CHECK-FMA-WIN-LABEL: test_mm_fmsub_ss: 184; CHECK-FMA-WIN: # %bb.0: # %entry 185; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 186; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 187; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 188; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02] 189; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 190; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 191entry: 192 %0 = extractelement <4 x float> %a, i64 0 193 %1 = extractelement <4 x float> %b, i64 0 194 %.rhs.i = extractelement <4 x float> %c, i64 0 195 %2 = fsub float -0.000000e+00, %.rhs.i 196 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 197 %4 = insertelement <4 x float> %a, float %3, i64 0 198 ret <4 x float> %4 199} 200 201define <2 x double> @test_mm_fmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 202; CHECK-FMA-LABEL: test_mm_fmsub_sd: 203; CHECK-FMA: # %bb.0: # %entry 204; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 205; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 206; CHECK-FMA-NEXT: retq # encoding: [0xc3] 207; 208; CHECK-AVX512VL-LABEL: test_mm_fmsub_sd: 209; CHECK-AVX512VL: # %bb.0: # %entry 210; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 211; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 212; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 213; 214; CHECK-FMA-WIN-LABEL: test_mm_fmsub_sd: 215; CHECK-FMA-WIN: # %bb.0: # %entry 216; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 217; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 218; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 219; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02] 220; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 221; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 222entry: 223 %0 = extractelement <2 x double> %a, i64 0 224 %1 = extractelement <2 x double> %b, i64 0 225 %.rhs.i = extractelement <2 x double> %c, i64 0 226 %2 = fsub double -0.000000e+00, %.rhs.i 227 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 228 %4 = insertelement <2 x double> %a, double %3, i64 0 229 ret <2 x double> %4 230} 231 232define <4 x float> @test_mm_fnmadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 233; CHECK-FMA-LABEL: test_mm_fnmadd_ps: 234; CHECK-FMA: # %bb.0: # %entry 235; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2] 236; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 237; CHECK-FMA-NEXT: retq # encoding: [0xc3] 238; 239; CHECK-AVX512VL-LABEL: test_mm_fnmadd_ps: 240; CHECK-AVX512VL: # %bb.0: # %entry 241; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 242; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 243; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 244; 245; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_ps: 246; CHECK-FMA-WIN: # %bb.0: # %entry 247; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 248; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 249; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00] 250; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 251; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 252entry: 253 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 254 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c) #2 255 ret <4 x float> %0 256} 257 258define <2 x double> @test_mm_fnmadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 259; CHECK-FMA-LABEL: test_mm_fnmadd_pd: 260; CHECK-FMA: # %bb.0: # %entry 261; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 262; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 263; CHECK-FMA-NEXT: retq # encoding: [0xc3] 264; 265; CHECK-AVX512VL-LABEL: test_mm_fnmadd_pd: 266; CHECK-AVX512VL: # %bb.0: # %entry 267; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 268; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 269; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 270; 271; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_pd: 272; CHECK-FMA-WIN: # %bb.0: # %entry 273; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 274; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 275; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00] 276; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 277; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 278entry: 279 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 280 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c) #2 281 ret <2 x double> %0 282} 283 284define <4 x float> @test_mm_fnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 285; CHECK-FMA-LABEL: test_mm_fnmadd_ss: 286; CHECK-FMA: # %bb.0: # %entry 287; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2] 288; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 289; CHECK-FMA-NEXT: retq # encoding: [0xc3] 290; 291; CHECK-AVX512VL-LABEL: test_mm_fnmadd_ss: 292; CHECK-AVX512VL: # %bb.0: # %entry 293; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2] 294; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 295; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 296; 297; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_ss: 298; CHECK-FMA-WIN: # %bb.0: # %entry 299; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 300; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 301; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 302; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02] 303; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 304; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 305entry: 306 %0 = extractelement <4 x float> %a, i64 0 307 %.rhs.i = extractelement <4 x float> %b, i64 0 308 %1 = fsub float -0.000000e+00, %.rhs.i 309 %2 = extractelement <4 x float> %c, i64 0 310 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 311 %4 = insertelement <4 x float> %a, float %3, i64 0 312 ret <4 x float> %4 313} 314 315define <2 x double> @test_mm_fnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 316; CHECK-FMA-LABEL: test_mm_fnmadd_sd: 317; CHECK-FMA: # %bb.0: # %entry 318; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 319; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 320; CHECK-FMA-NEXT: retq # encoding: [0xc3] 321; 322; CHECK-AVX512VL-LABEL: test_mm_fnmadd_sd: 323; CHECK-AVX512VL: # %bb.0: # %entry 324; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 325; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 326; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 327; 328; CHECK-FMA-WIN-LABEL: test_mm_fnmadd_sd: 329; CHECK-FMA-WIN: # %bb.0: # %entry 330; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 331; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 332; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 333; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02] 334; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 335; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 336entry: 337 %0 = extractelement <2 x double> %a, i64 0 338 %.rhs.i = extractelement <2 x double> %b, i64 0 339 %1 = fsub double -0.000000e+00, %.rhs.i 340 %2 = extractelement <2 x double> %c, i64 0 341 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 342 %4 = insertelement <2 x double> %a, double %3, i64 0 343 ret <2 x double> %4 344} 345 346define <4 x float> @test_mm_fnmsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 347; CHECK-FMA-LABEL: test_mm_fnmsub_ps: 348; CHECK-FMA: # %bb.0: # %entry 349; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2] 350; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 351; CHECK-FMA-NEXT: retq # encoding: [0xc3] 352; 353; CHECK-AVX512VL-LABEL: test_mm_fnmsub_ps: 354; CHECK-AVX512VL: # %bb.0: # %entry 355; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 356; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 357; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 358; 359; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_ps: 360; CHECK-FMA-WIN: # %bb.0: # %entry 361; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 362; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 363; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00] 364; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 365; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 366entry: 367 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 368 %sub1.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 369 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub1.i) #2 370 ret <4 x float> %0 371} 372 373define <2 x double> @test_mm_fnmsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 374; CHECK-FMA-LABEL: test_mm_fnmsub_pd: 375; CHECK-FMA: # %bb.0: # %entry 376; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 377; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 378; CHECK-FMA-NEXT: retq # encoding: [0xc3] 379; 380; CHECK-AVX512VL-LABEL: test_mm_fnmsub_pd: 381; CHECK-AVX512VL: # %bb.0: # %entry 382; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 383; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 384; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 385; 386; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_pd: 387; CHECK-FMA-WIN: # %bb.0: # %entry 388; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 389; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 390; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00] 391; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 392; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 393entry: 394 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a 395 %sub1.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 396 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub1.i) #2 397 ret <2 x double> %0 398} 399 400define <4 x float> @test_mm_fnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 401; CHECK-FMA-LABEL: test_mm_fnmsub_ss: 402; CHECK-FMA: # %bb.0: # %entry 403; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 404; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 405; CHECK-FMA-NEXT: retq # encoding: [0xc3] 406; 407; CHECK-AVX512VL-LABEL: test_mm_fnmsub_ss: 408; CHECK-AVX512VL: # %bb.0: # %entry 409; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 410; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 411; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 412; 413; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_ss: 414; CHECK-FMA-WIN: # %bb.0: # %entry 415; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 416; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 417; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 418; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02] 419; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 420; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 421entry: 422 %0 = extractelement <4 x float> %a, i64 0 423 %.rhs.i = extractelement <4 x float> %b, i64 0 424 %1 = fsub float -0.000000e+00, %.rhs.i 425 %.rhs2.i = extractelement <4 x float> %c, i64 0 426 %2 = fsub float -0.000000e+00, %.rhs2.i 427 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #2 428 %4 = insertelement <4 x float> %a, float %3, i64 0 429 ret <4 x float> %4 430} 431 432define <2 x double> @test_mm_fnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 433; CHECK-FMA-LABEL: test_mm_fnmsub_sd: 434; CHECK-FMA: # %bb.0: # %entry 435; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 436; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 437; CHECK-FMA-NEXT: retq # encoding: [0xc3] 438; 439; CHECK-AVX512VL-LABEL: test_mm_fnmsub_sd: 440; CHECK-AVX512VL: # %bb.0: # %entry 441; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 442; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 443; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 444; 445; CHECK-FMA-WIN-LABEL: test_mm_fnmsub_sd: 446; CHECK-FMA-WIN: # %bb.0: # %entry 447; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 448; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 449; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 450; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02] 451; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 452; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 453entry: 454 %0 = extractelement <2 x double> %a, i64 0 455 %.rhs.i = extractelement <2 x double> %b, i64 0 456 %1 = fsub double -0.000000e+00, %.rhs.i 457 %.rhs2.i = extractelement <2 x double> %c, i64 0 458 %2 = fsub double -0.000000e+00, %.rhs2.i 459 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #2 460 %4 = insertelement <2 x double> %a, double %3, i64 0 461 ret <2 x double> %4 462} 463 464define <4 x float> @test_mm_fmaddsub_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 465; CHECK-FMA-LABEL: test_mm_fmaddsub_ps: 466; CHECK-FMA: # %bb.0: # %entry 467; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 468; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 469; CHECK-FMA-NEXT: retq # encoding: [0xc3] 470; 471; CHECK-AVX512VL-LABEL: test_mm_fmaddsub_ps: 472; CHECK-AVX512VL: # %bb.0: # %entry 473; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 474; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 475; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 476; 477; CHECK-FMA-WIN-LABEL: test_mm_fmaddsub_ps: 478; CHECK-FMA-WIN: # %bb.0: # %entry 479; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 480; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 481; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00] 482; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 483; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 484entry: 485 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 486 %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 487 %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %1) #2 488 %3 = shufflevector <4 x float> %2, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 489 ret <4 x float> %3 490} 491 492define <2 x double> @test_mm_fmaddsub_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 493; CHECK-FMA-LABEL: test_mm_fmaddsub_pd: 494; CHECK-FMA: # %bb.0: # %entry 495; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 496; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 497; CHECK-FMA-NEXT: retq # encoding: [0xc3] 498; 499; CHECK-AVX512VL-LABEL: test_mm_fmaddsub_pd: 500; CHECK-AVX512VL: # %bb.0: # %entry 501; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 502; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 503; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 504; 505; CHECK-FMA-WIN-LABEL: test_mm_fmaddsub_pd: 506; CHECK-FMA-WIN: # %bb.0: # %entry 507; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 508; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 509; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00] 510; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 511; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 512entry: 513 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 514 %1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 515 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %1) #2 516 %3 = shufflevector <2 x double> %2, <2 x double> %0, <2 x i32> <i32 0, i32 3> 517 ret <2 x double> %3 518} 519 520define <4 x float> @test_mm_fmsubadd_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 521; CHECK-FMA-LABEL: test_mm_fmsubadd_ps: 522; CHECK-FMA: # %bb.0: # %entry 523; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 524; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 525; CHECK-FMA-NEXT: retq # encoding: [0xc3] 526; 527; CHECK-AVX512VL-LABEL: test_mm_fmsubadd_ps: 528; CHECK-AVX512VL: # %bb.0: # %entry 529; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 530; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 531; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 532; 533; CHECK-FMA-WIN-LABEL: test_mm_fmsubadd_ps: 534; CHECK-FMA-WIN: # %bb.0: # %entry 535; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 536; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 537; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00] 538; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 539; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 540entry: 541 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 542 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i) #2 543 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 544 %2 = shufflevector <4 x float> %1, <4 x float> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 545 ret <4 x float> %2 546} 547 548define <2 x double> @test_mm_fmsubadd_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) { 549; CHECK-FMA-LABEL: test_mm_fmsubadd_pd: 550; CHECK-FMA: # %bb.0: # %entry 551; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 552; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 553; CHECK-FMA-NEXT: retq # encoding: [0xc3] 554; 555; CHECK-AVX512VL-LABEL: test_mm_fmsubadd_pd: 556; CHECK-AVX512VL: # %bb.0: # %entry 557; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 558; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 559; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 560; 561; CHECK-FMA-WIN-LABEL: test_mm_fmsubadd_pd: 562; CHECK-FMA-WIN: # %bb.0: # %entry 563; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 564; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 565; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00] 566; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 567; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 568entry: 569 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 570 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i) #2 571 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 572 %2 = shufflevector <2 x double> %1, <2 x double> %0, <2 x i32> <i32 0, i32 3> 573 ret <2 x double> %2 574} 575 576define <8 x float> @test_mm256_fmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 577; CHECK-FMA-LABEL: test_mm256_fmadd_ps: 578; CHECK-FMA: # %bb.0: # %entry 579; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 580; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 581; CHECK-FMA-NEXT: retq # encoding: [0xc3] 582; 583; CHECK-AVX512VL-LABEL: test_mm256_fmadd_ps: 584; CHECK-AVX512VL: # %bb.0: # %entry 585; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 586; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 587; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 588; 589; CHECK-FMA-WIN-LABEL: test_mm256_fmadd_ps: 590; CHECK-FMA-WIN: # %bb.0: # %entry 591; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 592; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 593; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00] 594; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 595; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 596entry: 597 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 598 ret <8 x float> %0 599} 600 601define <4 x double> @test_mm256_fmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 602; CHECK-FMA-LABEL: test_mm256_fmadd_pd: 603; CHECK-FMA: # %bb.0: # %entry 604; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 605; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 606; CHECK-FMA-NEXT: retq # encoding: [0xc3] 607; 608; CHECK-AVX512VL-LABEL: test_mm256_fmadd_pd: 609; CHECK-AVX512VL: # %bb.0: # %entry 610; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 611; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 612; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 613; 614; CHECK-FMA-WIN-LABEL: test_mm256_fmadd_pd: 615; CHECK-FMA-WIN: # %bb.0: # %entry 616; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 617; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 618; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00] 619; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 620; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 621entry: 622 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 623 ret <4 x double> %0 624} 625 626define <8 x float> @test_mm256_fmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 627; CHECK-FMA-LABEL: test_mm256_fmsub_ps: 628; CHECK-FMA: # %bb.0: # %entry 629; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 630; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 631; CHECK-FMA-NEXT: retq # encoding: [0xc3] 632; 633; CHECK-AVX512VL-LABEL: test_mm256_fmsub_ps: 634; CHECK-AVX512VL: # %bb.0: # %entry 635; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 636; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 637; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 638; 639; CHECK-FMA-WIN-LABEL: test_mm256_fmsub_ps: 640; CHECK-FMA-WIN: # %bb.0: # %entry 641; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 642; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 643; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00] 644; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 645; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 646entry: 647 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 648 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 649 ret <8 x float> %0 650} 651 652define <4 x double> @test_mm256_fmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 653; CHECK-FMA-LABEL: test_mm256_fmsub_pd: 654; CHECK-FMA: # %bb.0: # %entry 655; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 656; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 657; CHECK-FMA-NEXT: retq # encoding: [0xc3] 658; 659; CHECK-AVX512VL-LABEL: test_mm256_fmsub_pd: 660; CHECK-AVX512VL: # %bb.0: # %entry 661; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 662; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 663; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 664; 665; CHECK-FMA-WIN-LABEL: test_mm256_fmsub_pd: 666; CHECK-FMA-WIN: # %bb.0: # %entry 667; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 668; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 669; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00] 670; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 671; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 672entry: 673 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 674 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 675 ret <4 x double> %0 676} 677 678define <8 x float> @test_mm256_fnmadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 679; CHECK-FMA-LABEL: test_mm256_fnmadd_ps: 680; CHECK-FMA: # %bb.0: # %entry 681; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2] 682; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 683; CHECK-FMA-NEXT: retq # encoding: [0xc3] 684; 685; CHECK-AVX512VL-LABEL: test_mm256_fnmadd_ps: 686; CHECK-AVX512VL: # %bb.0: # %entry 687; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 688; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 689; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 690; 691; CHECK-FMA-WIN-LABEL: test_mm256_fnmadd_ps: 692; CHECK-FMA-WIN: # %bb.0: # %entry 693; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 694; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 695; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00] 696; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 697; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 698entry: 699 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 700 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %c) #2 701 ret <8 x float> %0 702} 703 704define <4 x double> @test_mm256_fnmadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 705; CHECK-FMA-LABEL: test_mm256_fnmadd_pd: 706; CHECK-FMA: # %bb.0: # %entry 707; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 708; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 709; CHECK-FMA-NEXT: retq # encoding: [0xc3] 710; 711; CHECK-AVX512VL-LABEL: test_mm256_fnmadd_pd: 712; CHECK-AVX512VL: # %bb.0: # %entry 713; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 714; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 715; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 716; 717; CHECK-FMA-WIN-LABEL: test_mm256_fnmadd_pd: 718; CHECK-FMA-WIN: # %bb.0: # %entry 719; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 720; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 721; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00] 722; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 723; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 724entry: 725 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 726 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %c) #2 727 ret <4 x double> %0 728} 729 730define <8 x float> @test_mm256_fnmsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 731; CHECK-FMA-LABEL: test_mm256_fnmsub_ps: 732; CHECK-FMA: # %bb.0: # %entry 733; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2] 734; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 735; CHECK-FMA-NEXT: retq # encoding: [0xc3] 736; 737; CHECK-AVX512VL-LABEL: test_mm256_fnmsub_ps: 738; CHECK-AVX512VL: # %bb.0: # %entry 739; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 740; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 741; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 742; 743; CHECK-FMA-WIN-LABEL: test_mm256_fnmsub_ps: 744; CHECK-FMA-WIN: # %bb.0: # %entry 745; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 746; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 747; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00] 748; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 749; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 750entry: 751 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 752 %sub1.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 753 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %sub.i, <8 x float> %b, <8 x float> %sub1.i) #2 754 ret <8 x float> %0 755} 756 757define <4 x double> @test_mm256_fnmsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 758; CHECK-FMA-LABEL: test_mm256_fnmsub_pd: 759; CHECK-FMA: # %bb.0: # %entry 760; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 761; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 762; CHECK-FMA-NEXT: retq # encoding: [0xc3] 763; 764; CHECK-AVX512VL-LABEL: test_mm256_fnmsub_pd: 765; CHECK-AVX512VL: # %bb.0: # %entry 766; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 767; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 768; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 769; 770; CHECK-FMA-WIN-LABEL: test_mm256_fnmsub_pd: 771; CHECK-FMA-WIN: # %bb.0: # %entry 772; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 773; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 774; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00] 775; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 776; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 777entry: 778 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a 779 %sub1.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 780 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %sub.i, <4 x double> %b, <4 x double> %sub1.i) #2 781 ret <4 x double> %0 782} 783 784define <8 x float> @test_mm256_fmaddsub_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 785; CHECK-FMA-LABEL: test_mm256_fmaddsub_ps: 786; CHECK-FMA: # %bb.0: # %entry 787; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 788; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 789; CHECK-FMA-NEXT: retq # encoding: [0xc3] 790; 791; CHECK-AVX512VL-LABEL: test_mm256_fmaddsub_ps: 792; CHECK-AVX512VL: # %bb.0: # %entry 793; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 794; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 795; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 796; 797; CHECK-FMA-WIN-LABEL: test_mm256_fmaddsub_ps: 798; CHECK-FMA-WIN: # %bb.0: # %entry 799; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 800; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 801; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00] 802; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 803; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 804entry: 805 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 806 %1 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 807 %2 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %1) #2 808 %3 = shufflevector <8 x float> %2, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 809 ret <8 x float> %3 810} 811 812define <4 x double> @test_mm256_fmaddsub_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 813; CHECK-FMA-LABEL: test_mm256_fmaddsub_pd: 814; CHECK-FMA: # %bb.0: # %entry 815; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 816; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 817; CHECK-FMA-NEXT: retq # encoding: [0xc3] 818; 819; CHECK-AVX512VL-LABEL: test_mm256_fmaddsub_pd: 820; CHECK-AVX512VL: # %bb.0: # %entry 821; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 822; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 823; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 824; 825; CHECK-FMA-WIN-LABEL: test_mm256_fmaddsub_pd: 826; CHECK-FMA-WIN: # %bb.0: # %entry 827; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 828; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 829; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00] 830; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 831; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 832entry: 833 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 834 %1 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 835 %2 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %1) #2 836 %3 = shufflevector <4 x double> %2, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 837 ret <4 x double> %3 838} 839 840define <8 x float> @test_mm256_fmsubadd_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) { 841; CHECK-FMA-LABEL: test_mm256_fmsubadd_ps: 842; CHECK-FMA: # %bb.0: # %entry 843; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 844; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 845; CHECK-FMA-NEXT: retq # encoding: [0xc3] 846; 847; CHECK-AVX512VL-LABEL: test_mm256_fmsubadd_ps: 848; CHECK-AVX512VL: # %bb.0: # %entry 849; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 850; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 851; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 852; 853; CHECK-FMA-WIN-LABEL: test_mm256_fmsubadd_ps: 854; CHECK-FMA-WIN: # %bb.0: # %entry 855; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 856; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 857; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00] 858; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 859; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 860entry: 861 %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 862 %0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 863 %1 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 864 %2 = shufflevector <8 x float> %1, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 865 ret <8 x float> %2 866} 867 868define <4 x double> @test_mm256_fmsubadd_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 869; CHECK-FMA-LABEL: test_mm256_fmsubadd_pd: 870; CHECK-FMA: # %bb.0: # %entry 871; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 872; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 873; CHECK-FMA-NEXT: retq # encoding: [0xc3] 874; 875; CHECK-AVX512VL-LABEL: test_mm256_fmsubadd_pd: 876; CHECK-AVX512VL: # %bb.0: # %entry 877; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 878; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 879; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 880; 881; CHECK-FMA-WIN-LABEL: test_mm256_fmsubadd_pd: 882; CHECK-FMA-WIN: # %bb.0: # %entry 883; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 884; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 885; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00] 886; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 887; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 888entry: 889 %sub.i = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %c 890 %0 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %sub.i) #2 891 %1 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #2 892 %2 = shufflevector <4 x double> %1, <4 x double> %0, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 893 ret <4 x double> %2 894} 895 896declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 897declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #1 898declare float @llvm.fma.f32(float, float, float) #1 899declare double @llvm.fma.f64(double, double, double) #1 900declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #1 901declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #1 902