1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL 4; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN 5 6; VFMADD 7define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 8; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss: 9; CHECK-FMA: # %bb.0: 10; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 11; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 12; CHECK-FMA-NEXT: retq # encoding: [0xc3] 13; 14; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss: 15; CHECK-AVX512VL: # %bb.0: 16; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] 17; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 18; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 19; 20; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss: 21; CHECK-FMA-WIN: # %bb.0: 22; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 23; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 24; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 25; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x02] 26; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 27; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 28 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 29 ret <4 x float> %res 30} 31 32define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 33; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss: 34; CHECK-FMA: # %bb.0: 35; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca] 36; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 37; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 38; CHECK-FMA-NEXT: retq # encoding: [0xc3] 39; 40; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss: 41; CHECK-AVX512VL: # %bb.0: 42; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca] 43; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 44; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 45; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 46; 47; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss: 48; CHECK-FMA-WIN: # %bb.0: 49; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 50; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 51; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 52; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x99,0x01] 53; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 54; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 55 %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 56 ret <4 x float> %res 57} 58declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) 59 60define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 61; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd: 62; CHECK-FMA: # %bb.0: 63; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 64; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 65; CHECK-FMA-NEXT: retq # encoding: [0xc3] 66; 67; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd: 68; CHECK-AVX512VL: # %bb.0: 69; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] 70; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 71; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 72; 73; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd: 74; CHECK-FMA-WIN: # %bb.0: 75; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 76; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 77; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 78; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x02] 79; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 80; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 81 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 82 ret <2 x double> %res 83} 84 85define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 86; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd: 87; CHECK-FMA: # %bb.0: 88; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 89; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 90; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 91; CHECK-FMA-NEXT: retq # encoding: [0xc3] 92; 93; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd: 94; CHECK-AVX512VL: # %bb.0: 95; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca] 96; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) + xmm2 97; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 98; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 99; 100; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd: 101; CHECK-FMA-WIN: # %bb.0: 102; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 103; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 104; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 105; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x99,0x01] 106; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) + xmm1 107; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 108 %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 109 ret <2 x double> %res 110} 111declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) 112 113define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 114; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps: 115; CHECK-FMA: # %bb.0: 116; CHECK-FMA-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 117; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 118; CHECK-FMA-NEXT: retq # encoding: [0xc3] 119; 120; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps: 121; CHECK-AVX512VL: # %bb.0: 122; CHECK-AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] 123; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 124; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 125; 126; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps: 127; CHECK-FMA-WIN: # %bb.0: 128; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 129; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 130; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00] 131; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 132; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 133 %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 134 ret <4 x float> %res 135} 136declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) 137 138define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 139; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd: 140; CHECK-FMA: # %bb.0: 141; CHECK-FMA-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 142; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 143; CHECK-FMA-NEXT: retq # encoding: [0xc3] 144; 145; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd: 146; CHECK-AVX512VL: # %bb.0: 147; CHECK-AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] 148; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) + xmm2 149; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 150; 151; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd: 152; CHECK-FMA-WIN: # %bb.0: 153; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 154; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 155; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00] 156; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) + mem 157; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 158 %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 159 ret <2 x double> %res 160} 161declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) 162 163define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 164; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256: 165; CHECK-FMA: # %bb.0: 166; CHECK-FMA-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 167; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 168; CHECK-FMA-NEXT: retq # encoding: [0xc3] 169; 170; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256: 171; CHECK-AVX512VL: # %bb.0: 172; CHECK-AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] 173; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 174; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 175; 176; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256: 177; CHECK-FMA-WIN: # %bb.0: 178; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 179; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 180; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00] 181; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 182; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 183 %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 184 ret <8 x float> %res 185} 186declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 187 188define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 189; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256: 190; CHECK-FMA: # %bb.0: 191; CHECK-FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 192; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 193; CHECK-FMA-NEXT: retq # encoding: [0xc3] 194; 195; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256: 196; CHECK-AVX512VL: # %bb.0: 197; CHECK-AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] 198; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) + ymm2 199; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 200; 201; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256: 202; CHECK-FMA-WIN: # %bb.0: 203; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 204; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 205; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00] 206; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) + mem 207; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 208 %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 209 ret <4 x double> %res 210} 211declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 212 213; VFMSUB 214define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 215; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss: 216; CHECK-FMA: # %bb.0: 217; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2] 218; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 219; CHECK-FMA-NEXT: retq # encoding: [0xc3] 220; 221; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss: 222; CHECK-AVX512VL: # %bb.0: 223; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2] 224; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 225; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 226; 227; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss: 228; CHECK-FMA-WIN: # %bb.0: 229; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 230; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 231; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 232; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x02] 233; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 234; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 235 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 236 ret <4 x float> %res 237} 238 239define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 240; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss: 241; CHECK-FMA: # %bb.0: 242; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca] 243; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 244; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 245; CHECK-FMA-NEXT: retq # encoding: [0xc3] 246; 247; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss: 248; CHECK-AVX512VL: # %bb.0: 249; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca] 250; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 251; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 252; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 253; 254; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss: 255; CHECK-FMA-WIN: # %bb.0: 256; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 257; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 258; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 259; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9b,0x01] 260; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 261; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 262 %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 263 ret <4 x float> %res 264} 265declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) 266 267define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 268; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd: 269; CHECK-FMA: # %bb.0: 270; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 271; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 272; CHECK-FMA-NEXT: retq # encoding: [0xc3] 273; 274; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd: 275; CHECK-AVX512VL: # %bb.0: 276; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2] 277; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 278; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 279; 280; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd: 281; CHECK-FMA-WIN: # %bb.0: 282; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 283; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 284; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 285; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x02] 286; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 287; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 288 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 289 ret <2 x double> %res 290} 291 292define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 293; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd: 294; CHECK-FMA: # %bb.0: 295; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca] 296; CHECK-FMA-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 297; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 298; CHECK-FMA-NEXT: retq # encoding: [0xc3] 299; 300; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd: 301; CHECK-AVX512VL: # %bb.0: 302; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca] 303; CHECK-AVX512VL-NEXT: # xmm1 = (xmm0 * xmm1) - xmm2 304; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 305; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 306; 307; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd: 308; CHECK-FMA-WIN: # %bb.0: 309; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 310; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 311; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 312; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9b,0x01] 313; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm0 * mem) - xmm1 314; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 315 %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 316 ret <2 x double> %res 317} 318declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) 319 320define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 321; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps: 322; CHECK-FMA: # %bb.0: 323; CHECK-FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 324; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 325; CHECK-FMA-NEXT: retq # encoding: [0xc3] 326; 327; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps: 328; CHECK-AVX512VL: # %bb.0: 329; CHECK-AVX512VL-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2] 330; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 331; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 332; 333; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps: 334; CHECK-FMA-WIN: # %bb.0: 335; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 336; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 337; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00] 338; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 339; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 340 %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 341 ret <4 x float> %res 342} 343declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) 344 345define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 346; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd: 347; CHECK-FMA: # %bb.0: 348; CHECK-FMA-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 349; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 350; CHECK-FMA-NEXT: retq # encoding: [0xc3] 351; 352; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd: 353; CHECK-AVX512VL: # %bb.0: 354; CHECK-AVX512VL-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2] 355; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) - xmm2 356; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 357; 358; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd: 359; CHECK-FMA-WIN: # %bb.0: 360; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 361; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 362; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00] 363; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) - mem 364; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 365 %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 366 ret <2 x double> %res 367} 368declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) 369 370define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 371; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256: 372; CHECK-FMA: # %bb.0: 373; CHECK-FMA-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 374; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 375; CHECK-FMA-NEXT: retq # encoding: [0xc3] 376; 377; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256: 378; CHECK-AVX512VL: # %bb.0: 379; CHECK-AVX512VL-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2] 380; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 381; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 382; 383; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256: 384; CHECK-FMA-WIN: # %bb.0: 385; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 386; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 387; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00] 388; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 389; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 390 %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 391 ret <8 x float> %res 392} 393declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 394 395define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 396; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256: 397; CHECK-FMA: # %bb.0: 398; CHECK-FMA-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 399; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 400; CHECK-FMA-NEXT: retq # encoding: [0xc3] 401; 402; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256: 403; CHECK-AVX512VL: # %bb.0: 404; CHECK-AVX512VL-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2] 405; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) - ymm2 406; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 407; 408; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256: 409; CHECK-FMA-WIN: # %bb.0: 410; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 411; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 412; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00] 413; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) - mem 414; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 415 %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 416 ret <4 x double> %res 417} 418declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 419 420; VFNMADD 421define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 422; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss: 423; CHECK-FMA: # %bb.0: 424; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2] 425; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 426; CHECK-FMA-NEXT: retq # encoding: [0xc3] 427; 428; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss: 429; CHECK-AVX512VL: # %bb.0: 430; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2] 431; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 432; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 433; 434; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss: 435; CHECK-FMA-WIN: # %bb.0: 436; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 437; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 438; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 439; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x02] 440; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 441; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 442 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 443 ret <4 x float> %res 444} 445 446define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 447; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss: 448; CHECK-FMA: # %bb.0: 449; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca] 450; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 451; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 452; CHECK-FMA-NEXT: retq # encoding: [0xc3] 453; 454; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss: 455; CHECK-AVX512VL: # %bb.0: 456; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca] 457; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 458; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 459; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 460; 461; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss: 462; CHECK-FMA-WIN: # %bb.0: 463; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 464; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 465; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 466; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9d,0x01] 467; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 468; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 469 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 470 ret <4 x float> %res 471} 472declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) 473 474define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 475; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd: 476; CHECK-FMA: # %bb.0: 477; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 478; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 479; CHECK-FMA-NEXT: retq # encoding: [0xc3] 480; 481; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd: 482; CHECK-AVX512VL: # %bb.0: 483; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2] 484; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 485; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 486; 487; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd: 488; CHECK-FMA-WIN: # %bb.0: 489; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 490; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 491; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 492; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x02] 493; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 494; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 495 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 496 ret <2 x double> %res 497} 498 499define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 500; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd: 501; CHECK-FMA: # %bb.0: 502; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca] 503; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 504; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 505; CHECK-FMA-NEXT: retq # encoding: [0xc3] 506; 507; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd: 508; CHECK-AVX512VL: # %bb.0: 509; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca] 510; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) + xmm2 511; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 512; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 513; 514; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd: 515; CHECK-FMA-WIN: # %bb.0: 516; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 517; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 518; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 519; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9d,0x01] 520; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) + xmm1 521; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 522 %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 523 ret <2 x double> %res 524} 525declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) 526 527define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 528; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps: 529; CHECK-FMA: # %bb.0: 530; CHECK-FMA-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2] 531; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 532; CHECK-FMA-NEXT: retq # encoding: [0xc3] 533; 534; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps: 535; CHECK-AVX512VL: # %bb.0: 536; CHECK-AVX512VL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2] 537; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 538; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 539; 540; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps: 541; CHECK-FMA-WIN: # %bb.0: 542; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 543; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 544; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00] 545; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 546; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 547 %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 548 ret <4 x float> %res 549} 550declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) 551 552define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 553; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd: 554; CHECK-FMA: # %bb.0: 555; CHECK-FMA-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 556; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 557; CHECK-FMA-NEXT: retq # encoding: [0xc3] 558; 559; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd: 560; CHECK-AVX512VL: # %bb.0: 561; CHECK-AVX512VL-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2] 562; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) + xmm2 563; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 564; 565; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd: 566; CHECK-FMA-WIN: # %bb.0: 567; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 568; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 569; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00] 570; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) + mem 571; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 572 %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 573 ret <2 x double> %res 574} 575declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) 576 577define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 578; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256: 579; CHECK-FMA: # %bb.0: 580; CHECK-FMA-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2] 581; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 582; CHECK-FMA-NEXT: retq # encoding: [0xc3] 583; 584; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256: 585; CHECK-AVX512VL: # %bb.0: 586; CHECK-AVX512VL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2] 587; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 588; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 589; 590; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256: 591; CHECK-FMA-WIN: # %bb.0: 592; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 593; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 594; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00] 595; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 596; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 597 %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 598 ret <8 x float> %res 599} 600declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 601 602define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 603; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256: 604; CHECK-FMA: # %bb.0: 605; CHECK-FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 606; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 607; CHECK-FMA-NEXT: retq # encoding: [0xc3] 608; 609; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256: 610; CHECK-AVX512VL: # %bb.0: 611; CHECK-AVX512VL-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2] 612; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) + ymm2 613; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 614; 615; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256: 616; CHECK-FMA-WIN: # %bb.0: 617; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 618; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 619; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00] 620; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) + mem 621; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 622 %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 623 ret <4 x double> %res 624} 625declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 626 627; VFNMSUB 628define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 629; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss: 630; CHECK-FMA: # %bb.0: 631; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 632; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 633; CHECK-FMA-NEXT: retq # encoding: [0xc3] 634; 635; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss: 636; CHECK-AVX512VL: # %bb.0: 637; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2] 638; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 639; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 640; 641; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss: 642; CHECK-FMA-WIN: # %bb.0: 643; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01] 644; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 645; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 646; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x02] 647; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 648; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 649 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 650 ret <4 x float> %res 651} 652 653define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 654; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss: 655; CHECK-FMA: # %bb.0: 656; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca] 657; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 658; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 659; CHECK-FMA-NEXT: retq # encoding: [0xc3] 660; 661; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss: 662; CHECK-AVX512VL: # %bb.0: 663; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca] 664; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 665; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] 666; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 667; 668; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss: 669; CHECK-FMA-WIN: # %bb.0: 670; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 671; CHECK-FMA-WIN-NEXT: vmovss (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7a,0x10,0x08] 672; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero,zero,zero 673; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0x9f,0x01] 674; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 675; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 676 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2) 677 ret <4 x float> %res 678} 679declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) 680 681define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 682; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd: 683; CHECK-FMA: # %bb.0: 684; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 685; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 686; CHECK-FMA-NEXT: retq # encoding: [0xc3] 687; 688; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd: 689; CHECK-AVX512VL: # %bb.0: 690; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] 691; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 692; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 693; 694; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd: 695; CHECK-FMA-WIN: # %bb.0: 696; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01] 697; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 698; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 699; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x02] 700; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 701; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 702 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 703 ret <2 x double> %res 704} 705 706define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 707; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd: 708; CHECK-FMA: # %bb.0: 709; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 710; CHECK-FMA-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 711; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1] 712; CHECK-FMA-NEXT: retq # encoding: [0xc3] 713; 714; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd: 715; CHECK-AVX512VL: # %bb.0: 716; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca] 717; CHECK-AVX512VL-NEXT: # xmm1 = -(xmm0 * xmm1) - xmm2 718; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 719; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 720; 721; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd: 722; CHECK-FMA-WIN: # %bb.0: 723; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 724; CHECK-FMA-WIN-NEXT: vmovsd (%r8), %xmm1 # encoding: [0xc4,0xc1,0x7b,0x10,0x08] 725; CHECK-FMA-WIN-NEXT: # xmm1 = mem[0],zero 726; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0x9f,0x01] 727; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm0 * mem) - xmm1 728; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 729 %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2) 730 ret <2 x double> %res 731} 732declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) 733 734define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 735; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps: 736; CHECK-FMA: # %bb.0: 737; CHECK-FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2] 738; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 739; CHECK-FMA-NEXT: retq # encoding: [0xc3] 740; 741; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps: 742; CHECK-AVX512VL: # %bb.0: 743; CHECK-AVX512VL-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2] 744; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 745; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 746; 747; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps: 748; CHECK-FMA-WIN: # %bb.0: 749; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 750; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 751; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00] 752; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 753; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 754 %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 755 ret <4 x float> %res 756} 757declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) 758 759define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 760; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd: 761; CHECK-FMA: # %bb.0: 762; CHECK-FMA-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 763; CHECK-FMA-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 764; CHECK-FMA-NEXT: retq # encoding: [0xc3] 765; 766; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd: 767; CHECK-AVX512VL: # %bb.0: 768; CHECK-AVX512VL-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2] 769; CHECK-AVX512VL-NEXT: # xmm0 = -(xmm1 * xmm0) - xmm2 770; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 771; 772; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd: 773; CHECK-FMA-WIN: # %bb.0: 774; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 775; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 776; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00] 777; CHECK-FMA-WIN-NEXT: # xmm0 = -(xmm1 * xmm0) - mem 778; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 779 %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 780 ret <2 x double> %res 781} 782declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) 783 784define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 785; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256: 786; CHECK-FMA: # %bb.0: 787; CHECK-FMA-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2] 788; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 789; CHECK-FMA-NEXT: retq # encoding: [0xc3] 790; 791; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256: 792; CHECK-AVX512VL: # %bb.0: 793; CHECK-AVX512VL-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2] 794; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 795; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 796; 797; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256: 798; CHECK-FMA-WIN: # %bb.0: 799; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 800; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 801; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00] 802; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 803; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 804 %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 805 ret <8 x float> %res 806} 807declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 808 809define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 810; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256: 811; CHECK-FMA: # %bb.0: 812; CHECK-FMA-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 813; CHECK-FMA-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 814; CHECK-FMA-NEXT: retq # encoding: [0xc3] 815; 816; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256: 817; CHECK-AVX512VL: # %bb.0: 818; CHECK-AVX512VL-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2] 819; CHECK-AVX512VL-NEXT: # ymm0 = -(ymm1 * ymm0) - ymm2 820; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 821; 822; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256: 823; CHECK-FMA-WIN: # %bb.0: 824; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 825; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 826; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00] 827; CHECK-FMA-WIN-NEXT: # ymm0 = -(ymm1 * ymm0) - mem 828; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 829 %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 830 ret <4 x double> %res 831} 832declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 833 834; VFMADDSUB 835define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 836; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps: 837; CHECK-FMA: # %bb.0: 838; CHECK-FMA-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 839; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 840; CHECK-FMA-NEXT: retq # encoding: [0xc3] 841; 842; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps: 843; CHECK-AVX512VL: # %bb.0: 844; CHECK-AVX512VL-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2] 845; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 846; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 847; 848; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps: 849; CHECK-FMA-WIN: # %bb.0: 850; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 851; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 852; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00] 853; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 854; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 855 %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 856 ret <4 x float> %res 857} 858declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) 859 860define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 861; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd: 862; CHECK-FMA: # %bb.0: 863; CHECK-FMA-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 864; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 865; CHECK-FMA-NEXT: retq # encoding: [0xc3] 866; 867; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd: 868; CHECK-AVX512VL: # %bb.0: 869; CHECK-AVX512VL-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2] 870; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) +/- xmm2 871; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 872; 873; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd: 874; CHECK-FMA-WIN: # %bb.0: 875; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 876; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 877; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00] 878; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) +/- mem 879; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 880 %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 881 ret <2 x double> %res 882} 883declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) 884 885define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 886; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256: 887; CHECK-FMA: # %bb.0: 888; CHECK-FMA-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 889; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 890; CHECK-FMA-NEXT: retq # encoding: [0xc3] 891; 892; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256: 893; CHECK-AVX512VL: # %bb.0: 894; CHECK-AVX512VL-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2] 895; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 896; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 897; 898; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256: 899; CHECK-FMA-WIN: # %bb.0: 900; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 901; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 902; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00] 903; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 904; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 905 %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 906 ret <8 x float> %res 907} 908declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 909 910define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 911; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256: 912; CHECK-FMA: # %bb.0: 913; CHECK-FMA-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 914; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 915; CHECK-FMA-NEXT: retq # encoding: [0xc3] 916; 917; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256: 918; CHECK-AVX512VL: # %bb.0: 919; CHECK-AVX512VL-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2] 920; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) +/- ymm2 921; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 922; 923; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256: 924; CHECK-FMA-WIN: # %bb.0: 925; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 926; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 927; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00] 928; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) +/- mem 929; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 930 %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 931 ret <4 x double> %res 932} 933declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 934 935; VFMSUBADD 936define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 937; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps: 938; CHECK-FMA: # %bb.0: 939; CHECK-FMA-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 940; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 941; CHECK-FMA-NEXT: retq # encoding: [0xc3] 942; 943; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps: 944; CHECK-AVX512VL: # %bb.0: 945; CHECK-AVX512VL-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2] 946; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 947; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 948; 949; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps: 950; CHECK-FMA-WIN: # %bb.0: 951; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09] 952; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02] 953; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00] 954; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 955; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 956 %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) 957 ret <4 x float> %res 958} 959declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) 960 961define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 962; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd: 963; CHECK-FMA: # %bb.0: 964; CHECK-FMA-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 965; CHECK-FMA-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 966; CHECK-FMA-NEXT: retq # encoding: [0xc3] 967; 968; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd: 969; CHECK-AVX512VL: # %bb.0: 970; CHECK-AVX512VL-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2] 971; CHECK-AVX512VL-NEXT: # xmm0 = (xmm1 * xmm0) -/+ xmm2 972; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 973; 974; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd: 975; CHECK-FMA-WIN: # %bb.0: 976; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09] 977; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02] 978; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00] 979; CHECK-FMA-WIN-NEXT: # xmm0 = (xmm1 * xmm0) -/+ mem 980; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 981 %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) 982 ret <2 x double> %res 983} 984declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) 985 986define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { 987; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256: 988; CHECK-FMA: # %bb.0: 989; CHECK-FMA-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 990; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 991; CHECK-FMA-NEXT: retq # encoding: [0xc3] 992; 993; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256: 994; CHECK-AVX512VL: # %bb.0: 995; CHECK-AVX512VL-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2] 996; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 997; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 998; 999; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256: 1000; CHECK-FMA-WIN: # %bb.0: 1001; CHECK-FMA-WIN-NEXT: vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09] 1002; CHECK-FMA-WIN-NEXT: vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02] 1003; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00] 1004; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1005; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1006 %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 1007 ret <8 x float> %res 1008} 1009declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 1010 1011define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1012; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256: 1013; CHECK-FMA: # %bb.0: 1014; CHECK-FMA-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1015; CHECK-FMA-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1016; CHECK-FMA-NEXT: retq # encoding: [0xc3] 1017; 1018; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256: 1019; CHECK-AVX512VL: # %bb.0: 1020; CHECK-AVX512VL-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2] 1021; CHECK-AVX512VL-NEXT: # ymm0 = (ymm1 * ymm0) -/+ ymm2 1022; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] 1023; 1024; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256: 1025; CHECK-FMA-WIN: # %bb.0: 1026; CHECK-FMA-WIN-NEXT: vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09] 1027; CHECK-FMA-WIN-NEXT: vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02] 1028; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00] 1029; CHECK-FMA-WIN-NEXT: # ymm0 = (ymm1 * ymm0) -/+ mem 1030; CHECK-FMA-WIN-NEXT: retq # encoding: [0xc3] 1031 %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 1032 ret <4 x double> %res 1033} 1034declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 1035 1036attributes #0 = { nounwind } 1037