1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 6 7; 8; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 9; 10 11define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 12; FMA-LABEL: test_16f32_fmadd: 13; FMA: # BB#0: 14; FMA-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0 15; FMA-NEXT: vfmadd213ps %ymm5, %ymm3, %ymm1 16; FMA-NEXT: retq 17; 18; FMA4-LABEL: test_16f32_fmadd: 19; FMA4: # BB#0: 20; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0 21; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1 22; FMA4-NEXT: retq 23; 24; AVX512-LABEL: test_16f32_fmadd: 25; AVX512: # BB#0: 26; AVX512-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 27; AVX512-NEXT: retq 28 %x = fmul <16 x float> %a0, %a1 29 %res = fadd <16 x float> %x, %a2 30 ret <16 x float> %res 31} 32 33define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 34; FMA-LABEL: test_8f64_fmadd: 35; FMA: # BB#0: 36; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 37; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1 38; FMA-NEXT: retq 39; 40; FMA4-LABEL: test_8f64_fmadd: 41; FMA4: # BB#0: 42; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 43; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 44; FMA4-NEXT: retq 45; 46; AVX512-LABEL: test_8f64_fmadd: 47; AVX512: # BB#0: 48; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 49; AVX512-NEXT: retq 50 %x = fmul <8 x double> %a0, %a1 51 %res = fadd <8 x double> %x, %a2 52 ret <8 x double> %res 53} 54 55; 56; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 57; 58 59define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 60; FMA-LABEL: test_16f32_fmsub: 61; FMA: # BB#0: 62; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0 63; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1 64; FMA-NEXT: retq 65; 66; FMA4-LABEL: test_16f32_fmsub: 67; FMA4: # BB#0: 68; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 69; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 70; FMA4-NEXT: retq 71; 72; AVX512-LABEL: test_16f32_fmsub: 73; AVX512: # BB#0: 74; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 75; AVX512-NEXT: retq 76 %x = fmul <16 x float> %a0, %a1 77 %res = fsub <16 x float> %x, %a2 78 ret <16 x float> %res 79} 80 81define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 82; FMA-LABEL: test_8f64_fmsub: 83; FMA: # BB#0: 84; FMA-NEXT: vfmsub213pd %ymm4, %ymm2, %ymm0 85; FMA-NEXT: vfmsub213pd %ymm5, %ymm3, %ymm1 86; FMA-NEXT: retq 87; 88; FMA4-LABEL: test_8f64_fmsub: 89; FMA4: # BB#0: 90; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0 91; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1 92; FMA4-NEXT: retq 93; 94; AVX512-LABEL: test_8f64_fmsub: 95; AVX512: # BB#0: 96; AVX512-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 97; AVX512-NEXT: retq 98 %x = fmul <8 x double> %a0, %a1 99 %res = fsub <8 x double> %x, %a2 100 ret <8 x double> %res 101} 102 103; 104; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 105; 106 107define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 108; FMA-LABEL: test_16f32_fnmadd: 109; FMA: # BB#0: 110; FMA-NEXT: vfnmadd213ps %ymm4, %ymm2, %ymm0 111; FMA-NEXT: vfnmadd213ps %ymm5, %ymm3, %ymm1 112; FMA-NEXT: retq 113; 114; FMA4-LABEL: test_16f32_fnmadd: 115; FMA4: # BB#0: 116; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0 117; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1 118; FMA4-NEXT: retq 119; 120; AVX512-LABEL: test_16f32_fnmadd: 121; AVX512: # BB#0: 122; AVX512-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 123; AVX512-NEXT: retq 124 %x = fmul <16 x float> %a0, %a1 125 %res = fsub <16 x float> %a2, %x 126 ret <16 x float> %res 127} 128 129define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 130; FMA-LABEL: test_8f64_fnmadd: 131; FMA: # BB#0: 132; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0 133; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1 134; FMA-NEXT: retq 135; 136; FMA4-LABEL: test_8f64_fnmadd: 137; FMA4: # BB#0: 138; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 139; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 140; FMA4-NEXT: retq 141; 142; AVX512-LABEL: test_8f64_fnmadd: 143; AVX512: # BB#0: 144; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 145; AVX512-NEXT: retq 146 %x = fmul <8 x double> %a0, %a1 147 %res = fsub <8 x double> %a2, %x 148 ret <8 x double> %res 149} 150 151; 152; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 153; 154 155define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 156; FMA-LABEL: test_16f32_fnmsub: 157; FMA: # BB#0: 158; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 159; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1 160; FMA-NEXT: retq 161; 162; FMA4-LABEL: test_16f32_fnmsub: 163; FMA4: # BB#0: 164; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 165; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 166; FMA4-NEXT: retq 167; 168; AVX512-LABEL: test_16f32_fnmsub: 169; AVX512: # BB#0: 170; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 171; AVX512-NEXT: retq 172 %x = fmul <16 x float> %a0, %a1 173 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 174 %res = fsub <16 x float> %y, %a2 175 ret <16 x float> %res 176} 177 178define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 179; FMA-LABEL: test_8f64_fnmsub: 180; FMA: # BB#0: 181; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0 182; FMA-NEXT: vfnmsub213pd %ymm5, %ymm3, %ymm1 183; FMA-NEXT: retq 184; 185; FMA4-LABEL: test_8f64_fnmsub: 186; FMA4: # BB#0: 187; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 188; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1 189; FMA4-NEXT: retq 190; 191; AVX512-LABEL: test_8f64_fnmsub: 192; AVX512: # BB#0: 193; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 194; AVX512-NEXT: retq 195 %x = fmul <8 x double> %a0, %a1 196 %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 197 %res = fsub <8 x double> %y, %a2 198 ret <8 x double> %res 199} 200 201; 202; Load Folding Patterns 203; 204 205define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) { 206; FMA-LABEL: test_16f32_fmadd_load: 207; FMA: # BB#0: 208; FMA-NEXT: vfmadd132ps (%rdi), %ymm2, %ymm0 209; FMA-NEXT: vfmadd132ps 32(%rdi), %ymm3, %ymm1 210; FMA-NEXT: retq 211; 212; FMA4-LABEL: test_16f32_fmadd_load: 213; FMA4: # BB#0: 214; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0 215; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1 216; FMA4-NEXT: retq 217; 218; AVX512-LABEL: test_16f32_fmadd_load: 219; AVX512: # BB#0: 220; AVX512-NEXT: vmovaps (%rdi), %zmm2 221; AVX512-NEXT: vfmadd213ps %zmm1, %zmm0, %zmm2 222; AVX512-NEXT: vmovaps %zmm2, %zmm0 223; AVX512-NEXT: retq 224 %x = load <16 x float>, <16 x float>* %a0 225 %y = fmul <16 x float> %x, %a1 226 %res = fadd <16 x float> %y, %a2 227 ret <16 x float> %res 228} 229 230define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) { 231; FMA-LABEL: test_8f64_fmsub_load: 232; FMA: # BB#0: 233; FMA-NEXT: vfmsub132pd (%rdi), %ymm2, %ymm0 234; FMA-NEXT: vfmsub132pd 32(%rdi), %ymm3, %ymm1 235; FMA-NEXT: retq 236; 237; FMA4-LABEL: test_8f64_fmsub_load: 238; FMA4: # BB#0: 239; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0 240; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1 241; FMA4-NEXT: retq 242; 243; AVX512-LABEL: test_8f64_fmsub_load: 244; AVX512: # BB#0: 245; AVX512-NEXT: vmovapd (%rdi), %zmm2 246; AVX512-NEXT: vfmsub213pd %zmm1, %zmm0, %zmm2 247; AVX512-NEXT: vmovaps %zmm2, %zmm0 248; AVX512-NEXT: retq 249 %x = load <8 x double>, <8 x double>* %a0 250 %y = fmul <8 x double> %x, %a1 251 %res = fsub <8 x double> %y, %a2 252 ret <8 x double> %res 253} 254 255; 256; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 257; 258 259define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) { 260; FMA-LABEL: test_v16f32_mul_add_x_one_y: 261; FMA: # BB#0: 262; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0 263; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1 264; FMA-NEXT: retq 265; 266; FMA4-LABEL: test_v16f32_mul_add_x_one_y: 267; FMA4: # BB#0: 268; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 269; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 270; FMA4-NEXT: retq 271; 272; AVX512-LABEL: test_v16f32_mul_add_x_one_y: 273; AVX512: # BB#0: 274; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0 275; AVX512-NEXT: retq 276 %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 277 %m = fmul <16 x float> %a, %y 278 ret <16 x float> %m 279} 280 281define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) { 282; FMA-LABEL: test_v8f64_mul_y_add_x_one: 283; FMA: # BB#0: 284; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0 285; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1 286; FMA-NEXT: retq 287; 288; FMA4-LABEL: test_v8f64_mul_y_add_x_one: 289; FMA4: # BB#0: 290; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 291; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 292; FMA4-NEXT: retq 293; 294; AVX512-LABEL: test_v8f64_mul_y_add_x_one: 295; AVX512: # BB#0: 296; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0 297; AVX512-NEXT: retq 298 %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 299 %m = fmul <8 x double> %y, %a 300 ret <8 x double> %m 301} 302 303define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) { 304; FMA-LABEL: test_v16f32_mul_add_x_negone_y: 305; FMA: # BB#0: 306; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0 307; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1 308; FMA-NEXT: retq 309; 310; FMA4-LABEL: test_v16f32_mul_add_x_negone_y: 311; FMA4: # BB#0: 312; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 313; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 314; FMA4-NEXT: retq 315; 316; AVX512-LABEL: test_v16f32_mul_add_x_negone_y: 317; AVX512: # BB#0: 318; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0 319; AVX512-NEXT: retq 320 %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 321 %m = fmul <16 x float> %a, %y 322 ret <16 x float> %m 323} 324 325define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) { 326; FMA-LABEL: test_v8f64_mul_y_add_x_negone: 327; FMA: # BB#0: 328; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0 329; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1 330; FMA-NEXT: retq 331; 332; FMA4-LABEL: test_v8f64_mul_y_add_x_negone: 333; FMA4: # BB#0: 334; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 335; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 336; FMA4-NEXT: retq 337; 338; AVX512-LABEL: test_v8f64_mul_y_add_x_negone: 339; AVX512: # BB#0: 340; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0 341; AVX512-NEXT: retq 342 %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 343 %m = fmul <8 x double> %y, %a 344 ret <8 x double> %m 345} 346 347define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) { 348; FMA-LABEL: test_v16f32_mul_sub_one_x_y: 349; FMA: # BB#0: 350; FMA-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0 351; FMA-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1 352; FMA-NEXT: retq 353; 354; FMA4-LABEL: test_v16f32_mul_sub_one_x_y: 355; FMA4: # BB#0: 356; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0 357; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1 358; FMA4-NEXT: retq 359; 360; AVX512-LABEL: test_v16f32_mul_sub_one_x_y: 361; AVX512: # BB#0: 362; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0 363; AVX512-NEXT: retq 364 %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x 365 %m = fmul <16 x float> %s, %y 366 ret <16 x float> %m 367} 368 369define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) { 370; FMA-LABEL: test_v8f64_mul_y_sub_one_x: 371; FMA: # BB#0: 372; FMA-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0 373; FMA-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1 374; FMA-NEXT: retq 375; 376; FMA4-LABEL: test_v8f64_mul_y_sub_one_x: 377; FMA4: # BB#0: 378; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0 379; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1 380; FMA4-NEXT: retq 381; 382; AVX512-LABEL: test_v8f64_mul_y_sub_one_x: 383; AVX512: # BB#0: 384; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0 385; AVX512-NEXT: retq 386 %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x 387 %m = fmul <8 x double> %y, %s 388 ret <8 x double> %m 389} 390 391define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) { 392; FMA-LABEL: test_v16f32_mul_sub_negone_x_y: 393; FMA: # BB#0: 394; FMA-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0 395; FMA-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1 396; FMA-NEXT: retq 397; 398; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y: 399; FMA4: # BB#0: 400; FMA4-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0 401; FMA4-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1 402; FMA4-NEXT: retq 403; 404; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y: 405; AVX512: # BB#0: 406; AVX512-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0 407; AVX512-NEXT: retq 408 %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x 409 %m = fmul <16 x float> %s, %y 410 ret <16 x float> %m 411} 412 413define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) { 414; FMA-LABEL: test_v8f64_mul_y_sub_negone_x: 415; FMA: # BB#0: 416; FMA-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0 417; FMA-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1 418; FMA-NEXT: retq 419; 420; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x: 421; FMA4: # BB#0: 422; FMA4-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0 423; FMA4-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1 424; FMA4-NEXT: retq 425; 426; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x: 427; AVX512: # BB#0: 428; AVX512-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0 429; AVX512-NEXT: retq 430 %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x 431 %m = fmul <8 x double> %y, %s 432 ret <8 x double> %m 433} 434 435define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { 436; FMA-LABEL: test_v16f32_mul_sub_x_one_y: 437; FMA: # BB#0: 438; FMA-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0 439; FMA-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1 440; FMA-NEXT: retq 441; 442; FMA4-LABEL: test_v16f32_mul_sub_x_one_y: 443; FMA4: # BB#0: 444; FMA4-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 445; FMA4-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 446; FMA4-NEXT: retq 447; 448; AVX512-LABEL: test_v16f32_mul_sub_x_one_y: 449; AVX512: # BB#0: 450; AVX512-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0 451; AVX512-NEXT: retq 452 %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 453 %m = fmul <16 x float> %s, %y 454 ret <16 x float> %m 455} 456 457define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { 458; FMA-LABEL: test_v8f64_mul_y_sub_x_one: 459; FMA: # BB#0: 460; FMA-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0 461; FMA-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1 462; FMA-NEXT: retq 463; 464; FMA4-LABEL: test_v8f64_mul_y_sub_x_one: 465; FMA4: # BB#0: 466; FMA4-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 467; FMA4-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 468; FMA4-NEXT: retq 469; 470; AVX512-LABEL: test_v8f64_mul_y_sub_x_one: 471; AVX512: # BB#0: 472; AVX512-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0 473; AVX512-NEXT: retq 474 %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 475 %m = fmul <8 x double> %y, %s 476 ret <8 x double> %m 477} 478 479define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { 480; FMA-LABEL: test_v16f32_mul_sub_x_negone_y: 481; FMA: # BB#0: 482; FMA-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0 483; FMA-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1 484; FMA-NEXT: retq 485; 486; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y: 487; FMA4: # BB#0: 488; FMA4-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 489; FMA4-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 490; FMA4-NEXT: retq 491; 492; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y: 493; AVX512: # BB#0: 494; AVX512-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0 495; AVX512-NEXT: retq 496 %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 497 %m = fmul <16 x float> %s, %y 498 ret <16 x float> %m 499} 500 501define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { 502; FMA-LABEL: test_v8f64_mul_y_sub_x_negone: 503; FMA: # BB#0: 504; FMA-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0 505; FMA-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1 506; FMA-NEXT: retq 507; 508; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone: 509; FMA4: # BB#0: 510; FMA4-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 511; FMA4-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 512; FMA4-NEXT: retq 513; 514; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone: 515; AVX512: # BB#0: 516; AVX512-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0 517; AVX512-NEXT: retq 518 %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 519 %m = fmul <8 x double> %y, %s 520 ret <8 x double> %m 521} 522 523; 524; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 525; 526 527define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) { 528; FMA-LABEL: test_v16f32_interp: 529; FMA: # BB#0: 530; FMA-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3 531; FMA-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2 532; FMA-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0 533; FMA-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1 534; FMA-NEXT: retq 535; 536; FMA4-LABEL: test_v16f32_interp: 537; FMA4: # BB#0: 538; FMA4-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3 539; FMA4-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2 540; FMA4-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0 541; FMA4-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1 542; FMA4-NEXT: retq 543; 544; AVX512-LABEL: test_v16f32_interp: 545; AVX512: # BB#0: 546; AVX512-NEXT: vmovaps %zmm2, %zmm3 547; AVX512-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm3 548; AVX512-NEXT: vfmadd213ps %zmm3, %zmm2, %zmm0 549; AVX512-NEXT: retq 550 %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 551 %tx = fmul <16 x float> %x, %t 552 %ty = fmul <16 x float> %y, %t1 553 %r = fadd <16 x float> %tx, %ty 554 ret <16 x float> %r 555} 556 557define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) { 558; FMA-LABEL: test_v8f64_interp: 559; FMA: # BB#0: 560; FMA-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3 561; FMA-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2 562; FMA-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0 563; FMA-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1 564; FMA-NEXT: retq 565; 566; FMA4-LABEL: test_v8f64_interp: 567; FMA4: # BB#0: 568; FMA4-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3 569; FMA4-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2 570; FMA4-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0 571; FMA4-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1 572; FMA4-NEXT: retq 573; 574; AVX512-LABEL: test_v8f64_interp: 575; AVX512: # BB#0: 576; AVX512-NEXT: vmovaps %zmm2, %zmm3 577; AVX512-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm3 578; AVX512-NEXT: vfmadd213pd %zmm3, %zmm2, %zmm0 579; AVX512-NEXT: retq 580 %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t 581 %tx = fmul <8 x double> %x, %t 582 %ty = fmul <8 x double> %y, %t1 583 %r = fadd <8 x double> %tx, %ty 584 ret <8 x double> %r 585} 586 587; 588; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 589; 590 591define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 592; FMA-LABEL: test_v16f32_fneg_fmadd: 593; FMA: # BB#0: 594; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 595; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1 596; FMA-NEXT: retq 597; 598; FMA4-LABEL: test_v16f32_fneg_fmadd: 599; FMA4: # BB#0: 600; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 601; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 602; FMA4-NEXT: retq 603; 604; AVX512-LABEL: test_v16f32_fneg_fmadd: 605; AVX512: # BB#0: 606; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 607; AVX512-NEXT: retq 608 %mul = fmul <16 x float> %a0, %a1 609 %add = fadd <16 x float> %mul, %a2 610 %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 611 ret <16 x float> %neg 612} 613 614define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 615; FMA-LABEL: test_v8f64_fneg_fmsub: 616; FMA: # BB#0: 617; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0 618; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1 619; FMA-NEXT: retq 620; 621; FMA4-LABEL: test_v8f64_fneg_fmsub: 622; FMA4: # BB#0: 623; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 624; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 625; FMA4-NEXT: retq 626; 627; AVX512-LABEL: test_v8f64_fneg_fmsub: 628; AVX512: # BB#0: 629; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 630; AVX512-NEXT: retq 631 %mul = fmul <8 x double> %a0, %a1 632 %sub = fsub <8 x double> %mul, %a2 633 %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 634 ret <8 x double> %neg 635} 636 637define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 638; FMA-LABEL: test_v16f32_fneg_fnmadd: 639; FMA: # BB#0: 640; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0 641; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1 642; FMA-NEXT: retq 643; 644; FMA4-LABEL: test_v16f32_fneg_fnmadd: 645; FMA4: # BB#0: 646; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 647; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 648; FMA4-NEXT: retq 649; 650; AVX512-LABEL: test_v16f32_fneg_fnmadd: 651; AVX512: # BB#0: 652; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 653; AVX512-NEXT: retq 654 %mul = fmul <16 x float> %a0, %a1 655 %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul 656 %add = fadd <16 x float> %neg0, %a2 657 %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 658 ret <16 x float> %neg1 659} 660 661define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 662; FMA-LABEL: test_v8f64_fneg_fnmsub: 663; FMA: # BB#0: 664; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 665; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1 666; FMA-NEXT: retq 667; 668; FMA4-LABEL: test_v8f64_fneg_fnmsub: 669; FMA4: # BB#0: 670; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 671; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 672; FMA4-NEXT: retq 673; 674; AVX512-LABEL: test_v8f64_fneg_fnmsub: 675; AVX512: # BB#0: 676; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 677; AVX512-NEXT: retq 678 %mul = fmul <8 x double> %a0, %a1 679 %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul 680 %sub = fsub <8 x double> %neg0, %a2 681 %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 682 ret <8 x double> %neg1 683} 684 685; 686; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 687; 688 689define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 { 690; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 691; FMA: # BB#0: 692; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 693; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 694; FMA-NEXT: retq 695; 696; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 697; FMA4: # BB#0: 698; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 699; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 700; FMA4-NEXT: retq 701; 702; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 703; AVX512: # BB#0: 704; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 705; AVX512-NEXT: retq 706 %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0> 707 %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 708 %a = fadd <16 x float> %m0, %m1 709 ret <16 x float> %a 710} 711 712; 713; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 714; 715 716define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 { 717; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 718; FMA: # BB#0: 719; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0 720; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1 721; FMA-NEXT: retq 722; 723; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 724; FMA4: # BB#0: 725; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0 726; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1 727; FMA4-NEXT: retq 728; 729; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 730; AVX512: # BB#0: 731; AVX512-NEXT: vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1 732; AVX512-NEXT: vmovaps %zmm1, %zmm0 733; AVX512-NEXT: retq 734 %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0> 735 %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 736 %a = fadd <16 x float> %m1, %y 737 ret <16 x float> %a 738} 739 740; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 741 742define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 { 743; FMA-LABEL: test_v16f32_fneg_fmul: 744; FMA: # BB#0: 745; FMA-NEXT: vxorps %ymm4, %ymm4, %ymm4 746; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 747; FMA-NEXT: vfnmsub213ps %ymm4, %ymm3, %ymm1 748; FMA-NEXT: retq 749; 750; FMA4-LABEL: test_v16f32_fneg_fmul: 751; FMA4: # BB#0: 752; FMA4-NEXT: vxorps %ymm4, %ymm4, %ymm4 753; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 754; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1 755; FMA4-NEXT: retq 756; 757; AVX512-LABEL: test_v16f32_fneg_fmul: 758; AVX512: # BB#0: 759; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 760; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 761; AVX512-NEXT: retq 762 %m = fmul nsz <16 x float> %x, %y 763 %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m 764 ret <16 x float> %n 765} 766 767define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { 768; FMA-LABEL: test_v8f64_fneg_fmul: 769; FMA: # BB#0: 770; FMA-NEXT: vxorpd %ymm4, %ymm4, %ymm4 771; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0 772; FMA-NEXT: vfnmsub213pd %ymm4, %ymm3, %ymm1 773; FMA-NEXT: retq 774; 775; FMA4-LABEL: test_v8f64_fneg_fmul: 776; FMA4: # BB#0: 777; FMA4-NEXT: vxorpd %ymm4, %ymm4, %ymm4 778; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 779; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1 780; FMA4-NEXT: retq 781; 782; AVX512-LABEL: test_v8f64_fneg_fmul: 783; AVX512: # BB#0: 784; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2 785; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 786; AVX512-NEXT: retq 787 %m = fmul nsz <8 x double> %x, %y 788 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 789 ret <8 x double> %n 790} 791 792define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 { 793; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz: 794; FMA: # BB#0: 795; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1 796; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0 797; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 798; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0 799; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1 800; FMA-NEXT: retq 801; 802; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz: 803; FMA4: # BB#0: 804; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 805; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0 806; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] 807; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0 808; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1 809; FMA4-NEXT: retq 810; 811; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz: 812; AVX512: # BB#0: 813; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 814; AVX512-NEXT: vxorpd {{.*}}(%rip), %zmm0, %zmm0 815; AVX512-NEXT: retq 816 %m = fmul <8 x double> %x, %y 817 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 818 ret <8 x double> %n 819} 820 821attributes #0 = { "unsafe-fp-math"="true" } 822