1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefix=AVX 4 5define <2 x double> @splat_fdiv_v2f64(<2 x double> %x, double %y) { 6; SSE-LABEL: splat_fdiv_v2f64: 7; SSE: # %bb.0: 8; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 9; SSE-NEXT: divsd %xmm1, %xmm2 10; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0,0] 11; SSE-NEXT: mulpd %xmm2, %xmm0 12; SSE-NEXT: retq 13; 14; AVX-LABEL: splat_fdiv_v2f64: 15; AVX: # %bb.0: 16; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 17; AVX-NEXT: vdivsd %xmm1, %xmm2, %xmm1 18; AVX-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 19; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 20; AVX-NEXT: retq 21 %vy = insertelement <2 x double> undef, double %y, i32 0 22 %splaty = shufflevector <2 x double> %vy, <2 x double> undef, <2 x i32> zeroinitializer 23 %r = fdiv fast <2 x double> %x, %splaty 24 ret <2 x double> %r 25} 26 27define <4 x double> @splat_fdiv_v4f64(<4 x double> %x, double %y) { 28; SSE-LABEL: splat_fdiv_v4f64: 29; SSE: # %bb.0: 30; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 31; SSE-NEXT: divsd %xmm2, %xmm3 32; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0,0] 33; SSE-NEXT: mulpd %xmm3, %xmm0 34; SSE-NEXT: mulpd %xmm3, %xmm1 35; SSE-NEXT: retq 36; 37; AVX-LABEL: splat_fdiv_v4f64: 38; AVX: # %bb.0: 39; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 40; AVX-NEXT: vdivsd %xmm1, %xmm2, %xmm1 41; AVX-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 42; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 43; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 44; AVX-NEXT: retq 45 %vy = insertelement <4 x double> undef, double %y, i32 0 46 %splaty = shufflevector <4 x double> %vy, <4 x double> undef, <4 x i32> zeroinitializer 47 %r = fdiv arcp <4 x double> %x, %splaty 48 ret <4 x double> %r 49} 50 51define <4 x float> @splat_fdiv_v4f32(<4 x float> %x, float %y) { 52; SSE-LABEL: splat_fdiv_v4f32: 53; SSE: # %bb.0: 54; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 55; SSE-NEXT: divss %xmm1, %xmm2 56; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0] 57; SSE-NEXT: mulps %xmm2, %xmm0 58; SSE-NEXT: retq 59; 60; AVX-LABEL: splat_fdiv_v4f32: 61; AVX: # %bb.0: 62; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 63; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1 64; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 65; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 66; AVX-NEXT: retq 67 %vy = insertelement <4 x float> undef, float %y, i32 0 68 %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer 69 %r = fdiv arcp reassoc <4 x float> %x, %splaty 70 ret <4 x float> %r 71} 72 73define <8 x float> @splat_fdiv_v8f32(<8 x float> %x, float %y) { 74; SSE-LABEL: splat_fdiv_v8f32: 75; SSE: # %bb.0: 76; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 77; SSE-NEXT: divss %xmm2, %xmm3 78; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0,0,0] 79; SSE-NEXT: mulps %xmm3, %xmm0 80; SSE-NEXT: mulps %xmm3, %xmm1 81; SSE-NEXT: retq 82; 83; AVX-LABEL: splat_fdiv_v8f32: 84; AVX: # %bb.0: 85; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 86; AVX-NEXT: vdivss %xmm1, %xmm2, %xmm1 87; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 88; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 89; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 90; AVX-NEXT: retq 91 %vy = insertelement <8 x float> undef, float %y, i32 0 92 %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer 93 %r = fdiv fast <8 x float> %x, %splaty 94 ret <8 x float> %r 95} 96 97define <4 x float> @splat_fdiv_v4f32_estimate(<4 x float> %x, float %y) #0 { 98; SSE-LABEL: splat_fdiv_v4f32_estimate: 99; SSE: # %bb.0: 100; SSE-NEXT: rcpss %xmm1, %xmm2 101; SSE-NEXT: mulss %xmm2, %xmm1 102; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 103; SSE-NEXT: subss %xmm1, %xmm3 104; SSE-NEXT: mulss %xmm2, %xmm3 105; SSE-NEXT: addss %xmm2, %xmm3 106; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0,0,0] 107; SSE-NEXT: mulps %xmm3, %xmm0 108; SSE-NEXT: retq 109; 110; AVX-LABEL: splat_fdiv_v4f32_estimate: 111; AVX: # %bb.0: 112; AVX-NEXT: vrcpss %xmm1, %xmm1, %xmm2 113; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 114; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 115; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1 116; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1 117; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1 118; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 119; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 120; AVX-NEXT: retq 121 %vy = insertelement <4 x float> undef, float %y, i32 0 122 %splaty = shufflevector <4 x float> %vy, <4 x float> undef, <4 x i32> zeroinitializer 123 %r = fdiv arcp reassoc ninf <4 x float> %x, %splaty 124 ret <4 x float> %r 125} 126 127define <8 x float> @splat_fdiv_v8f32_estimate(<8 x float> %x, float %y) #0 { 128; SSE-LABEL: splat_fdiv_v8f32_estimate: 129; SSE: # %bb.0: 130; SSE-NEXT: rcpss %xmm2, %xmm3 131; SSE-NEXT: mulss %xmm3, %xmm2 132; SSE-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero 133; SSE-NEXT: subss %xmm2, %xmm4 134; SSE-NEXT: mulss %xmm3, %xmm4 135; SSE-NEXT: addss %xmm3, %xmm4 136; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0] 137; SSE-NEXT: mulps %xmm4, %xmm0 138; SSE-NEXT: mulps %xmm4, %xmm1 139; SSE-NEXT: retq 140; 141; AVX-LABEL: splat_fdiv_v8f32_estimate: 142; AVX: # %bb.0: 143; AVX-NEXT: vrcpss %xmm1, %xmm1, %xmm2 144; AVX-NEXT: vmulss %xmm2, %xmm1, %xmm1 145; AVX-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 146; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm1 147; AVX-NEXT: vmulss %xmm1, %xmm2, %xmm1 148; AVX-NEXT: vaddss %xmm1, %xmm2, %xmm1 149; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 150; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 151; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 152; AVX-NEXT: retq 153 %vy = insertelement <8 x float> undef, float %y, i32 0 154 %splaty = shufflevector <8 x float> %vy, <8 x float> undef, <8 x i32> zeroinitializer 155 %r = fdiv fast <8 x float> %x, %splaty 156 ret <8 x float> %r 157} 158 159attributes #0 = { "reciprocal-estimates"="divf,vec-divf" } 160