1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX 4 5; Verify that we're folding the load into the math instruction. 6; This pattern is generated out of the simplest intrinsics usage: 7; _mm_add_ss(a, _mm_load_ss(b)); 8 9define <4 x float> @addss(<4 x float> %va, float* %pb) { 10; SSE-LABEL: addss: 11; SSE: # BB#0: 12; SSE-NEXT: addss (%rdi), %xmm0 13; SSE-NEXT: retq 14; 15; AVX-LABEL: addss: 16; AVX: # BB#0: 17; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0 18; AVX-NEXT: retq 19 %a = extractelement <4 x float> %va, i32 0 20 %b = load float, float* %pb 21 %r = fadd float %a, %b 22 %vr = insertelement <4 x float> %va, float %r, i32 0 23 ret <4 x float> %vr 24} 25 26define <2 x double> @addsd(<2 x double> %va, double* %pb) { 27; SSE-LABEL: addsd: 28; SSE: # BB#0: 29; SSE-NEXT: addsd (%rdi), %xmm0 30; SSE-NEXT: retq 31; 32; AVX-LABEL: addsd: 33; AVX: # BB#0: 34; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 35; AVX-NEXT: retq 36 %a = extractelement <2 x double> %va, i32 0 37 %b = load double, double* %pb 38 %r = fadd double %a, %b 39 %vr = insertelement <2 x double> %va, double %r, i32 0 40 ret <2 x double> %vr 41} 42 43define <4 x float> @subss(<4 x float> %va, float* %pb) { 44; SSE-LABEL: subss: 45; SSE: # BB#0: 46; SSE-NEXT: subss (%rdi), %xmm0 47; SSE-NEXT: retq 48; 49; AVX-LABEL: subss: 50; AVX: # BB#0: 51; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 52; AVX-NEXT: retq 53 %a = extractelement <4 x float> %va, i32 0 54 %b = load float, float* %pb 55 %r = fsub float %a, %b 56 %vr = insertelement <4 x float> %va, float %r, i32 0 57 ret <4 x float> %vr 58} 59 60define <2 x double> @subsd(<2 x double> %va, double* %pb) { 61; SSE-LABEL: subsd: 62; SSE: # BB#0: 63; SSE-NEXT: subsd (%rdi), %xmm0 64; SSE-NEXT: retq 65; 66; AVX-LABEL: subsd: 67; AVX: # BB#0: 68; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 69; AVX-NEXT: retq 70 %a = extractelement <2 x double> %va, i32 0 71 %b = load double, double* %pb 72 %r = fsub double %a, %b 73 %vr = insertelement <2 x double> %va, double %r, i32 0 74 ret <2 x double> %vr 75} 76 77define <4 x float> @mulss(<4 x float> %va, float* %pb) { 78; SSE-LABEL: mulss: 79; SSE: # BB#0: 80; SSE-NEXT: mulss (%rdi), %xmm0 81; SSE-NEXT: retq 82; 83; AVX-LABEL: mulss: 84; AVX: # BB#0: 85; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0 86; AVX-NEXT: retq 87 %a = extractelement <4 x float> %va, i32 0 88 %b = load float, float* %pb 89 %r = fmul float %a, %b 90 %vr = insertelement <4 x float> %va, float %r, i32 0 91 ret <4 x float> %vr 92} 93 94define <2 x double> @mulsd(<2 x double> %va, double* %pb) { 95; SSE-LABEL: mulsd: 96; SSE: # BB#0: 97; SSE-NEXT: mulsd (%rdi), %xmm0 98; SSE-NEXT: retq 99; 100; AVX-LABEL: mulsd: 101; AVX: # BB#0: 102; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 103; AVX-NEXT: retq 104 %a = extractelement <2 x double> %va, i32 0 105 %b = load double, double* %pb 106 %r = fmul double %a, %b 107 %vr = insertelement <2 x double> %va, double %r, i32 0 108 ret <2 x double> %vr 109} 110 111define <4 x float> @divss(<4 x float> %va, float* %pb) { 112; SSE-LABEL: divss: 113; SSE: # BB#0: 114; SSE-NEXT: divss (%rdi), %xmm0 115; SSE-NEXT: retq 116; 117; AVX-LABEL: divss: 118; AVX: # BB#0: 119; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 120; AVX-NEXT: retq 121 %a = extractelement <4 x float> %va, i32 0 122 %b = load float, float* %pb 123 %r = fdiv float %a, %b 124 %vr = insertelement <4 x float> %va, float %r, i32 0 125 ret <4 x float> %vr 126} 127 128define <2 x double> @divsd(<2 x double> %va, double* %pb) { 129; SSE-LABEL: divsd: 130; SSE: # BB#0: 131; SSE-NEXT: divsd (%rdi), %xmm0 132; SSE-NEXT: retq 133; 134; AVX-LABEL: divsd: 135; AVX: # BB#0: 136; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 137; AVX-NEXT: retq 138 %a = extractelement <2 x double> %va, i32 0 139 %b = load double, double* %pb 140 %r = fdiv double %a, %b 141 %vr = insertelement <2 x double> %va, double %r, i32 0 142 ret <2 x double> %vr 143} 144