• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
4
5; Verify that we're folding the load into the math instruction.
6; This pattern is generated out of the simplest intrinsics usage:
7;  _mm_add_ss(a, _mm_load_ss(b));
8
9define <4 x float> @addss(<4 x float> %va, float* %pb) {
10; SSE-LABEL: addss:
11; SSE:       # BB#0:
12; SSE-NEXT:    addss (%rdi), %xmm0
13; SSE-NEXT:    retq
14;
15; AVX-LABEL: addss:
16; AVX:       # BB#0:
17; AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
18; AVX-NEXT:    retq
19    %a = extractelement <4 x float> %va, i32 0
20    %b = load float, float* %pb
21    %r = fadd float %a, %b
22    %vr = insertelement <4 x float> %va, float %r, i32 0
23    ret <4 x float> %vr
24}
25
26define <2 x double> @addsd(<2 x double> %va, double* %pb) {
27; SSE-LABEL: addsd:
28; SSE:       # BB#0:
29; SSE-NEXT:    addsd (%rdi), %xmm0
30; SSE-NEXT:    retq
31;
32; AVX-LABEL: addsd:
33; AVX:       # BB#0:
34; AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
35; AVX-NEXT:    retq
36    %a = extractelement <2 x double> %va, i32 0
37    %b = load double, double* %pb
38    %r = fadd double %a, %b
39    %vr = insertelement <2 x double> %va, double %r, i32 0
40    ret <2 x double> %vr
41}
42
43define <4 x float> @subss(<4 x float> %va, float* %pb) {
44; SSE-LABEL: subss:
45; SSE:       # BB#0:
46; SSE-NEXT:    subss (%rdi), %xmm0
47; SSE-NEXT:    retq
48;
49; AVX-LABEL: subss:
50; AVX:       # BB#0:
51; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
52; AVX-NEXT:    retq
53    %a = extractelement <4 x float> %va, i32 0
54    %b = load float, float* %pb
55    %r = fsub float %a, %b
56    %vr = insertelement <4 x float> %va, float %r, i32 0
57    ret <4 x float> %vr
58}
59
60define <2 x double> @subsd(<2 x double> %va, double* %pb) {
61; SSE-LABEL: subsd:
62; SSE:       # BB#0:
63; SSE-NEXT:    subsd (%rdi), %xmm0
64; SSE-NEXT:    retq
65;
66; AVX-LABEL: subsd:
67; AVX:       # BB#0:
68; AVX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0
69; AVX-NEXT:    retq
70    %a = extractelement <2 x double> %va, i32 0
71    %b = load double, double* %pb
72    %r = fsub double %a, %b
73    %vr = insertelement <2 x double> %va, double %r, i32 0
74    ret <2 x double> %vr
75}
76
77define <4 x float> @mulss(<4 x float> %va, float* %pb) {
78; SSE-LABEL: mulss:
79; SSE:       # BB#0:
80; SSE-NEXT:    mulss (%rdi), %xmm0
81; SSE-NEXT:    retq
82;
83; AVX-LABEL: mulss:
84; AVX:       # BB#0:
85; AVX-NEXT:    vmulss (%rdi), %xmm0, %xmm0
86; AVX-NEXT:    retq
87    %a = extractelement <4 x float> %va, i32 0
88    %b = load float, float* %pb
89    %r = fmul float %a, %b
90    %vr = insertelement <4 x float> %va, float %r, i32 0
91    ret <4 x float> %vr
92}
93
94define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
95; SSE-LABEL: mulsd:
96; SSE:       # BB#0:
97; SSE-NEXT:    mulsd (%rdi), %xmm0
98; SSE-NEXT:    retq
99;
100; AVX-LABEL: mulsd:
101; AVX:       # BB#0:
102; AVX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0
103; AVX-NEXT:    retq
104    %a = extractelement <2 x double> %va, i32 0
105    %b = load double, double* %pb
106    %r = fmul double %a, %b
107    %vr = insertelement <2 x double> %va, double %r, i32 0
108    ret <2 x double> %vr
109}
110
111define <4 x float> @divss(<4 x float> %va, float* %pb) {
112; SSE-LABEL: divss:
113; SSE:       # BB#0:
114; SSE-NEXT:    divss (%rdi), %xmm0
115; SSE-NEXT:    retq
116;
117; AVX-LABEL: divss:
118; AVX:       # BB#0:
119; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
120; AVX-NEXT:    retq
121    %a = extractelement <4 x float> %va, i32 0
122    %b = load float, float* %pb
123    %r = fdiv float %a, %b
124    %vr = insertelement <4 x float> %va, float %r, i32 0
125    ret <4 x float> %vr
126}
127
128define <2 x double> @divsd(<2 x double> %va, double* %pb) {
129; SSE-LABEL: divsd:
130; SSE:       # BB#0:
131; SSE-NEXT:    divsd (%rdi), %xmm0
132; SSE-NEXT:    retq
133;
134; AVX-LABEL: divsd:
135; AVX:       # BB#0:
136; AVX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0
137; AVX-NEXT:    retq
138    %a = extractelement <2 x double> %va, i32 0
139    %b = load double, double* %pb
140    %r = fdiv double %a, %b
141    %vr = insertelement <2 x double> %va, double %r, i32 0
142    ret <2 x double> %vr
143}
144