• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,HASWELL
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BROADWELL
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
14; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKYLAKE
15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
16; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKX
17; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
18; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BTVER2
19; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
20; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,ZNVER1
21
22define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
23; GENERIC-LABEL: test_addsubpd:
24; GENERIC:       # %bb.0:
25; GENERIC-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
26; GENERIC-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
27; GENERIC-NEXT:    retq # sched: [1:1.00]
28;
29; ATOM-LABEL: test_addsubpd:
30; ATOM:       # %bb.0:
31; ATOM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [6:3.00]
32; ATOM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [7:3.50]
33; ATOM-NEXT:    retq # sched: [79:39.50]
34;
35; SLM-LABEL: test_addsubpd:
36; SLM:       # %bb.0:
37; SLM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
38; SLM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [6:1.00]
39; SLM-NEXT:    retq # sched: [4:1.00]
40;
41; SANDY-SSE-LABEL: test_addsubpd:
42; SANDY-SSE:       # %bb.0:
43; SANDY-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
44; SANDY-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
45; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
46;
47; SANDY-LABEL: test_addsubpd:
48; SANDY:       # %bb.0:
49; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
50; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
51; SANDY-NEXT:    retq # sched: [1:1.00]
52;
53; HASWELL-SSE-LABEL: test_addsubpd:
54; HASWELL-SSE:       # %bb.0:
55; HASWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
56; HASWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
57; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
58;
59; HASWELL-LABEL: test_addsubpd:
60; HASWELL:       # %bb.0:
61; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
62; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
63; HASWELL-NEXT:    retq # sched: [7:1.00]
64;
65; BROADWELL-SSE-LABEL: test_addsubpd:
66; BROADWELL-SSE:       # %bb.0:
67; BROADWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
68; BROADWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
69; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
70;
71; BROADWELL-LABEL: test_addsubpd:
72; BROADWELL:       # %bb.0:
73; BROADWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
74; BROADWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
75; BROADWELL-NEXT:    retq # sched: [7:1.00]
76;
77; SKYLAKE-SSE-LABEL: test_addsubpd:
78; SKYLAKE-SSE:       # %bb.0:
79; SKYLAKE-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
80; SKYLAKE-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
81; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
82;
83; SKYLAKE-LABEL: test_addsubpd:
84; SKYLAKE:       # %bb.0:
85; SKYLAKE-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
86; SKYLAKE-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
87; SKYLAKE-NEXT:    retq # sched: [7:1.00]
88;
89; SKX-SSE-LABEL: test_addsubpd:
90; SKX-SSE:       # %bb.0:
91; SKX-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
92; SKX-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
93; SKX-SSE-NEXT:    retq # sched: [7:1.00]
94;
95; SKX-LABEL: test_addsubpd:
96; SKX:       # %bb.0:
97; SKX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
98; SKX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
99; SKX-NEXT:    retq # sched: [7:1.00]
100;
101; BTVER2-SSE-LABEL: test_addsubpd:
102; BTVER2-SSE:       # %bb.0:
103; BTVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
104; BTVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
105; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
106;
107; BTVER2-LABEL: test_addsubpd:
108; BTVER2:       # %bb.0:
109; BTVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
110; BTVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
111; BTVER2-NEXT:    retq # sched: [4:1.00]
112;
113; ZNVER1-SSE-LABEL: test_addsubpd:
114; ZNVER1-SSE:       # %bb.0:
115; ZNVER1-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
116; ZNVER1-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
117; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
118;
119; ZNVER1-LABEL: test_addsubpd:
120; ZNVER1:       # %bb.0:
121; ZNVER1-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
122; ZNVER1-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
123; ZNVER1-NEXT:    retq # sched: [1:0.50]
124  %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
125  %2 = load <2 x double>, <2 x double> *%a2, align 16
126  %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
127  ret <2 x double> %3
128}
129declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
130
131define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
132; GENERIC-LABEL: test_addsubps:
133; GENERIC:       # %bb.0:
134; GENERIC-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
135; GENERIC-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
136; GENERIC-NEXT:    retq # sched: [1:1.00]
137;
138; ATOM-LABEL: test_addsubps:
139; ATOM:       # %bb.0:
140; ATOM-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:5.00]
141; ATOM-NEXT:    addsubps (%rdi), %xmm0 # sched: [5:5.00]
142; ATOM-NEXT:    retq # sched: [79:39.50]
143;
144; SLM-LABEL: test_addsubps:
145; SLM:       # %bb.0:
146; SLM-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
147; SLM-NEXT:    addsubps (%rdi), %xmm0 # sched: [6:1.00]
148; SLM-NEXT:    retq # sched: [4:1.00]
149;
150; SANDY-SSE-LABEL: test_addsubps:
151; SANDY-SSE:       # %bb.0:
152; SANDY-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
153; SANDY-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
154; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
155;
156; SANDY-LABEL: test_addsubps:
157; SANDY:       # %bb.0:
158; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
159; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
160; SANDY-NEXT:    retq # sched: [1:1.00]
161;
162; HASWELL-SSE-LABEL: test_addsubps:
163; HASWELL-SSE:       # %bb.0:
164; HASWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
165; HASWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
166; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
167;
168; HASWELL-LABEL: test_addsubps:
169; HASWELL:       # %bb.0:
170; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
171; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
172; HASWELL-NEXT:    retq # sched: [7:1.00]
173;
174; BROADWELL-SSE-LABEL: test_addsubps:
175; BROADWELL-SSE:       # %bb.0:
176; BROADWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
177; BROADWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
178; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
179;
180; BROADWELL-LABEL: test_addsubps:
181; BROADWELL:       # %bb.0:
182; BROADWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
183; BROADWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
184; BROADWELL-NEXT:    retq # sched: [7:1.00]
185;
186; SKYLAKE-SSE-LABEL: test_addsubps:
187; SKYLAKE-SSE:       # %bb.0:
188; SKYLAKE-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
189; SKYLAKE-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
190; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
191;
192; SKYLAKE-LABEL: test_addsubps:
193; SKYLAKE:       # %bb.0:
194; SKYLAKE-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
195; SKYLAKE-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
196; SKYLAKE-NEXT:    retq # sched: [7:1.00]
197;
198; SKX-SSE-LABEL: test_addsubps:
199; SKX-SSE:       # %bb.0:
200; SKX-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
201; SKX-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
202; SKX-SSE-NEXT:    retq # sched: [7:1.00]
203;
204; SKX-LABEL: test_addsubps:
205; SKX:       # %bb.0:
206; SKX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
207; SKX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
208; SKX-NEXT:    retq # sched: [7:1.00]
209;
210; BTVER2-SSE-LABEL: test_addsubps:
211; BTVER2-SSE:       # %bb.0:
212; BTVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
213; BTVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
214; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
215;
216; BTVER2-LABEL: test_addsubps:
217; BTVER2:       # %bb.0:
218; BTVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
219; BTVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
220; BTVER2-NEXT:    retq # sched: [4:1.00]
221;
222; ZNVER1-SSE-LABEL: test_addsubps:
223; ZNVER1-SSE:       # %bb.0:
224; ZNVER1-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
225; ZNVER1-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
226; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
227;
228; ZNVER1-LABEL: test_addsubps:
229; ZNVER1:       # %bb.0:
230; ZNVER1-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
231; ZNVER1-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
232; ZNVER1-NEXT:    retq # sched: [1:0.50]
233  %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
234  %2 = load <4 x float>, <4 x float> *%a2, align 16
235  %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
236  ret <4 x float> %3
237}
238declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
239
240define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
241; GENERIC-LABEL: test_haddpd:
242; GENERIC:       # %bb.0:
243; GENERIC-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
244; GENERIC-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
245; GENERIC-NEXT:    retq # sched: [1:1.00]
246;
247; ATOM-LABEL: test_haddpd:
248; ATOM:       # %bb.0:
249; ATOM-NEXT:    haddpd %xmm1, %xmm0 # sched: [8:4.00]
250; ATOM-NEXT:    haddpd (%rdi), %xmm0 # sched: [9:4.50]
251; ATOM-NEXT:    retq # sched: [79:39.50]
252;
253; SLM-LABEL: test_haddpd:
254; SLM:       # %bb.0:
255; SLM-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
256; SLM-NEXT:    haddpd (%rdi), %xmm0 # sched: [6:1.00]
257; SLM-NEXT:    retq # sched: [4:1.00]
258;
259; SANDY-SSE-LABEL: test_haddpd:
260; SANDY-SSE:       # %bb.0:
261; SANDY-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
262; SANDY-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
263; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
264;
265; SANDY-LABEL: test_haddpd:
266; SANDY:       # %bb.0:
267; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
268; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
269; SANDY-NEXT:    retq # sched: [1:1.00]
270;
271; HASWELL-SSE-LABEL: test_haddpd:
272; HASWELL-SSE:       # %bb.0:
273; HASWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
274; HASWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
275; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
276;
277; HASWELL-LABEL: test_haddpd:
278; HASWELL:       # %bb.0:
279; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
280; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
281; HASWELL-NEXT:    retq # sched: [7:1.00]
282;
283; BROADWELL-SSE-LABEL: test_haddpd:
284; BROADWELL-SSE:       # %bb.0:
285; BROADWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
286; BROADWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [10:2.00]
287; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
288;
289; BROADWELL-LABEL: test_haddpd:
290; BROADWELL:       # %bb.0:
291; BROADWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
292; BROADWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
293; BROADWELL-NEXT:    retq # sched: [7:1.00]
294;
295; SKYLAKE-SSE-LABEL: test_haddpd:
296; SKYLAKE-SSE:       # %bb.0:
297; SKYLAKE-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
298; SKYLAKE-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
299; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
300;
301; SKYLAKE-LABEL: test_haddpd:
302; SKYLAKE:       # %bb.0:
303; SKYLAKE-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
304; SKYLAKE-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
305; SKYLAKE-NEXT:    retq # sched: [7:1.00]
306;
307; SKX-SSE-LABEL: test_haddpd:
308; SKX-SSE:       # %bb.0:
309; SKX-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
310; SKX-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
311; SKX-SSE-NEXT:    retq # sched: [7:1.00]
312;
313; SKX-LABEL: test_haddpd:
314; SKX:       # %bb.0:
315; SKX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
316; SKX-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
317; SKX-NEXT:    retq # sched: [7:1.00]
318;
319; BTVER2-SSE-LABEL: test_haddpd:
320; BTVER2-SSE:       # %bb.0:
321; BTVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
322; BTVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [8:1.00]
323; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
324;
325; BTVER2-LABEL: test_haddpd:
326; BTVER2:       # %bb.0:
327; BTVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
328; BTVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
329; BTVER2-NEXT:    retq # sched: [4:1.00]
330;
331; ZNVER1-SSE-LABEL: test_haddpd:
332; ZNVER1-SSE:       # %bb.0:
333; ZNVER1-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [100:0.25]
334; ZNVER1-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [100:0.25]
335; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
336;
337; ZNVER1-LABEL: test_haddpd:
338; ZNVER1:       # %bb.0:
339; ZNVER1-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
340; ZNVER1-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
341; ZNVER1-NEXT:    retq # sched: [1:0.50]
342  %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
343  %2 = load <2 x double>, <2 x double> *%a2, align 16
344  %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
345  ret <2 x double> %3
346}
347declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
348
349define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
350; GENERIC-LABEL: test_haddps:
351; GENERIC:       # %bb.0:
352; GENERIC-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
353; GENERIC-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
354; GENERIC-NEXT:    retq # sched: [1:1.00]
355;
356; ATOM-LABEL: test_haddps:
357; ATOM:       # %bb.0:
358; ATOM-NEXT:    haddps %xmm1, %xmm0 # sched: [8:4.00]
359; ATOM-NEXT:    haddps (%rdi), %xmm0 # sched: [9:4.50]
360; ATOM-NEXT:    retq # sched: [79:39.50]
361;
362; SLM-LABEL: test_haddps:
363; SLM:       # %bb.0:
364; SLM-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
365; SLM-NEXT:    haddps (%rdi), %xmm0 # sched: [6:1.00]
366; SLM-NEXT:    retq # sched: [4:1.00]
367;
368; SANDY-SSE-LABEL: test_haddps:
369; SANDY-SSE:       # %bb.0:
370; SANDY-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
371; SANDY-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
372; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
373;
374; SANDY-LABEL: test_haddps:
375; SANDY:       # %bb.0:
376; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
377; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
378; SANDY-NEXT:    retq # sched: [1:1.00]
379;
380; HASWELL-SSE-LABEL: test_haddps:
381; HASWELL-SSE:       # %bb.0:
382; HASWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
383; HASWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
384; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
385;
386; HASWELL-LABEL: test_haddps:
387; HASWELL:       # %bb.0:
388; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
389; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
390; HASWELL-NEXT:    retq # sched: [7:1.00]
391;
392; BROADWELL-SSE-LABEL: test_haddps:
393; BROADWELL-SSE:       # %bb.0:
394; BROADWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
395; BROADWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [10:2.00]
396; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
397;
398; BROADWELL-LABEL: test_haddps:
399; BROADWELL:       # %bb.0:
400; BROADWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
401; BROADWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
402; BROADWELL-NEXT:    retq # sched: [7:1.00]
403;
404; SKYLAKE-SSE-LABEL: test_haddps:
405; SKYLAKE-SSE:       # %bb.0:
406; SKYLAKE-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
407; SKYLAKE-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
408; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
409;
410; SKYLAKE-LABEL: test_haddps:
411; SKYLAKE:       # %bb.0:
412; SKYLAKE-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
413; SKYLAKE-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
414; SKYLAKE-NEXT:    retq # sched: [7:1.00]
415;
416; SKX-SSE-LABEL: test_haddps:
417; SKX-SSE:       # %bb.0:
418; SKX-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
419; SKX-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
420; SKX-SSE-NEXT:    retq # sched: [7:1.00]
421;
422; SKX-LABEL: test_haddps:
423; SKX:       # %bb.0:
424; SKX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
425; SKX-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
426; SKX-NEXT:    retq # sched: [7:1.00]
427;
428; BTVER2-SSE-LABEL: test_haddps:
429; BTVER2-SSE:       # %bb.0:
430; BTVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
431; BTVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [8:1.00]
432; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
433;
434; BTVER2-LABEL: test_haddps:
435; BTVER2:       # %bb.0:
436; BTVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
437; BTVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
438; BTVER2-NEXT:    retq # sched: [4:1.00]
439;
440; ZNVER1-SSE-LABEL: test_haddps:
441; ZNVER1-SSE:       # %bb.0:
442; ZNVER1-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [100:0.25]
443; ZNVER1-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [100:0.25]
444; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
445;
446; ZNVER1-LABEL: test_haddps:
447; ZNVER1:       # %bb.0:
448; ZNVER1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
449; ZNVER1-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
450; ZNVER1-NEXT:    retq # sched: [1:0.50]
451  %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
452  %2 = load <4 x float>, <4 x float> *%a2, align 16
453  %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
454  ret <4 x float> %3
455}
456declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
457
458define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
459; GENERIC-LABEL: test_hsubpd:
460; GENERIC:       # %bb.0:
461; GENERIC-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
462; GENERIC-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
463; GENERIC-NEXT:    retq # sched: [1:1.00]
464;
465; ATOM-LABEL: test_hsubpd:
466; ATOM:       # %bb.0:
467; ATOM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [8:4.00]
468; ATOM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [9:4.50]
469; ATOM-NEXT:    retq # sched: [79:39.50]
470;
471; SLM-LABEL: test_hsubpd:
472; SLM:       # %bb.0:
473; SLM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
474; SLM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [6:1.00]
475; SLM-NEXT:    retq # sched: [4:1.00]
476;
477; SANDY-SSE-LABEL: test_hsubpd:
478; SANDY-SSE:       # %bb.0:
479; SANDY-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
480; SANDY-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
481; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
482;
483; SANDY-LABEL: test_hsubpd:
484; SANDY:       # %bb.0:
485; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
486; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
487; SANDY-NEXT:    retq # sched: [1:1.00]
488;
489; HASWELL-SSE-LABEL: test_hsubpd:
490; HASWELL-SSE:       # %bb.0:
491; HASWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
492; HASWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
493; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
494;
495; HASWELL-LABEL: test_hsubpd:
496; HASWELL:       # %bb.0:
497; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
498; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
499; HASWELL-NEXT:    retq # sched: [7:1.00]
500;
501; BROADWELL-SSE-LABEL: test_hsubpd:
502; BROADWELL-SSE:       # %bb.0:
503; BROADWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
504; BROADWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [10:2.00]
505; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
506;
507; BROADWELL-LABEL: test_hsubpd:
508; BROADWELL:       # %bb.0:
509; BROADWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
510; BROADWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
511; BROADWELL-NEXT:    retq # sched: [7:1.00]
512;
513; SKYLAKE-SSE-LABEL: test_hsubpd:
514; SKYLAKE-SSE:       # %bb.0:
515; SKYLAKE-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
516; SKYLAKE-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
517; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
518;
519; SKYLAKE-LABEL: test_hsubpd:
520; SKYLAKE:       # %bb.0:
521; SKYLAKE-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
522; SKYLAKE-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
523; SKYLAKE-NEXT:    retq # sched: [7:1.00]
524;
525; SKX-SSE-LABEL: test_hsubpd:
526; SKX-SSE:       # %bb.0:
527; SKX-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
528; SKX-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
529; SKX-SSE-NEXT:    retq # sched: [7:1.00]
530;
531; SKX-LABEL: test_hsubpd:
532; SKX:       # %bb.0:
533; SKX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
534; SKX-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
535; SKX-NEXT:    retq # sched: [7:1.00]
536;
537; BTVER2-SSE-LABEL: test_hsubpd:
538; BTVER2-SSE:       # %bb.0:
539; BTVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
540; BTVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [8:1.00]
541; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
542;
543; BTVER2-LABEL: test_hsubpd:
544; BTVER2:       # %bb.0:
545; BTVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
546; BTVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
547; BTVER2-NEXT:    retq # sched: [4:1.00]
548;
549; ZNVER1-SSE-LABEL: test_hsubpd:
550; ZNVER1-SSE:       # %bb.0:
551; ZNVER1-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [100:0.25]
552; ZNVER1-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [100:0.25]
553; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
554;
555; ZNVER1-LABEL: test_hsubpd:
556; ZNVER1:       # %bb.0:
557; ZNVER1-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
558; ZNVER1-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
559; ZNVER1-NEXT:    retq # sched: [1:0.50]
560  %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
561  %2 = load <2 x double>, <2 x double> *%a2, align 16
562  %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
563  ret <2 x double> %3
564}
565declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
566
567define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
568; GENERIC-LABEL: test_hsubps:
569; GENERIC:       # %bb.0:
570; GENERIC-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
571; GENERIC-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
572; GENERIC-NEXT:    retq # sched: [1:1.00]
573;
574; ATOM-LABEL: test_hsubps:
575; ATOM:       # %bb.0:
576; ATOM-NEXT:    hsubps %xmm1, %xmm0 # sched: [8:4.00]
577; ATOM-NEXT:    hsubps (%rdi), %xmm0 # sched: [9:4.50]
578; ATOM-NEXT:    retq # sched: [79:39.50]
579;
580; SLM-LABEL: test_hsubps:
581; SLM:       # %bb.0:
582; SLM-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
583; SLM-NEXT:    hsubps (%rdi), %xmm0 # sched: [6:1.00]
584; SLM-NEXT:    retq # sched: [4:1.00]
585;
586; SANDY-SSE-LABEL: test_hsubps:
587; SANDY-SSE:       # %bb.0:
588; SANDY-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
589; SANDY-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
590; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
591;
592; SANDY-LABEL: test_hsubps:
593; SANDY:       # %bb.0:
594; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
595; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
596; SANDY-NEXT:    retq # sched: [1:1.00]
597;
598; HASWELL-SSE-LABEL: test_hsubps:
599; HASWELL-SSE:       # %bb.0:
600; HASWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
601; HASWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
602; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
603;
604; HASWELL-LABEL: test_hsubps:
605; HASWELL:       # %bb.0:
606; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
607; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
608; HASWELL-NEXT:    retq # sched: [7:1.00]
609;
610; BROADWELL-SSE-LABEL: test_hsubps:
611; BROADWELL-SSE:       # %bb.0:
612; BROADWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
613; BROADWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [10:2.00]
614; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
615;
616; BROADWELL-LABEL: test_hsubps:
617; BROADWELL:       # %bb.0:
618; BROADWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
619; BROADWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
620; BROADWELL-NEXT:    retq # sched: [7:1.00]
621;
622; SKYLAKE-SSE-LABEL: test_hsubps:
623; SKYLAKE-SSE:       # %bb.0:
624; SKYLAKE-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
625; SKYLAKE-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
626; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
627;
628; SKYLAKE-LABEL: test_hsubps:
629; SKYLAKE:       # %bb.0:
630; SKYLAKE-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
631; SKYLAKE-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
632; SKYLAKE-NEXT:    retq # sched: [7:1.00]
633;
634; SKX-SSE-LABEL: test_hsubps:
635; SKX-SSE:       # %bb.0:
636; SKX-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
637; SKX-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
638; SKX-SSE-NEXT:    retq # sched: [7:1.00]
639;
640; SKX-LABEL: test_hsubps:
641; SKX:       # %bb.0:
642; SKX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
643; SKX-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
644; SKX-NEXT:    retq # sched: [7:1.00]
645;
646; BTVER2-SSE-LABEL: test_hsubps:
647; BTVER2-SSE:       # %bb.0:
648; BTVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
649; BTVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [8:1.00]
650; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
651;
652; BTVER2-LABEL: test_hsubps:
653; BTVER2:       # %bb.0:
654; BTVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
655; BTVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
656; BTVER2-NEXT:    retq # sched: [4:1.00]
657;
658; ZNVER1-SSE-LABEL: test_hsubps:
659; ZNVER1-SSE:       # %bb.0:
660; ZNVER1-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [100:0.25]
661; ZNVER1-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [100:0.25]
662; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
663;
664; ZNVER1-LABEL: test_hsubps:
665; ZNVER1:       # %bb.0:
666; ZNVER1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
667; ZNVER1-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
668; ZNVER1-NEXT:    retq # sched: [1:0.50]
669  %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
670  %2 = load <4 x float>, <4 x float> *%a2, align 16
671  %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
672  ret <4 x float> %3
673}
674declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
675
676define <16 x i8> @test_lddqu(i8* %a0) {
677; GENERIC-LABEL: test_lddqu:
678; GENERIC:       # %bb.0:
679; GENERIC-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
680; GENERIC-NEXT:    retq # sched: [1:1.00]
681;
682; ATOM-LABEL: test_lddqu:
683; ATOM:       # %bb.0:
684; ATOM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.50]
685; ATOM-NEXT:    nop # sched: [1:0.50]
686; ATOM-NEXT:    nop # sched: [1:0.50]
687; ATOM-NEXT:    retq # sched: [79:39.50]
688;
689; SLM-LABEL: test_lddqu:
690; SLM:       # %bb.0:
691; SLM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.00]
692; SLM-NEXT:    retq # sched: [4:1.00]
693;
694; SANDY-SSE-LABEL: test_lddqu:
695; SANDY-SSE:       # %bb.0:
696; SANDY-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
697; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
698;
699; SANDY-LABEL: test_lddqu:
700; SANDY:       # %bb.0:
701; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
702; SANDY-NEXT:    retq # sched: [1:1.00]
703;
704; HASWELL-SSE-LABEL: test_lddqu:
705; HASWELL-SSE:       # %bb.0:
706; HASWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
707; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
708;
709; HASWELL-LABEL: test_lddqu:
710; HASWELL:       # %bb.0:
711; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
712; HASWELL-NEXT:    retq # sched: [7:1.00]
713;
714; BROADWELL-SSE-LABEL: test_lddqu:
715; BROADWELL-SSE:       # %bb.0:
716; BROADWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
717; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
718;
719; BROADWELL-LABEL: test_lddqu:
720; BROADWELL:       # %bb.0:
721; BROADWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
722; BROADWELL-NEXT:    retq # sched: [7:1.00]
723;
724; SKYLAKE-SSE-LABEL: test_lddqu:
725; SKYLAKE-SSE:       # %bb.0:
726; SKYLAKE-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
727; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
728;
729; SKYLAKE-LABEL: test_lddqu:
730; SKYLAKE:       # %bb.0:
731; SKYLAKE-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
732; SKYLAKE-NEXT:    retq # sched: [7:1.00]
733;
734; SKX-SSE-LABEL: test_lddqu:
735; SKX-SSE:       # %bb.0:
736; SKX-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
737; SKX-SSE-NEXT:    retq # sched: [7:1.00]
738;
739; SKX-LABEL: test_lddqu:
740; SKX:       # %bb.0:
741; SKX-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
742; SKX-NEXT:    retq # sched: [7:1.00]
743;
744; BTVER2-SSE-LABEL: test_lddqu:
745; BTVER2-SSE:       # %bb.0:
746; BTVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:1.00]
747; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
748;
749; BTVER2-LABEL: test_lddqu:
750; BTVER2:       # %bb.0:
751; BTVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:1.00]
752; BTVER2-NEXT:    retq # sched: [4:1.00]
753;
754; ZNVER1-SSE-LABEL: test_lddqu:
755; ZNVER1-SSE:       # %bb.0:
756; ZNVER1-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [8:0.50]
757; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
758;
759; ZNVER1-LABEL: test_lddqu:
760; ZNVER1:       # %bb.0:
761; ZNVER1-NEXT:    vlddqu (%rdi), %xmm0 # sched: [8:0.50]
762; ZNVER1-NEXT:    retq # sched: [1:0.50]
763  %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
764  ret <16 x i8> %1
765}
766declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
767
768define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
769; GENERIC-LABEL: test_monitor:
770; GENERIC:       # %bb.0:
771; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
772; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
773; GENERIC-NEXT:    monitor # sched: [100:0.33]
774; GENERIC-NEXT:    retq # sched: [1:1.00]
775;
776; ATOM-LABEL: test_monitor:
777; ATOM:       # %bb.0:
778; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
779; ATOM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
780; ATOM-NEXT:    monitor # sched: [45:22.50]
781; ATOM-NEXT:    retq # sched: [79:39.50]
782;
783; SLM-LABEL: test_monitor:
784; SLM:       # %bb.0:
785; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
786; SLM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
787; SLM-NEXT:    monitor # sched: [100:1.00]
788; SLM-NEXT:    retq # sched: [4:1.00]
789;
790; SANDY-SSE-LABEL: test_monitor:
791; SANDY-SSE:       # %bb.0:
792; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
793; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
794; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
795; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
796;
797; SANDY-LABEL: test_monitor:
798; SANDY:       # %bb.0:
799; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
800; SANDY-NEXT:    movl %esi, %ecx # sched: [1:0.33]
801; SANDY-NEXT:    monitor # sched: [100:0.33]
802; SANDY-NEXT:    retq # sched: [1:1.00]
803;
804; HASWELL-SSE-LABEL: test_monitor:
805; HASWELL-SSE:       # %bb.0:
806; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
807; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
808; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
809; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
810;
811; HASWELL-LABEL: test_monitor:
812; HASWELL:       # %bb.0:
813; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
814; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
815; HASWELL-NEXT:    monitor # sched: [100:0.25]
816; HASWELL-NEXT:    retq # sched: [7:1.00]
817;
818; BROADWELL-SSE-LABEL: test_monitor:
819; BROADWELL-SSE:       # %bb.0:
820; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
821; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
822; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
823; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
824;
825; BROADWELL-LABEL: test_monitor:
826; BROADWELL:       # %bb.0:
827; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
828; BROADWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
829; BROADWELL-NEXT:    monitor # sched: [100:0.25]
830; BROADWELL-NEXT:    retq # sched: [7:1.00]
831;
832; SKYLAKE-SSE-LABEL: test_monitor:
833; SKYLAKE-SSE:       # %bb.0:
834; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
835; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
836; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
837; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
838;
839; SKYLAKE-LABEL: test_monitor:
840; SKYLAKE:       # %bb.0:
841; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
842; SKYLAKE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
843; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
844; SKYLAKE-NEXT:    retq # sched: [7:1.00]
845;
846; SKX-SSE-LABEL: test_monitor:
847; SKX-SSE:       # %bb.0:
848; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
849; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
850; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
851; SKX-SSE-NEXT:    retq # sched: [7:1.00]
852;
853; SKX-LABEL: test_monitor:
854; SKX:       # %bb.0:
855; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
856; SKX-NEXT:    movl %esi, %ecx # sched: [1:0.25]
857; SKX-NEXT:    monitor # sched: [100:0.25]
858; SKX-NEXT:    retq # sched: [7:1.00]
859;
860; BTVER2-SSE-LABEL: test_monitor:
861; BTVER2-SSE:       # %bb.0:
862; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
863; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
864; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
865; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
866;
867; BTVER2-LABEL: test_monitor:
868; BTVER2:       # %bb.0:
869; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
870; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
871; BTVER2-NEXT:    monitor # sched: [100:0.50]
872; BTVER2-NEXT:    retq # sched: [4:1.00]
873;
874; ZNVER1-SSE-LABEL: test_monitor:
875; ZNVER1-SSE:       # %bb.0:
876; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
877; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
878; ZNVER1-SSE-NEXT:    monitor # sched: [100:0.25]
879; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
880;
881; ZNVER1-LABEL: test_monitor:
882; ZNVER1:       # %bb.0:
883; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
884; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
885; ZNVER1-NEXT:    monitor # sched: [100:0.25]
886; ZNVER1-NEXT:    retq # sched: [1:0.50]
887  tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
888  ret void
889}
890declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
891
892define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
893; GENERIC-LABEL: test_movddup:
894; GENERIC:       # %bb.0:
895; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
896; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
897; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
898; GENERIC-NEXT:    retq # sched: [1:1.00]
899;
900; ATOM-LABEL: test_movddup:
901; ATOM:       # %bb.0:
902; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
903; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
904; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
905; ATOM-NEXT:    retq # sched: [79:39.50]
906;
907; SLM-LABEL: test_movddup:
908; SLM:       # %bb.0:
909; SLM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
910; SLM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
911; SLM-NEXT:    subpd %xmm0, %xmm1 # sched: [3:1.00]
912; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
913; SLM-NEXT:    retq # sched: [4:1.00]
914;
915; SANDY-SSE-LABEL: test_movddup:
916; SANDY-SSE:       # %bb.0:
917; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
918; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
919; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
920; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
921;
922; SANDY-LABEL: test_movddup:
923; SANDY:       # %bb.0:
924; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
925; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
926; SANDY-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
927; SANDY-NEXT:    retq # sched: [1:1.00]
928;
929; HASWELL-SSE-LABEL: test_movddup:
930; HASWELL-SSE:       # %bb.0:
931; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
932; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
933; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
934; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
935;
936; HASWELL-LABEL: test_movddup:
937; HASWELL:       # %bb.0:
938; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
939; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
940; HASWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
941; HASWELL-NEXT:    retq # sched: [7:1.00]
942;
943; BROADWELL-SSE-LABEL: test_movddup:
944; BROADWELL-SSE:       # %bb.0:
945; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
946; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
947; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
948; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
949;
950; BROADWELL-LABEL: test_movddup:
951; BROADWELL:       # %bb.0:
952; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
953; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
954; BROADWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
955; BROADWELL-NEXT:    retq # sched: [7:1.00]
956;
957; SKYLAKE-SSE-LABEL: test_movddup:
958; SKYLAKE-SSE:       # %bb.0:
959; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
960; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
961; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
962; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
963;
964; SKYLAKE-LABEL: test_movddup:
965; SKYLAKE:       # %bb.0:
966; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
967; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
968; SKYLAKE-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
969; SKYLAKE-NEXT:    retq # sched: [7:1.00]
970;
971; SKX-SSE-LABEL: test_movddup:
972; SKX-SSE:       # %bb.0:
973; SKX-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
974; SKX-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
975; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
976; SKX-SSE-NEXT:    retq # sched: [7:1.00]
977;
978; SKX-LABEL: test_movddup:
979; SKX:       # %bb.0:
980; SKX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
981; SKX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
982; SKX-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
983; SKX-NEXT:    retq # sched: [7:1.00]
984;
985; BTVER2-SSE-LABEL: test_movddup:
986; BTVER2-SSE:       # %bb.0:
987; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
988; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
989; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
990; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
991;
992; BTVER2-LABEL: test_movddup:
993; BTVER2:       # %bb.0:
994; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
995; BTVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
996; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
997; BTVER2-NEXT:    retq # sched: [4:1.00]
998;
999; ZNVER1-SSE-LABEL: test_movddup:
1000; ZNVER1-SSE:       # %bb.0:
1001; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
1002; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
1003; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
1004; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1005;
1006; ZNVER1-LABEL: test_movddup:
1007; ZNVER1:       # %bb.0:
1008; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
1009; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
1010; ZNVER1-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1011; ZNVER1-NEXT:    retq # sched: [1:0.50]
1012  %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
1013  %2 = load <2 x double>, <2 x double> *%a1, align 16
1014  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
1015  %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
1016  ret <2 x double> %4
1017}
1018
1019define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
1020; GENERIC-LABEL: test_movshdup:
1021; GENERIC:       # %bb.0:
1022; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1023; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1024; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1025; GENERIC-NEXT:    retq # sched: [1:1.00]
1026;
1027; ATOM-LABEL: test_movshdup:
1028; ATOM:       # %bb.0:
1029; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1030; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
1031; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
1032; ATOM-NEXT:    retq # sched: [79:39.50]
1033;
1034; SLM-LABEL: test_movshdup:
1035; SLM:       # %bb.0:
1036; SLM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
1037; SLM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1038; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
1039; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
1040; SLM-NEXT:    retq # sched: [4:1.00]
1041;
1042; SANDY-SSE-LABEL: test_movshdup:
1043; SANDY-SSE:       # %bb.0:
1044; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1045; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1046; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1047; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1048;
1049; SANDY-LABEL: test_movshdup:
1050; SANDY:       # %bb.0:
1051; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1052; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1053; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1054; SANDY-NEXT:    retq # sched: [1:1.00]
1055;
1056; HASWELL-SSE-LABEL: test_movshdup:
1057; HASWELL-SSE:       # %bb.0:
1058; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1059; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1060; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1061; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1062;
1063; HASWELL-LABEL: test_movshdup:
1064; HASWELL:       # %bb.0:
1065; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1066; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1067; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1068; HASWELL-NEXT:    retq # sched: [7:1.00]
1069;
1070; BROADWELL-SSE-LABEL: test_movshdup:
1071; BROADWELL-SSE:       # %bb.0:
1072; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1073; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
1074; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1075; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1076;
1077; BROADWELL-LABEL: test_movshdup:
1078; BROADWELL:       # %bb.0:
1079; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1080; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
1081; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1082; BROADWELL-NEXT:    retq # sched: [7:1.00]
1083;
1084; SKYLAKE-SSE-LABEL: test_movshdup:
1085; SKYLAKE-SSE:       # %bb.0:
1086; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1087; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1088; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1089; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1090;
1091; SKYLAKE-LABEL: test_movshdup:
1092; SKYLAKE:       # %bb.0:
1093; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1094; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1095; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1096; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1097;
1098; SKX-SSE-LABEL: test_movshdup:
1099; SKX-SSE:       # %bb.0:
1100; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
1101; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
1102; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1103; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1104;
1105; SKX-LABEL: test_movshdup:
1106; SKX:       # %bb.0:
1107; SKX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
1108; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
1109; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1110; SKX-NEXT:    retq # sched: [7:1.00]
1111;
1112; BTVER2-SSE-LABEL: test_movshdup:
1113; BTVER2-SSE:       # %bb.0:
1114; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1115; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
1116; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1117; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1118;
1119; BTVER2-LABEL: test_movshdup:
1120; BTVER2:       # %bb.0:
1121; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
1122; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1123; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1124; BTVER2-NEXT:    retq # sched: [4:1.00]
1125;
1126; ZNVER1-SSE-LABEL: test_movshdup:
1127; ZNVER1-SSE:       # %bb.0:
1128; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
1129; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
1130; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1131; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1132;
1133; ZNVER1-LABEL: test_movshdup:
1134; ZNVER1:       # %bb.0:
1135; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
1136; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
1137; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1138; ZNVER1-NEXT:    retq # sched: [1:0.50]
1139  %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1140  %2 = load <4 x float>, <4 x float> *%a1, align 16
1141  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
1142  %4 = fadd <4 x float> %1, %3
1143  ret <4 x float> %4
1144}
1145
1146define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
1147; GENERIC-LABEL: test_movsldup:
1148; GENERIC:       # %bb.0:
1149; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1150; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1151; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1152; GENERIC-NEXT:    retq # sched: [1:1.00]
1153;
1154; ATOM-LABEL: test_movsldup:
1155; ATOM:       # %bb.0:
1156; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1157; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
1158; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
1159; ATOM-NEXT:    retq # sched: [79:39.50]
1160;
1161; SLM-LABEL: test_movsldup:
1162; SLM:       # %bb.0:
1163; SLM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
1164; SLM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1165; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
1166; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
1167; SLM-NEXT:    retq # sched: [4:1.00]
1168;
1169; SANDY-SSE-LABEL: test_movsldup:
1170; SANDY-SSE:       # %bb.0:
1171; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1172; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1173; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1174; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1175;
1176; SANDY-LABEL: test_movsldup:
1177; SANDY:       # %bb.0:
1178; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1179; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1180; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1181; SANDY-NEXT:    retq # sched: [1:1.00]
1182;
1183; HASWELL-SSE-LABEL: test_movsldup:
1184; HASWELL-SSE:       # %bb.0:
1185; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1186; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1187; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1188; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1189;
1190; HASWELL-LABEL: test_movsldup:
1191; HASWELL:       # %bb.0:
1192; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1193; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1194; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1195; HASWELL-NEXT:    retq # sched: [7:1.00]
1196;
1197; BROADWELL-SSE-LABEL: test_movsldup:
1198; BROADWELL-SSE:       # %bb.0:
1199; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1200; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
1201; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1202; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1203;
1204; BROADWELL-LABEL: test_movsldup:
1205; BROADWELL:       # %bb.0:
1206; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1207; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
1208; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1209; BROADWELL-NEXT:    retq # sched: [7:1.00]
1210;
1211; SKYLAKE-SSE-LABEL: test_movsldup:
1212; SKYLAKE-SSE:       # %bb.0:
1213; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1214; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1215; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1216; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1217;
1218; SKYLAKE-LABEL: test_movsldup:
1219; SKYLAKE:       # %bb.0:
1220; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1221; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1222; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1223; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1224;
1225; SKX-SSE-LABEL: test_movsldup:
1226; SKX-SSE:       # %bb.0:
1227; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
1228; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
1229; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
1230; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1231;
1232; SKX-LABEL: test_movsldup:
1233; SKX:       # %bb.0:
1234; SKX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
1235; SKX-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
1236; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1237; SKX-NEXT:    retq # sched: [7:1.00]
1238;
1239; BTVER2-SSE-LABEL: test_movsldup:
1240; BTVER2-SSE:       # %bb.0:
1241; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
1242; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
1243; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1244; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1245;
1246; BTVER2-LABEL: test_movsldup:
1247; BTVER2:       # %bb.0:
1248; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
1249; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1250; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1251; BTVER2-NEXT:    retq # sched: [4:1.00]
1252;
1253; ZNVER1-SSE-LABEL: test_movsldup:
1254; ZNVER1-SSE:       # %bb.0:
1255; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
1256; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
1257; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
1258; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1259;
1260; ZNVER1-LABEL: test_movsldup:
1261; ZNVER1:       # %bb.0:
1262; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
1263; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
1264; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1265; ZNVER1-NEXT:    retq # sched: [1:0.50]
1266  %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1267  %2 = load <4 x float>, <4 x float> *%a1, align 16
1268  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
1269  %4 = fadd <4 x float> %1, %3
1270  ret <4 x float> %4
1271}
1272
1273define void @test_mwait(i32 %a0, i32 %a1) {
1274; GENERIC-LABEL: test_mwait:
1275; GENERIC:       # %bb.0:
1276; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1277; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
1278; GENERIC-NEXT:    mwait # sched: [100:0.33]
1279; GENERIC-NEXT:    retq # sched: [1:1.00]
1280;
1281; ATOM-LABEL: test_mwait:
1282; ATOM:       # %bb.0:
1283; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1284; ATOM-NEXT:    movl %esi, %eax # sched: [1:0.50]
1285; ATOM-NEXT:    mwait # sched: [46:23.00]
1286; ATOM-NEXT:    retq # sched: [79:39.50]
1287;
1288; SLM-LABEL: test_mwait:
1289; SLM:       # %bb.0:
1290; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1291; SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
1292; SLM-NEXT:    mwait # sched: [100:1.00]
1293; SLM-NEXT:    retq # sched: [4:1.00]
1294;
1295; SANDY-SSE-LABEL: test_mwait:
1296; SANDY-SSE:       # %bb.0:
1297; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1298; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
1299; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
1300; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
1301;
1302; SANDY-LABEL: test_mwait:
1303; SANDY:       # %bb.0:
1304; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
1305; SANDY-NEXT:    movl %esi, %eax # sched: [1:0.33]
1306; SANDY-NEXT:    mwait # sched: [100:0.33]
1307; SANDY-NEXT:    retq # sched: [1:1.00]
1308;
1309; HASWELL-SSE-LABEL: test_mwait:
1310; HASWELL-SSE:       # %bb.0:
1311; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1312; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1313; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
1314; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
1315;
1316; HASWELL-LABEL: test_mwait:
1317; HASWELL:       # %bb.0:
1318; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1319; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
1320; HASWELL-NEXT:    mwait # sched: [20:2.50]
1321; HASWELL-NEXT:    retq # sched: [7:1.00]
1322;
1323; BROADWELL-SSE-LABEL: test_mwait:
1324; BROADWELL-SSE:       # %bb.0:
1325; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1326; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1327; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
1328; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
1329;
1330; BROADWELL-LABEL: test_mwait:
1331; BROADWELL:       # %bb.0:
1332; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1333; BROADWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
1334; BROADWELL-NEXT:    mwait # sched: [100:0.25]
1335; BROADWELL-NEXT:    retq # sched: [7:1.00]
1336;
1337; SKYLAKE-SSE-LABEL: test_mwait:
1338; SKYLAKE-SSE:       # %bb.0:
1339; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1340; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1341; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
1342; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
1343;
1344; SKYLAKE-LABEL: test_mwait:
1345; SKYLAKE:       # %bb.0:
1346; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1347; SKYLAKE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1348; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
1349; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1350;
1351; SKX-SSE-LABEL: test_mwait:
1352; SKX-SSE:       # %bb.0:
1353; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1354; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1355; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
1356; SKX-SSE-NEXT:    retq # sched: [7:1.00]
1357;
1358; SKX-LABEL: test_mwait:
1359; SKX:       # %bb.0:
1360; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1361; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
1362; SKX-NEXT:    mwait # sched: [20:2.50]
1363; SKX-NEXT:    retq # sched: [7:1.00]
1364;
1365; BTVER2-SSE-LABEL: test_mwait:
1366; BTVER2-SSE:       # %bb.0:
1367; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1368; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
1369; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
1370; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
1371;
1372; BTVER2-LABEL: test_mwait:
1373; BTVER2:       # %bb.0:
1374; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
1375; BTVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
1376; BTVER2-NEXT:    mwait # sched: [100:0.50]
1377; BTVER2-NEXT:    retq # sched: [4:1.00]
1378;
1379; ZNVER1-SSE-LABEL: test_mwait:
1380; ZNVER1-SSE:       # %bb.0:
1381; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1382; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
1383; ZNVER1-SSE-NEXT:    mwait # sched: [100:0.25]
1384; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
1385;
1386; ZNVER1-LABEL: test_mwait:
1387; ZNVER1:       # %bb.0:
1388; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
1389; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
1390; ZNVER1-NEXT:    mwait # sched: [100:0.25]
1391; ZNVER1-NEXT:    retq # sched: [1:0.50]
1392  tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
1393  ret void
1394}
1395declare void @llvm.x86.sse3.mwait(i32, i32)
1396