• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
13
14define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
15; GENERIC-LABEL: test_cvtpd2pi:
16; GENERIC:       # %bb.0:
17; GENERIC-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
18; GENERIC-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
19; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
20; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
21; GENERIC-NEXT:    retq # sched: [1:1.00]
22;
23; ATOM-LABEL: test_cvtpd2pi:
24; ATOM:       # %bb.0:
25; ATOM-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [8:4.00]
26; ATOM-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [7:3.50]
27; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
28; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
29; ATOM-NEXT:    retq # sched: [79:39.50]
30;
31; SLM-LABEL: test_cvtpd2pi:
32; SLM:       # %bb.0:
33; SLM-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [7:1.00]
34; SLM-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:0.50]
35; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
36; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
37; SLM-NEXT:    retq # sched: [4:1.00]
38;
39; SANDY-LABEL: test_cvtpd2pi:
40; SANDY:       # %bb.0:
41; SANDY-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
42; SANDY-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
43; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
44; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
45; SANDY-NEXT:    retq # sched: [1:1.00]
46;
47; HASWELL-LABEL: test_cvtpd2pi:
48; HASWELL:       # %bb.0:
49; HASWELL-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
50; HASWELL-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
51; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
52; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
53; HASWELL-NEXT:    retq # sched: [7:1.00]
54;
55; BROADWELL-LABEL: test_cvtpd2pi:
56; BROADWELL:       # %bb.0:
57; BROADWELL-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
58; BROADWELL-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [9:1.00]
59; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
60; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
61; BROADWELL-NEXT:    retq # sched: [7:1.00]
62;
63; SKYLAKE-LABEL: test_cvtpd2pi:
64; SKYLAKE:       # %bb.0:
65; SKYLAKE-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
66; SKYLAKE-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
67; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
68; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
69; SKYLAKE-NEXT:    retq # sched: [7:1.00]
70;
71; SKX-LABEL: test_cvtpd2pi:
72; SKX:       # %bb.0:
73; SKX-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
74; SKX-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
75; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
76; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
77; SKX-NEXT:    retq # sched: [7:1.00]
78;
79; BTVER2-LABEL: test_cvtpd2pi:
80; BTVER2:       # %bb.0:
81; BTVER2-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
82; BTVER2-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [3:1.00]
83; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
84; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
85; BTVER2-NEXT:    retq # sched: [4:1.00]
86;
87; ZNVER1-LABEL: test_cvtpd2pi:
88; ZNVER1:       # %bb.0:
89; ZNVER1-NEXT:    cvtpd2pi (%rdi), %mm1 # sched: [12:1.00]
90; ZNVER1-NEXT:    cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
91; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
92; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
93; ZNVER1-NEXT:    retq # sched: [1:0.50]
94  %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0)
95  %2 = load <2 x double>, <2 x double> *%a1, align 16
96  %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2)
97  %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
98  %5 = bitcast x86_mmx %4 to i64
99  ret i64 %5
100}
101declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
102
103define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize {
104; GENERIC-LABEL: test_cvtpi2pd:
105; GENERIC:       # %bb.0:
106; GENERIC-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
107; GENERIC-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00]
108; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
109; GENERIC-NEXT:    retq # sched: [1:1.00]
110;
111; ATOM-LABEL: test_cvtpi2pd:
112; ATOM:       # %bb.0:
113; ATOM-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00]
114; ATOM-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [7:3.50]
115; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
116; ATOM-NEXT:    retq # sched: [79:39.50]
117;
118; SLM-LABEL: test_cvtpi2pd:
119; SLM:       # %bb.0:
120; SLM-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00]
121; SLM-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:0.50]
122; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
123; SLM-NEXT:    retq # sched: [4:1.00]
124;
125; SANDY-LABEL: test_cvtpi2pd:
126; SANDY:       # %bb.0:
127; SANDY-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
128; SANDY-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
129; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
130; SANDY-NEXT:    retq # sched: [1:1.00]
131;
132; HASWELL-LABEL: test_cvtpi2pd:
133; HASWELL:       # %bb.0:
134; HASWELL-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
135; HASWELL-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00]
136; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
137; HASWELL-NEXT:    retq # sched: [7:1.00]
138;
139; BROADWELL-LABEL: test_cvtpi2pd:
140; BROADWELL:       # %bb.0:
141; BROADWELL-NEXT:    cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00]
142; BROADWELL-NEXT:    cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
143; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
144; BROADWELL-NEXT:    retq # sched: [7:1.00]
145;
146; SKYLAKE-LABEL: test_cvtpi2pd:
147; SKYLAKE:       # %bb.0:
148; SKYLAKE-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [5:1.00]
149; SKYLAKE-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
150; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
151; SKYLAKE-NEXT:    retq # sched: [7:1.00]
152;
153; SKX-LABEL: test_cvtpi2pd:
154; SKX:       # %bb.0:
155; SKX-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [4:0.50]
156; SKX-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50]
157; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
158; SKX-NEXT:    retq # sched: [7:1.00]
159;
160; BTVER2-LABEL: test_cvtpi2pd:
161; BTVER2:       # %bb.0:
162; BTVER2-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00]
163; BTVER2-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
164; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
165; BTVER2-NEXT:    retq # sched: [4:1.00]
166;
167; ZNVER1-LABEL: test_cvtpi2pd:
168; ZNVER1:       # %bb.0:
169; ZNVER1-NEXT:    cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00]
170; ZNVER1-NEXT:    cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
171; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
172; ZNVER1-NEXT:    retq # sched: [1:0.50]
173  %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0)
174  %2 = load x86_mmx, x86_mmx *%a1, align 8
175  %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2)
176  %4 = fadd <2 x double> %1, %3
177  ret <2 x double> %4
178}
179declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
180
181define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize {
182; GENERIC-LABEL: test_cvtpi2ps:
183; GENERIC:       # %bb.0:
184; GENERIC-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
185; GENERIC-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
186; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
187; GENERIC-NEXT:    retq # sched: [1:1.00]
188;
189; ATOM-LABEL: test_cvtpi2ps:
190; ATOM:       # %bb.0:
191; ATOM-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [5:5.00]
192; ATOM-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00]
193; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
194; ATOM-NEXT:    retq # sched: [79:39.50]
195;
196; SLM-LABEL: test_cvtpi2ps:
197; SLM:       # %bb.0:
198; SLM-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00]
199; SLM-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [4:0.50]
200; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
201; SLM-NEXT:    retq # sched: [4:1.00]
202;
203; SANDY-LABEL: test_cvtpi2ps:
204; SANDY:       # %bb.0:
205; SANDY-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
206; SANDY-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
207; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
208; SANDY-NEXT:    retq # sched: [1:1.00]
209;
210; HASWELL-LABEL: test_cvtpi2ps:
211; HASWELL:       # %bb.0:
212; HASWELL-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
213; HASWELL-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
214; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
215; HASWELL-NEXT:    retq # sched: [7:1.00]
216;
217; BROADWELL-LABEL: test_cvtpi2ps:
218; BROADWELL:       # %bb.0:
219; BROADWELL-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
220; BROADWELL-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
221; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
222; BROADWELL-NEXT:    retq # sched: [7:1.00]
223;
224; SKYLAKE-LABEL: test_cvtpi2ps:
225; SKYLAKE:       # %bb.0:
226; SKYLAKE-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
227; SKYLAKE-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
228; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
229; SKYLAKE-NEXT:    retq # sched: [7:1.00]
230;
231; SKX-LABEL: test_cvtpi2ps:
232; SKX:       # %bb.0:
233; SKX-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
234; SKX-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
235; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
236; SKX-NEXT:    retq # sched: [7:1.00]
237;
238; BTVER2-LABEL: test_cvtpi2ps:
239; BTVER2:       # %bb.0:
240; BTVER2-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
241; BTVER2-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
242; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
243; BTVER2-NEXT:    retq # sched: [4:1.00]
244;
245; ZNVER1-LABEL: test_cvtpi2ps:
246; ZNVER1:       # %bb.0:
247; ZNVER1-NEXT:    cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00]
248; ZNVER1-NEXT:    cvtpi2ps %mm0, %xmm0 # sched: [5:1.00]
249; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
250; ZNVER1-NEXT:    retq # sched: [1:0.50]
251  %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0)
252  %2 = load x86_mmx, x86_mmx *%a1, align 8
253  %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2)
254  %4 = fadd <4 x float> %1, %3
255  ret <4 x float> %4
256}
257declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
258
259define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
260; GENERIC-LABEL: test_cvtps2pi:
261; GENERIC:       # %bb.0:
262; GENERIC-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
263; GENERIC-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
264; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
265; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
266; GENERIC-NEXT:    retq # sched: [1:1.00]
267;
268; ATOM-LABEL: test_cvtps2pi:
269; ATOM:       # %bb.0:
270; ATOM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:5.00]
271; ATOM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [5:5.00]
272; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
273; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
274; ATOM-NEXT:    retq # sched: [79:39.50]
275;
276; SLM-LABEL: test_cvtps2pi:
277; SLM:       # %bb.0:
278; SLM-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [7:1.00]
279; SLM-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:0.50]
280; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
281; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
282; SLM-NEXT:    retq # sched: [4:1.00]
283;
284; SANDY-LABEL: test_cvtps2pi:
285; SANDY:       # %bb.0:
286; SANDY-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
287; SANDY-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
288; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
289; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
290; SANDY-NEXT:    retq # sched: [1:1.00]
291;
292; HASWELL-LABEL: test_cvtps2pi:
293; HASWELL:       # %bb.0:
294; HASWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
295; HASWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
296; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
297; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
298; HASWELL-NEXT:    retq # sched: [7:1.00]
299;
300; BROADWELL-LABEL: test_cvtps2pi:
301; BROADWELL:       # %bb.0:
302; BROADWELL-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
303; BROADWELL-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
304; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
305; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
306; BROADWELL-NEXT:    retq # sched: [7:1.00]
307;
308; SKYLAKE-LABEL: test_cvtps2pi:
309; SKYLAKE:       # %bb.0:
310; SKYLAKE-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
311; SKYLAKE-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
312; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
313; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
314; SKYLAKE-NEXT:    retq # sched: [7:1.00]
315;
316; SKX-LABEL: test_cvtps2pi:
317; SKX:       # %bb.0:
318; SKX-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
319; SKX-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
320; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
321; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
322; SKX-NEXT:    retq # sched: [7:1.00]
323;
324; BTVER2-LABEL: test_cvtps2pi:
325; BTVER2:       # %bb.0:
326; BTVER2-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
327; BTVER2-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
328; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
329; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
330; BTVER2-NEXT:    retq # sched: [4:1.00]
331;
332; ZNVER1-LABEL: test_cvtps2pi:
333; ZNVER1:       # %bb.0:
334; ZNVER1-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [12:1.00]
335; ZNVER1-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
336; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
337; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
338; ZNVER1-NEXT:    retq # sched: [1:0.50]
339  %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0)
340  %2 = load <4 x float>, <4 x float> *%a1, align 16
341  %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2)
342  %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
343  %5 = bitcast x86_mmx %4 to i64
344  ret i64 %5
345}
346declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
347
348define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
349; GENERIC-LABEL: test_cvttpd2pi:
350; GENERIC:       # %bb.0:
351; GENERIC-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
352; GENERIC-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
353; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
354; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
355; GENERIC-NEXT:    retq # sched: [1:1.00]
356;
357; ATOM-LABEL: test_cvttpd2pi:
358; ATOM:       # %bb.0:
359; ATOM-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [8:4.00]
360; ATOM-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [7:3.50]
361; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
362; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
363; ATOM-NEXT:    retq # sched: [79:39.50]
364;
365; SLM-LABEL: test_cvttpd2pi:
366; SLM:       # %bb.0:
367; SLM-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [7:1.00]
368; SLM-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:0.50]
369; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
370; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
371; SLM-NEXT:    retq # sched: [4:1.00]
372;
373; SANDY-LABEL: test_cvttpd2pi:
374; SANDY:       # %bb.0:
375; SANDY-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
376; SANDY-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
377; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
378; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
379; SANDY-NEXT:    retq # sched: [1:1.00]
380;
381; HASWELL-LABEL: test_cvttpd2pi:
382; HASWELL:       # %bb.0:
383; HASWELL-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
384; HASWELL-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
385; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
386; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
387; HASWELL-NEXT:    retq # sched: [7:1.00]
388;
389; BROADWELL-LABEL: test_cvttpd2pi:
390; BROADWELL:       # %bb.0:
391; BROADWELL-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
392; BROADWELL-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [9:1.00]
393; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
394; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
395; BROADWELL-NEXT:    retq # sched: [7:1.00]
396;
397; SKYLAKE-LABEL: test_cvttpd2pi:
398; SKYLAKE:       # %bb.0:
399; SKYLAKE-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
400; SKYLAKE-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
401; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
402; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
403; SKYLAKE-NEXT:    retq # sched: [7:1.00]
404;
405; SKX-LABEL: test_cvttpd2pi:
406; SKX:       # %bb.0:
407; SKX-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
408; SKX-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
409; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
410; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
411; SKX-NEXT:    retq # sched: [7:1.00]
412;
413; BTVER2-LABEL: test_cvttpd2pi:
414; BTVER2:       # %bb.0:
415; BTVER2-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
416; BTVER2-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [3:1.00]
417; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
418; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
419; BTVER2-NEXT:    retq # sched: [4:1.00]
420;
421; ZNVER1-LABEL: test_cvttpd2pi:
422; ZNVER1:       # %bb.0:
423; ZNVER1-NEXT:    cvttpd2pi (%rdi), %mm1 # sched: [12:1.00]
424; ZNVER1-NEXT:    cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
425; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
426; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
427; ZNVER1-NEXT:    retq # sched: [1:0.50]
428  %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0)
429  %2 = load <2 x double>, <2 x double> *%a1, align 16
430  %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2)
431  %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
432  %5 = bitcast x86_mmx %4 to i64
433  ret i64 %5
434}
435declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
436
437define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
438; GENERIC-LABEL: test_cvttps2pi:
439; GENERIC:       # %bb.0:
440; GENERIC-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
441; GENERIC-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
442; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
443; GENERIC-NEXT:    movq %mm1, %rax # sched: [2:1.00]
444; GENERIC-NEXT:    retq # sched: [1:1.00]
445;
446; ATOM-LABEL: test_cvttps2pi:
447; ATOM:       # %bb.0:
448; ATOM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:5.00]
449; ATOM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [5:5.00]
450; ATOM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
451; ATOM-NEXT:    movq %mm1, %rax # sched: [3:3.00]
452; ATOM-NEXT:    retq # sched: [79:39.50]
453;
454; SLM-LABEL: test_cvttps2pi:
455; SLM:       # %bb.0:
456; SLM-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [7:1.00]
457; SLM-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:0.50]
458; SLM-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
459; SLM-NEXT:    movq %mm1, %rax # sched: [1:0.50]
460; SLM-NEXT:    retq # sched: [4:1.00]
461;
462; SANDY-LABEL: test_cvttps2pi:
463; SANDY:       # %bb.0:
464; SANDY-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
465; SANDY-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
466; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
467; SANDY-NEXT:    movq %mm1, %rax # sched: [2:1.00]
468; SANDY-NEXT:    retq # sched: [1:1.00]
469;
470; HASWELL-LABEL: test_cvttps2pi:
471; HASWELL:       # %bb.0:
472; HASWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
473; HASWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
474; HASWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
475; HASWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
476; HASWELL-NEXT:    retq # sched: [7:1.00]
477;
478; BROADWELL-LABEL: test_cvttps2pi:
479; BROADWELL:       # %bb.0:
480; BROADWELL-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
481; BROADWELL-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
482; BROADWELL-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
483; BROADWELL-NEXT:    movq %mm1, %rax # sched: [1:1.00]
484; BROADWELL-NEXT:    retq # sched: [7:1.00]
485;
486; SKYLAKE-LABEL: test_cvttps2pi:
487; SKYLAKE:       # %bb.0:
488; SKYLAKE-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
489; SKYLAKE-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
490; SKYLAKE-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
491; SKYLAKE-NEXT:    movq %mm1, %rax # sched: [2:1.00]
492; SKYLAKE-NEXT:    retq # sched: [7:1.00]
493;
494; SKX-LABEL: test_cvttps2pi:
495; SKX:       # %bb.0:
496; SKX-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
497; SKX-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
498; SKX-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
499; SKX-NEXT:    movq %mm1, %rax # sched: [2:1.00]
500; SKX-NEXT:    retq # sched: [7:1.00]
501;
502; BTVER2-LABEL: test_cvttps2pi:
503; BTVER2:       # %bb.0:
504; BTVER2-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
505; BTVER2-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
506; BTVER2-NEXT:    por %mm0, %mm1 # sched: [1:0.50]
507; BTVER2-NEXT:    movq %mm1, %rax # sched: [4:1.00]
508; BTVER2-NEXT:    retq # sched: [4:1.00]
509;
510; ZNVER1-LABEL: test_cvttps2pi:
511; ZNVER1:       # %bb.0:
512; ZNVER1-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [12:1.00]
513; ZNVER1-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
514; ZNVER1-NEXT:    por %mm0, %mm1 # sched: [1:0.25]
515; ZNVER1-NEXT:    movq %mm1, %rax # sched: [2:1.00]
516; ZNVER1-NEXT:    retq # sched: [1:0.50]
517  %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0)
518  %2 = load <4 x float>, <4 x float> *%a1, align 16
519  %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2)
520  %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
521  %5 = bitcast x86_mmx %4 to i64
522  ret i64 %5
523}
524declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
525
526define void @test_emms() optsize {
527; GENERIC-LABEL: test_emms:
528; GENERIC:       # %bb.0:
529; GENERIC-NEXT:    emms # sched: [31:10.33]
530; GENERIC-NEXT:    retq # sched: [1:1.00]
531;
532; ATOM-LABEL: test_emms:
533; ATOM:       # %bb.0:
534; ATOM-NEXT:    emms # sched: [5:2.50]
535; ATOM-NEXT:    retq # sched: [79:39.50]
536;
537; SLM-LABEL: test_emms:
538; SLM:       # %bb.0:
539; SLM-NEXT:    emms # sched: [10:5.00]
540; SLM-NEXT:    retq # sched: [4:1.00]
541;
542; SANDY-LABEL: test_emms:
543; SANDY:       # %bb.0:
544; SANDY-NEXT:    emms # sched: [31:10.33]
545; SANDY-NEXT:    retq # sched: [1:1.00]
546;
547; HASWELL-LABEL: test_emms:
548; HASWELL:       # %bb.0:
549; HASWELL-NEXT:    emms # sched: [31:10.00]
550; HASWELL-NEXT:    retq # sched: [7:1.00]
551;
552; BROADWELL-LABEL: test_emms:
553; BROADWELL:       # %bb.0:
554; BROADWELL-NEXT:    emms # sched: [31:10.00]
555; BROADWELL-NEXT:    retq # sched: [7:1.00]
556;
557; SKYLAKE-LABEL: test_emms:
558; SKYLAKE:       # %bb.0:
559; SKYLAKE-NEXT:    emms # sched: [10:4.50]
560; SKYLAKE-NEXT:    retq # sched: [7:1.00]
561;
562; SKX-LABEL: test_emms:
563; SKX:       # %bb.0:
564; SKX-NEXT:    emms # sched: [10:4.50]
565; SKX-NEXT:    retq # sched: [7:1.00]
566;
567; BTVER2-LABEL: test_emms:
568; BTVER2:       # %bb.0:
569; BTVER2-NEXT:    emms # sched: [2:0.50]
570; BTVER2-NEXT:    retq # sched: [4:1.00]
571;
572; ZNVER1-LABEL: test_emms:
573; ZNVER1:       # %bb.0:
574; ZNVER1-NEXT:    emms # sched: [2:0.25]
575; ZNVER1-NEXT:    retq # sched: [1:0.50]
576  call void @llvm.x86.mmx.emms()
577  ret void
578}
579declare void @llvm.x86.mmx.emms()
580
581define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize {
582; GENERIC-LABEL: test_maskmovq:
583; GENERIC:       # %bb.0:
584; GENERIC-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
585; GENERIC-NEXT:    retq # sched: [1:1.00]
586;
587; ATOM-LABEL: test_maskmovq:
588; ATOM:       # %bb.0:
589; ATOM-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
590; ATOM-NEXT:    retq # sched: [79:39.50]
591;
592; SLM-LABEL: test_maskmovq:
593; SLM:       # %bb.0:
594; SLM-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
595; SLM-NEXT:    retq # sched: [4:1.00]
596;
597; SANDY-LABEL: test_maskmovq:
598; SANDY:       # %bb.0:
599; SANDY-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
600; SANDY-NEXT:    retq # sched: [1:1.00]
601;
602; HASWELL-LABEL: test_maskmovq:
603; HASWELL:       # %bb.0:
604; HASWELL-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
605; HASWELL-NEXT:    retq # sched: [7:1.00]
606;
607; BROADWELL-LABEL: test_maskmovq:
608; BROADWELL:       # %bb.0:
609; BROADWELL-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
610; BROADWELL-NEXT:    retq # sched: [7:1.00]
611;
612; SKYLAKE-LABEL: test_maskmovq:
613; SKYLAKE:       # %bb.0:
614; SKYLAKE-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
615; SKYLAKE-NEXT:    retq # sched: [7:1.00]
616;
617; SKX-LABEL: test_maskmovq:
618; SKX:       # %bb.0:
619; SKX-NEXT:    maskmovq %mm1, %mm0 # sched: [1:1.00]
620; SKX-NEXT:    retq # sched: [7:1.00]
621;
622; BTVER2-LABEL: test_maskmovq:
623; BTVER2:       # %bb.0:
624; BTVER2-NEXT:    maskmovq %mm1, %mm0 # sched: [1:0.50]
625; BTVER2-NEXT:    retq # sched: [4:1.00]
626;
627; ZNVER1-LABEL: test_maskmovq:
628; ZNVER1:       # %bb.0:
629; ZNVER1-NEXT:    maskmovq %mm1, %mm0 # sched: [100:0.25]
630; ZNVER1-NEXT:    retq # sched: [1:0.50]
631  call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2)
632  ret void
633}
634declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
635
636define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
637; GENERIC-LABEL: test_movd:
638; GENERIC:       # %bb.0:
639; GENERIC-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
640; GENERIC-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
641; GENERIC-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
642; GENERIC-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
643; GENERIC-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
644; GENERIC-NEXT:    movd %mm0, %eax # sched: [2:1.00]
645; GENERIC-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
646; GENERIC-NEXT:    retq # sched: [1:1.00]
647;
648; ATOM-LABEL: test_movd:
649; ATOM:       # %bb.0:
650; ATOM-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
651; ATOM-NEXT:    movd (%rsi), %mm2 # sched: [1:1.00]
652; ATOM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
653; ATOM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
654; ATOM-NEXT:    movd %mm2, %ecx # sched: [3:3.00]
655; ATOM-NEXT:    movd %mm0, %eax # sched: [3:3.00]
656; ATOM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
657; ATOM-NEXT:    retq # sched: [79:39.50]
658;
659; SLM-LABEL: test_movd:
660; SLM:       # %bb.0:
661; SLM-NEXT:    movd (%rsi), %mm2 # sched: [3:1.00]
662; SLM-NEXT:    movd %edi, %mm1 # sched: [1:0.50]
663; SLM-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
664; SLM-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
665; SLM-NEXT:    movd %mm2, %ecx # sched: [1:0.50]
666; SLM-NEXT:    movd %mm0, %eax # sched: [1:0.50]
667; SLM-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
668; SLM-NEXT:    retq # sched: [4:1.00]
669;
670; SANDY-LABEL: test_movd:
671; SANDY:       # %bb.0:
672; SANDY-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
673; SANDY-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
674; SANDY-NEXT:    paddd %mm1, %mm2 # sched: [3:1.00]
675; SANDY-NEXT:    paddd %mm2, %mm0 # sched: [3:1.00]
676; SANDY-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
677; SANDY-NEXT:    movd %mm0, %eax # sched: [2:1.00]
678; SANDY-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
679; SANDY-NEXT:    retq # sched: [1:1.00]
680;
681; HASWELL-LABEL: test_movd:
682; HASWELL:       # %bb.0:
683; HASWELL-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
684; HASWELL-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
685; HASWELL-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
686; HASWELL-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
687; HASWELL-NEXT:    movd %mm2, %ecx # sched: [1:1.00]
688; HASWELL-NEXT:    movd %mm0, %eax # sched: [1:1.00]
689; HASWELL-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
690; HASWELL-NEXT:    retq # sched: [7:1.00]
691;
692; BROADWELL-LABEL: test_movd:
693; BROADWELL:       # %bb.0:
694; BROADWELL-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
695; BROADWELL-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
696; BROADWELL-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
697; BROADWELL-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
698; BROADWELL-NEXT:    movd %mm2, %ecx # sched: [1:1.00]
699; BROADWELL-NEXT:    movd %mm0, %eax # sched: [1:1.00]
700; BROADWELL-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
701; BROADWELL-NEXT:    retq # sched: [7:1.00]
702;
703; SKYLAKE-LABEL: test_movd:
704; SKYLAKE:       # %bb.0:
705; SKYLAKE-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
706; SKYLAKE-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
707; SKYLAKE-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
708; SKYLAKE-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
709; SKYLAKE-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
710; SKYLAKE-NEXT:    movd %mm0, %eax # sched: [2:1.00]
711; SKYLAKE-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
712; SKYLAKE-NEXT:    retq # sched: [7:1.00]
713;
714; SKX-LABEL: test_movd:
715; SKX:       # %bb.0:
716; SKX-NEXT:    movd %edi, %mm1 # sched: [1:1.00]
717; SKX-NEXT:    movd (%rsi), %mm2 # sched: [5:0.50]
718; SKX-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
719; SKX-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
720; SKX-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
721; SKX-NEXT:    movd %mm0, %eax # sched: [2:1.00]
722; SKX-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
723; SKX-NEXT:    retq # sched: [7:1.00]
724;
725; BTVER2-LABEL: test_movd:
726; BTVER2:       # %bb.0:
727; BTVER2-NEXT:    movd %edi, %mm1 # sched: [8:0.50]
728; BTVER2-NEXT:    movd (%rsi), %mm2 # sched: [5:1.00]
729; BTVER2-NEXT:    paddd %mm1, %mm2 # sched: [1:0.50]
730; BTVER2-NEXT:    paddd %mm2, %mm0 # sched: [1:0.50]
731; BTVER2-NEXT:    movd %mm2, %ecx # sched: [4:1.00]
732; BTVER2-NEXT:    movd %mm0, %eax # sched: [4:1.00]
733; BTVER2-NEXT:    movl %ecx, (%rsi) # sched: [1:1.00]
734; BTVER2-NEXT:    retq # sched: [4:1.00]
735;
736; ZNVER1-LABEL: test_movd:
737; ZNVER1:       # %bb.0:
738; ZNVER1-NEXT:    movd (%rsi), %mm2 # sched: [8:0.50]
739; ZNVER1-NEXT:    movd %edi, %mm1 # sched: [3:1.00]
740; ZNVER1-NEXT:    paddd %mm1, %mm2 # sched: [1:0.25]
741; ZNVER1-NEXT:    paddd %mm2, %mm0 # sched: [1:0.25]
742; ZNVER1-NEXT:    movd %mm2, %ecx # sched: [2:1.00]
743; ZNVER1-NEXT:    movd %mm0, %eax # sched: [2:1.00]
744; ZNVER1-NEXT:    movl %ecx, (%rsi) # sched: [1:0.50]
745; ZNVER1-NEXT:    retq # sched: [1:0.50]
746  %1  = insertelement <2 x i32> undef, i32 %a1, i32 0
747  %2  = bitcast <2 x i32> %1 to x86_mmx
748  %3  = load i32, i32 *%a2
749  %4  = insertelement <2 x i32> undef, i32 %3, i32 0
750  %5  = bitcast <2 x i32> %4 to x86_mmx
751  %6  = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5)
752  %7  = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6)
753  %8  = bitcast x86_mmx %6 to <2 x i32>
754  %9  = bitcast x86_mmx %7 to <2 x i32>
755  %10 = extractelement <2 x i32> %8, i32 0
756  %11 = extractelement <2 x i32> %9, i32 0
757  store i32 %10, i32* %a2
758  ret i32 %11
759}
760
761define i64 @test_movdq2q(<2 x i64> %a0) optsize {
762; GENERIC-LABEL: test_movdq2q:
763; GENERIC:       # %bb.0:
764; GENERIC-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
765; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
766; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
767; GENERIC-NEXT:    retq # sched: [1:1.00]
768;
769; ATOM-LABEL: test_movdq2q:
770; ATOM:       # %bb.0:
771; ATOM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
772; ATOM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
773; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
774; ATOM-NEXT:    retq # sched: [79:39.50]
775;
776; SLM-LABEL: test_movdq2q:
777; SLM:       # %bb.0:
778; SLM-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
779; SLM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
780; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
781; SLM-NEXT:    retq # sched: [4:1.00]
782;
783; SANDY-LABEL: test_movdq2q:
784; SANDY:       # %bb.0:
785; SANDY-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
786; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
787; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
788; SANDY-NEXT:    retq # sched: [1:1.00]
789;
790; HASWELL-LABEL: test_movdq2q:
791; HASWELL:       # %bb.0:
792; HASWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
793; HASWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
794; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
795; HASWELL-NEXT:    retq # sched: [7:1.00]
796;
797; BROADWELL-LABEL: test_movdq2q:
798; BROADWELL:       # %bb.0:
799; BROADWELL-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:0.67]
800; BROADWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
801; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
802; BROADWELL-NEXT:    retq # sched: [7:1.00]
803;
804; SKYLAKE-LABEL: test_movdq2q:
805; SKYLAKE:       # %bb.0:
806; SKYLAKE-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
807; SKYLAKE-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
808; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
809; SKYLAKE-NEXT:    retq # sched: [7:1.00]
810;
811; SKX-LABEL: test_movdq2q:
812; SKX:       # %bb.0:
813; SKX-NEXT:    movdq2q %xmm0, %mm0 # sched: [2:1.00]
814; SKX-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
815; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
816; SKX-NEXT:    retq # sched: [7:1.00]
817;
818; BTVER2-LABEL: test_movdq2q:
819; BTVER2:       # %bb.0:
820; BTVER2-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.50]
821; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
822; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
823; BTVER2-NEXT:    retq # sched: [4:1.00]
824;
825; ZNVER1-LABEL: test_movdq2q:
826; ZNVER1:       # %bb.0:
827; ZNVER1-NEXT:    movdq2q %xmm0, %mm0 # sched: [1:0.25]
828; ZNVER1-NEXT:    paddd %mm0, %mm0 # sched: [1:0.25]
829; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
830; ZNVER1-NEXT:    retq # sched: [1:0.50]
831  %1 = extractelement <2 x i64> %a0, i32 0
832  %2 = bitcast i64 %1 to x86_mmx
833  %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
834  %4 = bitcast x86_mmx %3 to i64
835  ret i64 %4
836}
837
838define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize {
839; GENERIC-LABEL: test_movntq:
840; GENERIC:       # %bb.0:
841; GENERIC-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
842; GENERIC-NEXT:    retq # sched: [1:1.00]
843;
844; ATOM-LABEL: test_movntq:
845; ATOM:       # %bb.0:
846; ATOM-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
847; ATOM-NEXT:    retq # sched: [79:39.50]
848;
849; SLM-LABEL: test_movntq:
850; SLM:       # %bb.0:
851; SLM-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
852; SLM-NEXT:    retq # sched: [4:1.00]
853;
854; SANDY-LABEL: test_movntq:
855; SANDY:       # %bb.0:
856; SANDY-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
857; SANDY-NEXT:    retq # sched: [1:1.00]
858;
859; HASWELL-LABEL: test_movntq:
860; HASWELL:       # %bb.0:
861; HASWELL-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
862; HASWELL-NEXT:    retq # sched: [7:1.00]
863;
864; BROADWELL-LABEL: test_movntq:
865; BROADWELL:       # %bb.0:
866; BROADWELL-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
867; BROADWELL-NEXT:    retq # sched: [7:1.00]
868;
869; SKYLAKE-LABEL: test_movntq:
870; SKYLAKE:       # %bb.0:
871; SKYLAKE-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
872; SKYLAKE-NEXT:    retq # sched: [7:1.00]
873;
874; SKX-LABEL: test_movntq:
875; SKX:       # %bb.0:
876; SKX-NEXT:    movntq %mm0, (%rdi) # sched: [1:1.00]
877; SKX-NEXT:    retq # sched: [7:1.00]
878;
879; BTVER2-LABEL: test_movntq:
880; BTVER2:       # %bb.0:
881; BTVER2-NEXT:    movntq %mm0, (%rdi) # sched: [2:1.00]
882; BTVER2-NEXT:    retq # sched: [4:1.00]
883;
884; ZNVER1-LABEL: test_movntq:
885; ZNVER1:       # %bb.0:
886; ZNVER1-NEXT:    movntq %mm0, (%rdi) # sched: [1:0.50]
887; ZNVER1-NEXT:    retq # sched: [1:0.50]
888  call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1)
889  ret void
890}
891declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
892
893define void @test_movq(i64 *%a0) {
894; GENERIC-LABEL: test_movq:
895; GENERIC:       # %bb.0:
896; GENERIC-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
897; GENERIC-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
898; GENERIC-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
899; GENERIC-NEXT:    retq # sched: [1:1.00]
900;
901; ATOM-LABEL: test_movq:
902; ATOM:       # %bb.0:
903; ATOM-NEXT:    movq (%rdi), %mm0 # sched: [1:1.00]
904; ATOM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
905; ATOM-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
906; ATOM-NEXT:    nop # sched: [1:0.50]
907; ATOM-NEXT:    nop # sched: [1:0.50]
908; ATOM-NEXT:    retq # sched: [79:39.50]
909;
910; SLM-LABEL: test_movq:
911; SLM:       # %bb.0:
912; SLM-NEXT:    movq (%rdi), %mm0 # sched: [3:1.00]
913; SLM-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
914; SLM-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
915; SLM-NEXT:    retq # sched: [4:1.00]
916;
917; SANDY-LABEL: test_movq:
918; SANDY:       # %bb.0:
919; SANDY-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
920; SANDY-NEXT:    paddd %mm0, %mm0 # sched: [3:1.00]
921; SANDY-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
922; SANDY-NEXT:    retq # sched: [1:1.00]
923;
924; HASWELL-LABEL: test_movq:
925; HASWELL:       # %bb.0:
926; HASWELL-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
927; HASWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
928; HASWELL-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
929; HASWELL-NEXT:    retq # sched: [7:1.00]
930;
931; BROADWELL-LABEL: test_movq:
932; BROADWELL:       # %bb.0:
933; BROADWELL-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
934; BROADWELL-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
935; BROADWELL-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
936; BROADWELL-NEXT:    retq # sched: [7:1.00]
937;
938; SKYLAKE-LABEL: test_movq:
939; SKYLAKE:       # %bb.0:
940; SKYLAKE-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
941; SKYLAKE-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
942; SKYLAKE-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
943; SKYLAKE-NEXT:    retq # sched: [7:1.00]
944;
945; SKX-LABEL: test_movq:
946; SKX:       # %bb.0:
947; SKX-NEXT:    movq (%rdi), %mm0 # sched: [5:0.50]
948; SKX-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
949; SKX-NEXT:    movq %mm0, (%rdi) # sched: [1:1.00]
950; SKX-NEXT:    retq # sched: [7:1.00]
951;
952; BTVER2-LABEL: test_movq:
953; BTVER2:       # %bb.0:
954; BTVER2-NEXT:    movq (%rdi), %mm0 # sched: [5:1.00]
955; BTVER2-NEXT:    paddd %mm0, %mm0 # sched: [1:0.50]
956; BTVER2-NEXT:    movq %mm0, (%rdi) # sched: [2:1.00]
957; BTVER2-NEXT:    retq # sched: [4:1.00]
958;
959; ZNVER1-LABEL: test_movq:
960; ZNVER1:       # %bb.0:
961; ZNVER1-NEXT:    movq (%rdi), %mm0 # sched: [8:0.50]
962; ZNVER1-NEXT:    paddd %mm0, %mm0 # sched: [1:0.25]
963; ZNVER1-NEXT:    movq %mm0, (%rdi) # sched: [1:0.50]
964; ZNVER1-NEXT:    retq # sched: [1:0.50]
965  %1 = load i64, i64* %a0, align 8
966  %2 = bitcast i64 %1 to x86_mmx
967  %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
968  %4 = bitcast x86_mmx %3 to i64
969  store i64 %4, i64* %a0, align 8
970  ret void
971}
972
973define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize {
974; GENERIC-LABEL: test_movq2dq:
975; GENERIC:       # %bb.0:
976; GENERIC-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.33]
977; GENERIC-NEXT:    retq # sched: [1:1.00]
978;
979; ATOM-LABEL: test_movq2dq:
980; ATOM:       # %bb.0:
981; ATOM-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
982; ATOM-NEXT:    retq # sched: [79:39.50]
983;
984; SLM-LABEL: test_movq2dq:
985; SLM:       # %bb.0:
986; SLM-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
987; SLM-NEXT:    retq # sched: [4:1.00]
988;
989; SANDY-LABEL: test_movq2dq:
990; SANDY:       # %bb.0:
991; SANDY-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.33]
992; SANDY-NEXT:    retq # sched: [1:1.00]
993;
994; HASWELL-LABEL: test_movq2dq:
995; HASWELL:       # %bb.0:
996; HASWELL-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:1.00]
997; HASWELL-NEXT:    retq # sched: [7:1.00]
998;
999; BROADWELL-LABEL: test_movq2dq:
1000; BROADWELL:       # %bb.0:
1001; BROADWELL-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:1.00]
1002; BROADWELL-NEXT:    retq # sched: [7:1.00]
1003;
1004; SKYLAKE-LABEL: test_movq2dq:
1005; SKYLAKE:       # %bb.0:
1006; SKYLAKE-NEXT:    movq2dq %mm0, %xmm0 # sched: [2:2.00]
1007; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1008;
1009; SKX-LABEL: test_movq2dq:
1010; SKX:       # %bb.0:
1011; SKX-NEXT:    movq2dq %mm0, %xmm0 # sched: [2:2.00]
1012; SKX-NEXT:    retq # sched: [7:1.00]
1013;
1014; BTVER2-LABEL: test_movq2dq:
1015; BTVER2:       # %bb.0:
1016; BTVER2-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.50]
1017; BTVER2-NEXT:    retq # sched: [4:1.00]
1018;
1019; ZNVER1-LABEL: test_movq2dq:
1020; ZNVER1:       # %bb.0:
1021; ZNVER1-NEXT:    movq2dq %mm0, %xmm0 # sched: [1:0.25]
1022; ZNVER1-NEXT:    retq # sched: [1:0.50]
1023  %1 = bitcast x86_mmx %a0 to i64
1024  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
1025  ret <2 x i64> %2
1026}
1027
1028define i64 @test_pabsb(x86_mmx *%a0) optsize {
1029; GENERIC-LABEL: test_pabsb:
1030; GENERIC:       # %bb.0:
1031; GENERIC-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1032; GENERIC-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1033; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1034; GENERIC-NEXT:    retq # sched: [1:1.00]
1035;
1036; ATOM-LABEL: test_pabsb:
1037; ATOM:       # %bb.0:
1038; ATOM-NEXT:    pabsb (%rdi), %mm0 # sched: [1:1.00]
1039; ATOM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1040; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1041; ATOM-NEXT:    retq # sched: [79:39.50]
1042;
1043; SLM-LABEL: test_pabsb:
1044; SLM:       # %bb.0:
1045; SLM-NEXT:    pabsb (%rdi), %mm0 # sched: [4:1.00]
1046; SLM-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1047; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1048; SLM-NEXT:    retq # sched: [4:1.00]
1049;
1050; SANDY-LABEL: test_pabsb:
1051; SANDY:       # %bb.0:
1052; SANDY-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1053; SANDY-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1054; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1055; SANDY-NEXT:    retq # sched: [1:1.00]
1056;
1057; HASWELL-LABEL: test_pabsb:
1058; HASWELL:       # %bb.0:
1059; HASWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1060; HASWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1061; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1062; HASWELL-NEXT:    retq # sched: [7:1.00]
1063;
1064; BROADWELL-LABEL: test_pabsb:
1065; BROADWELL:       # %bb.0:
1066; BROADWELL-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1067; BROADWELL-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1068; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1069; BROADWELL-NEXT:    retq # sched: [7:1.00]
1070;
1071; SKYLAKE-LABEL: test_pabsb:
1072; SKYLAKE:       # %bb.0:
1073; SKYLAKE-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1074; SKYLAKE-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1075; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1076; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1077;
1078; SKX-LABEL: test_pabsb:
1079; SKX:       # %bb.0:
1080; SKX-NEXT:    pabsb (%rdi), %mm0 # sched: [6:0.50]
1081; SKX-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1082; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1083; SKX-NEXT:    retq # sched: [7:1.00]
1084;
1085; BTVER2-LABEL: test_pabsb:
1086; BTVER2:       # %bb.0:
1087; BTVER2-NEXT:    pabsb (%rdi), %mm0 # sched: [6:1.00]
1088; BTVER2-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.50]
1089; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1090; BTVER2-NEXT:    retq # sched: [4:1.00]
1091;
1092; ZNVER1-LABEL: test_pabsb:
1093; ZNVER1:       # %bb.0:
1094; ZNVER1-NEXT:    pabsb (%rdi), %mm0 # sched: [8:0.50]
1095; ZNVER1-NEXT:    pabsb %mm0, %mm0 # sched: [1:0.25]
1096; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1097; ZNVER1-NEXT:    retq # sched: [1:0.50]
1098  %1 = load x86_mmx, x86_mmx *%a0, align 8
1099  %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1)
1100  %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2)
1101  %4 = bitcast x86_mmx %3 to i64
1102  ret i64 %4
1103}
1104declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
1105
1106define i64 @test_pabsd(x86_mmx *%a0) optsize {
1107; GENERIC-LABEL: test_pabsd:
1108; GENERIC:       # %bb.0:
1109; GENERIC-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1110; GENERIC-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1111; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1112; GENERIC-NEXT:    retq # sched: [1:1.00]
1113;
1114; ATOM-LABEL: test_pabsd:
1115; ATOM:       # %bb.0:
1116; ATOM-NEXT:    pabsd (%rdi), %mm0 # sched: [1:1.00]
1117; ATOM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1118; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1119; ATOM-NEXT:    retq # sched: [79:39.50]
1120;
1121; SLM-LABEL: test_pabsd:
1122; SLM:       # %bb.0:
1123; SLM-NEXT:    pabsd (%rdi), %mm0 # sched: [4:1.00]
1124; SLM-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1125; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1126; SLM-NEXT:    retq # sched: [4:1.00]
1127;
1128; SANDY-LABEL: test_pabsd:
1129; SANDY:       # %bb.0:
1130; SANDY-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1131; SANDY-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1132; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1133; SANDY-NEXT:    retq # sched: [1:1.00]
1134;
1135; HASWELL-LABEL: test_pabsd:
1136; HASWELL:       # %bb.0:
1137; HASWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1138; HASWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1139; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1140; HASWELL-NEXT:    retq # sched: [7:1.00]
1141;
1142; BROADWELL-LABEL: test_pabsd:
1143; BROADWELL:       # %bb.0:
1144; BROADWELL-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1145; BROADWELL-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1146; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1147; BROADWELL-NEXT:    retq # sched: [7:1.00]
1148;
1149; SKYLAKE-LABEL: test_pabsd:
1150; SKYLAKE:       # %bb.0:
1151; SKYLAKE-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1152; SKYLAKE-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1153; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1154; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1155;
1156; SKX-LABEL: test_pabsd:
1157; SKX:       # %bb.0:
1158; SKX-NEXT:    pabsd (%rdi), %mm0 # sched: [6:0.50]
1159; SKX-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1160; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1161; SKX-NEXT:    retq # sched: [7:1.00]
1162;
1163; BTVER2-LABEL: test_pabsd:
1164; BTVER2:       # %bb.0:
1165; BTVER2-NEXT:    pabsd (%rdi), %mm0 # sched: [6:1.00]
1166; BTVER2-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.50]
1167; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1168; BTVER2-NEXT:    retq # sched: [4:1.00]
1169;
1170; ZNVER1-LABEL: test_pabsd:
1171; ZNVER1:       # %bb.0:
1172; ZNVER1-NEXT:    pabsd (%rdi), %mm0 # sched: [8:0.50]
1173; ZNVER1-NEXT:    pabsd %mm0, %mm0 # sched: [1:0.25]
1174; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1175; ZNVER1-NEXT:    retq # sched: [1:0.50]
1176  %1 = load x86_mmx, x86_mmx *%a0, align 8
1177  %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1)
1178  %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2)
1179  %4 = bitcast x86_mmx %3 to i64
1180  ret i64 %4
1181}
1182declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
1183
1184define i64 @test_pabsw(x86_mmx *%a0) optsize {
1185; GENERIC-LABEL: test_pabsw:
1186; GENERIC:       # %bb.0:
1187; GENERIC-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1188; GENERIC-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1189; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1190; GENERIC-NEXT:    retq # sched: [1:1.00]
1191;
1192; ATOM-LABEL: test_pabsw:
1193; ATOM:       # %bb.0:
1194; ATOM-NEXT:    pabsw (%rdi), %mm0 # sched: [1:1.00]
1195; ATOM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1196; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1197; ATOM-NEXT:    retq # sched: [79:39.50]
1198;
1199; SLM-LABEL: test_pabsw:
1200; SLM:       # %bb.0:
1201; SLM-NEXT:    pabsw (%rdi), %mm0 # sched: [4:1.00]
1202; SLM-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1203; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1204; SLM-NEXT:    retq # sched: [4:1.00]
1205;
1206; SANDY-LABEL: test_pabsw:
1207; SANDY:       # %bb.0:
1208; SANDY-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1209; SANDY-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1210; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1211; SANDY-NEXT:    retq # sched: [1:1.00]
1212;
1213; HASWELL-LABEL: test_pabsw:
1214; HASWELL:       # %bb.0:
1215; HASWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1216; HASWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1217; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1218; HASWELL-NEXT:    retq # sched: [7:1.00]
1219;
1220; BROADWELL-LABEL: test_pabsw:
1221; BROADWELL:       # %bb.0:
1222; BROADWELL-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1223; BROADWELL-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1224; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1225; BROADWELL-NEXT:    retq # sched: [7:1.00]
1226;
1227; SKYLAKE-LABEL: test_pabsw:
1228; SKYLAKE:       # %bb.0:
1229; SKYLAKE-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1230; SKYLAKE-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1231; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1232; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1233;
1234; SKX-LABEL: test_pabsw:
1235; SKX:       # %bb.0:
1236; SKX-NEXT:    pabsw (%rdi), %mm0 # sched: [6:0.50]
1237; SKX-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1238; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1239; SKX-NEXT:    retq # sched: [7:1.00]
1240;
1241; BTVER2-LABEL: test_pabsw:
1242; BTVER2:       # %bb.0:
1243; BTVER2-NEXT:    pabsw (%rdi), %mm0 # sched: [6:1.00]
1244; BTVER2-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.50]
1245; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1246; BTVER2-NEXT:    retq # sched: [4:1.00]
1247;
1248; ZNVER1-LABEL: test_pabsw:
1249; ZNVER1:       # %bb.0:
1250; ZNVER1-NEXT:    pabsw (%rdi), %mm0 # sched: [8:0.50]
1251; ZNVER1-NEXT:    pabsw %mm0, %mm0 # sched: [1:0.25]
1252; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1253; ZNVER1-NEXT:    retq # sched: [1:0.50]
1254  %1 = load x86_mmx, x86_mmx *%a0, align 8
1255  %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1)
1256  %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2)
1257  %4 = bitcast x86_mmx %3 to i64
1258  ret i64 %4
1259}
1260declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
1261
1262define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1263; GENERIC-LABEL: test_packssdw:
1264; GENERIC:       # %bb.0:
1265; GENERIC-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
1266; GENERIC-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
1267; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1268; GENERIC-NEXT:    retq # sched: [1:1.00]
1269;
1270; ATOM-LABEL: test_packssdw:
1271; ATOM:       # %bb.0:
1272; ATOM-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
1273; ATOM-NEXT:    packssdw (%rdi), %mm0 # sched: [1:1.00]
1274; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1275; ATOM-NEXT:    retq # sched: [79:39.50]
1276;
1277; SLM-LABEL: test_packssdw:
1278; SLM:       # %bb.0:
1279; SLM-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
1280; SLM-NEXT:    packssdw (%rdi), %mm0 # sched: [4:1.00]
1281; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1282; SLM-NEXT:    retq # sched: [4:1.00]
1283;
1284; SANDY-LABEL: test_packssdw:
1285; SANDY:       # %bb.0:
1286; SANDY-NEXT:    packssdw %mm1, %mm0 # sched: [1:1.00]
1287; SANDY-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
1288; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1289; SANDY-NEXT:    retq # sched: [1:1.00]
1290;
1291; HASWELL-LABEL: test_packssdw:
1292; HASWELL:       # %bb.0:
1293; HASWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
1294; HASWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
1295; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1296; HASWELL-NEXT:    retq # sched: [7:1.00]
1297;
1298; BROADWELL-LABEL: test_packssdw:
1299; BROADWELL:       # %bb.0:
1300; BROADWELL-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
1301; BROADWELL-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
1302; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1303; BROADWELL-NEXT:    retq # sched: [7:1.00]
1304;
1305; SKYLAKE-LABEL: test_packssdw:
1306; SKYLAKE:       # %bb.0:
1307; SKYLAKE-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
1308; SKYLAKE-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
1309; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1310; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1311;
1312; SKX-LABEL: test_packssdw:
1313; SKX:       # %bb.0:
1314; SKX-NEXT:    packssdw %mm1, %mm0 # sched: [3:2.00]
1315; SKX-NEXT:    packssdw (%rdi), %mm0 # sched: [7:2.00]
1316; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1317; SKX-NEXT:    retq # sched: [7:1.00]
1318;
1319; BTVER2-LABEL: test_packssdw:
1320; BTVER2:       # %bb.0:
1321; BTVER2-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
1322; BTVER2-NEXT:    packssdw (%rdi), %mm0 # sched: [6:1.00]
1323; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1324; BTVER2-NEXT:    retq # sched: [4:1.00]
1325;
1326; ZNVER1-LABEL: test_packssdw:
1327; ZNVER1:       # %bb.0:
1328; ZNVER1-NEXT:    packssdw %mm1, %mm0 # sched: [1:0.50]
1329; ZNVER1-NEXT:    packssdw (%rdi), %mm0 # sched: [1:0.50]
1330; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1331; ZNVER1-NEXT:    retq # sched: [1:0.50]
1332  %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1)
1333  %2 = load x86_mmx, x86_mmx *%a2, align 8
1334  %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2)
1335  %4 = bitcast x86_mmx %3 to i64
1336  ret i64 %4
1337}
1338declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
1339
1340define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1341; GENERIC-LABEL: test_packsswb:
1342; GENERIC:       # %bb.0:
1343; GENERIC-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
1344; GENERIC-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
1345; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1346; GENERIC-NEXT:    retq # sched: [1:1.00]
1347;
1348; ATOM-LABEL: test_packsswb:
1349; ATOM:       # %bb.0:
1350; ATOM-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
1351; ATOM-NEXT:    packsswb (%rdi), %mm0 # sched: [1:1.00]
1352; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1353; ATOM-NEXT:    retq # sched: [79:39.50]
1354;
1355; SLM-LABEL: test_packsswb:
1356; SLM:       # %bb.0:
1357; SLM-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
1358; SLM-NEXT:    packsswb (%rdi), %mm0 # sched: [4:1.00]
1359; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1360; SLM-NEXT:    retq # sched: [4:1.00]
1361;
1362; SANDY-LABEL: test_packsswb:
1363; SANDY:       # %bb.0:
1364; SANDY-NEXT:    packsswb %mm1, %mm0 # sched: [1:1.00]
1365; SANDY-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
1366; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1367; SANDY-NEXT:    retq # sched: [1:1.00]
1368;
1369; HASWELL-LABEL: test_packsswb:
1370; HASWELL:       # %bb.0:
1371; HASWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
1372; HASWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
1373; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1374; HASWELL-NEXT:    retq # sched: [7:1.00]
1375;
1376; BROADWELL-LABEL: test_packsswb:
1377; BROADWELL:       # %bb.0:
1378; BROADWELL-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
1379; BROADWELL-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
1380; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1381; BROADWELL-NEXT:    retq # sched: [7:1.00]
1382;
1383; SKYLAKE-LABEL: test_packsswb:
1384; SKYLAKE:       # %bb.0:
1385; SKYLAKE-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
1386; SKYLAKE-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
1387; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1388; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1389;
1390; SKX-LABEL: test_packsswb:
1391; SKX:       # %bb.0:
1392; SKX-NEXT:    packsswb %mm1, %mm0 # sched: [3:2.00]
1393; SKX-NEXT:    packsswb (%rdi), %mm0 # sched: [7:2.00]
1394; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1395; SKX-NEXT:    retq # sched: [7:1.00]
1396;
1397; BTVER2-LABEL: test_packsswb:
1398; BTVER2:       # %bb.0:
1399; BTVER2-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
1400; BTVER2-NEXT:    packsswb (%rdi), %mm0 # sched: [6:1.00]
1401; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1402; BTVER2-NEXT:    retq # sched: [4:1.00]
1403;
1404; ZNVER1-LABEL: test_packsswb:
1405; ZNVER1:       # %bb.0:
1406; ZNVER1-NEXT:    packsswb %mm1, %mm0 # sched: [1:0.50]
1407; ZNVER1-NEXT:    packsswb (%rdi), %mm0 # sched: [1:0.50]
1408; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1409; ZNVER1-NEXT:    retq # sched: [1:0.50]
1410  %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1)
1411  %2 = load x86_mmx, x86_mmx *%a2, align 8
1412  %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2)
1413  %4 = bitcast x86_mmx %3 to i64
1414  ret i64 %4
1415}
1416declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
1417
1418define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1419; GENERIC-LABEL: test_packuswb:
1420; GENERIC:       # %bb.0:
1421; GENERIC-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
1422; GENERIC-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
1423; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1424; GENERIC-NEXT:    retq # sched: [1:1.00]
1425;
1426; ATOM-LABEL: test_packuswb:
1427; ATOM:       # %bb.0:
1428; ATOM-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
1429; ATOM-NEXT:    packuswb (%rdi), %mm0 # sched: [1:1.00]
1430; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1431; ATOM-NEXT:    retq # sched: [79:39.50]
1432;
1433; SLM-LABEL: test_packuswb:
1434; SLM:       # %bb.0:
1435; SLM-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
1436; SLM-NEXT:    packuswb (%rdi), %mm0 # sched: [4:1.00]
1437; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1438; SLM-NEXT:    retq # sched: [4:1.00]
1439;
1440; SANDY-LABEL: test_packuswb:
1441; SANDY:       # %bb.0:
1442; SANDY-NEXT:    packuswb %mm1, %mm0 # sched: [1:1.00]
1443; SANDY-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
1444; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1445; SANDY-NEXT:    retq # sched: [1:1.00]
1446;
1447; HASWELL-LABEL: test_packuswb:
1448; HASWELL:       # %bb.0:
1449; HASWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
1450; HASWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
1451; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1452; HASWELL-NEXT:    retq # sched: [7:1.00]
1453;
1454; BROADWELL-LABEL: test_packuswb:
1455; BROADWELL:       # %bb.0:
1456; BROADWELL-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
1457; BROADWELL-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
1458; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1459; BROADWELL-NEXT:    retq # sched: [7:1.00]
1460;
1461; SKYLAKE-LABEL: test_packuswb:
1462; SKYLAKE:       # %bb.0:
1463; SKYLAKE-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
1464; SKYLAKE-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
1465; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1466; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1467;
1468; SKX-LABEL: test_packuswb:
1469; SKX:       # %bb.0:
1470; SKX-NEXT:    packuswb %mm1, %mm0 # sched: [3:2.00]
1471; SKX-NEXT:    packuswb (%rdi), %mm0 # sched: [7:2.00]
1472; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1473; SKX-NEXT:    retq # sched: [7:1.00]
1474;
1475; BTVER2-LABEL: test_packuswb:
1476; BTVER2:       # %bb.0:
1477; BTVER2-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
1478; BTVER2-NEXT:    packuswb (%rdi), %mm0 # sched: [6:1.00]
1479; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1480; BTVER2-NEXT:    retq # sched: [4:1.00]
1481;
1482; ZNVER1-LABEL: test_packuswb:
1483; ZNVER1:       # %bb.0:
1484; ZNVER1-NEXT:    packuswb %mm1, %mm0 # sched: [1:0.50]
1485; ZNVER1-NEXT:    packuswb (%rdi), %mm0 # sched: [1:0.50]
1486; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1487; ZNVER1-NEXT:    retq # sched: [1:0.50]
1488  %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1)
1489  %2 = load x86_mmx, x86_mmx *%a2, align 8
1490  %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2)
1491  %4 = bitcast x86_mmx %3 to i64
1492  ret i64 %4
1493}
1494declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
1495
1496define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1497; GENERIC-LABEL: test_paddb:
1498; GENERIC:       # %bb.0:
1499; GENERIC-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
1500; GENERIC-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
1501; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1502; GENERIC-NEXT:    retq # sched: [1:1.00]
1503;
1504; ATOM-LABEL: test_paddb:
1505; ATOM:       # %bb.0:
1506; ATOM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1507; ATOM-NEXT:    paddb (%rdi), %mm0 # sched: [1:1.00]
1508; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1509; ATOM-NEXT:    retq # sched: [79:39.50]
1510;
1511; SLM-LABEL: test_paddb:
1512; SLM:       # %bb.0:
1513; SLM-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1514; SLM-NEXT:    paddb (%rdi), %mm0 # sched: [4:1.00]
1515; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1516; SLM-NEXT:    retq # sched: [4:1.00]
1517;
1518; SANDY-LABEL: test_paddb:
1519; SANDY:       # %bb.0:
1520; SANDY-NEXT:    paddb %mm1, %mm0 # sched: [3:1.00]
1521; SANDY-NEXT:    paddb (%rdi), %mm0 # sched: [8:1.00]
1522; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1523; SANDY-NEXT:    retq # sched: [1:1.00]
1524;
1525; HASWELL-LABEL: test_paddb:
1526; HASWELL:       # %bb.0:
1527; HASWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1528; HASWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
1529; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1530; HASWELL-NEXT:    retq # sched: [7:1.00]
1531;
1532; BROADWELL-LABEL: test_paddb:
1533; BROADWELL:       # %bb.0:
1534; BROADWELL-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1535; BROADWELL-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
1536; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1537; BROADWELL-NEXT:    retq # sched: [7:1.00]
1538;
1539; SKYLAKE-LABEL: test_paddb:
1540; SKYLAKE:       # %bb.0:
1541; SKYLAKE-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1542; SKYLAKE-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
1543; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1544; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1545;
1546; SKX-LABEL: test_paddb:
1547; SKX:       # %bb.0:
1548; SKX-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1549; SKX-NEXT:    paddb (%rdi), %mm0 # sched: [6:0.50]
1550; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1551; SKX-NEXT:    retq # sched: [7:1.00]
1552;
1553; BTVER2-LABEL: test_paddb:
1554; BTVER2:       # %bb.0:
1555; BTVER2-NEXT:    paddb %mm1, %mm0 # sched: [1:0.50]
1556; BTVER2-NEXT:    paddb (%rdi), %mm0 # sched: [6:1.00]
1557; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1558; BTVER2-NEXT:    retq # sched: [4:1.00]
1559;
1560; ZNVER1-LABEL: test_paddb:
1561; ZNVER1:       # %bb.0:
1562; ZNVER1-NEXT:    paddb %mm1, %mm0 # sched: [1:0.25]
1563; ZNVER1-NEXT:    paddb (%rdi), %mm0 # sched: [8:0.50]
1564; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1565; ZNVER1-NEXT:    retq # sched: [1:0.50]
1566  %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1)
1567  %2 = load x86_mmx, x86_mmx *%a2, align 8
1568  %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2)
1569  %4 = bitcast x86_mmx %3 to i64
1570  ret i64 %4
1571}
1572declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
1573
1574define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1575; GENERIC-LABEL: test_paddd:
1576; GENERIC:       # %bb.0:
1577; GENERIC-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
1578; GENERIC-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
1579; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1580; GENERIC-NEXT:    retq # sched: [1:1.00]
1581;
1582; ATOM-LABEL: test_paddd:
1583; ATOM:       # %bb.0:
1584; ATOM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1585; ATOM-NEXT:    paddd (%rdi), %mm0 # sched: [1:1.00]
1586; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1587; ATOM-NEXT:    retq # sched: [79:39.50]
1588;
1589; SLM-LABEL: test_paddd:
1590; SLM:       # %bb.0:
1591; SLM-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1592; SLM-NEXT:    paddd (%rdi), %mm0 # sched: [4:1.00]
1593; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1594; SLM-NEXT:    retq # sched: [4:1.00]
1595;
1596; SANDY-LABEL: test_paddd:
1597; SANDY:       # %bb.0:
1598; SANDY-NEXT:    paddd %mm1, %mm0 # sched: [3:1.00]
1599; SANDY-NEXT:    paddd (%rdi), %mm0 # sched: [8:1.00]
1600; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1601; SANDY-NEXT:    retq # sched: [1:1.00]
1602;
1603; HASWELL-LABEL: test_paddd:
1604; HASWELL:       # %bb.0:
1605; HASWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1606; HASWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
1607; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1608; HASWELL-NEXT:    retq # sched: [7:1.00]
1609;
1610; BROADWELL-LABEL: test_paddd:
1611; BROADWELL:       # %bb.0:
1612; BROADWELL-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1613; BROADWELL-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
1614; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1615; BROADWELL-NEXT:    retq # sched: [7:1.00]
1616;
1617; SKYLAKE-LABEL: test_paddd:
1618; SKYLAKE:       # %bb.0:
1619; SKYLAKE-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1620; SKYLAKE-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
1621; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1622; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1623;
1624; SKX-LABEL: test_paddd:
1625; SKX:       # %bb.0:
1626; SKX-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1627; SKX-NEXT:    paddd (%rdi), %mm0 # sched: [6:0.50]
1628; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1629; SKX-NEXT:    retq # sched: [7:1.00]
1630;
1631; BTVER2-LABEL: test_paddd:
1632; BTVER2:       # %bb.0:
1633; BTVER2-NEXT:    paddd %mm1, %mm0 # sched: [1:0.50]
1634; BTVER2-NEXT:    paddd (%rdi), %mm0 # sched: [6:1.00]
1635; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1636; BTVER2-NEXT:    retq # sched: [4:1.00]
1637;
1638; ZNVER1-LABEL: test_paddd:
1639; ZNVER1:       # %bb.0:
1640; ZNVER1-NEXT:    paddd %mm1, %mm0 # sched: [1:0.25]
1641; ZNVER1-NEXT:    paddd (%rdi), %mm0 # sched: [8:0.50]
1642; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1643; ZNVER1-NEXT:    retq # sched: [1:0.50]
1644  %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1)
1645  %2 = load x86_mmx, x86_mmx *%a2, align 8
1646  %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2)
1647  %4 = bitcast x86_mmx %3 to i64
1648  ret i64 %4
1649}
1650declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
1651
1652define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1653; GENERIC-LABEL: test_paddq:
1654; GENERIC:       # %bb.0:
1655; GENERIC-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1656; GENERIC-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
1657; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1658; GENERIC-NEXT:    retq # sched: [1:1.00]
1659;
1660; ATOM-LABEL: test_paddq:
1661; ATOM:       # %bb.0:
1662; ATOM-NEXT:    paddq %mm1, %mm0 # sched: [2:1.00]
1663; ATOM-NEXT:    paddq (%rdi), %mm0 # sched: [3:1.50]
1664; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1665; ATOM-NEXT:    retq # sched: [79:39.50]
1666;
1667; SLM-LABEL: test_paddq:
1668; SLM:       # %bb.0:
1669; SLM-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1670; SLM-NEXT:    paddq (%rdi), %mm0 # sched: [4:1.00]
1671; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1672; SLM-NEXT:    retq # sched: [4:1.00]
1673;
1674; SANDY-LABEL: test_paddq:
1675; SANDY:       # %bb.0:
1676; SANDY-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1677; SANDY-NEXT:    paddq (%rdi), %mm0 # sched: [7:0.50]
1678; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1679; SANDY-NEXT:    retq # sched: [1:1.00]
1680;
1681; HASWELL-LABEL: test_paddq:
1682; HASWELL:       # %bb.0:
1683; HASWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1684; HASWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
1685; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1686; HASWELL-NEXT:    retq # sched: [7:1.00]
1687;
1688; BROADWELL-LABEL: test_paddq:
1689; BROADWELL:       # %bb.0:
1690; BROADWELL-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1691; BROADWELL-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
1692; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1693; BROADWELL-NEXT:    retq # sched: [7:1.00]
1694;
1695; SKYLAKE-LABEL: test_paddq:
1696; SKYLAKE:       # %bb.0:
1697; SKYLAKE-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1698; SKYLAKE-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
1699; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1700; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1701;
1702; SKX-LABEL: test_paddq:
1703; SKX:       # %bb.0:
1704; SKX-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1705; SKX-NEXT:    paddq (%rdi), %mm0 # sched: [6:0.50]
1706; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1707; SKX-NEXT:    retq # sched: [7:1.00]
1708;
1709; BTVER2-LABEL: test_paddq:
1710; BTVER2:       # %bb.0:
1711; BTVER2-NEXT:    paddq %mm1, %mm0 # sched: [1:0.50]
1712; BTVER2-NEXT:    paddq (%rdi), %mm0 # sched: [6:1.00]
1713; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1714; BTVER2-NEXT:    retq # sched: [4:1.00]
1715;
1716; ZNVER1-LABEL: test_paddq:
1717; ZNVER1:       # %bb.0:
1718; ZNVER1-NEXT:    paddq %mm1, %mm0 # sched: [1:0.25]
1719; ZNVER1-NEXT:    paddq (%rdi), %mm0 # sched: [8:0.50]
1720; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1721; ZNVER1-NEXT:    retq # sched: [1:0.50]
1722  %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1)
1723  %2 = load x86_mmx, x86_mmx *%a2, align 8
1724  %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2)
1725  %4 = bitcast x86_mmx %3 to i64
1726  ret i64 %4
1727}
1728declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
1729
1730define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1731; GENERIC-LABEL: test_paddsb:
1732; GENERIC:       # %bb.0:
1733; GENERIC-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
1734; GENERIC-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
1735; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1736; GENERIC-NEXT:    retq # sched: [1:1.00]
1737;
1738; ATOM-LABEL: test_paddsb:
1739; ATOM:       # %bb.0:
1740; ATOM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
1741; ATOM-NEXT:    paddsb (%rdi), %mm0 # sched: [1:1.00]
1742; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1743; ATOM-NEXT:    retq # sched: [79:39.50]
1744;
1745; SLM-LABEL: test_paddsb:
1746; SLM:       # %bb.0:
1747; SLM-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
1748; SLM-NEXT:    paddsb (%rdi), %mm0 # sched: [4:1.00]
1749; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1750; SLM-NEXT:    retq # sched: [4:1.00]
1751;
1752; SANDY-LABEL: test_paddsb:
1753; SANDY:       # %bb.0:
1754; SANDY-NEXT:    paddsb %mm1, %mm0 # sched: [3:1.00]
1755; SANDY-NEXT:    paddsb (%rdi), %mm0 # sched: [8:1.00]
1756; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1757; SANDY-NEXT:    retq # sched: [1:1.00]
1758;
1759; HASWELL-LABEL: test_paddsb:
1760; HASWELL:       # %bb.0:
1761; HASWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
1762; HASWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
1763; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1764; HASWELL-NEXT:    retq # sched: [7:1.00]
1765;
1766; BROADWELL-LABEL: test_paddsb:
1767; BROADWELL:       # %bb.0:
1768; BROADWELL-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
1769; BROADWELL-NEXT:    paddsb (%rdi), %mm0 # sched: [6:0.50]
1770; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1771; BROADWELL-NEXT:    retq # sched: [7:1.00]
1772;
1773; SKYLAKE-LABEL: test_paddsb:
1774; SKYLAKE:       # %bb.0:
1775; SKYLAKE-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
1776; SKYLAKE-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
1777; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1778; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1779;
1780; SKX-LABEL: test_paddsb:
1781; SKX:       # %bb.0:
1782; SKX-NEXT:    paddsb %mm1, %mm0 # sched: [1:1.00]
1783; SKX-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
1784; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1785; SKX-NEXT:    retq # sched: [7:1.00]
1786;
1787; BTVER2-LABEL: test_paddsb:
1788; BTVER2:       # %bb.0:
1789; BTVER2-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.50]
1790; BTVER2-NEXT:    paddsb (%rdi), %mm0 # sched: [6:1.00]
1791; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1792; BTVER2-NEXT:    retq # sched: [4:1.00]
1793;
1794; ZNVER1-LABEL: test_paddsb:
1795; ZNVER1:       # %bb.0:
1796; ZNVER1-NEXT:    paddsb %mm1, %mm0 # sched: [1:0.25]
1797; ZNVER1-NEXT:    paddsb (%rdi), %mm0 # sched: [8:0.50]
1798; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1799; ZNVER1-NEXT:    retq # sched: [1:0.50]
1800  %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1)
1801  %2 = load x86_mmx, x86_mmx *%a2, align 8
1802  %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2)
1803  %4 = bitcast x86_mmx %3 to i64
1804  ret i64 %4
1805}
1806declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
1807
1808define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1809; GENERIC-LABEL: test_paddsw:
1810; GENERIC:       # %bb.0:
1811; GENERIC-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
1812; GENERIC-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
1813; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1814; GENERIC-NEXT:    retq # sched: [1:1.00]
1815;
1816; ATOM-LABEL: test_paddsw:
1817; ATOM:       # %bb.0:
1818; ATOM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
1819; ATOM-NEXT:    paddsw (%rdi), %mm0 # sched: [1:1.00]
1820; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1821; ATOM-NEXT:    retq # sched: [79:39.50]
1822;
1823; SLM-LABEL: test_paddsw:
1824; SLM:       # %bb.0:
1825; SLM-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
1826; SLM-NEXT:    paddsw (%rdi), %mm0 # sched: [4:1.00]
1827; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1828; SLM-NEXT:    retq # sched: [4:1.00]
1829;
1830; SANDY-LABEL: test_paddsw:
1831; SANDY:       # %bb.0:
1832; SANDY-NEXT:    paddsw %mm1, %mm0 # sched: [3:1.00]
1833; SANDY-NEXT:    paddsw (%rdi), %mm0 # sched: [8:1.00]
1834; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1835; SANDY-NEXT:    retq # sched: [1:1.00]
1836;
1837; HASWELL-LABEL: test_paddsw:
1838; HASWELL:       # %bb.0:
1839; HASWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
1840; HASWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
1841; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1842; HASWELL-NEXT:    retq # sched: [7:1.00]
1843;
1844; BROADWELL-LABEL: test_paddsw:
1845; BROADWELL:       # %bb.0:
1846; BROADWELL-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
1847; BROADWELL-NEXT:    paddsw (%rdi), %mm0 # sched: [6:0.50]
1848; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1849; BROADWELL-NEXT:    retq # sched: [7:1.00]
1850;
1851; SKYLAKE-LABEL: test_paddsw:
1852; SKYLAKE:       # %bb.0:
1853; SKYLAKE-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
1854; SKYLAKE-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
1855; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1856; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1857;
1858; SKX-LABEL: test_paddsw:
1859; SKX:       # %bb.0:
1860; SKX-NEXT:    paddsw %mm1, %mm0 # sched: [1:1.00]
1861; SKX-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
1862; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1863; SKX-NEXT:    retq # sched: [7:1.00]
1864;
1865; BTVER2-LABEL: test_paddsw:
1866; BTVER2:       # %bb.0:
1867; BTVER2-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.50]
1868; BTVER2-NEXT:    paddsw (%rdi), %mm0 # sched: [6:1.00]
1869; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1870; BTVER2-NEXT:    retq # sched: [4:1.00]
1871;
1872; ZNVER1-LABEL: test_paddsw:
1873; ZNVER1:       # %bb.0:
1874; ZNVER1-NEXT:    paddsw %mm1, %mm0 # sched: [1:0.25]
1875; ZNVER1-NEXT:    paddsw (%rdi), %mm0 # sched: [8:0.50]
1876; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1877; ZNVER1-NEXT:    retq # sched: [1:0.50]
1878  %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1)
1879  %2 = load x86_mmx, x86_mmx *%a2, align 8
1880  %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2)
1881  %4 = bitcast x86_mmx %3 to i64
1882  ret i64 %4
1883}
1884declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
1885
1886define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1887; GENERIC-LABEL: test_paddusb:
1888; GENERIC:       # %bb.0:
1889; GENERIC-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
1890; GENERIC-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
1891; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1892; GENERIC-NEXT:    retq # sched: [1:1.00]
1893;
1894; ATOM-LABEL: test_paddusb:
1895; ATOM:       # %bb.0:
1896; ATOM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
1897; ATOM-NEXT:    paddusb (%rdi), %mm0 # sched: [1:1.00]
1898; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1899; ATOM-NEXT:    retq # sched: [79:39.50]
1900;
1901; SLM-LABEL: test_paddusb:
1902; SLM:       # %bb.0:
1903; SLM-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
1904; SLM-NEXT:    paddusb (%rdi), %mm0 # sched: [4:1.00]
1905; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1906; SLM-NEXT:    retq # sched: [4:1.00]
1907;
1908; SANDY-LABEL: test_paddusb:
1909; SANDY:       # %bb.0:
1910; SANDY-NEXT:    paddusb %mm1, %mm0 # sched: [3:1.00]
1911; SANDY-NEXT:    paddusb (%rdi), %mm0 # sched: [8:1.00]
1912; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1913; SANDY-NEXT:    retq # sched: [1:1.00]
1914;
1915; HASWELL-LABEL: test_paddusb:
1916; HASWELL:       # %bb.0:
1917; HASWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
1918; HASWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
1919; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1920; HASWELL-NEXT:    retq # sched: [7:1.00]
1921;
1922; BROADWELL-LABEL: test_paddusb:
1923; BROADWELL:       # %bb.0:
1924; BROADWELL-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
1925; BROADWELL-NEXT:    paddusb (%rdi), %mm0 # sched: [6:0.50]
1926; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1927; BROADWELL-NEXT:    retq # sched: [7:1.00]
1928;
1929; SKYLAKE-LABEL: test_paddusb:
1930; SKYLAKE:       # %bb.0:
1931; SKYLAKE-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
1932; SKYLAKE-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
1933; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1934; SKYLAKE-NEXT:    retq # sched: [7:1.00]
1935;
1936; SKX-LABEL: test_paddusb:
1937; SKX:       # %bb.0:
1938; SKX-NEXT:    paddusb %mm1, %mm0 # sched: [1:1.00]
1939; SKX-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
1940; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1941; SKX-NEXT:    retq # sched: [7:1.00]
1942;
1943; BTVER2-LABEL: test_paddusb:
1944; BTVER2:       # %bb.0:
1945; BTVER2-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.50]
1946; BTVER2-NEXT:    paddusb (%rdi), %mm0 # sched: [6:1.00]
1947; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
1948; BTVER2-NEXT:    retq # sched: [4:1.00]
1949;
1950; ZNVER1-LABEL: test_paddusb:
1951; ZNVER1:       # %bb.0:
1952; ZNVER1-NEXT:    paddusb %mm1, %mm0 # sched: [1:0.25]
1953; ZNVER1-NEXT:    paddusb (%rdi), %mm0 # sched: [8:0.50]
1954; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1955; ZNVER1-NEXT:    retq # sched: [1:0.50]
1956  %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1)
1957  %2 = load x86_mmx, x86_mmx *%a2, align 8
1958  %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2)
1959  %4 = bitcast x86_mmx %3 to i64
1960  ret i64 %4
1961}
1962declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
1963
1964define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
1965; GENERIC-LABEL: test_paddusw:
1966; GENERIC:       # %bb.0:
1967; GENERIC-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
1968; GENERIC-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
1969; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1970; GENERIC-NEXT:    retq # sched: [1:1.00]
1971;
1972; ATOM-LABEL: test_paddusw:
1973; ATOM:       # %bb.0:
1974; ATOM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
1975; ATOM-NEXT:    paddusw (%rdi), %mm0 # sched: [1:1.00]
1976; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
1977; ATOM-NEXT:    retq # sched: [79:39.50]
1978;
1979; SLM-LABEL: test_paddusw:
1980; SLM:       # %bb.0:
1981; SLM-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
1982; SLM-NEXT:    paddusw (%rdi), %mm0 # sched: [4:1.00]
1983; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
1984; SLM-NEXT:    retq # sched: [4:1.00]
1985;
1986; SANDY-LABEL: test_paddusw:
1987; SANDY:       # %bb.0:
1988; SANDY-NEXT:    paddusw %mm1, %mm0 # sched: [3:1.00]
1989; SANDY-NEXT:    paddusw (%rdi), %mm0 # sched: [8:1.00]
1990; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
1991; SANDY-NEXT:    retq # sched: [1:1.00]
1992;
1993; HASWELL-LABEL: test_paddusw:
1994; HASWELL:       # %bb.0:
1995; HASWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
1996; HASWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
1997; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
1998; HASWELL-NEXT:    retq # sched: [7:1.00]
1999;
2000; BROADWELL-LABEL: test_paddusw:
2001; BROADWELL:       # %bb.0:
2002; BROADWELL-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
2003; BROADWELL-NEXT:    paddusw (%rdi), %mm0 # sched: [6:0.50]
2004; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2005; BROADWELL-NEXT:    retq # sched: [7:1.00]
2006;
2007; SKYLAKE-LABEL: test_paddusw:
2008; SKYLAKE:       # %bb.0:
2009; SKYLAKE-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
2010; SKYLAKE-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
2011; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2012; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2013;
2014; SKX-LABEL: test_paddusw:
2015; SKX:       # %bb.0:
2016; SKX-NEXT:    paddusw %mm1, %mm0 # sched: [1:1.00]
2017; SKX-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
2018; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2019; SKX-NEXT:    retq # sched: [7:1.00]
2020;
2021; BTVER2-LABEL: test_paddusw:
2022; BTVER2:       # %bb.0:
2023; BTVER2-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.50]
2024; BTVER2-NEXT:    paddusw (%rdi), %mm0 # sched: [6:1.00]
2025; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2026; BTVER2-NEXT:    retq # sched: [4:1.00]
2027;
2028; ZNVER1-LABEL: test_paddusw:
2029; ZNVER1:       # %bb.0:
2030; ZNVER1-NEXT:    paddusw %mm1, %mm0 # sched: [1:0.25]
2031; ZNVER1-NEXT:    paddusw (%rdi), %mm0 # sched: [8:0.50]
2032; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2033; ZNVER1-NEXT:    retq # sched: [1:0.50]
2034  %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1)
2035  %2 = load x86_mmx, x86_mmx *%a2, align 8
2036  %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2)
2037  %4 = bitcast x86_mmx %3 to i64
2038  ret i64 %4
2039}
2040declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
2041
2042define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2043; GENERIC-LABEL: test_paddw:
2044; GENERIC:       # %bb.0:
2045; GENERIC-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
2046; GENERIC-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
2047; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2048; GENERIC-NEXT:    retq # sched: [1:1.00]
2049;
2050; ATOM-LABEL: test_paddw:
2051; ATOM:       # %bb.0:
2052; ATOM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2053; ATOM-NEXT:    paddw (%rdi), %mm0 # sched: [1:1.00]
2054; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2055; ATOM-NEXT:    retq # sched: [79:39.50]
2056;
2057; SLM-LABEL: test_paddw:
2058; SLM:       # %bb.0:
2059; SLM-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2060; SLM-NEXT:    paddw (%rdi), %mm0 # sched: [4:1.00]
2061; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2062; SLM-NEXT:    retq # sched: [4:1.00]
2063;
2064; SANDY-LABEL: test_paddw:
2065; SANDY:       # %bb.0:
2066; SANDY-NEXT:    paddw %mm1, %mm0 # sched: [3:1.00]
2067; SANDY-NEXT:    paddw (%rdi), %mm0 # sched: [8:1.00]
2068; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2069; SANDY-NEXT:    retq # sched: [1:1.00]
2070;
2071; HASWELL-LABEL: test_paddw:
2072; HASWELL:       # %bb.0:
2073; HASWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2074; HASWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
2075; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2076; HASWELL-NEXT:    retq # sched: [7:1.00]
2077;
2078; BROADWELL-LABEL: test_paddw:
2079; BROADWELL:       # %bb.0:
2080; BROADWELL-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2081; BROADWELL-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
2082; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2083; BROADWELL-NEXT:    retq # sched: [7:1.00]
2084;
2085; SKYLAKE-LABEL: test_paddw:
2086; SKYLAKE:       # %bb.0:
2087; SKYLAKE-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2088; SKYLAKE-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
2089; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2090; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2091;
2092; SKX-LABEL: test_paddw:
2093; SKX:       # %bb.0:
2094; SKX-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2095; SKX-NEXT:    paddw (%rdi), %mm0 # sched: [6:0.50]
2096; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2097; SKX-NEXT:    retq # sched: [7:1.00]
2098;
2099; BTVER2-LABEL: test_paddw:
2100; BTVER2:       # %bb.0:
2101; BTVER2-NEXT:    paddw %mm1, %mm0 # sched: [1:0.50]
2102; BTVER2-NEXT:    paddw (%rdi), %mm0 # sched: [6:1.00]
2103; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2104; BTVER2-NEXT:    retq # sched: [4:1.00]
2105;
2106; ZNVER1-LABEL: test_paddw:
2107; ZNVER1:       # %bb.0:
2108; ZNVER1-NEXT:    paddw %mm1, %mm0 # sched: [1:0.25]
2109; ZNVER1-NEXT:    paddw (%rdi), %mm0 # sched: [8:0.50]
2110; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2111; ZNVER1-NEXT:    retq # sched: [1:0.50]
2112  %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1)
2113  %2 = load x86_mmx, x86_mmx *%a2, align 8
2114  %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2)
2115  %4 = bitcast x86_mmx %3 to i64
2116  ret i64 %4
2117}
2118declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
2119
2120define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2121; GENERIC-LABEL: test_palignr:
2122; GENERIC:       # %bb.0:
2123; GENERIC-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
2124; GENERIC-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
2125; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2126; GENERIC-NEXT:    retq # sched: [1:1.00]
2127;
2128; ATOM-LABEL: test_palignr:
2129; ATOM:       # %bb.0:
2130; ATOM-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2131; ATOM-NEXT:    palignr $1, (%rdi), %mm0 # sched: [1:1.00]
2132; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2133; ATOM-NEXT:    retq # sched: [79:39.50]
2134;
2135; SLM-LABEL: test_palignr:
2136; SLM:       # %bb.0:
2137; SLM-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2138; SLM-NEXT:    palignr $1, (%rdi), %mm0 # sched: [4:1.00]
2139; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2140; SLM-NEXT:    retq # sched: [4:1.00]
2141;
2142; SANDY-LABEL: test_palignr:
2143; SANDY:       # %bb.0:
2144; SANDY-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
2145; SANDY-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:0.50]
2146; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2147; SANDY-NEXT:    retq # sched: [1:1.00]
2148;
2149; HASWELL-LABEL: test_palignr:
2150; HASWELL:       # %bb.0:
2151; HASWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2152; HASWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
2153; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2154; HASWELL-NEXT:    retq # sched: [7:1.00]
2155;
2156; BROADWELL-LABEL: test_palignr:
2157; BROADWELL:       # %bb.0:
2158; BROADWELL-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2159; BROADWELL-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
2160; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2161; BROADWELL-NEXT:    retq # sched: [7:1.00]
2162;
2163; SKYLAKE-LABEL: test_palignr:
2164; SKYLAKE:       # %bb.0:
2165; SKYLAKE-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2166; SKYLAKE-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
2167; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2168; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2169;
2170; SKX-LABEL: test_palignr:
2171; SKX:       # %bb.0:
2172; SKX-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:1.00]
2173; SKX-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
2174; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2175; SKX-NEXT:    retq # sched: [7:1.00]
2176;
2177; BTVER2-LABEL: test_palignr:
2178; BTVER2:       # %bb.0:
2179; BTVER2-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.50]
2180; BTVER2-NEXT:    palignr $1, (%rdi), %mm0 # sched: [6:1.00]
2181; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2182; BTVER2-NEXT:    retq # sched: [4:1.00]
2183;
2184; ZNVER1-LABEL: test_palignr:
2185; ZNVER1:       # %bb.0:
2186; ZNVER1-NEXT:    palignr $1, %mm1, %mm0 # sched: [1:0.25]
2187; ZNVER1-NEXT:    palignr $1, (%rdi), %mm0 # sched: [8:0.50]
2188; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2189; ZNVER1-NEXT:    retq # sched: [1:0.50]
2190  %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1)
2191  %2 = load x86_mmx, x86_mmx *%a2, align 8
2192  %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1)
2193  %4 = bitcast x86_mmx %3 to i64
2194  ret i64 %4
2195}
2196declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
2197
2198define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2199; GENERIC-LABEL: test_pand:
2200; GENERIC:       # %bb.0:
2201; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
2202; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2203; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2204; GENERIC-NEXT:    retq # sched: [1:1.00]
2205;
2206; ATOM-LABEL: test_pand:
2207; ATOM:       # %bb.0:
2208; ATOM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
2209; ATOM-NEXT:    pand (%rdi), %mm0 # sched: [1:1.00]
2210; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2211; ATOM-NEXT:    retq # sched: [79:39.50]
2212;
2213; SLM-LABEL: test_pand:
2214; SLM:       # %bb.0:
2215; SLM-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
2216; SLM-NEXT:    pand (%rdi), %mm0 # sched: [4:1.00]
2217; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2218; SLM-NEXT:    retq # sched: [4:1.00]
2219;
2220; SANDY-LABEL: test_pand:
2221; SANDY:       # %bb.0:
2222; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
2223; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2224; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2225; SANDY-NEXT:    retq # sched: [1:1.00]
2226;
2227; HASWELL-LABEL: test_pand:
2228; HASWELL:       # %bb.0:
2229; HASWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
2230; HASWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2231; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2232; HASWELL-NEXT:    retq # sched: [7:1.00]
2233;
2234; BROADWELL-LABEL: test_pand:
2235; BROADWELL:       # %bb.0:
2236; BROADWELL-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
2237; BROADWELL-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2238; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2239; BROADWELL-NEXT:    retq # sched: [7:1.00]
2240;
2241; SKYLAKE-LABEL: test_pand:
2242; SKYLAKE:       # %bb.0:
2243; SKYLAKE-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
2244; SKYLAKE-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2245; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2246; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2247;
2248; SKX-LABEL: test_pand:
2249; SKX:       # %bb.0:
2250; SKX-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
2251; SKX-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
2252; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2253; SKX-NEXT:    retq # sched: [7:1.00]
2254;
2255; BTVER2-LABEL: test_pand:
2256; BTVER2:       # %bb.0:
2257; BTVER2-NEXT:    pand %mm1, %mm0 # sched: [1:0.50]
2258; BTVER2-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
2259; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2260; BTVER2-NEXT:    retq # sched: [4:1.00]
2261;
2262; ZNVER1-LABEL: test_pand:
2263; ZNVER1:       # %bb.0:
2264; ZNVER1-NEXT:    pand %mm1, %mm0 # sched: [1:0.25]
2265; ZNVER1-NEXT:    pand (%rdi), %mm0 # sched: [8:0.50]
2266; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2267; ZNVER1-NEXT:    retq # sched: [1:0.50]
2268  %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1)
2269  %2 = load x86_mmx, x86_mmx *%a2, align 8
2270  %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2)
2271  %4 = bitcast x86_mmx %3 to i64
2272  ret i64 %4
2273}
2274declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
2275
2276define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2277; GENERIC-LABEL: test_pandn:
2278; GENERIC:       # %bb.0:
2279; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
2280; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2281; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2282; GENERIC-NEXT:    retq # sched: [1:1.00]
2283;
2284; ATOM-LABEL: test_pandn:
2285; ATOM:       # %bb.0:
2286; ATOM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
2287; ATOM-NEXT:    pandn (%rdi), %mm0 # sched: [1:1.00]
2288; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2289; ATOM-NEXT:    retq # sched: [79:39.50]
2290;
2291; SLM-LABEL: test_pandn:
2292; SLM:       # %bb.0:
2293; SLM-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
2294; SLM-NEXT:    pandn (%rdi), %mm0 # sched: [4:1.00]
2295; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2296; SLM-NEXT:    retq # sched: [4:1.00]
2297;
2298; SANDY-LABEL: test_pandn:
2299; SANDY:       # %bb.0:
2300; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
2301; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2302; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2303; SANDY-NEXT:    retq # sched: [1:1.00]
2304;
2305; HASWELL-LABEL: test_pandn:
2306; HASWELL:       # %bb.0:
2307; HASWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
2308; HASWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2309; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2310; HASWELL-NEXT:    retq # sched: [7:1.00]
2311;
2312; BROADWELL-LABEL: test_pandn:
2313; BROADWELL:       # %bb.0:
2314; BROADWELL-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
2315; BROADWELL-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2316; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2317; BROADWELL-NEXT:    retq # sched: [7:1.00]
2318;
2319; SKYLAKE-LABEL: test_pandn:
2320; SKYLAKE:       # %bb.0:
2321; SKYLAKE-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
2322; SKYLAKE-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2323; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2324; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2325;
2326; SKX-LABEL: test_pandn:
2327; SKX:       # %bb.0:
2328; SKX-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
2329; SKX-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
2330; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2331; SKX-NEXT:    retq # sched: [7:1.00]
2332;
2333; BTVER2-LABEL: test_pandn:
2334; BTVER2:       # %bb.0:
2335; BTVER2-NEXT:    pandn %mm1, %mm0 # sched: [1:0.50]
2336; BTVER2-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
2337; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2338; BTVER2-NEXT:    retq # sched: [4:1.00]
2339;
2340; ZNVER1-LABEL: test_pandn:
2341; ZNVER1:       # %bb.0:
2342; ZNVER1-NEXT:    pandn %mm1, %mm0 # sched: [1:0.25]
2343; ZNVER1-NEXT:    pandn (%rdi), %mm0 # sched: [8:0.50]
2344; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2345; ZNVER1-NEXT:    retq # sched: [1:0.50]
2346  %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1)
2347  %2 = load x86_mmx, x86_mmx *%a2, align 8
2348  %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2)
2349  %4 = bitcast x86_mmx %3 to i64
2350  ret i64 %4
2351}
2352declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
2353
2354define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2355; GENERIC-LABEL: test_pavgb:
2356; GENERIC:       # %bb.0:
2357; GENERIC-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
2358; GENERIC-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
2359; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2360; GENERIC-NEXT:    retq # sched: [1:1.00]
2361;
2362; ATOM-LABEL: test_pavgb:
2363; ATOM:       # %bb.0:
2364; ATOM-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
2365; ATOM-NEXT:    pavgb (%rdi), %mm0 # sched: [1:1.00]
2366; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2367; ATOM-NEXT:    retq # sched: [79:39.50]
2368;
2369; SLM-LABEL: test_pavgb:
2370; SLM:       # %bb.0:
2371; SLM-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
2372; SLM-NEXT:    pavgb (%rdi), %mm0 # sched: [4:1.00]
2373; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2374; SLM-NEXT:    retq # sched: [4:1.00]
2375;
2376; SANDY-LABEL: test_pavgb:
2377; SANDY:       # %bb.0:
2378; SANDY-NEXT:    pavgb %mm1, %mm0 # sched: [3:1.00]
2379; SANDY-NEXT:    pavgb (%rdi), %mm0 # sched: [8:1.00]
2380; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2381; SANDY-NEXT:    retq # sched: [1:1.00]
2382;
2383; HASWELL-LABEL: test_pavgb:
2384; HASWELL:       # %bb.0:
2385; HASWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
2386; HASWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
2387; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2388; HASWELL-NEXT:    retq # sched: [7:1.00]
2389;
2390; BROADWELL-LABEL: test_pavgb:
2391; BROADWELL:       # %bb.0:
2392; BROADWELL-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
2393; BROADWELL-NEXT:    pavgb (%rdi), %mm0 # sched: [6:0.50]
2394; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2395; BROADWELL-NEXT:    retq # sched: [7:1.00]
2396;
2397; SKYLAKE-LABEL: test_pavgb:
2398; SKYLAKE:       # %bb.0:
2399; SKYLAKE-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
2400; SKYLAKE-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
2401; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2402; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2403;
2404; SKX-LABEL: test_pavgb:
2405; SKX:       # %bb.0:
2406; SKX-NEXT:    pavgb %mm1, %mm0 # sched: [1:1.00]
2407; SKX-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
2408; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2409; SKX-NEXT:    retq # sched: [7:1.00]
2410;
2411; BTVER2-LABEL: test_pavgb:
2412; BTVER2:       # %bb.0:
2413; BTVER2-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.50]
2414; BTVER2-NEXT:    pavgb (%rdi), %mm0 # sched: [6:1.00]
2415; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2416; BTVER2-NEXT:    retq # sched: [4:1.00]
2417;
2418; ZNVER1-LABEL: test_pavgb:
2419; ZNVER1:       # %bb.0:
2420; ZNVER1-NEXT:    pavgb %mm1, %mm0 # sched: [1:0.25]
2421; ZNVER1-NEXT:    pavgb (%rdi), %mm0 # sched: [8:0.50]
2422; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2423; ZNVER1-NEXT:    retq # sched: [1:0.50]
2424  %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1)
2425  %2 = load x86_mmx, x86_mmx *%a2, align 8
2426  %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2)
2427  %4 = bitcast x86_mmx %3 to i64
2428  ret i64 %4
2429}
2430declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
2431
2432define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2433; GENERIC-LABEL: test_pavgw:
2434; GENERIC:       # %bb.0:
2435; GENERIC-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
2436; GENERIC-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
2437; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2438; GENERIC-NEXT:    retq # sched: [1:1.00]
2439;
2440; ATOM-LABEL: test_pavgw:
2441; ATOM:       # %bb.0:
2442; ATOM-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
2443; ATOM-NEXT:    pavgw (%rdi), %mm0 # sched: [1:1.00]
2444; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2445; ATOM-NEXT:    retq # sched: [79:39.50]
2446;
2447; SLM-LABEL: test_pavgw:
2448; SLM:       # %bb.0:
2449; SLM-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
2450; SLM-NEXT:    pavgw (%rdi), %mm0 # sched: [4:1.00]
2451; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2452; SLM-NEXT:    retq # sched: [4:1.00]
2453;
2454; SANDY-LABEL: test_pavgw:
2455; SANDY:       # %bb.0:
2456; SANDY-NEXT:    pavgw %mm1, %mm0 # sched: [3:1.00]
2457; SANDY-NEXT:    pavgw (%rdi), %mm0 # sched: [8:1.00]
2458; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2459; SANDY-NEXT:    retq # sched: [1:1.00]
2460;
2461; HASWELL-LABEL: test_pavgw:
2462; HASWELL:       # %bb.0:
2463; HASWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
2464; HASWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
2465; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2466; HASWELL-NEXT:    retq # sched: [7:1.00]
2467;
2468; BROADWELL-LABEL: test_pavgw:
2469; BROADWELL:       # %bb.0:
2470; BROADWELL-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
2471; BROADWELL-NEXT:    pavgw (%rdi), %mm0 # sched: [6:0.50]
2472; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2473; BROADWELL-NEXT:    retq # sched: [7:1.00]
2474;
2475; SKYLAKE-LABEL: test_pavgw:
2476; SKYLAKE:       # %bb.0:
2477; SKYLAKE-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
2478; SKYLAKE-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
2479; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2480; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2481;
2482; SKX-LABEL: test_pavgw:
2483; SKX:       # %bb.0:
2484; SKX-NEXT:    pavgw %mm1, %mm0 # sched: [1:1.00]
2485; SKX-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
2486; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2487; SKX-NEXT:    retq # sched: [7:1.00]
2488;
2489; BTVER2-LABEL: test_pavgw:
2490; BTVER2:       # %bb.0:
2491; BTVER2-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.50]
2492; BTVER2-NEXT:    pavgw (%rdi), %mm0 # sched: [6:1.00]
2493; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2494; BTVER2-NEXT:    retq # sched: [4:1.00]
2495;
2496; ZNVER1-LABEL: test_pavgw:
2497; ZNVER1:       # %bb.0:
2498; ZNVER1-NEXT:    pavgw %mm1, %mm0 # sched: [1:0.25]
2499; ZNVER1-NEXT:    pavgw (%rdi), %mm0 # sched: [8:0.50]
2500; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2501; ZNVER1-NEXT:    retq # sched: [1:0.50]
2502  %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1)
2503  %2 = load x86_mmx, x86_mmx *%a2, align 8
2504  %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2)
2505  %4 = bitcast x86_mmx %3 to i64
2506  ret i64 %4
2507}
2508declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
2509
2510define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2511; GENERIC-LABEL: test_pcmpeqb:
2512; GENERIC:       # %bb.0:
2513; GENERIC-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
2514; GENERIC-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
2515; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2516; GENERIC-NEXT:    retq # sched: [1:1.00]
2517;
2518; ATOM-LABEL: test_pcmpeqb:
2519; ATOM:       # %bb.0:
2520; ATOM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
2521; ATOM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [1:1.00]
2522; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2523; ATOM-NEXT:    retq # sched: [79:39.50]
2524;
2525; SLM-LABEL: test_pcmpeqb:
2526; SLM:       # %bb.0:
2527; SLM-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
2528; SLM-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [4:1.00]
2529; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2530; SLM-NEXT:    retq # sched: [4:1.00]
2531;
2532; SANDY-LABEL: test_pcmpeqb:
2533; SANDY:       # %bb.0:
2534; SANDY-NEXT:    pcmpeqb %mm1, %mm0 # sched: [3:1.00]
2535; SANDY-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
2536; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2537; SANDY-NEXT:    retq # sched: [1:1.00]
2538;
2539; HASWELL-LABEL: test_pcmpeqb:
2540; HASWELL:       # %bb.0:
2541; HASWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
2542; HASWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
2543; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2544; HASWELL-NEXT:    retq # sched: [7:1.00]
2545;
2546; BROADWELL-LABEL: test_pcmpeqb:
2547; BROADWELL:       # %bb.0:
2548; BROADWELL-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
2549; BROADWELL-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
2550; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2551; BROADWELL-NEXT:    retq # sched: [7:1.00]
2552;
2553; SKYLAKE-LABEL: test_pcmpeqb:
2554; SKYLAKE:       # %bb.0:
2555; SKYLAKE-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
2556; SKYLAKE-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
2557; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2558; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2559;
2560; SKX-LABEL: test_pcmpeqb:
2561; SKX:       # %bb.0:
2562; SKX-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:1.00]
2563; SKX-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
2564; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2565; SKX-NEXT:    retq # sched: [7:1.00]
2566;
2567; BTVER2-LABEL: test_pcmpeqb:
2568; BTVER2:       # %bb.0:
2569; BTVER2-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.50]
2570; BTVER2-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
2571; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2572; BTVER2-NEXT:    retq # sched: [4:1.00]
2573;
2574; ZNVER1-LABEL: test_pcmpeqb:
2575; ZNVER1:       # %bb.0:
2576; ZNVER1-NEXT:    pcmpeqb %mm1, %mm0 # sched: [1:0.25]
2577; ZNVER1-NEXT:    pcmpeqb (%rdi), %mm0 # sched: [8:0.50]
2578; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2579; ZNVER1-NEXT:    retq # sched: [1:0.50]
2580  %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1)
2581  %2 = load x86_mmx, x86_mmx *%a2, align 8
2582  %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2)
2583  %4 = bitcast x86_mmx %3 to i64
2584  ret i64 %4
2585}
2586declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
2587
2588define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2589; GENERIC-LABEL: test_pcmpeqd:
2590; GENERIC:       # %bb.0:
2591; GENERIC-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
2592; GENERIC-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
2593; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2594; GENERIC-NEXT:    retq # sched: [1:1.00]
2595;
2596; ATOM-LABEL: test_pcmpeqd:
2597; ATOM:       # %bb.0:
2598; ATOM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
2599; ATOM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [1:1.00]
2600; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2601; ATOM-NEXT:    retq # sched: [79:39.50]
2602;
2603; SLM-LABEL: test_pcmpeqd:
2604; SLM:       # %bb.0:
2605; SLM-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
2606; SLM-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [4:1.00]
2607; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2608; SLM-NEXT:    retq # sched: [4:1.00]
2609;
2610; SANDY-LABEL: test_pcmpeqd:
2611; SANDY:       # %bb.0:
2612; SANDY-NEXT:    pcmpeqd %mm1, %mm0 # sched: [3:1.00]
2613; SANDY-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
2614; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2615; SANDY-NEXT:    retq # sched: [1:1.00]
2616;
2617; HASWELL-LABEL: test_pcmpeqd:
2618; HASWELL:       # %bb.0:
2619; HASWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
2620; HASWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
2621; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2622; HASWELL-NEXT:    retq # sched: [7:1.00]
2623;
2624; BROADWELL-LABEL: test_pcmpeqd:
2625; BROADWELL:       # %bb.0:
2626; BROADWELL-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
2627; BROADWELL-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
2628; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2629; BROADWELL-NEXT:    retq # sched: [7:1.00]
2630;
2631; SKYLAKE-LABEL: test_pcmpeqd:
2632; SKYLAKE:       # %bb.0:
2633; SKYLAKE-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
2634; SKYLAKE-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
2635; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2636; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2637;
2638; SKX-LABEL: test_pcmpeqd:
2639; SKX:       # %bb.0:
2640; SKX-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:1.00]
2641; SKX-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
2642; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2643; SKX-NEXT:    retq # sched: [7:1.00]
2644;
2645; BTVER2-LABEL: test_pcmpeqd:
2646; BTVER2:       # %bb.0:
2647; BTVER2-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.50]
2648; BTVER2-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
2649; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2650; BTVER2-NEXT:    retq # sched: [4:1.00]
2651;
2652; ZNVER1-LABEL: test_pcmpeqd:
2653; ZNVER1:       # %bb.0:
2654; ZNVER1-NEXT:    pcmpeqd %mm1, %mm0 # sched: [1:0.25]
2655; ZNVER1-NEXT:    pcmpeqd (%rdi), %mm0 # sched: [8:0.50]
2656; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2657; ZNVER1-NEXT:    retq # sched: [1:0.50]
2658  %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1)
2659  %2 = load x86_mmx, x86_mmx *%a2, align 8
2660  %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2)
2661  %4 = bitcast x86_mmx %3 to i64
2662  ret i64 %4
2663}
2664declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
2665
2666define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2667; GENERIC-LABEL: test_pcmpeqw:
2668; GENERIC:       # %bb.0:
2669; GENERIC-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
2670; GENERIC-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
2671; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2672; GENERIC-NEXT:    retq # sched: [1:1.00]
2673;
2674; ATOM-LABEL: test_pcmpeqw:
2675; ATOM:       # %bb.0:
2676; ATOM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
2677; ATOM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [1:1.00]
2678; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2679; ATOM-NEXT:    retq # sched: [79:39.50]
2680;
2681; SLM-LABEL: test_pcmpeqw:
2682; SLM:       # %bb.0:
2683; SLM-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
2684; SLM-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [4:1.00]
2685; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2686; SLM-NEXT:    retq # sched: [4:1.00]
2687;
2688; SANDY-LABEL: test_pcmpeqw:
2689; SANDY:       # %bb.0:
2690; SANDY-NEXT:    pcmpeqw %mm1, %mm0 # sched: [3:1.00]
2691; SANDY-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
2692; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2693; SANDY-NEXT:    retq # sched: [1:1.00]
2694;
2695; HASWELL-LABEL: test_pcmpeqw:
2696; HASWELL:       # %bb.0:
2697; HASWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
2698; HASWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
2699; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2700; HASWELL-NEXT:    retq # sched: [7:1.00]
2701;
2702; BROADWELL-LABEL: test_pcmpeqw:
2703; BROADWELL:       # %bb.0:
2704; BROADWELL-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
2705; BROADWELL-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
2706; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2707; BROADWELL-NEXT:    retq # sched: [7:1.00]
2708;
2709; SKYLAKE-LABEL: test_pcmpeqw:
2710; SKYLAKE:       # %bb.0:
2711; SKYLAKE-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
2712; SKYLAKE-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
2713; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2714; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2715;
2716; SKX-LABEL: test_pcmpeqw:
2717; SKX:       # %bb.0:
2718; SKX-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:1.00]
2719; SKX-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
2720; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2721; SKX-NEXT:    retq # sched: [7:1.00]
2722;
2723; BTVER2-LABEL: test_pcmpeqw:
2724; BTVER2:       # %bb.0:
2725; BTVER2-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.50]
2726; BTVER2-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
2727; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2728; BTVER2-NEXT:    retq # sched: [4:1.00]
2729;
2730; ZNVER1-LABEL: test_pcmpeqw:
2731; ZNVER1:       # %bb.0:
2732; ZNVER1-NEXT:    pcmpeqw %mm1, %mm0 # sched: [1:0.25]
2733; ZNVER1-NEXT:    pcmpeqw (%rdi), %mm0 # sched: [8:0.50]
2734; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2735; ZNVER1-NEXT:    retq # sched: [1:0.50]
2736  %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1)
2737  %2 = load x86_mmx, x86_mmx *%a2, align 8
2738  %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2)
2739  %4 = bitcast x86_mmx %3 to i64
2740  ret i64 %4
2741}
2742declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
2743
2744define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2745; GENERIC-LABEL: test_pcmpgtb:
2746; GENERIC:       # %bb.0:
2747; GENERIC-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
2748; GENERIC-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
2749; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2750; GENERIC-NEXT:    retq # sched: [1:1.00]
2751;
2752; ATOM-LABEL: test_pcmpgtb:
2753; ATOM:       # %bb.0:
2754; ATOM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
2755; ATOM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [1:1.00]
2756; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2757; ATOM-NEXT:    retq # sched: [79:39.50]
2758;
2759; SLM-LABEL: test_pcmpgtb:
2760; SLM:       # %bb.0:
2761; SLM-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
2762; SLM-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [4:1.00]
2763; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2764; SLM-NEXT:    retq # sched: [4:1.00]
2765;
2766; SANDY-LABEL: test_pcmpgtb:
2767; SANDY:       # %bb.0:
2768; SANDY-NEXT:    pcmpgtb %mm1, %mm0 # sched: [3:1.00]
2769; SANDY-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
2770; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2771; SANDY-NEXT:    retq # sched: [1:1.00]
2772;
2773; HASWELL-LABEL: test_pcmpgtb:
2774; HASWELL:       # %bb.0:
2775; HASWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
2776; HASWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
2777; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2778; HASWELL-NEXT:    retq # sched: [7:1.00]
2779;
2780; BROADWELL-LABEL: test_pcmpgtb:
2781; BROADWELL:       # %bb.0:
2782; BROADWELL-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
2783; BROADWELL-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
2784; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2785; BROADWELL-NEXT:    retq # sched: [7:1.00]
2786;
2787; SKYLAKE-LABEL: test_pcmpgtb:
2788; SKYLAKE:       # %bb.0:
2789; SKYLAKE-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
2790; SKYLAKE-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
2791; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2792; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2793;
2794; SKX-LABEL: test_pcmpgtb:
2795; SKX:       # %bb.0:
2796; SKX-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:1.00]
2797; SKX-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
2798; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2799; SKX-NEXT:    retq # sched: [7:1.00]
2800;
2801; BTVER2-LABEL: test_pcmpgtb:
2802; BTVER2:       # %bb.0:
2803; BTVER2-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.50]
2804; BTVER2-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
2805; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2806; BTVER2-NEXT:    retq # sched: [4:1.00]
2807;
2808; ZNVER1-LABEL: test_pcmpgtb:
2809; ZNVER1:       # %bb.0:
2810; ZNVER1-NEXT:    pcmpgtb %mm1, %mm0 # sched: [1:0.25]
2811; ZNVER1-NEXT:    pcmpgtb (%rdi), %mm0 # sched: [8:0.50]
2812; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2813; ZNVER1-NEXT:    retq # sched: [1:0.50]
2814  %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1)
2815  %2 = load x86_mmx, x86_mmx *%a2, align 8
2816  %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2)
2817  %4 = bitcast x86_mmx %3 to i64
2818  ret i64 %4
2819}
2820declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
2821
2822define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2823; GENERIC-LABEL: test_pcmpgtd:
2824; GENERIC:       # %bb.0:
2825; GENERIC-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
2826; GENERIC-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
2827; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2828; GENERIC-NEXT:    retq # sched: [1:1.00]
2829;
2830; ATOM-LABEL: test_pcmpgtd:
2831; ATOM:       # %bb.0:
2832; ATOM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
2833; ATOM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [1:1.00]
2834; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2835; ATOM-NEXT:    retq # sched: [79:39.50]
2836;
2837; SLM-LABEL: test_pcmpgtd:
2838; SLM:       # %bb.0:
2839; SLM-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
2840; SLM-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [4:1.00]
2841; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2842; SLM-NEXT:    retq # sched: [4:1.00]
2843;
2844; SANDY-LABEL: test_pcmpgtd:
2845; SANDY:       # %bb.0:
2846; SANDY-NEXT:    pcmpgtd %mm1, %mm0 # sched: [3:1.00]
2847; SANDY-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
2848; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2849; SANDY-NEXT:    retq # sched: [1:1.00]
2850;
2851; HASWELL-LABEL: test_pcmpgtd:
2852; HASWELL:       # %bb.0:
2853; HASWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
2854; HASWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
2855; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2856; HASWELL-NEXT:    retq # sched: [7:1.00]
2857;
2858; BROADWELL-LABEL: test_pcmpgtd:
2859; BROADWELL:       # %bb.0:
2860; BROADWELL-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
2861; BROADWELL-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
2862; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2863; BROADWELL-NEXT:    retq # sched: [7:1.00]
2864;
2865; SKYLAKE-LABEL: test_pcmpgtd:
2866; SKYLAKE:       # %bb.0:
2867; SKYLAKE-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
2868; SKYLAKE-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
2869; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2870; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2871;
2872; SKX-LABEL: test_pcmpgtd:
2873; SKX:       # %bb.0:
2874; SKX-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:1.00]
2875; SKX-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
2876; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2877; SKX-NEXT:    retq # sched: [7:1.00]
2878;
2879; BTVER2-LABEL: test_pcmpgtd:
2880; BTVER2:       # %bb.0:
2881; BTVER2-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.50]
2882; BTVER2-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
2883; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2884; BTVER2-NEXT:    retq # sched: [4:1.00]
2885;
2886; ZNVER1-LABEL: test_pcmpgtd:
2887; ZNVER1:       # %bb.0:
2888; ZNVER1-NEXT:    pcmpgtd %mm1, %mm0 # sched: [1:0.25]
2889; ZNVER1-NEXT:    pcmpgtd (%rdi), %mm0 # sched: [8:0.50]
2890; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2891; ZNVER1-NEXT:    retq # sched: [1:0.50]
2892  %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1)
2893  %2 = load x86_mmx, x86_mmx *%a2, align 8
2894  %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2)
2895  %4 = bitcast x86_mmx %3 to i64
2896  ret i64 %4
2897}
2898declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
2899
2900define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
2901; GENERIC-LABEL: test_pcmpgtw:
2902; GENERIC:       # %bb.0:
2903; GENERIC-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
2904; GENERIC-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
2905; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2906; GENERIC-NEXT:    retq # sched: [1:1.00]
2907;
2908; ATOM-LABEL: test_pcmpgtw:
2909; ATOM:       # %bb.0:
2910; ATOM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
2911; ATOM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [1:1.00]
2912; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
2913; ATOM-NEXT:    retq # sched: [79:39.50]
2914;
2915; SLM-LABEL: test_pcmpgtw:
2916; SLM:       # %bb.0:
2917; SLM-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
2918; SLM-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [4:1.00]
2919; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
2920; SLM-NEXT:    retq # sched: [4:1.00]
2921;
2922; SANDY-LABEL: test_pcmpgtw:
2923; SANDY:       # %bb.0:
2924; SANDY-NEXT:    pcmpgtw %mm1, %mm0 # sched: [3:1.00]
2925; SANDY-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
2926; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2927; SANDY-NEXT:    retq # sched: [1:1.00]
2928;
2929; HASWELL-LABEL: test_pcmpgtw:
2930; HASWELL:       # %bb.0:
2931; HASWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
2932; HASWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
2933; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2934; HASWELL-NEXT:    retq # sched: [7:1.00]
2935;
2936; BROADWELL-LABEL: test_pcmpgtw:
2937; BROADWELL:       # %bb.0:
2938; BROADWELL-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
2939; BROADWELL-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
2940; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
2941; BROADWELL-NEXT:    retq # sched: [7:1.00]
2942;
2943; SKYLAKE-LABEL: test_pcmpgtw:
2944; SKYLAKE:       # %bb.0:
2945; SKYLAKE-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
2946; SKYLAKE-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
2947; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2948; SKYLAKE-NEXT:    retq # sched: [7:1.00]
2949;
2950; SKX-LABEL: test_pcmpgtw:
2951; SKX:       # %bb.0:
2952; SKX-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:1.00]
2953; SKX-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
2954; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2955; SKX-NEXT:    retq # sched: [7:1.00]
2956;
2957; BTVER2-LABEL: test_pcmpgtw:
2958; BTVER2:       # %bb.0:
2959; BTVER2-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.50]
2960; BTVER2-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
2961; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
2962; BTVER2-NEXT:    retq # sched: [4:1.00]
2963;
2964; ZNVER1-LABEL: test_pcmpgtw:
2965; ZNVER1:       # %bb.0:
2966; ZNVER1-NEXT:    pcmpgtw %mm1, %mm0 # sched: [1:0.25]
2967; ZNVER1-NEXT:    pcmpgtw (%rdi), %mm0 # sched: [8:0.50]
2968; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
2969; ZNVER1-NEXT:    retq # sched: [1:0.50]
2970  %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1)
2971  %2 = load x86_mmx, x86_mmx *%a2, align 8
2972  %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2)
2973  %4 = bitcast x86_mmx %3 to i64
2974  ret i64 %4
2975}
2976declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
2977
2978define i32 @test_pextrw(x86_mmx %a0) optsize {
2979; GENERIC-LABEL: test_pextrw:
2980; GENERIC:       # %bb.0:
2981; GENERIC-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
2982; GENERIC-NEXT:    retq # sched: [1:1.00]
2983;
2984; ATOM-LABEL: test_pextrw:
2985; ATOM:       # %bb.0:
2986; ATOM-NEXT:    pextrw $0, %mm0, %eax # sched: [4:2.00]
2987; ATOM-NEXT:    retq # sched: [79:39.50]
2988;
2989; SLM-LABEL: test_pextrw:
2990; SLM:       # %bb.0:
2991; SLM-NEXT:    pextrw $0, %mm0, %eax # sched: [1:1.00]
2992; SLM-NEXT:    retq # sched: [4:1.00]
2993;
2994; SANDY-LABEL: test_pextrw:
2995; SANDY:       # %bb.0:
2996; SANDY-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
2997; SANDY-NEXT:    retq # sched: [1:1.00]
2998;
2999; HASWELL-LABEL: test_pextrw:
3000; HASWELL:       # %bb.0:
3001; HASWELL-NEXT:    pextrw $0, %mm0, %eax # sched: [2:1.00]
3002; HASWELL-NEXT:    retq # sched: [7:1.00]
3003;
3004; BROADWELL-LABEL: test_pextrw:
3005; BROADWELL:       # %bb.0:
3006; BROADWELL-NEXT:    pextrw $0, %mm0, %eax # sched: [2:1.00]
3007; BROADWELL-NEXT:    retq # sched: [7:1.00]
3008;
3009; SKYLAKE-LABEL: test_pextrw:
3010; SKYLAKE:       # %bb.0:
3011; SKYLAKE-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
3012; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3013;
3014; SKX-LABEL: test_pextrw:
3015; SKX:       # %bb.0:
3016; SKX-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
3017; SKX-NEXT:    retq # sched: [7:1.00]
3018;
3019; BTVER2-LABEL: test_pextrw:
3020; BTVER2:       # %bb.0:
3021; BTVER2-NEXT:    pextrw $0, %mm0, %eax # sched: [3:1.00]
3022; BTVER2-NEXT:    retq # sched: [4:1.00]
3023;
3024; ZNVER1-LABEL: test_pextrw:
3025; ZNVER1:       # %bb.0:
3026; ZNVER1-NEXT:    pextrw $0, %mm0, %eax # sched: [2:2.00]
3027; ZNVER1-NEXT:    retq # sched: [1:0.50]
3028  %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0)
3029  ret i32 %1
3030}
3031declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone
3032
3033define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3034; GENERIC-LABEL: test_phaddd:
3035; GENERIC:       # %bb.0:
3036; GENERIC-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
3037; GENERIC-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
3038; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3039; GENERIC-NEXT:    retq # sched: [1:1.00]
3040;
3041; ATOM-LABEL: test_phaddd:
3042; ATOM:       # %bb.0:
3043; ATOM-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
3044; ATOM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:2.00]
3045; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3046; ATOM-NEXT:    retq # sched: [79:39.50]
3047;
3048; SLM-LABEL: test_phaddd:
3049; SLM:       # %bb.0:
3050; SLM-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
3051; SLM-NEXT:    phaddd (%rdi), %mm0 # sched: [4:1.00]
3052; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3053; SLM-NEXT:    retq # sched: [4:1.00]
3054;
3055; SANDY-LABEL: test_phaddd:
3056; SANDY:       # %bb.0:
3057; SANDY-NEXT:    phaddd %mm1, %mm0 # sched: [3:1.50]
3058; SANDY-NEXT:    phaddd (%rdi), %mm0 # sched: [8:1.50]
3059; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3060; SANDY-NEXT:    retq # sched: [1:1.00]
3061;
3062; HASWELL-LABEL: test_phaddd:
3063; HASWELL:       # %bb.0:
3064; HASWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
3065; HASWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
3066; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3067; HASWELL-NEXT:    retq # sched: [7:1.00]
3068;
3069; BROADWELL-LABEL: test_phaddd:
3070; BROADWELL:       # %bb.0:
3071; BROADWELL-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
3072; BROADWELL-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
3073; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3074; BROADWELL-NEXT:    retq # sched: [7:1.00]
3075;
3076; SKYLAKE-LABEL: test_phaddd:
3077; SKYLAKE:       # %bb.0:
3078; SKYLAKE-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
3079; SKYLAKE-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
3080; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3081; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3082;
3083; SKX-LABEL: test_phaddd:
3084; SKX:       # %bb.0:
3085; SKX-NEXT:    phaddd %mm1, %mm0 # sched: [3:2.00]
3086; SKX-NEXT:    phaddd (%rdi), %mm0 # sched: [8:2.00]
3087; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3088; SKX-NEXT:    retq # sched: [7:1.00]
3089;
3090; BTVER2-LABEL: test_phaddd:
3091; BTVER2:       # %bb.0:
3092; BTVER2-NEXT:    phaddd %mm1, %mm0 # sched: [1:0.50]
3093; BTVER2-NEXT:    phaddd (%rdi), %mm0 # sched: [6:1.00]
3094; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3095; BTVER2-NEXT:    retq # sched: [4:1.00]
3096;
3097; ZNVER1-LABEL: test_phaddd:
3098; ZNVER1:       # %bb.0:
3099; ZNVER1-NEXT:    phaddd %mm1, %mm0 # sched: [100:0.25]
3100; ZNVER1-NEXT:    phaddd (%rdi), %mm0 # sched: [100:0.25]
3101; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3102; ZNVER1-NEXT:    retq # sched: [1:0.50]
3103  %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1)
3104  %2 = load x86_mmx, x86_mmx *%a2, align 8
3105  %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2)
3106  %4 = bitcast x86_mmx %3 to i64
3107  ret i64 %4
3108}
3109declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
3110
3111define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3112; GENERIC-LABEL: test_phaddsw:
3113; GENERIC:       # %bb.0:
3114; GENERIC-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
3115; GENERIC-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
3116; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3117; GENERIC-NEXT:    retq # sched: [1:1.00]
3118;
3119; ATOM-LABEL: test_phaddsw:
3120; ATOM:       # %bb.0:
3121; ATOM-NEXT:    phaddsw %mm1, %mm0 # sched: [5:2.50]
3122; ATOM-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:3.00]
3123; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3124; ATOM-NEXT:    retq # sched: [79:39.50]
3125;
3126; SLM-LABEL: test_phaddsw:
3127; SLM:       # %bb.0:
3128; SLM-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
3129; SLM-NEXT:    phaddsw (%rdi), %mm0 # sched: [4:1.00]
3130; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3131; SLM-NEXT:    retq # sched: [4:1.00]
3132;
3133; SANDY-LABEL: test_phaddsw:
3134; SANDY:       # %bb.0:
3135; SANDY-NEXT:    phaddsw %mm1, %mm0 # sched: [3:1.50]
3136; SANDY-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:1.50]
3137; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3138; SANDY-NEXT:    retq # sched: [1:1.00]
3139;
3140; HASWELL-LABEL: test_phaddsw:
3141; HASWELL:       # %bb.0:
3142; HASWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
3143; HASWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
3144; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3145; HASWELL-NEXT:    retq # sched: [7:1.00]
3146;
3147; BROADWELL-LABEL: test_phaddsw:
3148; BROADWELL:       # %bb.0:
3149; BROADWELL-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
3150; BROADWELL-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
3151; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3152; BROADWELL-NEXT:    retq # sched: [7:1.00]
3153;
3154; SKYLAKE-LABEL: test_phaddsw:
3155; SKYLAKE:       # %bb.0:
3156; SKYLAKE-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
3157; SKYLAKE-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
3158; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3159; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3160;
3161; SKX-LABEL: test_phaddsw:
3162; SKX:       # %bb.0:
3163; SKX-NEXT:    phaddsw %mm1, %mm0 # sched: [3:2.00]
3164; SKX-NEXT:    phaddsw (%rdi), %mm0 # sched: [8:2.00]
3165; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3166; SKX-NEXT:    retq # sched: [7:1.00]
3167;
3168; BTVER2-LABEL: test_phaddsw:
3169; BTVER2:       # %bb.0:
3170; BTVER2-NEXT:    phaddsw %mm1, %mm0 # sched: [1:0.50]
3171; BTVER2-NEXT:    phaddsw (%rdi), %mm0 # sched: [6:1.00]
3172; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3173; BTVER2-NEXT:    retq # sched: [4:1.00]
3174;
3175; ZNVER1-LABEL: test_phaddsw:
3176; ZNVER1:       # %bb.0:
3177; ZNVER1-NEXT:    phaddsw %mm1, %mm0 # sched: [100:0.25]
3178; ZNVER1-NEXT:    phaddsw (%rdi), %mm0 # sched: [100:0.25]
3179; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3180; ZNVER1-NEXT:    retq # sched: [1:0.50]
3181  %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1)
3182  %2 = load x86_mmx, x86_mmx *%a2, align 8
3183  %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2)
3184  %4 = bitcast x86_mmx %3 to i64
3185  ret i64 %4
3186}
3187declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
3188
3189define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3190; GENERIC-LABEL: test_phaddw:
3191; GENERIC:       # %bb.0:
3192; GENERIC-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
3193; GENERIC-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
3194; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3195; GENERIC-NEXT:    retq # sched: [1:1.00]
3196;
3197; ATOM-LABEL: test_phaddw:
3198; ATOM:       # %bb.0:
3199; ATOM-NEXT:    phaddw %mm1, %mm0 # sched: [5:2.50]
3200; ATOM-NEXT:    phaddw (%rdi), %mm0 # sched: [6:3.00]
3201; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3202; ATOM-NEXT:    retq # sched: [79:39.50]
3203;
3204; SLM-LABEL: test_phaddw:
3205; SLM:       # %bb.0:
3206; SLM-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
3207; SLM-NEXT:    phaddw (%rdi), %mm0 # sched: [4:1.00]
3208; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3209; SLM-NEXT:    retq # sched: [4:1.00]
3210;
3211; SANDY-LABEL: test_phaddw:
3212; SANDY:       # %bb.0:
3213; SANDY-NEXT:    phaddw %mm1, %mm0 # sched: [3:1.50]
3214; SANDY-NEXT:    phaddw (%rdi), %mm0 # sched: [8:1.50]
3215; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3216; SANDY-NEXT:    retq # sched: [1:1.00]
3217;
3218; HASWELL-LABEL: test_phaddw:
3219; HASWELL:       # %bb.0:
3220; HASWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
3221; HASWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
3222; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3223; HASWELL-NEXT:    retq # sched: [7:1.00]
3224;
3225; BROADWELL-LABEL: test_phaddw:
3226; BROADWELL:       # %bb.0:
3227; BROADWELL-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
3228; BROADWELL-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
3229; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3230; BROADWELL-NEXT:    retq # sched: [7:1.00]
3231;
3232; SKYLAKE-LABEL: test_phaddw:
3233; SKYLAKE:       # %bb.0:
3234; SKYLAKE-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
3235; SKYLAKE-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
3236; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3237; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3238;
3239; SKX-LABEL: test_phaddw:
3240; SKX:       # %bb.0:
3241; SKX-NEXT:    phaddw %mm1, %mm0 # sched: [3:2.00]
3242; SKX-NEXT:    phaddw (%rdi), %mm0 # sched: [8:2.00]
3243; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3244; SKX-NEXT:    retq # sched: [7:1.00]
3245;
3246; BTVER2-LABEL: test_phaddw:
3247; BTVER2:       # %bb.0:
3248; BTVER2-NEXT:    phaddw %mm1, %mm0 # sched: [1:0.50]
3249; BTVER2-NEXT:    phaddw (%rdi), %mm0 # sched: [6:1.00]
3250; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3251; BTVER2-NEXT:    retq # sched: [4:1.00]
3252;
3253; ZNVER1-LABEL: test_phaddw:
3254; ZNVER1:       # %bb.0:
3255; ZNVER1-NEXT:    phaddw %mm1, %mm0 # sched: [100:0.25]
3256; ZNVER1-NEXT:    phaddw (%rdi), %mm0 # sched: [100:0.25]
3257; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3258; ZNVER1-NEXT:    retq # sched: [1:0.50]
3259  %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1)
3260  %2 = load x86_mmx, x86_mmx *%a2, align 8
3261  %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2)
3262  %4 = bitcast x86_mmx %3 to i64
3263  ret i64 %4
3264}
3265declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
3266
3267define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3268; GENERIC-LABEL: test_phsubd:
3269; GENERIC:       # %bb.0:
3270; GENERIC-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
3271; GENERIC-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
3272; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3273; GENERIC-NEXT:    retq # sched: [1:1.00]
3274;
3275; ATOM-LABEL: test_phsubd:
3276; ATOM:       # %bb.0:
3277; ATOM-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
3278; ATOM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:2.00]
3279; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3280; ATOM-NEXT:    retq # sched: [79:39.50]
3281;
3282; SLM-LABEL: test_phsubd:
3283; SLM:       # %bb.0:
3284; SLM-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
3285; SLM-NEXT:    phsubd (%rdi), %mm0 # sched: [4:1.00]
3286; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3287; SLM-NEXT:    retq # sched: [4:1.00]
3288;
3289; SANDY-LABEL: test_phsubd:
3290; SANDY:       # %bb.0:
3291; SANDY-NEXT:    phsubd %mm1, %mm0 # sched: [3:1.50]
3292; SANDY-NEXT:    phsubd (%rdi), %mm0 # sched: [8:1.50]
3293; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3294; SANDY-NEXT:    retq # sched: [1:1.00]
3295;
3296; HASWELL-LABEL: test_phsubd:
3297; HASWELL:       # %bb.0:
3298; HASWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
3299; HASWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
3300; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3301; HASWELL-NEXT:    retq # sched: [7:1.00]
3302;
3303; BROADWELL-LABEL: test_phsubd:
3304; BROADWELL:       # %bb.0:
3305; BROADWELL-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
3306; BROADWELL-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
3307; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3308; BROADWELL-NEXT:    retq # sched: [7:1.00]
3309;
3310; SKYLAKE-LABEL: test_phsubd:
3311; SKYLAKE:       # %bb.0:
3312; SKYLAKE-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
3313; SKYLAKE-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
3314; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3315; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3316;
3317; SKX-LABEL: test_phsubd:
3318; SKX:       # %bb.0:
3319; SKX-NEXT:    phsubd %mm1, %mm0 # sched: [3:2.00]
3320; SKX-NEXT:    phsubd (%rdi), %mm0 # sched: [8:2.00]
3321; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3322; SKX-NEXT:    retq # sched: [7:1.00]
3323;
3324; BTVER2-LABEL: test_phsubd:
3325; BTVER2:       # %bb.0:
3326; BTVER2-NEXT:    phsubd %mm1, %mm0 # sched: [1:0.50]
3327; BTVER2-NEXT:    phsubd (%rdi), %mm0 # sched: [6:1.00]
3328; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3329; BTVER2-NEXT:    retq # sched: [4:1.00]
3330;
3331; ZNVER1-LABEL: test_phsubd:
3332; ZNVER1:       # %bb.0:
3333; ZNVER1-NEXT:    phsubd %mm1, %mm0 # sched: [100:0.25]
3334; ZNVER1-NEXT:    phsubd (%rdi), %mm0 # sched: [100:0.25]
3335; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3336; ZNVER1-NEXT:    retq # sched: [1:0.50]
3337  %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1)
3338  %2 = load x86_mmx, x86_mmx *%a2, align 8
3339  %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2)
3340  %4 = bitcast x86_mmx %3 to i64
3341  ret i64 %4
3342}
3343declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
3344
3345define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3346; GENERIC-LABEL: test_phsubsw:
3347; GENERIC:       # %bb.0:
3348; GENERIC-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
3349; GENERIC-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
3350; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3351; GENERIC-NEXT:    retq # sched: [1:1.00]
3352;
3353; ATOM-LABEL: test_phsubsw:
3354; ATOM:       # %bb.0:
3355; ATOM-NEXT:    phsubsw %mm1, %mm0 # sched: [5:2.50]
3356; ATOM-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:3.00]
3357; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3358; ATOM-NEXT:    retq # sched: [79:39.50]
3359;
3360; SLM-LABEL: test_phsubsw:
3361; SLM:       # %bb.0:
3362; SLM-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
3363; SLM-NEXT:    phsubsw (%rdi), %mm0 # sched: [4:1.00]
3364; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3365; SLM-NEXT:    retq # sched: [4:1.00]
3366;
3367; SANDY-LABEL: test_phsubsw:
3368; SANDY:       # %bb.0:
3369; SANDY-NEXT:    phsubsw %mm1, %mm0 # sched: [3:1.50]
3370; SANDY-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:1.50]
3371; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3372; SANDY-NEXT:    retq # sched: [1:1.00]
3373;
3374; HASWELL-LABEL: test_phsubsw:
3375; HASWELL:       # %bb.0:
3376; HASWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
3377; HASWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
3378; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3379; HASWELL-NEXT:    retq # sched: [7:1.00]
3380;
3381; BROADWELL-LABEL: test_phsubsw:
3382; BROADWELL:       # %bb.0:
3383; BROADWELL-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
3384; BROADWELL-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
3385; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3386; BROADWELL-NEXT:    retq # sched: [7:1.00]
3387;
3388; SKYLAKE-LABEL: test_phsubsw:
3389; SKYLAKE:       # %bb.0:
3390; SKYLAKE-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
3391; SKYLAKE-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
3392; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3393; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3394;
3395; SKX-LABEL: test_phsubsw:
3396; SKX:       # %bb.0:
3397; SKX-NEXT:    phsubsw %mm1, %mm0 # sched: [3:2.00]
3398; SKX-NEXT:    phsubsw (%rdi), %mm0 # sched: [8:2.00]
3399; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3400; SKX-NEXT:    retq # sched: [7:1.00]
3401;
3402; BTVER2-LABEL: test_phsubsw:
3403; BTVER2:       # %bb.0:
3404; BTVER2-NEXT:    phsubsw %mm1, %mm0 # sched: [1:0.50]
3405; BTVER2-NEXT:    phsubsw (%rdi), %mm0 # sched: [6:1.00]
3406; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3407; BTVER2-NEXT:    retq # sched: [4:1.00]
3408;
3409; ZNVER1-LABEL: test_phsubsw:
3410; ZNVER1:       # %bb.0:
3411; ZNVER1-NEXT:    phsubsw %mm1, %mm0 # sched: [100:0.25]
3412; ZNVER1-NEXT:    phsubsw (%rdi), %mm0 # sched: [100:0.25]
3413; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3414; ZNVER1-NEXT:    retq # sched: [1:0.50]
3415  %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1)
3416  %2 = load x86_mmx, x86_mmx *%a2, align 8
3417  %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2)
3418  %4 = bitcast x86_mmx %3 to i64
3419  ret i64 %4
3420}
3421declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
3422
3423define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3424; GENERIC-LABEL: test_phsubw:
3425; GENERIC:       # %bb.0:
3426; GENERIC-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
3427; GENERIC-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
3428; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3429; GENERIC-NEXT:    retq # sched: [1:1.00]
3430;
3431; ATOM-LABEL: test_phsubw:
3432; ATOM:       # %bb.0:
3433; ATOM-NEXT:    phsubw %mm1, %mm0 # sched: [5:2.50]
3434; ATOM-NEXT:    phsubw (%rdi), %mm0 # sched: [6:3.00]
3435; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3436; ATOM-NEXT:    retq # sched: [79:39.50]
3437;
3438; SLM-LABEL: test_phsubw:
3439; SLM:       # %bb.0:
3440; SLM-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
3441; SLM-NEXT:    phsubw (%rdi), %mm0 # sched: [4:1.00]
3442; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3443; SLM-NEXT:    retq # sched: [4:1.00]
3444;
3445; SANDY-LABEL: test_phsubw:
3446; SANDY:       # %bb.0:
3447; SANDY-NEXT:    phsubw %mm1, %mm0 # sched: [3:1.50]
3448; SANDY-NEXT:    phsubw (%rdi), %mm0 # sched: [8:1.50]
3449; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3450; SANDY-NEXT:    retq # sched: [1:1.00]
3451;
3452; HASWELL-LABEL: test_phsubw:
3453; HASWELL:       # %bb.0:
3454; HASWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
3455; HASWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
3456; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3457; HASWELL-NEXT:    retq # sched: [7:1.00]
3458;
3459; BROADWELL-LABEL: test_phsubw:
3460; BROADWELL:       # %bb.0:
3461; BROADWELL-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
3462; BROADWELL-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
3463; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3464; BROADWELL-NEXT:    retq # sched: [7:1.00]
3465;
3466; SKYLAKE-LABEL: test_phsubw:
3467; SKYLAKE:       # %bb.0:
3468; SKYLAKE-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
3469; SKYLAKE-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
3470; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3471; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3472;
3473; SKX-LABEL: test_phsubw:
3474; SKX:       # %bb.0:
3475; SKX-NEXT:    phsubw %mm1, %mm0 # sched: [3:2.00]
3476; SKX-NEXT:    phsubw (%rdi), %mm0 # sched: [8:2.00]
3477; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3478; SKX-NEXT:    retq # sched: [7:1.00]
3479;
3480; BTVER2-LABEL: test_phsubw:
3481; BTVER2:       # %bb.0:
3482; BTVER2-NEXT:    phsubw %mm1, %mm0 # sched: [1:0.50]
3483; BTVER2-NEXT:    phsubw (%rdi), %mm0 # sched: [6:1.00]
3484; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3485; BTVER2-NEXT:    retq # sched: [4:1.00]
3486;
3487; ZNVER1-LABEL: test_phsubw:
3488; ZNVER1:       # %bb.0:
3489; ZNVER1-NEXT:    phsubw %mm1, %mm0 # sched: [100:0.25]
3490; ZNVER1-NEXT:    phsubw (%rdi), %mm0 # sched: [100:0.25]
3491; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3492; ZNVER1-NEXT:    retq # sched: [1:0.50]
3493  %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1)
3494  %2 = load x86_mmx, x86_mmx *%a2, align 8
3495  %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2)
3496  %4 = bitcast x86_mmx %3 to i64
3497  ret i64 %4
3498}
3499declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
3500
3501define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
3502; GENERIC-LABEL: test_pinsrw:
3503; GENERIC:       # %bb.0:
3504; GENERIC-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
3505; GENERIC-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3506; GENERIC-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
3507; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3508; GENERIC-NEXT:    retq # sched: [1:1.00]
3509;
3510; ATOM-LABEL: test_pinsrw:
3511; ATOM:       # %bb.0:
3512; ATOM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
3513; ATOM-NEXT:    movswl (%rsi), %eax # sched: [1:1.00]
3514; ATOM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
3515; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3516; ATOM-NEXT:    retq # sched: [79:39.50]
3517;
3518; SLM-LABEL: test_pinsrw:
3519; SLM:       # %bb.0:
3520; SLM-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
3521; SLM-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:1.00]
3522; SLM-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:1.00]
3523; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3524; SLM-NEXT:    retq # sched: [4:1.00]
3525;
3526; SANDY-LABEL: test_pinsrw:
3527; SANDY:       # %bb.0:
3528; SANDY-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:1.00]
3529; SANDY-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3530; SANDY-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:1.00]
3531; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3532; SANDY-NEXT:    retq # sched: [1:1.00]
3533;
3534; HASWELL-LABEL: test_pinsrw:
3535; HASWELL:       # %bb.0:
3536; HASWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
3537; HASWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3538; HASWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
3539; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3540; HASWELL-NEXT:    retq # sched: [7:1.00]
3541;
3542; BROADWELL-LABEL: test_pinsrw:
3543; BROADWELL:       # %bb.0:
3544; BROADWELL-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
3545; BROADWELL-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3546; BROADWELL-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
3547; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3548; BROADWELL-NEXT:    retq # sched: [7:1.00]
3549;
3550; SKYLAKE-LABEL: test_pinsrw:
3551; SKYLAKE:       # %bb.0:
3552; SKYLAKE-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
3553; SKYLAKE-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3554; SKYLAKE-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
3555; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3556; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3557;
3558; SKX-LABEL: test_pinsrw:
3559; SKX:       # %bb.0:
3560; SKX-NEXT:    pinsrw $0, %edi, %mm0 # sched: [2:2.00]
3561; SKX-NEXT:    movswl (%rsi), %eax # sched: [5:0.50]
3562; SKX-NEXT:    pinsrw $1, %eax, %mm0 # sched: [2:2.00]
3563; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3564; SKX-NEXT:    retq # sched: [7:1.00]
3565;
3566; BTVER2-LABEL: test_pinsrw:
3567; BTVER2:       # %bb.0:
3568; BTVER2-NEXT:    pinsrw $0, %edi, %mm0 # sched: [7:0.50]
3569; BTVER2-NEXT:    movswl (%rsi), %eax # sched: [4:1.00]
3570; BTVER2-NEXT:    pinsrw $1, %eax, %mm0 # sched: [7:0.50]
3571; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3572; BTVER2-NEXT:    retq # sched: [4:1.00]
3573;
3574; ZNVER1-LABEL: test_pinsrw:
3575; ZNVER1:       # %bb.0:
3576; ZNVER1-NEXT:    movswl (%rsi), %eax # sched: [8:0.50]
3577; ZNVER1-NEXT:    pinsrw $0, %edi, %mm0 # sched: [1:0.25]
3578; ZNVER1-NEXT:    pinsrw $1, %eax, %mm0 # sched: [1:0.25]
3579; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3580; ZNVER1-NEXT:    retq # sched: [1:0.50]
3581  %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0)
3582  %2 = load i16, i16 *%a2, align 2
3583  %3 = sext i16 %2 to i32
3584  %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1)
3585  %5 = bitcast x86_mmx %4 to i64
3586  ret i64 %5
3587}
3588declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone
3589
3590define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3591; GENERIC-LABEL: test_pmaddwd:
3592; GENERIC:       # %bb.0:
3593; GENERIC-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
3594; GENERIC-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
3595; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3596; GENERIC-NEXT:    retq # sched: [1:1.00]
3597;
3598; ATOM-LABEL: test_pmaddwd:
3599; ATOM:       # %bb.0:
3600; ATOM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:4.00]
3601; ATOM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [4:4.00]
3602; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3603; ATOM-NEXT:    retq # sched: [79:39.50]
3604;
3605; SLM-LABEL: test_pmaddwd:
3606; SLM:       # %bb.0:
3607; SLM-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
3608; SLM-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
3609; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3610; SLM-NEXT:    retq # sched: [4:1.00]
3611;
3612; SANDY-LABEL: test_pmaddwd:
3613; SANDY:       # %bb.0:
3614; SANDY-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
3615; SANDY-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
3616; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3617; SANDY-NEXT:    retq # sched: [1:1.00]
3618;
3619; HASWELL-LABEL: test_pmaddwd:
3620; HASWELL:       # %bb.0:
3621; HASWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
3622; HASWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
3623; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3624; HASWELL-NEXT:    retq # sched: [7:1.00]
3625;
3626; BROADWELL-LABEL: test_pmaddwd:
3627; BROADWELL:       # %bb.0:
3628; BROADWELL-NEXT:    pmaddwd %mm1, %mm0 # sched: [5:1.00]
3629; BROADWELL-NEXT:    pmaddwd (%rdi), %mm0 # sched: [10:1.00]
3630; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3631; BROADWELL-NEXT:    retq # sched: [7:1.00]
3632;
3633; SKYLAKE-LABEL: test_pmaddwd:
3634; SKYLAKE:       # %bb.0:
3635; SKYLAKE-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
3636; SKYLAKE-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
3637; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3638; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3639;
3640; SKX-LABEL: test_pmaddwd:
3641; SKX:       # %bb.0:
3642; SKX-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
3643; SKX-NEXT:    pmaddwd (%rdi), %mm0 # sched: [9:1.00]
3644; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3645; SKX-NEXT:    retq # sched: [7:1.00]
3646;
3647; BTVER2-LABEL: test_pmaddwd:
3648; BTVER2:       # %bb.0:
3649; BTVER2-NEXT:    pmaddwd %mm1, %mm0 # sched: [2:1.00]
3650; BTVER2-NEXT:    pmaddwd (%rdi), %mm0 # sched: [7:1.00]
3651; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3652; BTVER2-NEXT:    retq # sched: [4:1.00]
3653;
3654; ZNVER1-LABEL: test_pmaddwd:
3655; ZNVER1:       # %bb.0:
3656; ZNVER1-NEXT:    pmaddwd %mm1, %mm0 # sched: [4:1.00]
3657; ZNVER1-NEXT:    pmaddwd (%rdi), %mm0 # sched: [11:1.00]
3658; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3659; ZNVER1-NEXT:    retq # sched: [1:0.50]
3660  %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1)
3661  %2 = load x86_mmx, x86_mmx *%a2, align 8
3662  %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2)
3663  %4 = bitcast x86_mmx %3 to i64
3664  ret i64 %4
3665}
3666declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
3667
3668define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3669; GENERIC-LABEL: test_pmaddubsw:
3670; GENERIC:       # %bb.0:
3671; GENERIC-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
3672; GENERIC-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
3673; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3674; GENERIC-NEXT:    retq # sched: [1:1.00]
3675;
3676; ATOM-LABEL: test_pmaddubsw:
3677; ATOM:       # %bb.0:
3678; ATOM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:4.00]
3679; ATOM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [4:4.00]
3680; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3681; ATOM-NEXT:    retq # sched: [79:39.50]
3682;
3683; SLM-LABEL: test_pmaddubsw:
3684; SLM:       # %bb.0:
3685; SLM-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
3686; SLM-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
3687; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3688; SLM-NEXT:    retq # sched: [4:1.00]
3689;
3690; SANDY-LABEL: test_pmaddubsw:
3691; SANDY:       # %bb.0:
3692; SANDY-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
3693; SANDY-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
3694; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3695; SANDY-NEXT:    retq # sched: [1:1.00]
3696;
3697; HASWELL-LABEL: test_pmaddubsw:
3698; HASWELL:       # %bb.0:
3699; HASWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
3700; HASWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
3701; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3702; HASWELL-NEXT:    retq # sched: [7:1.00]
3703;
3704; BROADWELL-LABEL: test_pmaddubsw:
3705; BROADWELL:       # %bb.0:
3706; BROADWELL-NEXT:    pmaddubsw %mm1, %mm0 # sched: [5:1.00]
3707; BROADWELL-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
3708; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3709; BROADWELL-NEXT:    retq # sched: [7:1.00]
3710;
3711; SKYLAKE-LABEL: test_pmaddubsw:
3712; SKYLAKE:       # %bb.0:
3713; SKYLAKE-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
3714; SKYLAKE-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
3715; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3716; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3717;
3718; SKX-LABEL: test_pmaddubsw:
3719; SKX:       # %bb.0:
3720; SKX-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
3721; SKX-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
3722; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3723; SKX-NEXT:    retq # sched: [7:1.00]
3724;
3725; BTVER2-LABEL: test_pmaddubsw:
3726; BTVER2:       # %bb.0:
3727; BTVER2-NEXT:    pmaddubsw %mm1, %mm0 # sched: [2:1.00]
3728; BTVER2-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
3729; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3730; BTVER2-NEXT:    retq # sched: [4:1.00]
3731;
3732; ZNVER1-LABEL: test_pmaddubsw:
3733; ZNVER1:       # %bb.0:
3734; ZNVER1-NEXT:    pmaddubsw %mm1, %mm0 # sched: [4:1.00]
3735; ZNVER1-NEXT:    pmaddubsw (%rdi), %mm0 # sched: [11:1.00]
3736; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3737; ZNVER1-NEXT:    retq # sched: [1:0.50]
3738  %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1)
3739  %2 = load x86_mmx, x86_mmx *%a2, align 8
3740  %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2)
3741  %4 = bitcast x86_mmx %3 to i64
3742  ret i64 %4
3743}
3744declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
3745
3746define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3747; GENERIC-LABEL: test_pmaxsw:
3748; GENERIC:       # %bb.0:
3749; GENERIC-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
3750; GENERIC-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
3751; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3752; GENERIC-NEXT:    retq # sched: [1:1.00]
3753;
3754; ATOM-LABEL: test_pmaxsw:
3755; ATOM:       # %bb.0:
3756; ATOM-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
3757; ATOM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [1:1.00]
3758; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3759; ATOM-NEXT:    retq # sched: [79:39.50]
3760;
3761; SLM-LABEL: test_pmaxsw:
3762; SLM:       # %bb.0:
3763; SLM-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
3764; SLM-NEXT:    pmaxsw (%rdi), %mm0 # sched: [4:1.00]
3765; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3766; SLM-NEXT:    retq # sched: [4:1.00]
3767;
3768; SANDY-LABEL: test_pmaxsw:
3769; SANDY:       # %bb.0:
3770; SANDY-NEXT:    pmaxsw %mm1, %mm0 # sched: [3:1.00]
3771; SANDY-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:1.00]
3772; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3773; SANDY-NEXT:    retq # sched: [1:1.00]
3774;
3775; HASWELL-LABEL: test_pmaxsw:
3776; HASWELL:       # %bb.0:
3777; HASWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
3778; HASWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
3779; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3780; HASWELL-NEXT:    retq # sched: [7:1.00]
3781;
3782; BROADWELL-LABEL: test_pmaxsw:
3783; BROADWELL:       # %bb.0:
3784; BROADWELL-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
3785; BROADWELL-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:0.50]
3786; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3787; BROADWELL-NEXT:    retq # sched: [7:1.00]
3788;
3789; SKYLAKE-LABEL: test_pmaxsw:
3790; SKYLAKE:       # %bb.0:
3791; SKYLAKE-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
3792; SKYLAKE-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
3793; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3794; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3795;
3796; SKX-LABEL: test_pmaxsw:
3797; SKX:       # %bb.0:
3798; SKX-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:1.00]
3799; SKX-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
3800; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3801; SKX-NEXT:    retq # sched: [7:1.00]
3802;
3803; BTVER2-LABEL: test_pmaxsw:
3804; BTVER2:       # %bb.0:
3805; BTVER2-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.50]
3806; BTVER2-NEXT:    pmaxsw (%rdi), %mm0 # sched: [6:1.00]
3807; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3808; BTVER2-NEXT:    retq # sched: [4:1.00]
3809;
3810; ZNVER1-LABEL: test_pmaxsw:
3811; ZNVER1:       # %bb.0:
3812; ZNVER1-NEXT:    pmaxsw %mm1, %mm0 # sched: [1:0.25]
3813; ZNVER1-NEXT:    pmaxsw (%rdi), %mm0 # sched: [8:0.50]
3814; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3815; ZNVER1-NEXT:    retq # sched: [1:0.50]
3816  %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1)
3817  %2 = load x86_mmx, x86_mmx *%a2, align 8
3818  %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2)
3819  %4 = bitcast x86_mmx %3 to i64
3820  ret i64 %4
3821}
3822declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
3823
3824define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3825; GENERIC-LABEL: test_pmaxub:
3826; GENERIC:       # %bb.0:
3827; GENERIC-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
3828; GENERIC-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
3829; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3830; GENERIC-NEXT:    retq # sched: [1:1.00]
3831;
3832; ATOM-LABEL: test_pmaxub:
3833; ATOM:       # %bb.0:
3834; ATOM-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
3835; ATOM-NEXT:    pmaxub (%rdi), %mm0 # sched: [1:1.00]
3836; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3837; ATOM-NEXT:    retq # sched: [79:39.50]
3838;
3839; SLM-LABEL: test_pmaxub:
3840; SLM:       # %bb.0:
3841; SLM-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
3842; SLM-NEXT:    pmaxub (%rdi), %mm0 # sched: [4:1.00]
3843; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3844; SLM-NEXT:    retq # sched: [4:1.00]
3845;
3846; SANDY-LABEL: test_pmaxub:
3847; SANDY:       # %bb.0:
3848; SANDY-NEXT:    pmaxub %mm1, %mm0 # sched: [3:1.00]
3849; SANDY-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:1.00]
3850; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3851; SANDY-NEXT:    retq # sched: [1:1.00]
3852;
3853; HASWELL-LABEL: test_pmaxub:
3854; HASWELL:       # %bb.0:
3855; HASWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
3856; HASWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
3857; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3858; HASWELL-NEXT:    retq # sched: [7:1.00]
3859;
3860; BROADWELL-LABEL: test_pmaxub:
3861; BROADWELL:       # %bb.0:
3862; BROADWELL-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
3863; BROADWELL-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:0.50]
3864; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3865; BROADWELL-NEXT:    retq # sched: [7:1.00]
3866;
3867; SKYLAKE-LABEL: test_pmaxub:
3868; SKYLAKE:       # %bb.0:
3869; SKYLAKE-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
3870; SKYLAKE-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
3871; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3872; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3873;
3874; SKX-LABEL: test_pmaxub:
3875; SKX:       # %bb.0:
3876; SKX-NEXT:    pmaxub %mm1, %mm0 # sched: [1:1.00]
3877; SKX-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
3878; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3879; SKX-NEXT:    retq # sched: [7:1.00]
3880;
3881; BTVER2-LABEL: test_pmaxub:
3882; BTVER2:       # %bb.0:
3883; BTVER2-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.50]
3884; BTVER2-NEXT:    pmaxub (%rdi), %mm0 # sched: [6:1.00]
3885; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3886; BTVER2-NEXT:    retq # sched: [4:1.00]
3887;
3888; ZNVER1-LABEL: test_pmaxub:
3889; ZNVER1:       # %bb.0:
3890; ZNVER1-NEXT:    pmaxub %mm1, %mm0 # sched: [1:0.25]
3891; ZNVER1-NEXT:    pmaxub (%rdi), %mm0 # sched: [8:0.50]
3892; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3893; ZNVER1-NEXT:    retq # sched: [1:0.50]
3894  %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1)
3895  %2 = load x86_mmx, x86_mmx *%a2, align 8
3896  %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2)
3897  %4 = bitcast x86_mmx %3 to i64
3898  ret i64 %4
3899}
3900declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
3901
3902define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3903; GENERIC-LABEL: test_pminsw:
3904; GENERIC:       # %bb.0:
3905; GENERIC-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
3906; GENERIC-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
3907; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3908; GENERIC-NEXT:    retq # sched: [1:1.00]
3909;
3910; ATOM-LABEL: test_pminsw:
3911; ATOM:       # %bb.0:
3912; ATOM-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
3913; ATOM-NEXT:    pminsw (%rdi), %mm0 # sched: [1:1.00]
3914; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3915; ATOM-NEXT:    retq # sched: [79:39.50]
3916;
3917; SLM-LABEL: test_pminsw:
3918; SLM:       # %bb.0:
3919; SLM-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
3920; SLM-NEXT:    pminsw (%rdi), %mm0 # sched: [4:1.00]
3921; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
3922; SLM-NEXT:    retq # sched: [4:1.00]
3923;
3924; SANDY-LABEL: test_pminsw:
3925; SANDY:       # %bb.0:
3926; SANDY-NEXT:    pminsw %mm1, %mm0 # sched: [3:1.00]
3927; SANDY-NEXT:    pminsw (%rdi), %mm0 # sched: [8:1.00]
3928; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3929; SANDY-NEXT:    retq # sched: [1:1.00]
3930;
3931; HASWELL-LABEL: test_pminsw:
3932; HASWELL:       # %bb.0:
3933; HASWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
3934; HASWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
3935; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3936; HASWELL-NEXT:    retq # sched: [7:1.00]
3937;
3938; BROADWELL-LABEL: test_pminsw:
3939; BROADWELL:       # %bb.0:
3940; BROADWELL-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
3941; BROADWELL-NEXT:    pminsw (%rdi), %mm0 # sched: [6:0.50]
3942; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
3943; BROADWELL-NEXT:    retq # sched: [7:1.00]
3944;
3945; SKYLAKE-LABEL: test_pminsw:
3946; SKYLAKE:       # %bb.0:
3947; SKYLAKE-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
3948; SKYLAKE-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
3949; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3950; SKYLAKE-NEXT:    retq # sched: [7:1.00]
3951;
3952; SKX-LABEL: test_pminsw:
3953; SKX:       # %bb.0:
3954; SKX-NEXT:    pminsw %mm1, %mm0 # sched: [1:1.00]
3955; SKX-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
3956; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3957; SKX-NEXT:    retq # sched: [7:1.00]
3958;
3959; BTVER2-LABEL: test_pminsw:
3960; BTVER2:       # %bb.0:
3961; BTVER2-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.50]
3962; BTVER2-NEXT:    pminsw (%rdi), %mm0 # sched: [6:1.00]
3963; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
3964; BTVER2-NEXT:    retq # sched: [4:1.00]
3965;
3966; ZNVER1-LABEL: test_pminsw:
3967; ZNVER1:       # %bb.0:
3968; ZNVER1-NEXT:    pminsw %mm1, %mm0 # sched: [1:0.25]
3969; ZNVER1-NEXT:    pminsw (%rdi), %mm0 # sched: [8:0.50]
3970; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3971; ZNVER1-NEXT:    retq # sched: [1:0.50]
3972  %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1)
3973  %2 = load x86_mmx, x86_mmx *%a2, align 8
3974  %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2)
3975  %4 = bitcast x86_mmx %3 to i64
3976  ret i64 %4
3977}
3978declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
3979
3980define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
3981; GENERIC-LABEL: test_pminub:
3982; GENERIC:       # %bb.0:
3983; GENERIC-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
3984; GENERIC-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
3985; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
3986; GENERIC-NEXT:    retq # sched: [1:1.00]
3987;
3988; ATOM-LABEL: test_pminub:
3989; ATOM:       # %bb.0:
3990; ATOM-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
3991; ATOM-NEXT:    pminub (%rdi), %mm0 # sched: [1:1.00]
3992; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
3993; ATOM-NEXT:    retq # sched: [79:39.50]
3994;
3995; SLM-LABEL: test_pminub:
3996; SLM:       # %bb.0:
3997; SLM-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
3998; SLM-NEXT:    pminub (%rdi), %mm0 # sched: [4:1.00]
3999; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4000; SLM-NEXT:    retq # sched: [4:1.00]
4001;
4002; SANDY-LABEL: test_pminub:
4003; SANDY:       # %bb.0:
4004; SANDY-NEXT:    pminub %mm1, %mm0 # sched: [3:1.00]
4005; SANDY-NEXT:    pminub (%rdi), %mm0 # sched: [8:1.00]
4006; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4007; SANDY-NEXT:    retq # sched: [1:1.00]
4008;
4009; HASWELL-LABEL: test_pminub:
4010; HASWELL:       # %bb.0:
4011; HASWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
4012; HASWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
4013; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4014; HASWELL-NEXT:    retq # sched: [7:1.00]
4015;
4016; BROADWELL-LABEL: test_pminub:
4017; BROADWELL:       # %bb.0:
4018; BROADWELL-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
4019; BROADWELL-NEXT:    pminub (%rdi), %mm0 # sched: [6:0.50]
4020; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4021; BROADWELL-NEXT:    retq # sched: [7:1.00]
4022;
4023; SKYLAKE-LABEL: test_pminub:
4024; SKYLAKE:       # %bb.0:
4025; SKYLAKE-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
4026; SKYLAKE-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
4027; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4028; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4029;
4030; SKX-LABEL: test_pminub:
4031; SKX:       # %bb.0:
4032; SKX-NEXT:    pminub %mm1, %mm0 # sched: [1:1.00]
4033; SKX-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
4034; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4035; SKX-NEXT:    retq # sched: [7:1.00]
4036;
4037; BTVER2-LABEL: test_pminub:
4038; BTVER2:       # %bb.0:
4039; BTVER2-NEXT:    pminub %mm1, %mm0 # sched: [1:0.50]
4040; BTVER2-NEXT:    pminub (%rdi), %mm0 # sched: [6:1.00]
4041; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4042; BTVER2-NEXT:    retq # sched: [4:1.00]
4043;
4044; ZNVER1-LABEL: test_pminub:
4045; ZNVER1:       # %bb.0:
4046; ZNVER1-NEXT:    pminub %mm1, %mm0 # sched: [1:0.25]
4047; ZNVER1-NEXT:    pminub (%rdi), %mm0 # sched: [8:0.50]
4048; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4049; ZNVER1-NEXT:    retq # sched: [1:0.50]
4050  %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1)
4051  %2 = load x86_mmx, x86_mmx *%a2, align 8
4052  %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2)
4053  %4 = bitcast x86_mmx %3 to i64
4054  ret i64 %4
4055}
4056declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
4057
4058define i32 @test_pmovmskb(x86_mmx %a0) optsize {
4059; GENERIC-LABEL: test_pmovmskb:
4060; GENERIC:       # %bb.0:
4061; GENERIC-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
4062; GENERIC-NEXT:    retq # sched: [1:1.00]
4063;
4064; ATOM-LABEL: test_pmovmskb:
4065; ATOM:       # %bb.0:
4066; ATOM-NEXT:    pmovmskb %mm0, %eax # sched: [3:3.00]
4067; ATOM-NEXT:    retq # sched: [79:39.50]
4068;
4069; SLM-LABEL: test_pmovmskb:
4070; SLM:       # %bb.0:
4071; SLM-NEXT:    pmovmskb %mm0, %eax # sched: [4:1.00]
4072; SLM-NEXT:    retq # sched: [4:1.00]
4073;
4074; SANDY-LABEL: test_pmovmskb:
4075; SANDY:       # %bb.0:
4076; SANDY-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
4077; SANDY-NEXT:    retq # sched: [1:1.00]
4078;
4079; HASWELL-LABEL: test_pmovmskb:
4080; HASWELL:       # %bb.0:
4081; HASWELL-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
4082; HASWELL-NEXT:    retq # sched: [7:1.00]
4083;
4084; BROADWELL-LABEL: test_pmovmskb:
4085; BROADWELL:       # %bb.0:
4086; BROADWELL-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
4087; BROADWELL-NEXT:    retq # sched: [7:1.00]
4088;
4089; SKYLAKE-LABEL: test_pmovmskb:
4090; SKYLAKE:       # %bb.0:
4091; SKYLAKE-NEXT:    pmovmskb %mm0, %eax # sched: [2:1.00]
4092; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4093;
4094; SKX-LABEL: test_pmovmskb:
4095; SKX:       # %bb.0:
4096; SKX-NEXT:    pmovmskb %mm0, %eax # sched: [2:1.00]
4097; SKX-NEXT:    retq # sched: [7:1.00]
4098;
4099; BTVER2-LABEL: test_pmovmskb:
4100; BTVER2:       # %bb.0:
4101; BTVER2-NEXT:    pmovmskb %mm0, %eax # sched: [3:1.00]
4102; BTVER2-NEXT:    retq # sched: [4:1.00]
4103;
4104; ZNVER1-LABEL: test_pmovmskb:
4105; ZNVER1:       # %bb.0:
4106; ZNVER1-NEXT:    pmovmskb %mm0, %eax # sched: [1:1.00]
4107; ZNVER1-NEXT:    retq # sched: [1:0.50]
4108  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
4109  ret i32 %1
4110}
4111declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
4112
4113define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4114; GENERIC-LABEL: test_pmulhrsw:
4115; GENERIC:       # %bb.0:
4116; GENERIC-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
4117; GENERIC-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
4118; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4119; GENERIC-NEXT:    retq # sched: [1:1.00]
4120;
4121; ATOM-LABEL: test_pmulhrsw:
4122; ATOM:       # %bb.0:
4123; ATOM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:4.00]
4124; ATOM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [4:4.00]
4125; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4126; ATOM-NEXT:    retq # sched: [79:39.50]
4127;
4128; SLM-LABEL: test_pmulhrsw:
4129; SLM:       # %bb.0:
4130; SLM-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
4131; SLM-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
4132; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4133; SLM-NEXT:    retq # sched: [4:1.00]
4134;
4135; SANDY-LABEL: test_pmulhrsw:
4136; SANDY:       # %bb.0:
4137; SANDY-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
4138; SANDY-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
4139; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4140; SANDY-NEXT:    retq # sched: [1:1.00]
4141;
4142; HASWELL-LABEL: test_pmulhrsw:
4143; HASWELL:       # %bb.0:
4144; HASWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
4145; HASWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
4146; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4147; HASWELL-NEXT:    retq # sched: [7:1.00]
4148;
4149; BROADWELL-LABEL: test_pmulhrsw:
4150; BROADWELL:       # %bb.0:
4151; BROADWELL-NEXT:    pmulhrsw %mm1, %mm0 # sched: [5:1.00]
4152; BROADWELL-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
4153; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4154; BROADWELL-NEXT:    retq # sched: [7:1.00]
4155;
4156; SKYLAKE-LABEL: test_pmulhrsw:
4157; SKYLAKE:       # %bb.0:
4158; SKYLAKE-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
4159; SKYLAKE-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
4160; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4161; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4162;
4163; SKX-LABEL: test_pmulhrsw:
4164; SKX:       # %bb.0:
4165; SKX-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
4166; SKX-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
4167; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4168; SKX-NEXT:    retq # sched: [7:1.00]
4169;
4170; BTVER2-LABEL: test_pmulhrsw:
4171; BTVER2:       # %bb.0:
4172; BTVER2-NEXT:    pmulhrsw %mm1, %mm0 # sched: [2:1.00]
4173; BTVER2-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
4174; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4175; BTVER2-NEXT:    retq # sched: [4:1.00]
4176;
4177; ZNVER1-LABEL: test_pmulhrsw:
4178; ZNVER1:       # %bb.0:
4179; ZNVER1-NEXT:    pmulhrsw %mm1, %mm0 # sched: [4:1.00]
4180; ZNVER1-NEXT:    pmulhrsw (%rdi), %mm0 # sched: [11:1.00]
4181; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4182; ZNVER1-NEXT:    retq # sched: [1:0.50]
4183  %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1)
4184  %2 = load x86_mmx, x86_mmx *%a2, align 8
4185  %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2)
4186  %4 = bitcast x86_mmx %3 to i64
4187  ret i64 %4
4188}
4189declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
4190
4191define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4192; GENERIC-LABEL: test_pmulhw:
4193; GENERIC:       # %bb.0:
4194; GENERIC-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
4195; GENERIC-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
4196; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4197; GENERIC-NEXT:    retq # sched: [1:1.00]
4198;
4199; ATOM-LABEL: test_pmulhw:
4200; ATOM:       # %bb.0:
4201; ATOM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:4.00]
4202; ATOM-NEXT:    pmulhw (%rdi), %mm0 # sched: [4:4.00]
4203; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4204; ATOM-NEXT:    retq # sched: [79:39.50]
4205;
4206; SLM-LABEL: test_pmulhw:
4207; SLM:       # %bb.0:
4208; SLM-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
4209; SLM-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
4210; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4211; SLM-NEXT:    retq # sched: [4:1.00]
4212;
4213; SANDY-LABEL: test_pmulhw:
4214; SANDY:       # %bb.0:
4215; SANDY-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
4216; SANDY-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
4217; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4218; SANDY-NEXT:    retq # sched: [1:1.00]
4219;
4220; HASWELL-LABEL: test_pmulhw:
4221; HASWELL:       # %bb.0:
4222; HASWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
4223; HASWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
4224; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4225; HASWELL-NEXT:    retq # sched: [7:1.00]
4226;
4227; BROADWELL-LABEL: test_pmulhw:
4228; BROADWELL:       # %bb.0:
4229; BROADWELL-NEXT:    pmulhw %mm1, %mm0 # sched: [5:1.00]
4230; BROADWELL-NEXT:    pmulhw (%rdi), %mm0 # sched: [10:1.00]
4231; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4232; BROADWELL-NEXT:    retq # sched: [7:1.00]
4233;
4234; SKYLAKE-LABEL: test_pmulhw:
4235; SKYLAKE:       # %bb.0:
4236; SKYLAKE-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
4237; SKYLAKE-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
4238; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4239; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4240;
4241; SKX-LABEL: test_pmulhw:
4242; SKX:       # %bb.0:
4243; SKX-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
4244; SKX-NEXT:    pmulhw (%rdi), %mm0 # sched: [9:1.00]
4245; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4246; SKX-NEXT:    retq # sched: [7:1.00]
4247;
4248; BTVER2-LABEL: test_pmulhw:
4249; BTVER2:       # %bb.0:
4250; BTVER2-NEXT:    pmulhw %mm1, %mm0 # sched: [2:1.00]
4251; BTVER2-NEXT:    pmulhw (%rdi), %mm0 # sched: [7:1.00]
4252; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4253; BTVER2-NEXT:    retq # sched: [4:1.00]
4254;
4255; ZNVER1-LABEL: test_pmulhw:
4256; ZNVER1:       # %bb.0:
4257; ZNVER1-NEXT:    pmulhw %mm1, %mm0 # sched: [4:1.00]
4258; ZNVER1-NEXT:    pmulhw (%rdi), %mm0 # sched: [11:1.00]
4259; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4260; ZNVER1-NEXT:    retq # sched: [1:0.50]
4261  %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1)
4262  %2 = load x86_mmx, x86_mmx *%a2, align 8
4263  %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2)
4264  %4 = bitcast x86_mmx %3 to i64
4265  ret i64 %4
4266}
4267declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
4268
4269define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4270; GENERIC-LABEL: test_pmulhuw:
4271; GENERIC:       # %bb.0:
4272; GENERIC-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
4273; GENERIC-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
4274; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4275; GENERIC-NEXT:    retq # sched: [1:1.00]
4276;
4277; ATOM-LABEL: test_pmulhuw:
4278; ATOM:       # %bb.0:
4279; ATOM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:4.00]
4280; ATOM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [4:4.00]
4281; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4282; ATOM-NEXT:    retq # sched: [79:39.50]
4283;
4284; SLM-LABEL: test_pmulhuw:
4285; SLM:       # %bb.0:
4286; SLM-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
4287; SLM-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
4288; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4289; SLM-NEXT:    retq # sched: [4:1.00]
4290;
4291; SANDY-LABEL: test_pmulhuw:
4292; SANDY:       # %bb.0:
4293; SANDY-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
4294; SANDY-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
4295; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4296; SANDY-NEXT:    retq # sched: [1:1.00]
4297;
4298; HASWELL-LABEL: test_pmulhuw:
4299; HASWELL:       # %bb.0:
4300; HASWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
4301; HASWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
4302; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4303; HASWELL-NEXT:    retq # sched: [7:1.00]
4304;
4305; BROADWELL-LABEL: test_pmulhuw:
4306; BROADWELL:       # %bb.0:
4307; BROADWELL-NEXT:    pmulhuw %mm1, %mm0 # sched: [5:1.00]
4308; BROADWELL-NEXT:    pmulhuw (%rdi), %mm0 # sched: [10:1.00]
4309; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4310; BROADWELL-NEXT:    retq # sched: [7:1.00]
4311;
4312; SKYLAKE-LABEL: test_pmulhuw:
4313; SKYLAKE:       # %bb.0:
4314; SKYLAKE-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
4315; SKYLAKE-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
4316; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4317; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4318;
4319; SKX-LABEL: test_pmulhuw:
4320; SKX:       # %bb.0:
4321; SKX-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
4322; SKX-NEXT:    pmulhuw (%rdi), %mm0 # sched: [9:1.00]
4323; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4324; SKX-NEXT:    retq # sched: [7:1.00]
4325;
4326; BTVER2-LABEL: test_pmulhuw:
4327; BTVER2:       # %bb.0:
4328; BTVER2-NEXT:    pmulhuw %mm1, %mm0 # sched: [2:1.00]
4329; BTVER2-NEXT:    pmulhuw (%rdi), %mm0 # sched: [7:1.00]
4330; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4331; BTVER2-NEXT:    retq # sched: [4:1.00]
4332;
4333; ZNVER1-LABEL: test_pmulhuw:
4334; ZNVER1:       # %bb.0:
4335; ZNVER1-NEXT:    pmulhuw %mm1, %mm0 # sched: [4:1.00]
4336; ZNVER1-NEXT:    pmulhuw (%rdi), %mm0 # sched: [11:1.00]
4337; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4338; ZNVER1-NEXT:    retq # sched: [1:0.50]
4339  %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1)
4340  %2 = load x86_mmx, x86_mmx *%a2, align 8
4341  %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2)
4342  %4 = bitcast x86_mmx %3 to i64
4343  ret i64 %4
4344}
4345declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
4346
4347define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4348; GENERIC-LABEL: test_pmullw:
4349; GENERIC:       # %bb.0:
4350; GENERIC-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
4351; GENERIC-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
4352; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4353; GENERIC-NEXT:    retq # sched: [1:1.00]
4354;
4355; ATOM-LABEL: test_pmullw:
4356; ATOM:       # %bb.0:
4357; ATOM-NEXT:    pmullw %mm1, %mm0 # sched: [4:4.00]
4358; ATOM-NEXT:    pmullw (%rdi), %mm0 # sched: [4:4.00]
4359; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4360; ATOM-NEXT:    retq # sched: [79:39.50]
4361;
4362; SLM-LABEL: test_pmullw:
4363; SLM:       # %bb.0:
4364; SLM-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
4365; SLM-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
4366; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4367; SLM-NEXT:    retq # sched: [4:1.00]
4368;
4369; SANDY-LABEL: test_pmullw:
4370; SANDY:       # %bb.0:
4371; SANDY-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
4372; SANDY-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
4373; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4374; SANDY-NEXT:    retq # sched: [1:1.00]
4375;
4376; HASWELL-LABEL: test_pmullw:
4377; HASWELL:       # %bb.0:
4378; HASWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
4379; HASWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
4380; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4381; HASWELL-NEXT:    retq # sched: [7:1.00]
4382;
4383; BROADWELL-LABEL: test_pmullw:
4384; BROADWELL:       # %bb.0:
4385; BROADWELL-NEXT:    pmullw %mm1, %mm0 # sched: [5:1.00]
4386; BROADWELL-NEXT:    pmullw (%rdi), %mm0 # sched: [10:1.00]
4387; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4388; BROADWELL-NEXT:    retq # sched: [7:1.00]
4389;
4390; SKYLAKE-LABEL: test_pmullw:
4391; SKYLAKE:       # %bb.0:
4392; SKYLAKE-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
4393; SKYLAKE-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
4394; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4395; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4396;
4397; SKX-LABEL: test_pmullw:
4398; SKX:       # %bb.0:
4399; SKX-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
4400; SKX-NEXT:    pmullw (%rdi), %mm0 # sched: [9:1.00]
4401; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4402; SKX-NEXT:    retq # sched: [7:1.00]
4403;
4404; BTVER2-LABEL: test_pmullw:
4405; BTVER2:       # %bb.0:
4406; BTVER2-NEXT:    pmullw %mm1, %mm0 # sched: [2:1.00]
4407; BTVER2-NEXT:    pmullw (%rdi), %mm0 # sched: [7:1.00]
4408; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4409; BTVER2-NEXT:    retq # sched: [4:1.00]
4410;
4411; ZNVER1-LABEL: test_pmullw:
4412; ZNVER1:       # %bb.0:
4413; ZNVER1-NEXT:    pmullw %mm1, %mm0 # sched: [4:1.00]
4414; ZNVER1-NEXT:    pmullw (%rdi), %mm0 # sched: [11:1.00]
4415; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4416; ZNVER1-NEXT:    retq # sched: [1:0.50]
4417  %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1)
4418  %2 = load x86_mmx, x86_mmx *%a2, align 8
4419  %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2)
4420  %4 = bitcast x86_mmx %3 to i64
4421  ret i64 %4
4422}
4423declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
4424
4425define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4426; GENERIC-LABEL: test_pmuludq:
4427; GENERIC:       # %bb.0:
4428; GENERIC-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
4429; GENERIC-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
4430; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4431; GENERIC-NEXT:    retq # sched: [1:1.00]
4432;
4433; ATOM-LABEL: test_pmuludq:
4434; ATOM:       # %bb.0:
4435; ATOM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:4.00]
4436; ATOM-NEXT:    pmuludq (%rdi), %mm0 # sched: [4:4.00]
4437; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4438; ATOM-NEXT:    retq # sched: [79:39.50]
4439;
4440; SLM-LABEL: test_pmuludq:
4441; SLM:       # %bb.0:
4442; SLM-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
4443; SLM-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
4444; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4445; SLM-NEXT:    retq # sched: [4:1.00]
4446;
4447; SANDY-LABEL: test_pmuludq:
4448; SANDY:       # %bb.0:
4449; SANDY-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
4450; SANDY-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
4451; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4452; SANDY-NEXT:    retq # sched: [1:1.00]
4453;
4454; HASWELL-LABEL: test_pmuludq:
4455; HASWELL:       # %bb.0:
4456; HASWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
4457; HASWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
4458; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4459; HASWELL-NEXT:    retq # sched: [7:1.00]
4460;
4461; BROADWELL-LABEL: test_pmuludq:
4462; BROADWELL:       # %bb.0:
4463; BROADWELL-NEXT:    pmuludq %mm1, %mm0 # sched: [5:1.00]
4464; BROADWELL-NEXT:    pmuludq (%rdi), %mm0 # sched: [10:1.00]
4465; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4466; BROADWELL-NEXT:    retq # sched: [7:1.00]
4467;
4468; SKYLAKE-LABEL: test_pmuludq:
4469; SKYLAKE:       # %bb.0:
4470; SKYLAKE-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
4471; SKYLAKE-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
4472; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4473; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4474;
4475; SKX-LABEL: test_pmuludq:
4476; SKX:       # %bb.0:
4477; SKX-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
4478; SKX-NEXT:    pmuludq (%rdi), %mm0 # sched: [9:1.00]
4479; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4480; SKX-NEXT:    retq # sched: [7:1.00]
4481;
4482; BTVER2-LABEL: test_pmuludq:
4483; BTVER2:       # %bb.0:
4484; BTVER2-NEXT:    pmuludq %mm1, %mm0 # sched: [2:1.00]
4485; BTVER2-NEXT:    pmuludq (%rdi), %mm0 # sched: [7:1.00]
4486; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4487; BTVER2-NEXT:    retq # sched: [4:1.00]
4488;
4489; ZNVER1-LABEL: test_pmuludq:
4490; ZNVER1:       # %bb.0:
4491; ZNVER1-NEXT:    pmuludq %mm1, %mm0 # sched: [4:1.00]
4492; ZNVER1-NEXT:    pmuludq (%rdi), %mm0 # sched: [11:1.00]
4493; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4494; ZNVER1-NEXT:    retq # sched: [1:0.50]
4495  %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1)
4496  %2 = load x86_mmx, x86_mmx *%a2, align 8
4497  %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2)
4498  %4 = bitcast x86_mmx %3 to i64
4499  ret i64 %4
4500}
4501declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
4502
4503define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4504; GENERIC-LABEL: test_por:
4505; GENERIC:       # %bb.0:
4506; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
4507; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4508; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4509; GENERIC-NEXT:    retq # sched: [1:1.00]
4510;
4511; ATOM-LABEL: test_por:
4512; ATOM:       # %bb.0:
4513; ATOM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
4514; ATOM-NEXT:    por (%rdi), %mm0 # sched: [1:1.00]
4515; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4516; ATOM-NEXT:    retq # sched: [79:39.50]
4517;
4518; SLM-LABEL: test_por:
4519; SLM:       # %bb.0:
4520; SLM-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
4521; SLM-NEXT:    por (%rdi), %mm0 # sched: [4:1.00]
4522; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4523; SLM-NEXT:    retq # sched: [4:1.00]
4524;
4525; SANDY-LABEL: test_por:
4526; SANDY:       # %bb.0:
4527; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
4528; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4529; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4530; SANDY-NEXT:    retq # sched: [1:1.00]
4531;
4532; HASWELL-LABEL: test_por:
4533; HASWELL:       # %bb.0:
4534; HASWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
4535; HASWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4536; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4537; HASWELL-NEXT:    retq # sched: [7:1.00]
4538;
4539; BROADWELL-LABEL: test_por:
4540; BROADWELL:       # %bb.0:
4541; BROADWELL-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
4542; BROADWELL-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4543; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4544; BROADWELL-NEXT:    retq # sched: [7:1.00]
4545;
4546; SKYLAKE-LABEL: test_por:
4547; SKYLAKE:       # %bb.0:
4548; SKYLAKE-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
4549; SKYLAKE-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4550; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4551; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4552;
4553; SKX-LABEL: test_por:
4554; SKX:       # %bb.0:
4555; SKX-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
4556; SKX-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
4557; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4558; SKX-NEXT:    retq # sched: [7:1.00]
4559;
4560; BTVER2-LABEL: test_por:
4561; BTVER2:       # %bb.0:
4562; BTVER2-NEXT:    por %mm1, %mm0 # sched: [1:0.50]
4563; BTVER2-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
4564; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4565; BTVER2-NEXT:    retq # sched: [4:1.00]
4566;
4567; ZNVER1-LABEL: test_por:
4568; ZNVER1:       # %bb.0:
4569; ZNVER1-NEXT:    por %mm1, %mm0 # sched: [1:0.25]
4570; ZNVER1-NEXT:    por (%rdi), %mm0 # sched: [8:0.50]
4571; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4572; ZNVER1-NEXT:    retq # sched: [1:0.50]
4573  %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1)
4574  %2 = load x86_mmx, x86_mmx *%a2, align 8
4575  %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2)
4576  %4 = bitcast x86_mmx %3 to i64
4577  ret i64 %4
4578}
4579declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
4580
4581define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4582; GENERIC-LABEL: test_psadbw:
4583; GENERIC:       # %bb.0:
4584; GENERIC-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
4585; GENERIC-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
4586; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4587; GENERIC-NEXT:    retq # sched: [1:1.00]
4588;
4589; ATOM-LABEL: test_psadbw:
4590; ATOM:       # %bb.0:
4591; ATOM-NEXT:    psadbw %mm1, %mm0 # sched: [4:2.00]
4592; ATOM-NEXT:    psadbw (%rdi), %mm0 # sched: [4:2.00]
4593; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4594; ATOM-NEXT:    retq # sched: [79:39.50]
4595;
4596; SLM-LABEL: test_psadbw:
4597; SLM:       # %bb.0:
4598; SLM-NEXT:    psadbw %mm1, %mm0 # sched: [4:1.00]
4599; SLM-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
4600; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4601; SLM-NEXT:    retq # sched: [4:1.00]
4602;
4603; SANDY-LABEL: test_psadbw:
4604; SANDY:       # %bb.0:
4605; SANDY-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
4606; SANDY-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
4607; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4608; SANDY-NEXT:    retq # sched: [1:1.00]
4609;
4610; HASWELL-LABEL: test_psadbw:
4611; HASWELL:       # %bb.0:
4612; HASWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
4613; HASWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
4614; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4615; HASWELL-NEXT:    retq # sched: [7:1.00]
4616;
4617; BROADWELL-LABEL: test_psadbw:
4618; BROADWELL:       # %bb.0:
4619; BROADWELL-NEXT:    psadbw %mm1, %mm0 # sched: [5:1.00]
4620; BROADWELL-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
4621; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4622; BROADWELL-NEXT:    retq # sched: [7:1.00]
4623;
4624; SKYLAKE-LABEL: test_psadbw:
4625; SKYLAKE:       # %bb.0:
4626; SKYLAKE-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
4627; SKYLAKE-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
4628; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4629; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4630;
4631; SKX-LABEL: test_psadbw:
4632; SKX:       # %bb.0:
4633; SKX-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
4634; SKX-NEXT:    psadbw (%rdi), %mm0 # sched: [8:1.00]
4635; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4636; SKX-NEXT:    retq # sched: [7:1.00]
4637;
4638; BTVER2-LABEL: test_psadbw:
4639; BTVER2:       # %bb.0:
4640; BTVER2-NEXT:    psadbw %mm1, %mm0 # sched: [2:0.50]
4641; BTVER2-NEXT:    psadbw (%rdi), %mm0 # sched: [7:1.00]
4642; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4643; BTVER2-NEXT:    retq # sched: [4:1.00]
4644;
4645; ZNVER1-LABEL: test_psadbw:
4646; ZNVER1:       # %bb.0:
4647; ZNVER1-NEXT:    psadbw %mm1, %mm0 # sched: [3:1.00]
4648; ZNVER1-NEXT:    psadbw (%rdi), %mm0 # sched: [10:1.00]
4649; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4650; ZNVER1-NEXT:    retq # sched: [1:0.50]
4651  %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1)
4652  %2 = load x86_mmx, x86_mmx *%a2, align 8
4653  %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2)
4654  %4 = bitcast x86_mmx %3 to i64
4655  ret i64 %4
4656}
4657declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
4658
4659define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize {
4660; GENERIC-LABEL: test_pshufb:
4661; GENERIC:       # %bb.0:
4662; GENERIC-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
4663; GENERIC-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
4664; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4665; GENERIC-NEXT:    retq # sched: [1:1.00]
4666;
4667; ATOM-LABEL: test_pshufb:
4668; ATOM:       # %bb.0:
4669; ATOM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4670; ATOM-NEXT:    pshufb (%rdi), %mm0 # sched: [1:1.00]
4671; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4672; ATOM-NEXT:    retq # sched: [79:39.50]
4673;
4674; SLM-LABEL: test_pshufb:
4675; SLM:       # %bb.0:
4676; SLM-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4677; SLM-NEXT:    pshufb (%rdi), %mm0 # sched: [4:1.00]
4678; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4679; SLM-NEXT:    retq # sched: [4:1.00]
4680;
4681; SANDY-LABEL: test_pshufb:
4682; SANDY:       # %bb.0:
4683; SANDY-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.50]
4684; SANDY-NEXT:    pshufb (%rdi), %mm0 # sched: [6:0.50]
4685; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4686; SANDY-NEXT:    retq # sched: [1:1.00]
4687;
4688; HASWELL-LABEL: test_pshufb:
4689; HASWELL:       # %bb.0:
4690; HASWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4691; HASWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
4692; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4693; HASWELL-NEXT:    retq # sched: [7:1.00]
4694;
4695; BROADWELL-LABEL: test_pshufb:
4696; BROADWELL:       # %bb.0:
4697; BROADWELL-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4698; BROADWELL-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
4699; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4700; BROADWELL-NEXT:    retq # sched: [7:1.00]
4701;
4702; SKYLAKE-LABEL: test_pshufb:
4703; SKYLAKE:       # %bb.0:
4704; SKYLAKE-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4705; SKYLAKE-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
4706; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4707; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4708;
4709; SKX-LABEL: test_pshufb:
4710; SKX:       # %bb.0:
4711; SKX-NEXT:    pshufb %mm1, %mm0 # sched: [1:1.00]
4712; SKX-NEXT:    pshufb (%rdi), %mm0 # sched: [6:1.00]
4713; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4714; SKX-NEXT:    retq # sched: [7:1.00]
4715;
4716; BTVER2-LABEL: test_pshufb:
4717; BTVER2:       # %bb.0:
4718; BTVER2-NEXT:    pshufb %mm1, %mm0 # sched: [2:2.00]
4719; BTVER2-NEXT:    pshufb (%rdi), %mm0 # sched: [7:2.00]
4720; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4721; BTVER2-NEXT:    retq # sched: [4:1.00]
4722;
4723; ZNVER1-LABEL: test_pshufb:
4724; ZNVER1:       # %bb.0:
4725; ZNVER1-NEXT:    pshufb %mm1, %mm0 # sched: [1:0.25]
4726; ZNVER1-NEXT:    pshufb (%rdi), %mm0 # sched: [8:0.50]
4727; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4728; ZNVER1-NEXT:    retq # sched: [1:0.50]
4729  %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1)
4730  %2 = load x86_mmx, x86_mmx *%a2, align 8
4731  %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2)
4732  %4 = bitcast x86_mmx %3 to i64
4733  ret i64 %4
4734}
4735declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
4736
4737define i64 @test_pshufw(x86_mmx *%a0) optsize {
4738; GENERIC-LABEL: test_pshufw:
4739; GENERIC:       # %bb.0:
4740; GENERIC-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4741; GENERIC-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4742; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4743; GENERIC-NEXT:    retq # sched: [1:1.00]
4744;
4745; ATOM-LABEL: test_pshufw:
4746; ATOM:       # %bb.0:
4747; ATOM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00]
4748; ATOM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4749; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4750; ATOM-NEXT:    retq # sched: [79:39.50]
4751;
4752; SLM-LABEL: test_pshufw:
4753; SLM:       # %bb.0:
4754; SLM-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00]
4755; SLM-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4756; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4757; SLM-NEXT:    retq # sched: [4:1.00]
4758;
4759; SANDY-LABEL: test_pshufw:
4760; SANDY:       # %bb.0:
4761; SANDY-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4762; SANDY-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4763; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4764; SANDY-NEXT:    retq # sched: [1:1.00]
4765;
4766; HASWELL-LABEL: test_pshufw:
4767; HASWELL:       # %bb.0:
4768; HASWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4769; HASWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4770; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4771; HASWELL-NEXT:    retq # sched: [7:1.00]
4772;
4773; BROADWELL-LABEL: test_pshufw:
4774; BROADWELL:       # %bb.0:
4775; BROADWELL-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4776; BROADWELL-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4777; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4778; BROADWELL-NEXT:    retq # sched: [7:1.00]
4779;
4780; SKYLAKE-LABEL: test_pshufw:
4781; SKYLAKE:       # %bb.0:
4782; SKYLAKE-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4783; SKYLAKE-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4784; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4785; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4786;
4787; SKX-LABEL: test_pshufw:
4788; SKX:       # %bb.0:
4789; SKX-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4790; SKX-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
4791; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4792; SKX-NEXT:    retq # sched: [7:1.00]
4793;
4794; BTVER2-LABEL: test_pshufw:
4795; BTVER2:       # %bb.0:
4796; BTVER2-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
4797; BTVER2-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50]
4798; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4799; BTVER2-NEXT:    retq # sched: [4:1.00]
4800;
4801; ZNVER1-LABEL: test_pshufw:
4802; ZNVER1:       # %bb.0:
4803; ZNVER1-NEXT:    pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50]
4804; ZNVER1-NEXT:    pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25]
4805; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4806; ZNVER1-NEXT:    retq # sched: [1:0.50]
4807  %1 = load x86_mmx, x86_mmx *%a0, align 8
4808  %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0)
4809  %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0)
4810  %4 = bitcast x86_mmx %3 to i64
4811  ret i64 %4
4812}
4813declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
4814
4815define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4816; GENERIC-LABEL: test_psignb:
4817; GENERIC:       # %bb.0:
4818; GENERIC-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4819; GENERIC-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4820; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4821; GENERIC-NEXT:    retq # sched: [1:1.00]
4822;
4823; ATOM-LABEL: test_psignb:
4824; ATOM:       # %bb.0:
4825; ATOM-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4826; ATOM-NEXT:    psignb (%rdi), %mm0 # sched: [1:1.00]
4827; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4828; ATOM-NEXT:    retq # sched: [79:39.50]
4829;
4830; SLM-LABEL: test_psignb:
4831; SLM:       # %bb.0:
4832; SLM-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4833; SLM-NEXT:    psignb (%rdi), %mm0 # sched: [4:1.00]
4834; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4835; SLM-NEXT:    retq # sched: [4:1.00]
4836;
4837; SANDY-LABEL: test_psignb:
4838; SANDY:       # %bb.0:
4839; SANDY-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4840; SANDY-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4841; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4842; SANDY-NEXT:    retq # sched: [1:1.00]
4843;
4844; HASWELL-LABEL: test_psignb:
4845; HASWELL:       # %bb.0:
4846; HASWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4847; HASWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4848; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4849; HASWELL-NEXT:    retq # sched: [7:1.00]
4850;
4851; BROADWELL-LABEL: test_psignb:
4852; BROADWELL:       # %bb.0:
4853; BROADWELL-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4854; BROADWELL-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4855; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4856; BROADWELL-NEXT:    retq # sched: [7:1.00]
4857;
4858; SKYLAKE-LABEL: test_psignb:
4859; SKYLAKE:       # %bb.0:
4860; SKYLAKE-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4861; SKYLAKE-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4862; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4863; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4864;
4865; SKX-LABEL: test_psignb:
4866; SKX:       # %bb.0:
4867; SKX-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4868; SKX-NEXT:    psignb (%rdi), %mm0 # sched: [6:0.50]
4869; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4870; SKX-NEXT:    retq # sched: [7:1.00]
4871;
4872; BTVER2-LABEL: test_psignb:
4873; BTVER2:       # %bb.0:
4874; BTVER2-NEXT:    psignb %mm1, %mm0 # sched: [1:0.50]
4875; BTVER2-NEXT:    psignb (%rdi), %mm0 # sched: [6:1.00]
4876; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4877; BTVER2-NEXT:    retq # sched: [4:1.00]
4878;
4879; ZNVER1-LABEL: test_psignb:
4880; ZNVER1:       # %bb.0:
4881; ZNVER1-NEXT:    psignb %mm1, %mm0 # sched: [1:0.25]
4882; ZNVER1-NEXT:    psignb (%rdi), %mm0 # sched: [8:0.50]
4883; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4884; ZNVER1-NEXT:    retq # sched: [1:0.50]
4885  %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1)
4886  %2 = load x86_mmx, x86_mmx *%a2, align 8
4887  %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2)
4888  %4 = bitcast x86_mmx %3 to i64
4889  ret i64 %4
4890}
4891declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
4892
4893define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4894; GENERIC-LABEL: test_psignd:
4895; GENERIC:       # %bb.0:
4896; GENERIC-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4897; GENERIC-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4898; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4899; GENERIC-NEXT:    retq # sched: [1:1.00]
4900;
4901; ATOM-LABEL: test_psignd:
4902; ATOM:       # %bb.0:
4903; ATOM-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4904; ATOM-NEXT:    psignd (%rdi), %mm0 # sched: [1:1.00]
4905; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4906; ATOM-NEXT:    retq # sched: [79:39.50]
4907;
4908; SLM-LABEL: test_psignd:
4909; SLM:       # %bb.0:
4910; SLM-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4911; SLM-NEXT:    psignd (%rdi), %mm0 # sched: [4:1.00]
4912; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4913; SLM-NEXT:    retq # sched: [4:1.00]
4914;
4915; SANDY-LABEL: test_psignd:
4916; SANDY:       # %bb.0:
4917; SANDY-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4918; SANDY-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4919; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4920; SANDY-NEXT:    retq # sched: [1:1.00]
4921;
4922; HASWELL-LABEL: test_psignd:
4923; HASWELL:       # %bb.0:
4924; HASWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4925; HASWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4926; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4927; HASWELL-NEXT:    retq # sched: [7:1.00]
4928;
4929; BROADWELL-LABEL: test_psignd:
4930; BROADWELL:       # %bb.0:
4931; BROADWELL-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4932; BROADWELL-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4933; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
4934; BROADWELL-NEXT:    retq # sched: [7:1.00]
4935;
4936; SKYLAKE-LABEL: test_psignd:
4937; SKYLAKE:       # %bb.0:
4938; SKYLAKE-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4939; SKYLAKE-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4940; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4941; SKYLAKE-NEXT:    retq # sched: [7:1.00]
4942;
4943; SKX-LABEL: test_psignd:
4944; SKX:       # %bb.0:
4945; SKX-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4946; SKX-NEXT:    psignd (%rdi), %mm0 # sched: [6:0.50]
4947; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4948; SKX-NEXT:    retq # sched: [7:1.00]
4949;
4950; BTVER2-LABEL: test_psignd:
4951; BTVER2:       # %bb.0:
4952; BTVER2-NEXT:    psignd %mm1, %mm0 # sched: [1:0.50]
4953; BTVER2-NEXT:    psignd (%rdi), %mm0 # sched: [6:1.00]
4954; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
4955; BTVER2-NEXT:    retq # sched: [4:1.00]
4956;
4957; ZNVER1-LABEL: test_psignd:
4958; ZNVER1:       # %bb.0:
4959; ZNVER1-NEXT:    psignd %mm1, %mm0 # sched: [1:0.25]
4960; ZNVER1-NEXT:    psignd (%rdi), %mm0 # sched: [8:0.50]
4961; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4962; ZNVER1-NEXT:    retq # sched: [1:0.50]
4963  %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1)
4964  %2 = load x86_mmx, x86_mmx *%a2, align 8
4965  %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2)
4966  %4 = bitcast x86_mmx %3 to i64
4967  ret i64 %4
4968}
4969declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
4970
4971define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
4972; GENERIC-LABEL: test_psignw:
4973; GENERIC:       # %bb.0:
4974; GENERIC-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
4975; GENERIC-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
4976; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4977; GENERIC-NEXT:    retq # sched: [1:1.00]
4978;
4979; ATOM-LABEL: test_psignw:
4980; ATOM:       # %bb.0:
4981; ATOM-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
4982; ATOM-NEXT:    psignw (%rdi), %mm0 # sched: [1:1.00]
4983; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
4984; ATOM-NEXT:    retq # sched: [79:39.50]
4985;
4986; SLM-LABEL: test_psignw:
4987; SLM:       # %bb.0:
4988; SLM-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
4989; SLM-NEXT:    psignw (%rdi), %mm0 # sched: [4:1.00]
4990; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
4991; SLM-NEXT:    retq # sched: [4:1.00]
4992;
4993; SANDY-LABEL: test_psignw:
4994; SANDY:       # %bb.0:
4995; SANDY-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
4996; SANDY-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
4997; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
4998; SANDY-NEXT:    retq # sched: [1:1.00]
4999;
5000; HASWELL-LABEL: test_psignw:
5001; HASWELL:       # %bb.0:
5002; HASWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
5003; HASWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
5004; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5005; HASWELL-NEXT:    retq # sched: [7:1.00]
5006;
5007; BROADWELL-LABEL: test_psignw:
5008; BROADWELL:       # %bb.0:
5009; BROADWELL-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
5010; BROADWELL-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
5011; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5012; BROADWELL-NEXT:    retq # sched: [7:1.00]
5013;
5014; SKYLAKE-LABEL: test_psignw:
5015; SKYLAKE:       # %bb.0:
5016; SKYLAKE-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
5017; SKYLAKE-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
5018; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5019; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5020;
5021; SKX-LABEL: test_psignw:
5022; SKX:       # %bb.0:
5023; SKX-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
5024; SKX-NEXT:    psignw (%rdi), %mm0 # sched: [6:0.50]
5025; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5026; SKX-NEXT:    retq # sched: [7:1.00]
5027;
5028; BTVER2-LABEL: test_psignw:
5029; BTVER2:       # %bb.0:
5030; BTVER2-NEXT:    psignw %mm1, %mm0 # sched: [1:0.50]
5031; BTVER2-NEXT:    psignw (%rdi), %mm0 # sched: [6:1.00]
5032; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5033; BTVER2-NEXT:    retq # sched: [4:1.00]
5034;
5035; ZNVER1-LABEL: test_psignw:
5036; ZNVER1:       # %bb.0:
5037; ZNVER1-NEXT:    psignw %mm1, %mm0 # sched: [1:0.25]
5038; ZNVER1-NEXT:    psignw (%rdi), %mm0 # sched: [8:0.50]
5039; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5040; ZNVER1-NEXT:    retq # sched: [1:0.50]
5041  %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1)
5042  %2 = load x86_mmx, x86_mmx *%a2, align 8
5043  %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2)
5044  %4 = bitcast x86_mmx %3 to i64
5045  ret i64 %4
5046}
5047declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
5048
5049define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5050; GENERIC-LABEL: test_pslld:
5051; GENERIC:       # %bb.0:
5052; GENERIC-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5053; GENERIC-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5054; GENERIC-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5055; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5056; GENERIC-NEXT:    retq # sched: [1:1.00]
5057;
5058; ATOM-LABEL: test_pslld:
5059; ATOM:       # %bb.0:
5060; ATOM-NEXT:    pslld %mm1, %mm0 # sched: [2:1.00]
5061; ATOM-NEXT:    pslld (%rdi), %mm0 # sched: [3:1.50]
5062; ATOM-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
5063; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5064; ATOM-NEXT:    retq # sched: [79:39.50]
5065;
5066; SLM-LABEL: test_pslld:
5067; SLM:       # %bb.0:
5068; SLM-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5069; SLM-NEXT:    pslld (%rdi), %mm0 # sched: [4:1.00]
5070; SLM-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5071; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5072; SLM-NEXT:    retq # sched: [4:1.00]
5073;
5074; SANDY-LABEL: test_pslld:
5075; SANDY:       # %bb.0:
5076; SANDY-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5077; SANDY-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5078; SANDY-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5079; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5080; SANDY-NEXT:    retq # sched: [1:1.00]
5081;
5082; HASWELL-LABEL: test_pslld:
5083; HASWELL:       # %bb.0:
5084; HASWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5085; HASWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5086; HASWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5087; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5088; HASWELL-NEXT:    retq # sched: [7:1.00]
5089;
5090; BROADWELL-LABEL: test_pslld:
5091; BROADWELL:       # %bb.0:
5092; BROADWELL-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5093; BROADWELL-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5094; BROADWELL-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5095; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5096; BROADWELL-NEXT:    retq # sched: [7:1.00]
5097;
5098; SKYLAKE-LABEL: test_pslld:
5099; SKYLAKE:       # %bb.0:
5100; SKYLAKE-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5101; SKYLAKE-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5102; SKYLAKE-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5103; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5104; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5105;
5106; SKX-LABEL: test_pslld:
5107; SKX:       # %bb.0:
5108; SKX-NEXT:    pslld %mm1, %mm0 # sched: [1:1.00]
5109; SKX-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5110; SKX-NEXT:    pslld $7, %mm0 # sched: [1:1.00]
5111; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5112; SKX-NEXT:    retq # sched: [7:1.00]
5113;
5114; BTVER2-LABEL: test_pslld:
5115; BTVER2:       # %bb.0:
5116; BTVER2-NEXT:    pslld %mm1, %mm0 # sched: [1:0.50]
5117; BTVER2-NEXT:    pslld (%rdi), %mm0 # sched: [6:1.00]
5118; BTVER2-NEXT:    pslld $7, %mm0 # sched: [1:0.50]
5119; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5120; BTVER2-NEXT:    retq # sched: [4:1.00]
5121;
5122; ZNVER1-LABEL: test_pslld:
5123; ZNVER1:       # %bb.0:
5124; ZNVER1-NEXT:    pslld %mm1, %mm0 # sched: [1:0.25]
5125; ZNVER1-NEXT:    pslld (%rdi), %mm0 # sched: [8:0.50]
5126; ZNVER1-NEXT:    pslld $7, %mm0 # sched: [1:0.25]
5127; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5128; ZNVER1-NEXT:    retq # sched: [1:0.50]
5129  %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1)
5130  %2 = load x86_mmx, x86_mmx *%a2, align 8
5131  %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2)
5132  %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7)
5133  %5 = bitcast x86_mmx %4 to i64
5134  ret i64 %5
5135}
5136declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
5137declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
5138
5139define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5140; GENERIC-LABEL: test_psllq:
5141; GENERIC:       # %bb.0:
5142; GENERIC-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5143; GENERIC-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5144; GENERIC-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5145; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5146; GENERIC-NEXT:    retq # sched: [1:1.00]
5147;
5148; ATOM-LABEL: test_psllq:
5149; ATOM:       # %bb.0:
5150; ATOM-NEXT:    psllq %mm1, %mm0 # sched: [2:1.00]
5151; ATOM-NEXT:    psllq (%rdi), %mm0 # sched: [3:1.50]
5152; ATOM-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
5153; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5154; ATOM-NEXT:    retq # sched: [79:39.50]
5155;
5156; SLM-LABEL: test_psllq:
5157; SLM:       # %bb.0:
5158; SLM-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5159; SLM-NEXT:    psllq (%rdi), %mm0 # sched: [4:1.00]
5160; SLM-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5161; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5162; SLM-NEXT:    retq # sched: [4:1.00]
5163;
5164; SANDY-LABEL: test_psllq:
5165; SANDY:       # %bb.0:
5166; SANDY-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5167; SANDY-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5168; SANDY-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5169; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5170; SANDY-NEXT:    retq # sched: [1:1.00]
5171;
5172; HASWELL-LABEL: test_psllq:
5173; HASWELL:       # %bb.0:
5174; HASWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5175; HASWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5176; HASWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5177; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5178; HASWELL-NEXT:    retq # sched: [7:1.00]
5179;
5180; BROADWELL-LABEL: test_psllq:
5181; BROADWELL:       # %bb.0:
5182; BROADWELL-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5183; BROADWELL-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5184; BROADWELL-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5185; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5186; BROADWELL-NEXT:    retq # sched: [7:1.00]
5187;
5188; SKYLAKE-LABEL: test_psllq:
5189; SKYLAKE:       # %bb.0:
5190; SKYLAKE-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5191; SKYLAKE-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5192; SKYLAKE-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5193; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5194; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5195;
5196; SKX-LABEL: test_psllq:
5197; SKX:       # %bb.0:
5198; SKX-NEXT:    psllq %mm1, %mm0 # sched: [1:1.00]
5199; SKX-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5200; SKX-NEXT:    psllq $7, %mm0 # sched: [1:1.00]
5201; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5202; SKX-NEXT:    retq # sched: [7:1.00]
5203;
5204; BTVER2-LABEL: test_psllq:
5205; BTVER2:       # %bb.0:
5206; BTVER2-NEXT:    psllq %mm1, %mm0 # sched: [1:0.50]
5207; BTVER2-NEXT:    psllq (%rdi), %mm0 # sched: [6:1.00]
5208; BTVER2-NEXT:    psllq $7, %mm0 # sched: [1:0.50]
5209; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5210; BTVER2-NEXT:    retq # sched: [4:1.00]
5211;
5212; ZNVER1-LABEL: test_psllq:
5213; ZNVER1:       # %bb.0:
5214; ZNVER1-NEXT:    psllq %mm1, %mm0 # sched: [1:0.25]
5215; ZNVER1-NEXT:    psllq (%rdi), %mm0 # sched: [8:0.50]
5216; ZNVER1-NEXT:    psllq $7, %mm0 # sched: [1:0.25]
5217; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5218; ZNVER1-NEXT:    retq # sched: [1:0.50]
5219  %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1)
5220  %2 = load x86_mmx, x86_mmx *%a2, align 8
5221  %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2)
5222  %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7)
5223  %5 = bitcast x86_mmx %4 to i64
5224  ret i64 %5
5225}
5226declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
5227declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
5228
5229define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5230; GENERIC-LABEL: test_psllw:
5231; GENERIC:       # %bb.0:
5232; GENERIC-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5233; GENERIC-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5234; GENERIC-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5235; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5236; GENERIC-NEXT:    retq # sched: [1:1.00]
5237;
5238; ATOM-LABEL: test_psllw:
5239; ATOM:       # %bb.0:
5240; ATOM-NEXT:    psllw %mm1, %mm0 # sched: [2:1.00]
5241; ATOM-NEXT:    psllw (%rdi), %mm0 # sched: [3:1.50]
5242; ATOM-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
5243; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5244; ATOM-NEXT:    retq # sched: [79:39.50]
5245;
5246; SLM-LABEL: test_psllw:
5247; SLM:       # %bb.0:
5248; SLM-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5249; SLM-NEXT:    psllw (%rdi), %mm0 # sched: [4:1.00]
5250; SLM-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5251; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5252; SLM-NEXT:    retq # sched: [4:1.00]
5253;
5254; SANDY-LABEL: test_psllw:
5255; SANDY:       # %bb.0:
5256; SANDY-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5257; SANDY-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5258; SANDY-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5259; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5260; SANDY-NEXT:    retq # sched: [1:1.00]
5261;
5262; HASWELL-LABEL: test_psllw:
5263; HASWELL:       # %bb.0:
5264; HASWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5265; HASWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5266; HASWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5267; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5268; HASWELL-NEXT:    retq # sched: [7:1.00]
5269;
5270; BROADWELL-LABEL: test_psllw:
5271; BROADWELL:       # %bb.0:
5272; BROADWELL-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5273; BROADWELL-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5274; BROADWELL-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5275; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5276; BROADWELL-NEXT:    retq # sched: [7:1.00]
5277;
5278; SKYLAKE-LABEL: test_psllw:
5279; SKYLAKE:       # %bb.0:
5280; SKYLAKE-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5281; SKYLAKE-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5282; SKYLAKE-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5283; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5284; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5285;
5286; SKX-LABEL: test_psllw:
5287; SKX:       # %bb.0:
5288; SKX-NEXT:    psllw %mm1, %mm0 # sched: [1:1.00]
5289; SKX-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5290; SKX-NEXT:    psllw $7, %mm0 # sched: [1:1.00]
5291; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5292; SKX-NEXT:    retq # sched: [7:1.00]
5293;
5294; BTVER2-LABEL: test_psllw:
5295; BTVER2:       # %bb.0:
5296; BTVER2-NEXT:    psllw %mm1, %mm0 # sched: [1:0.50]
5297; BTVER2-NEXT:    psllw (%rdi), %mm0 # sched: [6:1.00]
5298; BTVER2-NEXT:    psllw $7, %mm0 # sched: [1:0.50]
5299; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5300; BTVER2-NEXT:    retq # sched: [4:1.00]
5301;
5302; ZNVER1-LABEL: test_psllw:
5303; ZNVER1:       # %bb.0:
5304; ZNVER1-NEXT:    psllw %mm1, %mm0 # sched: [1:0.25]
5305; ZNVER1-NEXT:    psllw (%rdi), %mm0 # sched: [8:0.50]
5306; ZNVER1-NEXT:    psllw $7, %mm0 # sched: [1:0.25]
5307; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5308; ZNVER1-NEXT:    retq # sched: [1:0.50]
5309  %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1)
5310  %2 = load x86_mmx, x86_mmx *%a2, align 8
5311  %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2)
5312  %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7)
5313  %5 = bitcast x86_mmx %4 to i64
5314  ret i64 %5
5315}
5316declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
5317declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
5318
5319define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5320; GENERIC-LABEL: test_psrad:
5321; GENERIC:       # %bb.0:
5322; GENERIC-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5323; GENERIC-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5324; GENERIC-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5325; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5326; GENERIC-NEXT:    retq # sched: [1:1.00]
5327;
5328; ATOM-LABEL: test_psrad:
5329; ATOM:       # %bb.0:
5330; ATOM-NEXT:    psrad %mm1, %mm0 # sched: [2:1.00]
5331; ATOM-NEXT:    psrad (%rdi), %mm0 # sched: [3:1.50]
5332; ATOM-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
5333; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5334; ATOM-NEXT:    retq # sched: [79:39.50]
5335;
5336; SLM-LABEL: test_psrad:
5337; SLM:       # %bb.0:
5338; SLM-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5339; SLM-NEXT:    psrad (%rdi), %mm0 # sched: [4:1.00]
5340; SLM-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5341; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5342; SLM-NEXT:    retq # sched: [4:1.00]
5343;
5344; SANDY-LABEL: test_psrad:
5345; SANDY:       # %bb.0:
5346; SANDY-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5347; SANDY-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5348; SANDY-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5349; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5350; SANDY-NEXT:    retq # sched: [1:1.00]
5351;
5352; HASWELL-LABEL: test_psrad:
5353; HASWELL:       # %bb.0:
5354; HASWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5355; HASWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5356; HASWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5357; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5358; HASWELL-NEXT:    retq # sched: [7:1.00]
5359;
5360; BROADWELL-LABEL: test_psrad:
5361; BROADWELL:       # %bb.0:
5362; BROADWELL-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5363; BROADWELL-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5364; BROADWELL-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5365; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5366; BROADWELL-NEXT:    retq # sched: [7:1.00]
5367;
5368; SKYLAKE-LABEL: test_psrad:
5369; SKYLAKE:       # %bb.0:
5370; SKYLAKE-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5371; SKYLAKE-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5372; SKYLAKE-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5373; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5374; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5375;
5376; SKX-LABEL: test_psrad:
5377; SKX:       # %bb.0:
5378; SKX-NEXT:    psrad %mm1, %mm0 # sched: [1:1.00]
5379; SKX-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5380; SKX-NEXT:    psrad $7, %mm0 # sched: [1:1.00]
5381; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5382; SKX-NEXT:    retq # sched: [7:1.00]
5383;
5384; BTVER2-LABEL: test_psrad:
5385; BTVER2:       # %bb.0:
5386; BTVER2-NEXT:    psrad %mm1, %mm0 # sched: [1:0.50]
5387; BTVER2-NEXT:    psrad (%rdi), %mm0 # sched: [6:1.00]
5388; BTVER2-NEXT:    psrad $7, %mm0 # sched: [1:0.50]
5389; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5390; BTVER2-NEXT:    retq # sched: [4:1.00]
5391;
5392; ZNVER1-LABEL: test_psrad:
5393; ZNVER1:       # %bb.0:
5394; ZNVER1-NEXT:    psrad %mm1, %mm0 # sched: [1:0.25]
5395; ZNVER1-NEXT:    psrad (%rdi), %mm0 # sched: [8:0.50]
5396; ZNVER1-NEXT:    psrad $7, %mm0 # sched: [1:0.25]
5397; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5398; ZNVER1-NEXT:    retq # sched: [1:0.50]
5399  %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1)
5400  %2 = load x86_mmx, x86_mmx *%a2, align 8
5401  %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2)
5402  %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7)
5403  %5 = bitcast x86_mmx %4 to i64
5404  ret i64 %5
5405}
5406declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
5407declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
5408
5409define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5410; GENERIC-LABEL: test_psraw:
5411; GENERIC:       # %bb.0:
5412; GENERIC-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5413; GENERIC-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5414; GENERIC-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5415; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5416; GENERIC-NEXT:    retq # sched: [1:1.00]
5417;
5418; ATOM-LABEL: test_psraw:
5419; ATOM:       # %bb.0:
5420; ATOM-NEXT:    psraw %mm1, %mm0 # sched: [2:1.00]
5421; ATOM-NEXT:    psraw (%rdi), %mm0 # sched: [3:1.50]
5422; ATOM-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
5423; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5424; ATOM-NEXT:    retq # sched: [79:39.50]
5425;
5426; SLM-LABEL: test_psraw:
5427; SLM:       # %bb.0:
5428; SLM-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5429; SLM-NEXT:    psraw (%rdi), %mm0 # sched: [4:1.00]
5430; SLM-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5431; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5432; SLM-NEXT:    retq # sched: [4:1.00]
5433;
5434; SANDY-LABEL: test_psraw:
5435; SANDY:       # %bb.0:
5436; SANDY-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5437; SANDY-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5438; SANDY-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5439; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5440; SANDY-NEXT:    retq # sched: [1:1.00]
5441;
5442; HASWELL-LABEL: test_psraw:
5443; HASWELL:       # %bb.0:
5444; HASWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5445; HASWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5446; HASWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5447; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5448; HASWELL-NEXT:    retq # sched: [7:1.00]
5449;
5450; BROADWELL-LABEL: test_psraw:
5451; BROADWELL:       # %bb.0:
5452; BROADWELL-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5453; BROADWELL-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5454; BROADWELL-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5455; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5456; BROADWELL-NEXT:    retq # sched: [7:1.00]
5457;
5458; SKYLAKE-LABEL: test_psraw:
5459; SKYLAKE:       # %bb.0:
5460; SKYLAKE-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5461; SKYLAKE-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5462; SKYLAKE-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5463; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5464; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5465;
5466; SKX-LABEL: test_psraw:
5467; SKX:       # %bb.0:
5468; SKX-NEXT:    psraw %mm1, %mm0 # sched: [1:1.00]
5469; SKX-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5470; SKX-NEXT:    psraw $7, %mm0 # sched: [1:1.00]
5471; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5472; SKX-NEXT:    retq # sched: [7:1.00]
5473;
5474; BTVER2-LABEL: test_psraw:
5475; BTVER2:       # %bb.0:
5476; BTVER2-NEXT:    psraw %mm1, %mm0 # sched: [1:0.50]
5477; BTVER2-NEXT:    psraw (%rdi), %mm0 # sched: [6:1.00]
5478; BTVER2-NEXT:    psraw $7, %mm0 # sched: [1:0.50]
5479; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5480; BTVER2-NEXT:    retq # sched: [4:1.00]
5481;
5482; ZNVER1-LABEL: test_psraw:
5483; ZNVER1:       # %bb.0:
5484; ZNVER1-NEXT:    psraw %mm1, %mm0 # sched: [1:0.25]
5485; ZNVER1-NEXT:    psraw (%rdi), %mm0 # sched: [8:0.50]
5486; ZNVER1-NEXT:    psraw $7, %mm0 # sched: [1:0.25]
5487; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5488; ZNVER1-NEXT:    retq # sched: [1:0.50]
5489  %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1)
5490  %2 = load x86_mmx, x86_mmx *%a2, align 8
5491  %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2)
5492  %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7)
5493  %5 = bitcast x86_mmx %4 to i64
5494  ret i64 %5
5495}
5496declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
5497declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
5498
5499define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5500; GENERIC-LABEL: test_psrld:
5501; GENERIC:       # %bb.0:
5502; GENERIC-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5503; GENERIC-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5504; GENERIC-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5505; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5506; GENERIC-NEXT:    retq # sched: [1:1.00]
5507;
5508; ATOM-LABEL: test_psrld:
5509; ATOM:       # %bb.0:
5510; ATOM-NEXT:    psrld %mm1, %mm0 # sched: [2:1.00]
5511; ATOM-NEXT:    psrld (%rdi), %mm0 # sched: [3:1.50]
5512; ATOM-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
5513; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5514; ATOM-NEXT:    retq # sched: [79:39.50]
5515;
5516; SLM-LABEL: test_psrld:
5517; SLM:       # %bb.0:
5518; SLM-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5519; SLM-NEXT:    psrld (%rdi), %mm0 # sched: [4:1.00]
5520; SLM-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5521; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5522; SLM-NEXT:    retq # sched: [4:1.00]
5523;
5524; SANDY-LABEL: test_psrld:
5525; SANDY:       # %bb.0:
5526; SANDY-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5527; SANDY-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5528; SANDY-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5529; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5530; SANDY-NEXT:    retq # sched: [1:1.00]
5531;
5532; HASWELL-LABEL: test_psrld:
5533; HASWELL:       # %bb.0:
5534; HASWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5535; HASWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5536; HASWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5537; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5538; HASWELL-NEXT:    retq # sched: [7:1.00]
5539;
5540; BROADWELL-LABEL: test_psrld:
5541; BROADWELL:       # %bb.0:
5542; BROADWELL-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5543; BROADWELL-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5544; BROADWELL-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5545; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5546; BROADWELL-NEXT:    retq # sched: [7:1.00]
5547;
5548; SKYLAKE-LABEL: test_psrld:
5549; SKYLAKE:       # %bb.0:
5550; SKYLAKE-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5551; SKYLAKE-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5552; SKYLAKE-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5553; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5554; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5555;
5556; SKX-LABEL: test_psrld:
5557; SKX:       # %bb.0:
5558; SKX-NEXT:    psrld %mm1, %mm0 # sched: [1:1.00]
5559; SKX-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5560; SKX-NEXT:    psrld $7, %mm0 # sched: [1:1.00]
5561; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5562; SKX-NEXT:    retq # sched: [7:1.00]
5563;
5564; BTVER2-LABEL: test_psrld:
5565; BTVER2:       # %bb.0:
5566; BTVER2-NEXT:    psrld %mm1, %mm0 # sched: [1:0.50]
5567; BTVER2-NEXT:    psrld (%rdi), %mm0 # sched: [6:1.00]
5568; BTVER2-NEXT:    psrld $7, %mm0 # sched: [1:0.50]
5569; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5570; BTVER2-NEXT:    retq # sched: [4:1.00]
5571;
5572; ZNVER1-LABEL: test_psrld:
5573; ZNVER1:       # %bb.0:
5574; ZNVER1-NEXT:    psrld %mm1, %mm0 # sched: [1:0.25]
5575; ZNVER1-NEXT:    psrld (%rdi), %mm0 # sched: [8:0.50]
5576; ZNVER1-NEXT:    psrld $7, %mm0 # sched: [1:0.25]
5577; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5578; ZNVER1-NEXT:    retq # sched: [1:0.50]
5579  %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1)
5580  %2 = load x86_mmx, x86_mmx *%a2, align 8
5581  %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2)
5582  %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7)
5583  %5 = bitcast x86_mmx %4 to i64
5584  ret i64 %5
5585}
5586declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
5587declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
5588
5589define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5590; GENERIC-LABEL: test_psrlq:
5591; GENERIC:       # %bb.0:
5592; GENERIC-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5593; GENERIC-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5594; GENERIC-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5595; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5596; GENERIC-NEXT:    retq # sched: [1:1.00]
5597;
5598; ATOM-LABEL: test_psrlq:
5599; ATOM:       # %bb.0:
5600; ATOM-NEXT:    psrlq %mm1, %mm0 # sched: [2:1.00]
5601; ATOM-NEXT:    psrlq (%rdi), %mm0 # sched: [3:1.50]
5602; ATOM-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
5603; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5604; ATOM-NEXT:    retq # sched: [79:39.50]
5605;
5606; SLM-LABEL: test_psrlq:
5607; SLM:       # %bb.0:
5608; SLM-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5609; SLM-NEXT:    psrlq (%rdi), %mm0 # sched: [4:1.00]
5610; SLM-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5611; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5612; SLM-NEXT:    retq # sched: [4:1.00]
5613;
5614; SANDY-LABEL: test_psrlq:
5615; SANDY:       # %bb.0:
5616; SANDY-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5617; SANDY-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5618; SANDY-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5619; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5620; SANDY-NEXT:    retq # sched: [1:1.00]
5621;
5622; HASWELL-LABEL: test_psrlq:
5623; HASWELL:       # %bb.0:
5624; HASWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5625; HASWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5626; HASWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5627; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5628; HASWELL-NEXT:    retq # sched: [7:1.00]
5629;
5630; BROADWELL-LABEL: test_psrlq:
5631; BROADWELL:       # %bb.0:
5632; BROADWELL-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5633; BROADWELL-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5634; BROADWELL-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5635; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5636; BROADWELL-NEXT:    retq # sched: [7:1.00]
5637;
5638; SKYLAKE-LABEL: test_psrlq:
5639; SKYLAKE:       # %bb.0:
5640; SKYLAKE-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5641; SKYLAKE-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5642; SKYLAKE-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5643; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5644; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5645;
5646; SKX-LABEL: test_psrlq:
5647; SKX:       # %bb.0:
5648; SKX-NEXT:    psrlq %mm1, %mm0 # sched: [1:1.00]
5649; SKX-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5650; SKX-NEXT:    psrlq $7, %mm0 # sched: [1:1.00]
5651; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5652; SKX-NEXT:    retq # sched: [7:1.00]
5653;
5654; BTVER2-LABEL: test_psrlq:
5655; BTVER2:       # %bb.0:
5656; BTVER2-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.50]
5657; BTVER2-NEXT:    psrlq (%rdi), %mm0 # sched: [6:1.00]
5658; BTVER2-NEXT:    psrlq $7, %mm0 # sched: [1:0.50]
5659; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5660; BTVER2-NEXT:    retq # sched: [4:1.00]
5661;
5662; ZNVER1-LABEL: test_psrlq:
5663; ZNVER1:       # %bb.0:
5664; ZNVER1-NEXT:    psrlq %mm1, %mm0 # sched: [1:0.25]
5665; ZNVER1-NEXT:    psrlq (%rdi), %mm0 # sched: [8:0.50]
5666; ZNVER1-NEXT:    psrlq $7, %mm0 # sched: [1:0.25]
5667; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5668; ZNVER1-NEXT:    retq # sched: [1:0.50]
5669  %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1)
5670  %2 = load x86_mmx, x86_mmx *%a2, align 8
5671  %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2)
5672  %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7)
5673  %5 = bitcast x86_mmx %4 to i64
5674  ret i64 %5
5675}
5676declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
5677declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
5678
5679define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5680; GENERIC-LABEL: test_psrlw:
5681; GENERIC:       # %bb.0:
5682; GENERIC-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5683; GENERIC-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5684; GENERIC-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5685; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5686; GENERIC-NEXT:    retq # sched: [1:1.00]
5687;
5688; ATOM-LABEL: test_psrlw:
5689; ATOM:       # %bb.0:
5690; ATOM-NEXT:    psrlw %mm1, %mm0 # sched: [2:1.00]
5691; ATOM-NEXT:    psrlw (%rdi), %mm0 # sched: [3:1.50]
5692; ATOM-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
5693; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5694; ATOM-NEXT:    retq # sched: [79:39.50]
5695;
5696; SLM-LABEL: test_psrlw:
5697; SLM:       # %bb.0:
5698; SLM-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5699; SLM-NEXT:    psrlw (%rdi), %mm0 # sched: [4:1.00]
5700; SLM-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5701; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5702; SLM-NEXT:    retq # sched: [4:1.00]
5703;
5704; SANDY-LABEL: test_psrlw:
5705; SANDY:       # %bb.0:
5706; SANDY-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5707; SANDY-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5708; SANDY-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5709; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5710; SANDY-NEXT:    retq # sched: [1:1.00]
5711;
5712; HASWELL-LABEL: test_psrlw:
5713; HASWELL:       # %bb.0:
5714; HASWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5715; HASWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5716; HASWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5717; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5718; HASWELL-NEXT:    retq # sched: [7:1.00]
5719;
5720; BROADWELL-LABEL: test_psrlw:
5721; BROADWELL:       # %bb.0:
5722; BROADWELL-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5723; BROADWELL-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5724; BROADWELL-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5725; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5726; BROADWELL-NEXT:    retq # sched: [7:1.00]
5727;
5728; SKYLAKE-LABEL: test_psrlw:
5729; SKYLAKE:       # %bb.0:
5730; SKYLAKE-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5731; SKYLAKE-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5732; SKYLAKE-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5733; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5734; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5735;
5736; SKX-LABEL: test_psrlw:
5737; SKX:       # %bb.0:
5738; SKX-NEXT:    psrlw %mm1, %mm0 # sched: [1:1.00]
5739; SKX-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5740; SKX-NEXT:    psrlw $7, %mm0 # sched: [1:1.00]
5741; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5742; SKX-NEXT:    retq # sched: [7:1.00]
5743;
5744; BTVER2-LABEL: test_psrlw:
5745; BTVER2:       # %bb.0:
5746; BTVER2-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.50]
5747; BTVER2-NEXT:    psrlw (%rdi), %mm0 # sched: [6:1.00]
5748; BTVER2-NEXT:    psrlw $7, %mm0 # sched: [1:0.50]
5749; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5750; BTVER2-NEXT:    retq # sched: [4:1.00]
5751;
5752; ZNVER1-LABEL: test_psrlw:
5753; ZNVER1:       # %bb.0:
5754; ZNVER1-NEXT:    psrlw %mm1, %mm0 # sched: [1:0.25]
5755; ZNVER1-NEXT:    psrlw (%rdi), %mm0 # sched: [8:0.50]
5756; ZNVER1-NEXT:    psrlw $7, %mm0 # sched: [1:0.25]
5757; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5758; ZNVER1-NEXT:    retq # sched: [1:0.50]
5759  %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1)
5760  %2 = load x86_mmx, x86_mmx *%a2, align 8
5761  %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2)
5762  %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7)
5763  %5 = bitcast x86_mmx %4 to i64
5764  ret i64 %5
5765}
5766declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
5767declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
5768
5769define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5770; GENERIC-LABEL: test_psubb:
5771; GENERIC:       # %bb.0:
5772; GENERIC-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
5773; GENERIC-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
5774; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5775; GENERIC-NEXT:    retq # sched: [1:1.00]
5776;
5777; ATOM-LABEL: test_psubb:
5778; ATOM:       # %bb.0:
5779; ATOM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5780; ATOM-NEXT:    psubb (%rdi), %mm0 # sched: [1:1.00]
5781; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5782; ATOM-NEXT:    retq # sched: [79:39.50]
5783;
5784; SLM-LABEL: test_psubb:
5785; SLM:       # %bb.0:
5786; SLM-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5787; SLM-NEXT:    psubb (%rdi), %mm0 # sched: [4:1.00]
5788; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5789; SLM-NEXT:    retq # sched: [4:1.00]
5790;
5791; SANDY-LABEL: test_psubb:
5792; SANDY:       # %bb.0:
5793; SANDY-NEXT:    psubb %mm1, %mm0 # sched: [3:1.00]
5794; SANDY-NEXT:    psubb (%rdi), %mm0 # sched: [8:1.00]
5795; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5796; SANDY-NEXT:    retq # sched: [1:1.00]
5797;
5798; HASWELL-LABEL: test_psubb:
5799; HASWELL:       # %bb.0:
5800; HASWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5801; HASWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
5802; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5803; HASWELL-NEXT:    retq # sched: [7:1.00]
5804;
5805; BROADWELL-LABEL: test_psubb:
5806; BROADWELL:       # %bb.0:
5807; BROADWELL-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5808; BROADWELL-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
5809; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5810; BROADWELL-NEXT:    retq # sched: [7:1.00]
5811;
5812; SKYLAKE-LABEL: test_psubb:
5813; SKYLAKE:       # %bb.0:
5814; SKYLAKE-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5815; SKYLAKE-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
5816; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5817; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5818;
5819; SKX-LABEL: test_psubb:
5820; SKX:       # %bb.0:
5821; SKX-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5822; SKX-NEXT:    psubb (%rdi), %mm0 # sched: [6:0.50]
5823; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5824; SKX-NEXT:    retq # sched: [7:1.00]
5825;
5826; BTVER2-LABEL: test_psubb:
5827; BTVER2:       # %bb.0:
5828; BTVER2-NEXT:    psubb %mm1, %mm0 # sched: [1:0.50]
5829; BTVER2-NEXT:    psubb (%rdi), %mm0 # sched: [6:1.00]
5830; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5831; BTVER2-NEXT:    retq # sched: [4:1.00]
5832;
5833; ZNVER1-LABEL: test_psubb:
5834; ZNVER1:       # %bb.0:
5835; ZNVER1-NEXT:    psubb %mm1, %mm0 # sched: [1:0.25]
5836; ZNVER1-NEXT:    psubb (%rdi), %mm0 # sched: [8:0.50]
5837; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5838; ZNVER1-NEXT:    retq # sched: [1:0.50]
5839  %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1)
5840  %2 = load x86_mmx, x86_mmx *%a2, align 8
5841  %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2)
5842  %4 = bitcast x86_mmx %3 to i64
5843  ret i64 %4
5844}
5845declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
5846
5847define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5848; GENERIC-LABEL: test_psubd:
5849; GENERIC:       # %bb.0:
5850; GENERIC-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
5851; GENERIC-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
5852; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5853; GENERIC-NEXT:    retq # sched: [1:1.00]
5854;
5855; ATOM-LABEL: test_psubd:
5856; ATOM:       # %bb.0:
5857; ATOM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5858; ATOM-NEXT:    psubd (%rdi), %mm0 # sched: [1:1.00]
5859; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5860; ATOM-NEXT:    retq # sched: [79:39.50]
5861;
5862; SLM-LABEL: test_psubd:
5863; SLM:       # %bb.0:
5864; SLM-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5865; SLM-NEXT:    psubd (%rdi), %mm0 # sched: [4:1.00]
5866; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5867; SLM-NEXT:    retq # sched: [4:1.00]
5868;
5869; SANDY-LABEL: test_psubd:
5870; SANDY:       # %bb.0:
5871; SANDY-NEXT:    psubd %mm1, %mm0 # sched: [3:1.00]
5872; SANDY-NEXT:    psubd (%rdi), %mm0 # sched: [8:1.00]
5873; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5874; SANDY-NEXT:    retq # sched: [1:1.00]
5875;
5876; HASWELL-LABEL: test_psubd:
5877; HASWELL:       # %bb.0:
5878; HASWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5879; HASWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
5880; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5881; HASWELL-NEXT:    retq # sched: [7:1.00]
5882;
5883; BROADWELL-LABEL: test_psubd:
5884; BROADWELL:       # %bb.0:
5885; BROADWELL-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5886; BROADWELL-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
5887; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5888; BROADWELL-NEXT:    retq # sched: [7:1.00]
5889;
5890; SKYLAKE-LABEL: test_psubd:
5891; SKYLAKE:       # %bb.0:
5892; SKYLAKE-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5893; SKYLAKE-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
5894; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5895; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5896;
5897; SKX-LABEL: test_psubd:
5898; SKX:       # %bb.0:
5899; SKX-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5900; SKX-NEXT:    psubd (%rdi), %mm0 # sched: [6:0.50]
5901; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5902; SKX-NEXT:    retq # sched: [7:1.00]
5903;
5904; BTVER2-LABEL: test_psubd:
5905; BTVER2:       # %bb.0:
5906; BTVER2-NEXT:    psubd %mm1, %mm0 # sched: [1:0.50]
5907; BTVER2-NEXT:    psubd (%rdi), %mm0 # sched: [6:1.00]
5908; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5909; BTVER2-NEXT:    retq # sched: [4:1.00]
5910;
5911; ZNVER1-LABEL: test_psubd:
5912; ZNVER1:       # %bb.0:
5913; ZNVER1-NEXT:    psubd %mm1, %mm0 # sched: [1:0.25]
5914; ZNVER1-NEXT:    psubd (%rdi), %mm0 # sched: [8:0.50]
5915; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5916; ZNVER1-NEXT:    retq # sched: [1:0.50]
5917  %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1)
5918  %2 = load x86_mmx, x86_mmx *%a2, align 8
5919  %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2)
5920  %4 = bitcast x86_mmx %3 to i64
5921  ret i64 %4
5922}
5923declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
5924
5925define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
5926; GENERIC-LABEL: test_psubq:
5927; GENERIC:       # %bb.0:
5928; GENERIC-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
5929; GENERIC-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
5930; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5931; GENERIC-NEXT:    retq # sched: [1:1.00]
5932;
5933; ATOM-LABEL: test_psubq:
5934; ATOM:       # %bb.0:
5935; ATOM-NEXT:    psubq %mm1, %mm0 # sched: [2:1.00]
5936; ATOM-NEXT:    psubq (%rdi), %mm0 # sched: [3:1.50]
5937; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
5938; ATOM-NEXT:    retq # sched: [79:39.50]
5939;
5940; SLM-LABEL: test_psubq:
5941; SLM:       # %bb.0:
5942; SLM-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5943; SLM-NEXT:    psubq (%rdi), %mm0 # sched: [4:1.00]
5944; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
5945; SLM-NEXT:    retq # sched: [4:1.00]
5946;
5947; SANDY-LABEL: test_psubq:
5948; SANDY:       # %bb.0:
5949; SANDY-NEXT:    psubq %mm1, %mm0 # sched: [3:1.00]
5950; SANDY-NEXT:    psubq (%rdi), %mm0 # sched: [8:1.00]
5951; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5952; SANDY-NEXT:    retq # sched: [1:1.00]
5953;
5954; HASWELL-LABEL: test_psubq:
5955; HASWELL:       # %bb.0:
5956; HASWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5957; HASWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
5958; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5959; HASWELL-NEXT:    retq # sched: [7:1.00]
5960;
5961; BROADWELL-LABEL: test_psubq:
5962; BROADWELL:       # %bb.0:
5963; BROADWELL-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5964; BROADWELL-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
5965; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
5966; BROADWELL-NEXT:    retq # sched: [7:1.00]
5967;
5968; SKYLAKE-LABEL: test_psubq:
5969; SKYLAKE:       # %bb.0:
5970; SKYLAKE-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5971; SKYLAKE-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
5972; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5973; SKYLAKE-NEXT:    retq # sched: [7:1.00]
5974;
5975; SKX-LABEL: test_psubq:
5976; SKX:       # %bb.0:
5977; SKX-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5978; SKX-NEXT:    psubq (%rdi), %mm0 # sched: [6:0.50]
5979; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5980; SKX-NEXT:    retq # sched: [7:1.00]
5981;
5982; BTVER2-LABEL: test_psubq:
5983; BTVER2:       # %bb.0:
5984; BTVER2-NEXT:    psubq %mm1, %mm0 # sched: [1:0.50]
5985; BTVER2-NEXT:    psubq (%rdi), %mm0 # sched: [6:1.00]
5986; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
5987; BTVER2-NEXT:    retq # sched: [4:1.00]
5988;
5989; ZNVER1-LABEL: test_psubq:
5990; ZNVER1:       # %bb.0:
5991; ZNVER1-NEXT:    psubq %mm1, %mm0 # sched: [1:0.25]
5992; ZNVER1-NEXT:    psubq (%rdi), %mm0 # sched: [8:0.50]
5993; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
5994; ZNVER1-NEXT:    retq # sched: [1:0.50]
5995  %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1)
5996  %2 = load x86_mmx, x86_mmx *%a2, align 8
5997  %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2)
5998  %4 = bitcast x86_mmx %3 to i64
5999  ret i64 %4
6000}
6001declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
6002
6003define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6004; GENERIC-LABEL: test_psubsb:
6005; GENERIC:       # %bb.0:
6006; GENERIC-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
6007; GENERIC-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
6008; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6009; GENERIC-NEXT:    retq # sched: [1:1.00]
6010;
6011; ATOM-LABEL: test_psubsb:
6012; ATOM:       # %bb.0:
6013; ATOM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
6014; ATOM-NEXT:    psubsb (%rdi), %mm0 # sched: [1:1.00]
6015; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6016; ATOM-NEXT:    retq # sched: [79:39.50]
6017;
6018; SLM-LABEL: test_psubsb:
6019; SLM:       # %bb.0:
6020; SLM-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
6021; SLM-NEXT:    psubsb (%rdi), %mm0 # sched: [4:1.00]
6022; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6023; SLM-NEXT:    retq # sched: [4:1.00]
6024;
6025; SANDY-LABEL: test_psubsb:
6026; SANDY:       # %bb.0:
6027; SANDY-NEXT:    psubsb %mm1, %mm0 # sched: [3:1.00]
6028; SANDY-NEXT:    psubsb (%rdi), %mm0 # sched: [8:1.00]
6029; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6030; SANDY-NEXT:    retq # sched: [1:1.00]
6031;
6032; HASWELL-LABEL: test_psubsb:
6033; HASWELL:       # %bb.0:
6034; HASWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
6035; HASWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
6036; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6037; HASWELL-NEXT:    retq # sched: [7:1.00]
6038;
6039; BROADWELL-LABEL: test_psubsb:
6040; BROADWELL:       # %bb.0:
6041; BROADWELL-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
6042; BROADWELL-NEXT:    psubsb (%rdi), %mm0 # sched: [6:0.50]
6043; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6044; BROADWELL-NEXT:    retq # sched: [7:1.00]
6045;
6046; SKYLAKE-LABEL: test_psubsb:
6047; SKYLAKE:       # %bb.0:
6048; SKYLAKE-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
6049; SKYLAKE-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
6050; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6051; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6052;
6053; SKX-LABEL: test_psubsb:
6054; SKX:       # %bb.0:
6055; SKX-NEXT:    psubsb %mm1, %mm0 # sched: [1:1.00]
6056; SKX-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
6057; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6058; SKX-NEXT:    retq # sched: [7:1.00]
6059;
6060; BTVER2-LABEL: test_psubsb:
6061; BTVER2:       # %bb.0:
6062; BTVER2-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.50]
6063; BTVER2-NEXT:    psubsb (%rdi), %mm0 # sched: [6:1.00]
6064; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6065; BTVER2-NEXT:    retq # sched: [4:1.00]
6066;
6067; ZNVER1-LABEL: test_psubsb:
6068; ZNVER1:       # %bb.0:
6069; ZNVER1-NEXT:    psubsb %mm1, %mm0 # sched: [1:0.25]
6070; ZNVER1-NEXT:    psubsb (%rdi), %mm0 # sched: [8:0.50]
6071; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6072; ZNVER1-NEXT:    retq # sched: [1:0.50]
6073  %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1)
6074  %2 = load x86_mmx, x86_mmx *%a2, align 8
6075  %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2)
6076  %4 = bitcast x86_mmx %3 to i64
6077  ret i64 %4
6078}
6079declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
6080
6081define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6082; GENERIC-LABEL: test_psubsw:
6083; GENERIC:       # %bb.0:
6084; GENERIC-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
6085; GENERIC-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
6086; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6087; GENERIC-NEXT:    retq # sched: [1:1.00]
6088;
6089; ATOM-LABEL: test_psubsw:
6090; ATOM:       # %bb.0:
6091; ATOM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
6092; ATOM-NEXT:    psubsw (%rdi), %mm0 # sched: [1:1.00]
6093; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6094; ATOM-NEXT:    retq # sched: [79:39.50]
6095;
6096; SLM-LABEL: test_psubsw:
6097; SLM:       # %bb.0:
6098; SLM-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
6099; SLM-NEXT:    psubsw (%rdi), %mm0 # sched: [4:1.00]
6100; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6101; SLM-NEXT:    retq # sched: [4:1.00]
6102;
6103; SANDY-LABEL: test_psubsw:
6104; SANDY:       # %bb.0:
6105; SANDY-NEXT:    psubsw %mm1, %mm0 # sched: [3:1.00]
6106; SANDY-NEXT:    psubsw (%rdi), %mm0 # sched: [8:1.00]
6107; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6108; SANDY-NEXT:    retq # sched: [1:1.00]
6109;
6110; HASWELL-LABEL: test_psubsw:
6111; HASWELL:       # %bb.0:
6112; HASWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
6113; HASWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
6114; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6115; HASWELL-NEXT:    retq # sched: [7:1.00]
6116;
6117; BROADWELL-LABEL: test_psubsw:
6118; BROADWELL:       # %bb.0:
6119; BROADWELL-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
6120; BROADWELL-NEXT:    psubsw (%rdi), %mm0 # sched: [6:0.50]
6121; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6122; BROADWELL-NEXT:    retq # sched: [7:1.00]
6123;
6124; SKYLAKE-LABEL: test_psubsw:
6125; SKYLAKE:       # %bb.0:
6126; SKYLAKE-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
6127; SKYLAKE-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
6128; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6129; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6130;
6131; SKX-LABEL: test_psubsw:
6132; SKX:       # %bb.0:
6133; SKX-NEXT:    psubsw %mm1, %mm0 # sched: [1:1.00]
6134; SKX-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
6135; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6136; SKX-NEXT:    retq # sched: [7:1.00]
6137;
6138; BTVER2-LABEL: test_psubsw:
6139; BTVER2:       # %bb.0:
6140; BTVER2-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.50]
6141; BTVER2-NEXT:    psubsw (%rdi), %mm0 # sched: [6:1.00]
6142; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6143; BTVER2-NEXT:    retq # sched: [4:1.00]
6144;
6145; ZNVER1-LABEL: test_psubsw:
6146; ZNVER1:       # %bb.0:
6147; ZNVER1-NEXT:    psubsw %mm1, %mm0 # sched: [1:0.25]
6148; ZNVER1-NEXT:    psubsw (%rdi), %mm0 # sched: [8:0.50]
6149; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6150; ZNVER1-NEXT:    retq # sched: [1:0.50]
6151  %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1)
6152  %2 = load x86_mmx, x86_mmx *%a2, align 8
6153  %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2)
6154  %4 = bitcast x86_mmx %3 to i64
6155  ret i64 %4
6156}
6157declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
6158
6159define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6160; GENERIC-LABEL: test_psubusb:
6161; GENERIC:       # %bb.0:
6162; GENERIC-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
6163; GENERIC-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
6164; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6165; GENERIC-NEXT:    retq # sched: [1:1.00]
6166;
6167; ATOM-LABEL: test_psubusb:
6168; ATOM:       # %bb.0:
6169; ATOM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
6170; ATOM-NEXT:    psubusb (%rdi), %mm0 # sched: [1:1.00]
6171; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6172; ATOM-NEXT:    retq # sched: [79:39.50]
6173;
6174; SLM-LABEL: test_psubusb:
6175; SLM:       # %bb.0:
6176; SLM-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
6177; SLM-NEXT:    psubusb (%rdi), %mm0 # sched: [4:1.00]
6178; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6179; SLM-NEXT:    retq # sched: [4:1.00]
6180;
6181; SANDY-LABEL: test_psubusb:
6182; SANDY:       # %bb.0:
6183; SANDY-NEXT:    psubusb %mm1, %mm0 # sched: [3:1.00]
6184; SANDY-NEXT:    psubusb (%rdi), %mm0 # sched: [8:1.00]
6185; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6186; SANDY-NEXT:    retq # sched: [1:1.00]
6187;
6188; HASWELL-LABEL: test_psubusb:
6189; HASWELL:       # %bb.0:
6190; HASWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
6191; HASWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
6192; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6193; HASWELL-NEXT:    retq # sched: [7:1.00]
6194;
6195; BROADWELL-LABEL: test_psubusb:
6196; BROADWELL:       # %bb.0:
6197; BROADWELL-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
6198; BROADWELL-NEXT:    psubusb (%rdi), %mm0 # sched: [6:0.50]
6199; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6200; BROADWELL-NEXT:    retq # sched: [7:1.00]
6201;
6202; SKYLAKE-LABEL: test_psubusb:
6203; SKYLAKE:       # %bb.0:
6204; SKYLAKE-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
6205; SKYLAKE-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
6206; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6207; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6208;
6209; SKX-LABEL: test_psubusb:
6210; SKX:       # %bb.0:
6211; SKX-NEXT:    psubusb %mm1, %mm0 # sched: [1:1.00]
6212; SKX-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
6213; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6214; SKX-NEXT:    retq # sched: [7:1.00]
6215;
6216; BTVER2-LABEL: test_psubusb:
6217; BTVER2:       # %bb.0:
6218; BTVER2-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.50]
6219; BTVER2-NEXT:    psubusb (%rdi), %mm0 # sched: [6:1.00]
6220; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6221; BTVER2-NEXT:    retq # sched: [4:1.00]
6222;
6223; ZNVER1-LABEL: test_psubusb:
6224; ZNVER1:       # %bb.0:
6225; ZNVER1-NEXT:    psubusb %mm1, %mm0 # sched: [1:0.25]
6226; ZNVER1-NEXT:    psubusb (%rdi), %mm0 # sched: [8:0.50]
6227; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6228; ZNVER1-NEXT:    retq # sched: [1:0.50]
6229  %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1)
6230  %2 = load x86_mmx, x86_mmx *%a2, align 8
6231  %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2)
6232  %4 = bitcast x86_mmx %3 to i64
6233  ret i64 %4
6234}
6235declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
6236
6237define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6238; GENERIC-LABEL: test_psubusw:
6239; GENERIC:       # %bb.0:
6240; GENERIC-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
6241; GENERIC-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
6242; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6243; GENERIC-NEXT:    retq # sched: [1:1.00]
6244;
6245; ATOM-LABEL: test_psubusw:
6246; ATOM:       # %bb.0:
6247; ATOM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
6248; ATOM-NEXT:    psubusw (%rdi), %mm0 # sched: [1:1.00]
6249; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6250; ATOM-NEXT:    retq # sched: [79:39.50]
6251;
6252; SLM-LABEL: test_psubusw:
6253; SLM:       # %bb.0:
6254; SLM-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
6255; SLM-NEXT:    psubusw (%rdi), %mm0 # sched: [4:1.00]
6256; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6257; SLM-NEXT:    retq # sched: [4:1.00]
6258;
6259; SANDY-LABEL: test_psubusw:
6260; SANDY:       # %bb.0:
6261; SANDY-NEXT:    psubusw %mm1, %mm0 # sched: [3:1.00]
6262; SANDY-NEXT:    psubusw (%rdi), %mm0 # sched: [8:1.00]
6263; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6264; SANDY-NEXT:    retq # sched: [1:1.00]
6265;
6266; HASWELL-LABEL: test_psubusw:
6267; HASWELL:       # %bb.0:
6268; HASWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
6269; HASWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
6270; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6271; HASWELL-NEXT:    retq # sched: [7:1.00]
6272;
6273; BROADWELL-LABEL: test_psubusw:
6274; BROADWELL:       # %bb.0:
6275; BROADWELL-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
6276; BROADWELL-NEXT:    psubusw (%rdi), %mm0 # sched: [6:0.50]
6277; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6278; BROADWELL-NEXT:    retq # sched: [7:1.00]
6279;
6280; SKYLAKE-LABEL: test_psubusw:
6281; SKYLAKE:       # %bb.0:
6282; SKYLAKE-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
6283; SKYLAKE-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
6284; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6285; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6286;
6287; SKX-LABEL: test_psubusw:
6288; SKX:       # %bb.0:
6289; SKX-NEXT:    psubusw %mm1, %mm0 # sched: [1:1.00]
6290; SKX-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
6291; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6292; SKX-NEXT:    retq # sched: [7:1.00]
6293;
6294; BTVER2-LABEL: test_psubusw:
6295; BTVER2:       # %bb.0:
6296; BTVER2-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.50]
6297; BTVER2-NEXT:    psubusw (%rdi), %mm0 # sched: [6:1.00]
6298; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6299; BTVER2-NEXT:    retq # sched: [4:1.00]
6300;
6301; ZNVER1-LABEL: test_psubusw:
6302; ZNVER1:       # %bb.0:
6303; ZNVER1-NEXT:    psubusw %mm1, %mm0 # sched: [1:0.25]
6304; ZNVER1-NEXT:    psubusw (%rdi), %mm0 # sched: [8:0.50]
6305; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6306; ZNVER1-NEXT:    retq # sched: [1:0.50]
6307  %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1)
6308  %2 = load x86_mmx, x86_mmx *%a2, align 8
6309  %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2)
6310  %4 = bitcast x86_mmx %3 to i64
6311  ret i64 %4
6312}
6313declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
6314
6315define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6316; GENERIC-LABEL: test_psubw:
6317; GENERIC:       # %bb.0:
6318; GENERIC-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
6319; GENERIC-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
6320; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6321; GENERIC-NEXT:    retq # sched: [1:1.00]
6322;
6323; ATOM-LABEL: test_psubw:
6324; ATOM:       # %bb.0:
6325; ATOM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6326; ATOM-NEXT:    psubw (%rdi), %mm0 # sched: [1:1.00]
6327; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6328; ATOM-NEXT:    retq # sched: [79:39.50]
6329;
6330; SLM-LABEL: test_psubw:
6331; SLM:       # %bb.0:
6332; SLM-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6333; SLM-NEXT:    psubw (%rdi), %mm0 # sched: [4:1.00]
6334; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6335; SLM-NEXT:    retq # sched: [4:1.00]
6336;
6337; SANDY-LABEL: test_psubw:
6338; SANDY:       # %bb.0:
6339; SANDY-NEXT:    psubw %mm1, %mm0 # sched: [3:1.00]
6340; SANDY-NEXT:    psubw (%rdi), %mm0 # sched: [8:1.00]
6341; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6342; SANDY-NEXT:    retq # sched: [1:1.00]
6343;
6344; HASWELL-LABEL: test_psubw:
6345; HASWELL:       # %bb.0:
6346; HASWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6347; HASWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
6348; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6349; HASWELL-NEXT:    retq # sched: [7:1.00]
6350;
6351; BROADWELL-LABEL: test_psubw:
6352; BROADWELL:       # %bb.0:
6353; BROADWELL-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6354; BROADWELL-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
6355; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6356; BROADWELL-NEXT:    retq # sched: [7:1.00]
6357;
6358; SKYLAKE-LABEL: test_psubw:
6359; SKYLAKE:       # %bb.0:
6360; SKYLAKE-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6361; SKYLAKE-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
6362; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6363; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6364;
6365; SKX-LABEL: test_psubw:
6366; SKX:       # %bb.0:
6367; SKX-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6368; SKX-NEXT:    psubw (%rdi), %mm0 # sched: [6:0.50]
6369; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6370; SKX-NEXT:    retq # sched: [7:1.00]
6371;
6372; BTVER2-LABEL: test_psubw:
6373; BTVER2:       # %bb.0:
6374; BTVER2-NEXT:    psubw %mm1, %mm0 # sched: [1:0.50]
6375; BTVER2-NEXT:    psubw (%rdi), %mm0 # sched: [6:1.00]
6376; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6377; BTVER2-NEXT:    retq # sched: [4:1.00]
6378;
6379; ZNVER1-LABEL: test_psubw:
6380; ZNVER1:       # %bb.0:
6381; ZNVER1-NEXT:    psubw %mm1, %mm0 # sched: [1:0.25]
6382; ZNVER1-NEXT:    psubw (%rdi), %mm0 # sched: [8:0.50]
6383; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6384; ZNVER1-NEXT:    retq # sched: [1:0.50]
6385  %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1)
6386  %2 = load x86_mmx, x86_mmx *%a2, align 8
6387  %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2)
6388  %4 = bitcast x86_mmx %3 to i64
6389  ret i64 %4
6390}
6391declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
6392
6393define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6394; GENERIC-LABEL: test_punpckhbw:
6395; GENERIC:       # %bb.0:
6396; GENERIC-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6397; GENERIC-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6398; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6399; GENERIC-NEXT:    retq # sched: [1:1.00]
6400;
6401; ATOM-LABEL: test_punpckhbw:
6402; ATOM:       # %bb.0:
6403; ATOM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
6404; ATOM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00]
6405; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6406; ATOM-NEXT:    retq # sched: [79:39.50]
6407;
6408; SLM-LABEL: test_punpckhbw:
6409; SLM:       # %bb.0:
6410; SLM-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6411; SLM-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00]
6412; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6413; SLM-NEXT:    retq # sched: [4:1.00]
6414;
6415; SANDY-LABEL: test_punpckhbw:
6416; SANDY:       # %bb.0:
6417; SANDY-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6418; SANDY-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6419; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6420; SANDY-NEXT:    retq # sched: [1:1.00]
6421;
6422; HASWELL-LABEL: test_punpckhbw:
6423; HASWELL:       # %bb.0:
6424; HASWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6425; HASWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6426; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6427; HASWELL-NEXT:    retq # sched: [7:1.00]
6428;
6429; BROADWELL-LABEL: test_punpckhbw:
6430; BROADWELL:       # %bb.0:
6431; BROADWELL-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6432; BROADWELL-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6433; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6434; BROADWELL-NEXT:    retq # sched: [7:1.00]
6435;
6436; SKYLAKE-LABEL: test_punpckhbw:
6437; SKYLAKE:       # %bb.0:
6438; SKYLAKE-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6439; SKYLAKE-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6440; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6441; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6442;
6443; SKX-LABEL: test_punpckhbw:
6444; SKX:       # %bb.0:
6445; SKX-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
6446; SKX-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6447; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6448; SKX-NEXT:    retq # sched: [7:1.00]
6449;
6450; BTVER2-LABEL: test_punpckhbw:
6451; BTVER2:       # %bb.0:
6452; BTVER2-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
6453; BTVER2-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
6454; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6455; BTVER2-NEXT:    retq # sched: [4:1.00]
6456;
6457; ZNVER1-LABEL: test_punpckhbw:
6458; ZNVER1:       # %bb.0:
6459; ZNVER1-NEXT:    punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25]
6460; ZNVER1-NEXT:    punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50]
6461; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6462; ZNVER1-NEXT:    retq # sched: [1:0.50]
6463  %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1)
6464  %2 = load x86_mmx, x86_mmx *%a2, align 8
6465  %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2)
6466  %4 = bitcast x86_mmx %3 to i64
6467  ret i64 %4
6468}
6469declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
6470
6471define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6472; GENERIC-LABEL: test_punpckhdq:
6473; GENERIC:       # %bb.0:
6474; GENERIC-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6475; GENERIC-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6476; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6477; GENERIC-NEXT:    retq # sched: [1:1.00]
6478;
6479; ATOM-LABEL: test_punpckhdq:
6480; ATOM:       # %bb.0:
6481; ATOM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
6482; ATOM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00]
6483; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6484; ATOM-NEXT:    retq # sched: [79:39.50]
6485;
6486; SLM-LABEL: test_punpckhdq:
6487; SLM:       # %bb.0:
6488; SLM-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6489; SLM-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00]
6490; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6491; SLM-NEXT:    retq # sched: [4:1.00]
6492;
6493; SANDY-LABEL: test_punpckhdq:
6494; SANDY:       # %bb.0:
6495; SANDY-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6496; SANDY-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6497; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6498; SANDY-NEXT:    retq # sched: [1:1.00]
6499;
6500; HASWELL-LABEL: test_punpckhdq:
6501; HASWELL:       # %bb.0:
6502; HASWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6503; HASWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6504; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6505; HASWELL-NEXT:    retq # sched: [7:1.00]
6506;
6507; BROADWELL-LABEL: test_punpckhdq:
6508; BROADWELL:       # %bb.0:
6509; BROADWELL-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6510; BROADWELL-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6511; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6512; BROADWELL-NEXT:    retq # sched: [7:1.00]
6513;
6514; SKYLAKE-LABEL: test_punpckhdq:
6515; SKYLAKE:       # %bb.0:
6516; SKYLAKE-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6517; SKYLAKE-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6518; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6519; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6520;
6521; SKX-LABEL: test_punpckhdq:
6522; SKX:       # %bb.0:
6523; SKX-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
6524; SKX-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6525; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6526; SKX-NEXT:    retq # sched: [7:1.00]
6527;
6528; BTVER2-LABEL: test_punpckhdq:
6529; BTVER2:       # %bb.0:
6530; BTVER2-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
6531; BTVER2-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
6532; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6533; BTVER2-NEXT:    retq # sched: [4:1.00]
6534;
6535; ZNVER1-LABEL: test_punpckhdq:
6536; ZNVER1:       # %bb.0:
6537; ZNVER1-NEXT:    punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25]
6538; ZNVER1-NEXT:    punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50]
6539; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6540; ZNVER1-NEXT:    retq # sched: [1:0.50]
6541  %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1)
6542  %2 = load x86_mmx, x86_mmx *%a2, align 8
6543  %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2)
6544  %4 = bitcast x86_mmx %3 to i64
6545  ret i64 %4
6546}
6547declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
6548
6549define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6550; GENERIC-LABEL: test_punpckhwd:
6551; GENERIC:       # %bb.0:
6552; GENERIC-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6553; GENERIC-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6554; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6555; GENERIC-NEXT:    retq # sched: [1:1.00]
6556;
6557; ATOM-LABEL: test_punpckhwd:
6558; ATOM:       # %bb.0:
6559; ATOM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
6560; ATOM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
6561; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6562; ATOM-NEXT:    retq # sched: [79:39.50]
6563;
6564; SLM-LABEL: test_punpckhwd:
6565; SLM:       # %bb.0:
6566; SLM-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6567; SLM-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
6568; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6569; SLM-NEXT:    retq # sched: [4:1.00]
6570;
6571; SANDY-LABEL: test_punpckhwd:
6572; SANDY:       # %bb.0:
6573; SANDY-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6574; SANDY-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6575; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6576; SANDY-NEXT:    retq # sched: [1:1.00]
6577;
6578; HASWELL-LABEL: test_punpckhwd:
6579; HASWELL:       # %bb.0:
6580; HASWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6581; HASWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6582; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6583; HASWELL-NEXT:    retq # sched: [7:1.00]
6584;
6585; BROADWELL-LABEL: test_punpckhwd:
6586; BROADWELL:       # %bb.0:
6587; BROADWELL-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6588; BROADWELL-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6589; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6590; BROADWELL-NEXT:    retq # sched: [7:1.00]
6591;
6592; SKYLAKE-LABEL: test_punpckhwd:
6593; SKYLAKE:       # %bb.0:
6594; SKYLAKE-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6595; SKYLAKE-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6596; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6597; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6598;
6599; SKX-LABEL: test_punpckhwd:
6600; SKX:       # %bb.0:
6601; SKX-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6602; SKX-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6603; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6604; SKX-NEXT:    retq # sched: [7:1.00]
6605;
6606; BTVER2-LABEL: test_punpckhwd:
6607; BTVER2:       # %bb.0:
6608; BTVER2-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
6609; BTVER2-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6610; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6611; BTVER2-NEXT:    retq # sched: [4:1.00]
6612;
6613; ZNVER1-LABEL: test_punpckhwd:
6614; ZNVER1:       # %bb.0:
6615; ZNVER1-NEXT:    punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
6616; ZNVER1-NEXT:    punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
6617; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6618; ZNVER1-NEXT:    retq # sched: [1:0.50]
6619  %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1)
6620  %2 = load x86_mmx, x86_mmx *%a2, align 8
6621  %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2)
6622  %4 = bitcast x86_mmx %3 to i64
6623  ret i64 %4
6624}
6625declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
6626
6627define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6628; GENERIC-LABEL: test_punpcklbw:
6629; GENERIC:       # %bb.0:
6630; GENERIC-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6631; GENERIC-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6632; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6633; GENERIC-NEXT:    retq # sched: [1:1.00]
6634;
6635; ATOM-LABEL: test_punpcklbw:
6636; ATOM:       # %bb.0:
6637; ATOM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6638; ATOM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
6639; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6640; ATOM-NEXT:    retq # sched: [79:39.50]
6641;
6642; SLM-LABEL: test_punpcklbw:
6643; SLM:       # %bb.0:
6644; SLM-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6645; SLM-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
6646; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6647; SLM-NEXT:    retq # sched: [4:1.00]
6648;
6649; SANDY-LABEL: test_punpcklbw:
6650; SANDY:       # %bb.0:
6651; SANDY-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6652; SANDY-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6653; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6654; SANDY-NEXT:    retq # sched: [1:1.00]
6655;
6656; HASWELL-LABEL: test_punpcklbw:
6657; HASWELL:       # %bb.0:
6658; HASWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6659; HASWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6660; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6661; HASWELL-NEXT:    retq # sched: [7:1.00]
6662;
6663; BROADWELL-LABEL: test_punpcklbw:
6664; BROADWELL:       # %bb.0:
6665; BROADWELL-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6666; BROADWELL-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6667; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6668; BROADWELL-NEXT:    retq # sched: [7:1.00]
6669;
6670; SKYLAKE-LABEL: test_punpcklbw:
6671; SKYLAKE:       # %bb.0:
6672; SKYLAKE-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6673; SKYLAKE-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6674; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6675; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6676;
6677; SKX-LABEL: test_punpcklbw:
6678; SKX:       # %bb.0:
6679; SKX-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
6680; SKX-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6681; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6682; SKX-NEXT:    retq # sched: [7:1.00]
6683;
6684; BTVER2-LABEL: test_punpcklbw:
6685; BTVER2:       # %bb.0:
6686; BTVER2-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
6687; BTVER2-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
6688; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6689; BTVER2-NEXT:    retq # sched: [4:1.00]
6690;
6691; ZNVER1-LABEL: test_punpcklbw:
6692; ZNVER1:       # %bb.0:
6693; ZNVER1-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
6694; ZNVER1-NEXT:    punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
6695; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6696; ZNVER1-NEXT:    retq # sched: [1:0.50]
6697  %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1)
6698  %2 = load x86_mmx, x86_mmx *%a2, align 8
6699  %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2)
6700  %4 = bitcast x86_mmx %3 to i64
6701  ret i64 %4
6702}
6703declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
6704
6705define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6706; GENERIC-LABEL: test_punpckldq:
6707; GENERIC:       # %bb.0:
6708; GENERIC-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6709; GENERIC-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6710; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6711; GENERIC-NEXT:    retq # sched: [1:1.00]
6712;
6713; ATOM-LABEL: test_punpckldq:
6714; ATOM:       # %bb.0:
6715; ATOM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6716; ATOM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00]
6717; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6718; ATOM-NEXT:    retq # sched: [79:39.50]
6719;
6720; SLM-LABEL: test_punpckldq:
6721; SLM:       # %bb.0:
6722; SLM-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6723; SLM-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00]
6724; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6725; SLM-NEXT:    retq # sched: [4:1.00]
6726;
6727; SANDY-LABEL: test_punpckldq:
6728; SANDY:       # %bb.0:
6729; SANDY-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6730; SANDY-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6731; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6732; SANDY-NEXT:    retq # sched: [1:1.00]
6733;
6734; HASWELL-LABEL: test_punpckldq:
6735; HASWELL:       # %bb.0:
6736; HASWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6737; HASWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6738; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6739; HASWELL-NEXT:    retq # sched: [7:1.00]
6740;
6741; BROADWELL-LABEL: test_punpckldq:
6742; BROADWELL:       # %bb.0:
6743; BROADWELL-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6744; BROADWELL-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6745; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6746; BROADWELL-NEXT:    retq # sched: [7:1.00]
6747;
6748; SKYLAKE-LABEL: test_punpckldq:
6749; SKYLAKE:       # %bb.0:
6750; SKYLAKE-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6751; SKYLAKE-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6752; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6753; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6754;
6755; SKX-LABEL: test_punpckldq:
6756; SKX:       # %bb.0:
6757; SKX-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
6758; SKX-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6759; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6760; SKX-NEXT:    retq # sched: [7:1.00]
6761;
6762; BTVER2-LABEL: test_punpckldq:
6763; BTVER2:       # %bb.0:
6764; BTVER2-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
6765; BTVER2-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
6766; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6767; BTVER2-NEXT:    retq # sched: [4:1.00]
6768;
6769; ZNVER1-LABEL: test_punpckldq:
6770; ZNVER1:       # %bb.0:
6771; ZNVER1-NEXT:    punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25]
6772; ZNVER1-NEXT:    punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50]
6773; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6774; ZNVER1-NEXT:    retq # sched: [1:0.50]
6775  %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1)
6776  %2 = load x86_mmx, x86_mmx *%a2, align 8
6777  %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2)
6778  %4 = bitcast x86_mmx %3 to i64
6779  ret i64 %4
6780}
6781declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
6782
6783define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6784; GENERIC-LABEL: test_punpcklwd:
6785; GENERIC:       # %bb.0:
6786; GENERIC-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6787; GENERIC-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6788; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6789; GENERIC-NEXT:    retq # sched: [1:1.00]
6790;
6791; ATOM-LABEL: test_punpcklwd:
6792; ATOM:       # %bb.0:
6793; ATOM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6794; ATOM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00]
6795; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6796; ATOM-NEXT:    retq # sched: [79:39.50]
6797;
6798; SLM-LABEL: test_punpcklwd:
6799; SLM:       # %bb.0:
6800; SLM-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6801; SLM-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00]
6802; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6803; SLM-NEXT:    retq # sched: [4:1.00]
6804;
6805; SANDY-LABEL: test_punpcklwd:
6806; SANDY:       # %bb.0:
6807; SANDY-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6808; SANDY-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6809; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6810; SANDY-NEXT:    retq # sched: [1:1.00]
6811;
6812; HASWELL-LABEL: test_punpcklwd:
6813; HASWELL:       # %bb.0:
6814; HASWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6815; HASWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6816; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6817; HASWELL-NEXT:    retq # sched: [7:1.00]
6818;
6819; BROADWELL-LABEL: test_punpcklwd:
6820; BROADWELL:       # %bb.0:
6821; BROADWELL-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6822; BROADWELL-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6823; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6824; BROADWELL-NEXT:    retq # sched: [7:1.00]
6825;
6826; SKYLAKE-LABEL: test_punpcklwd:
6827; SKYLAKE:       # %bb.0:
6828; SKYLAKE-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6829; SKYLAKE-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6830; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6831; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6832;
6833; SKX-LABEL: test_punpcklwd:
6834; SKX:       # %bb.0:
6835; SKX-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
6836; SKX-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6837; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6838; SKX-NEXT:    retq # sched: [7:1.00]
6839;
6840; BTVER2-LABEL: test_punpcklwd:
6841; BTVER2:       # %bb.0:
6842; BTVER2-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
6843; BTVER2-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
6844; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6845; BTVER2-NEXT:    retq # sched: [4:1.00]
6846;
6847; ZNVER1-LABEL: test_punpcklwd:
6848; ZNVER1:       # %bb.0:
6849; ZNVER1-NEXT:    punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25]
6850; ZNVER1-NEXT:    punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50]
6851; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6852; ZNVER1-NEXT:    retq # sched: [1:0.50]
6853  %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1)
6854  %2 = load x86_mmx, x86_mmx *%a2, align 8
6855  %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2)
6856  %4 = bitcast x86_mmx %3 to i64
6857  ret i64 %4
6858}
6859declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
6860
6861define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
6862; GENERIC-LABEL: test_pxor:
6863; GENERIC:       # %bb.0:
6864; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
6865; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6866; GENERIC-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6867; GENERIC-NEXT:    retq # sched: [1:1.00]
6868;
6869; ATOM-LABEL: test_pxor:
6870; ATOM:       # %bb.0:
6871; ATOM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
6872; ATOM-NEXT:    pxor (%rdi), %mm0 # sched: [1:1.00]
6873; ATOM-NEXT:    movq %mm0, %rax # sched: [3:3.00]
6874; ATOM-NEXT:    retq # sched: [79:39.50]
6875;
6876; SLM-LABEL: test_pxor:
6877; SLM:       # %bb.0:
6878; SLM-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
6879; SLM-NEXT:    pxor (%rdi), %mm0 # sched: [4:1.00]
6880; SLM-NEXT:    movq %mm0, %rax # sched: [1:0.50]
6881; SLM-NEXT:    retq # sched: [4:1.00]
6882;
6883; SANDY-LABEL: test_pxor:
6884; SANDY:       # %bb.0:
6885; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
6886; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6887; SANDY-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6888; SANDY-NEXT:    retq # sched: [1:1.00]
6889;
6890; HASWELL-LABEL: test_pxor:
6891; HASWELL:       # %bb.0:
6892; HASWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
6893; HASWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6894; HASWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6895; HASWELL-NEXT:    retq # sched: [7:1.00]
6896;
6897; BROADWELL-LABEL: test_pxor:
6898; BROADWELL:       # %bb.0:
6899; BROADWELL-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
6900; BROADWELL-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6901; BROADWELL-NEXT:    movq %mm0, %rax # sched: [1:1.00]
6902; BROADWELL-NEXT:    retq # sched: [7:1.00]
6903;
6904; SKYLAKE-LABEL: test_pxor:
6905; SKYLAKE:       # %bb.0:
6906; SKYLAKE-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
6907; SKYLAKE-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6908; SKYLAKE-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6909; SKYLAKE-NEXT:    retq # sched: [7:1.00]
6910;
6911; SKX-LABEL: test_pxor:
6912; SKX:       # %bb.0:
6913; SKX-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
6914; SKX-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
6915; SKX-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6916; SKX-NEXT:    retq # sched: [7:1.00]
6917;
6918; BTVER2-LABEL: test_pxor:
6919; BTVER2:       # %bb.0:
6920; BTVER2-NEXT:    pxor %mm1, %mm0 # sched: [1:0.50]
6921; BTVER2-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
6922; BTVER2-NEXT:    movq %mm0, %rax # sched: [4:1.00]
6923; BTVER2-NEXT:    retq # sched: [4:1.00]
6924;
6925; ZNVER1-LABEL: test_pxor:
6926; ZNVER1:       # %bb.0:
6927; ZNVER1-NEXT:    pxor %mm1, %mm0 # sched: [1:0.25]
6928; ZNVER1-NEXT:    pxor (%rdi), %mm0 # sched: [8:0.50]
6929; ZNVER1-NEXT:    movq %mm0, %rax # sched: [2:1.00]
6930; ZNVER1-NEXT:    retq # sched: [1:0.50]
6931  %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1)
6932  %2 = load x86_mmx, x86_mmx *%a2, align 8
6933  %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2)
6934  %4 = bitcast x86_mmx %3 to i64
6935  ret i64 %4
6936}
6937declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
6938