• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
3
4; COST-LABEL: uaddl_8h
5; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
6; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
7; CODE-LABEL: uaddl_8h
8; CODE:       uaddl v0.8h, v0.8b, v1.8b
9define <8 x i16> @uaddl_8h(<8 x i8> %a, <8 x i8> %b) {
10  %tmp0 = zext <8 x i8> %a to <8 x i16>
11  %tmp1 = zext <8 x i8> %b to <8 x i16>
12  %tmp2 = add <8 x i16> %tmp0, %tmp1
13  ret <8 x i16> %tmp2
14}
15
16; COST-LABEL: uaddl_4s
17; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
18; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
19; CODE-LABEL: uaddl_4s
20; CODE:       uaddl v0.4s, v0.4h, v1.4h
21define <4 x i32> @uaddl_4s(<4 x i16> %a, <4 x i16> %b) {
22  %tmp0 = zext <4 x i16> %a to <4 x i32>
23  %tmp1 = zext <4 x i16> %b to <4 x i32>
24  %tmp2 = add <4 x i32> %tmp0, %tmp1
25  ret <4 x i32> %tmp2
26}
27
28; COST-LABEL: uaddl_2d
29; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
30; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
31; CODE-LABEL: uaddl_2d
32; CODE:       uaddl v0.2d, v0.2s, v1.2s
33define <2 x i64> @uaddl_2d(<2 x i32> %a, <2 x i32> %b) {
34  %tmp0 = zext <2 x i32> %a to <2 x i64>
35  %tmp1 = zext <2 x i32> %b to <2 x i64>
36  %tmp2 = add <2 x i64> %tmp0, %tmp1
37  ret <2 x i64> %tmp2
38}
39
40; COST-LABEL: uaddl2_8h
41; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
42; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
43; CODE-LABEL: uaddl2_8h
44; CODE:       uaddl2 v2.8h, v0.16b, v1.16b
45; CODE-NEXT:  uaddl v0.8h, v0.8b, v1.8b
46define <16 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) {
47  %tmp0 = zext <16 x i8> %a to <16 x i16>
48  %tmp1 = zext <16 x i8> %b to <16 x i16>
49  %tmp2 = add <16 x i16> %tmp0, %tmp1
50  ret <16 x i16> %tmp2
51}
52
53; COST-LABEL: uaddl2_4s
54; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
55; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
56; CODE-LABEL: uaddl2_4s
57; CODE:       uaddl2 v2.4s, v0.8h, v1.8h
58; CODE-NEXT:  uaddl v0.4s, v0.4h, v1.4h
59define <8 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) {
60  %tmp0 = zext <8 x i16> %a to <8 x i32>
61  %tmp1 = zext <8 x i16> %b to <8 x i32>
62  %tmp2 = add <8 x i32> %tmp0, %tmp1
63  ret <8 x i32> %tmp2
64}
65
66; COST-LABEL: uaddl2_2d
67; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
68; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
69; CODE-LABEL: uaddl2_2d
70; CODE:       uaddl2 v2.2d, v0.4s, v1.4s
71; CODE-NEXT:  uaddl v0.2d, v0.2s, v1.2s
72define <4 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) {
73  %tmp0 = zext <4 x i32> %a to <4 x i64>
74  %tmp1 = zext <4 x i32> %b to <4 x i64>
75  %tmp2 = add <4 x i64> %tmp0, %tmp1
76  ret <4 x i64> %tmp2
77}
78
79; COST-LABEL: saddl_8h
80; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
81; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
82; CODE-LABEL: saddl_8h
83; CODE:       saddl v0.8h, v0.8b, v1.8b
84define <8 x i16> @saddl_8h(<8 x i8> %a, <8 x i8> %b) {
85  %tmp0 = sext <8 x i8> %a to <8 x i16>
86  %tmp1 = sext <8 x i8> %b to <8 x i16>
87  %tmp2 = add <8 x i16> %tmp0, %tmp1
88  ret <8 x i16> %tmp2
89}
90
91; COST-LABEL: saddl_4s
92; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
93; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
94; CODE-LABEL: saddl_4s
95; CODE:       saddl v0.4s, v0.4h, v1.4h
96define <4 x i32> @saddl_4s(<4 x i16> %a, <4 x i16> %b) {
97  %tmp0 = sext <4 x i16> %a to <4 x i32>
98  %tmp1 = sext <4 x i16> %b to <4 x i32>
99  %tmp2 = add <4 x i32> %tmp0, %tmp1
100  ret <4 x i32> %tmp2
101}
102
103; COST-LABEL: saddl_2d
104; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
105; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
106; CODE-LABEL: saddl_2d
107; CODE:       saddl v0.2d, v0.2s, v1.2s
108define <2 x i64> @saddl_2d(<2 x i32> %a, <2 x i32> %b) {
109  %tmp0 = sext <2 x i32> %a to <2 x i64>
110  %tmp1 = sext <2 x i32> %b to <2 x i64>
111  %tmp2 = add <2 x i64> %tmp0, %tmp1
112  ret <2 x i64> %tmp2
113}
114
115; COST-LABEL: saddl2_8h
116; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
117; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
118; CODE-LABEL: saddl2_8h
119; CODE:       saddl2 v2.8h, v0.16b, v1.16b
120; CODE-NEXT:  saddl v0.8h, v0.8b, v1.8b
121define <16 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) {
122  %tmp0 = sext <16 x i8> %a to <16 x i16>
123  %tmp1 = sext <16 x i8> %b to <16 x i16>
124  %tmp2 = add <16 x i16> %tmp0, %tmp1
125  ret <16 x i16> %tmp2
126}
127
128; COST-LABEL: saddl2_4s
129; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
130; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
131; CODE-LABEL: saddl2_4s
132; CODE:       saddl2 v2.4s, v0.8h, v1.8h
133; CODE-NEXT:  saddl v0.4s, v0.4h, v1.4h
134define <8 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) {
135  %tmp0 = sext <8 x i16> %a to <8 x i32>
136  %tmp1 = sext <8 x i16> %b to <8 x i32>
137  %tmp2 = add <8 x i32> %tmp0, %tmp1
138  ret <8 x i32> %tmp2
139}
140
141; COST-LABEL: saddl2_2d
142; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
143; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
144; CODE-LABEL: saddl2_2d
145; CODE:       saddl2 v2.2d, v0.4s, v1.4s
146; CODE-NEXT:  saddl v0.2d, v0.2s, v1.2s
147define <4 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) {
148  %tmp0 = sext <4 x i32> %a to <4 x i64>
149  %tmp1 = sext <4 x i32> %b to <4 x i64>
150  %tmp2 = add <4 x i64> %tmp0, %tmp1
151  ret <4 x i64> %tmp2
152}
153
154; COST-LABEL: usubl_8h
155; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
156; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
157; CODE-LABEL: usubl_8h
158; CODE:       usubl v0.8h, v0.8b, v1.8b
159define <8 x i16> @usubl_8h(<8 x i8> %a, <8 x i8> %b) {
160  %tmp0 = zext <8 x i8> %a to <8 x i16>
161  %tmp1 = zext <8 x i8> %b to <8 x i16>
162  %tmp2 = sub <8 x i16> %tmp0, %tmp1
163  ret <8 x i16> %tmp2
164}
165
166; COST-LABEL: usubl_4s
167; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
168; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
169; CODE-LABEL: usubl_4s
170; CODE:       usubl v0.4s, v0.4h, v1.4h
171define <4 x i32> @usubl_4s(<4 x i16> %a, <4 x i16> %b) {
172  %tmp0 = zext <4 x i16> %a to <4 x i32>
173  %tmp1 = zext <4 x i16> %b to <4 x i32>
174  %tmp2 = sub <4 x i32> %tmp0, %tmp1
175  ret <4 x i32> %tmp2
176}
177
178; COST-LABEL: usubl_2d
179; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
180; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64>
181; CODE-LABEL: usubl_2d
182; CODE:       usubl v0.2d, v0.2s, v1.2s
183define <2 x i64> @usubl_2d(<2 x i32> %a, <2 x i32> %b) {
184  %tmp0 = zext <2 x i32> %a to <2 x i64>
185  %tmp1 = zext <2 x i32> %b to <2 x i64>
186  %tmp2 = sub <2 x i64> %tmp0, %tmp1
187  ret <2 x i64> %tmp2
188}
189
190; COST-LABEL: usubl2_8h
191; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
192; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16>
193; CODE-LABEL: usubl2_8h
194; CODE:       usubl2 v2.8h, v0.16b, v1.16b
195; CODE-NEXT:  usubl v0.8h, v0.8b, v1.8b
196define <16 x i16> @usubl2_8h(<16 x i8> %a, <16 x i8> %b) {
197  %tmp0 = zext <16 x i8> %a to <16 x i16>
198  %tmp1 = zext <16 x i8> %b to <16 x i16>
199  %tmp2 = sub <16 x i16> %tmp0, %tmp1
200  ret <16 x i16> %tmp2
201}
202
203; COST-LABEL: usubl2_4s
204; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
205; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32>
206; CODE-LABEL: usubl2_4s
207; CODE:       usubl2 v2.4s, v0.8h, v1.8h
208; CODE-NEXT:  usubl v0.4s, v0.4h, v1.4h
209define <8 x i32> @usubl2_4s(<8 x i16> %a, <8 x i16> %b) {
210  %tmp0 = zext <8 x i16> %a to <8 x i32>
211  %tmp1 = zext <8 x i16> %b to <8 x i32>
212  %tmp2 = sub <8 x i32> %tmp0, %tmp1
213  ret <8 x i32> %tmp2
214}
215
216; COST-LABEL: usubl2_2d
217; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
218; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64>
219; CODE-LABEL: usubl2_2d
220; CODE:       usubl2 v2.2d, v0.4s, v1.4s
221; CODE-NEXT:  usubl v0.2d, v0.2s, v1.2s
222define <4 x i64> @usubl2_2d(<4 x i32> %a, <4 x i32> %b) {
223  %tmp0 = zext <4 x i32> %a to <4 x i64>
224  %tmp1 = zext <4 x i32> %b to <4 x i64>
225  %tmp2 = sub <4 x i64> %tmp0, %tmp1
226  ret <4 x i64> %tmp2
227}
228
229; COST-LABEL: ssubl_8h
230; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
231; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16>
232; CODE-LABEL: ssubl_8h
233; CODE:       ssubl v0.8h, v0.8b, v1.8b
234define <8 x i16> @ssubl_8h(<8 x i8> %a, <8 x i8> %b) {
235  %tmp0 = sext <8 x i8> %a to <8 x i16>
236  %tmp1 = sext <8 x i8> %b to <8 x i16>
237  %tmp2 = sub <8 x i16> %tmp0, %tmp1
238  ret <8 x i16> %tmp2
239}
240
241; COST-LABEL: ssubl_4s
242; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
243; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32>
244; CODE-LABEL: ssubl_4s
245; CODE:       ssubl v0.4s, v0.4h, v1.4h
246define <4 x i32> @ssubl_4s(<4 x i16> %a, <4 x i16> %b) {
247  %tmp0 = sext <4 x i16> %a to <4 x i32>
248  %tmp1 = sext <4 x i16> %b to <4 x i32>
249  %tmp2 = sub <4 x i32> %tmp0, %tmp1
250  ret <4 x i32> %tmp2
251}
252
253; COST-LABEL: ssubl_2d
254; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
255; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64>
256; CODE-LABEL: ssubl_2d
257; CODE:       ssubl v0.2d, v0.2s, v1.2s
258define <2 x i64> @ssubl_2d(<2 x i32> %a, <2 x i32> %b) {
259  %tmp0 = sext <2 x i32> %a to <2 x i64>
260  %tmp1 = sext <2 x i32> %b to <2 x i64>
261  %tmp2 = sub <2 x i64> %tmp0, %tmp1
262  ret <2 x i64> %tmp2
263}
264
265; COST-LABEL: ssubl2_8h
266; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
267; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16>
268; CODE-LABEL: ssubl2_8h
269; CODE:       ssubl2 v2.8h, v0.16b, v1.16b
270; CODE-NEXT:  ssubl v0.8h, v0.8b, v1.8b
271define <16 x i16> @ssubl2_8h(<16 x i8> %a, <16 x i8> %b) {
272  %tmp0 = sext <16 x i8> %a to <16 x i16>
273  %tmp1 = sext <16 x i8> %b to <16 x i16>
274  %tmp2 = sub <16 x i16> %tmp0, %tmp1
275  ret <16 x i16> %tmp2
276}
277
278; COST-LABEL: ssubl2_4s
279; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
280; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32>
281; CODE-LABEL: ssubl2_4s
282; CODE:       ssubl2 v2.4s, v0.8h, v1.8h
283; CODE-NEXT:  ssubl v0.4s, v0.4h, v1.4h
284define <8 x i32> @ssubl2_4s(<8 x i16> %a, <8 x i16> %b) {
285  %tmp0 = sext <8 x i16> %a to <8 x i32>
286  %tmp1 = sext <8 x i16> %b to <8 x i32>
287  %tmp2 = sub <8 x i32> %tmp0, %tmp1
288  ret <8 x i32> %tmp2
289}
290
291; COST-LABEL: ssubl2_2d
292; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
293; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64>
294; CODE-LABEL: ssubl2_2d
295; CODE:       ssubl2 v2.2d, v0.4s, v1.4s
296; CODE-NEXT:  ssubl v0.2d, v0.2s, v1.2s
297define <4 x i64> @ssubl2_2d(<4 x i32> %a, <4 x i32> %b) {
298  %tmp0 = sext <4 x i32> %a to <4 x i64>
299  %tmp1 = sext <4 x i32> %b to <4 x i64>
300  %tmp2 = sub <4 x i64> %tmp0, %tmp1
301  ret <4 x i64> %tmp2
302}
303
304; COST-LABEL: uaddw_8h
305; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
306; CODE-LABEL: uaddw_8h
307; CODE:       uaddw v0.8h, v1.8h, v0.8b
308define <8 x i16> @uaddw_8h(<8 x i8> %a, <8 x i16> %b) {
309  %tmp0 = zext <8 x i8> %a to <8 x i16>
310  %tmp1 = add <8 x i16> %b, %tmp0
311  ret <8 x i16> %tmp1
312}
313
314; COST-LABEL: uaddw_4s
315; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
316; CODE-LABEL: uaddw_4s
317; CODE:       uaddw v0.4s, v1.4s, v0.4h
318define <4 x i32> @uaddw_4s(<4 x i16> %a, <4 x i32> %b) {
319  %tmp0 = zext <4 x i16> %a to <4 x i32>
320  %tmp1 = add <4 x i32> %b, %tmp0
321  ret <4 x i32> %tmp1
322}
323
324; COST-LABEL: uaddw_2d
325; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
326; CODE-LABEL: uaddw_2d
327; CODE:       uaddw v0.2d, v1.2d, v0.2s
328define <2 x i64> @uaddw_2d(<2 x i32> %a, <2 x i64> %b) {
329  %tmp0 = zext <2 x i32> %a to <2 x i64>
330  %tmp1 = add <2 x i64> %b, %tmp0
331  ret <2 x i64> %tmp1
332}
333
334; COST-LABEL: uaddw2_8h
335; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
336; CODE-LABEL: uaddw2_8h
337; CODE:       uaddw2 v2.8h, v2.8h, v0.16b
338; CODE-NEXT:  uaddw v0.8h, v1.8h, v0.8b
339define <16 x i16> @uaddw2_8h(<16 x i8> %a, <16 x i16> %b) {
340  %tmp0 = zext <16 x i8> %a to <16 x i16>
341  %tmp1 = add <16 x i16> %b, %tmp0
342  ret <16 x i16> %tmp1
343}
344
345; COST-LABEL: uaddw2_4s
346; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
347; CODE-LABEL: uaddw2_4s
348; CODE:       uaddw2 v2.4s, v2.4s, v0.8h
349; CODE-NEXT:  uaddw v0.4s, v1.4s, v0.4h
350define <8 x i32> @uaddw2_4s(<8 x i16> %a, <8 x i32> %b) {
351  %tmp0 = zext <8 x i16> %a to <8 x i32>
352  %tmp1 = add <8 x i32> %b, %tmp0
353  ret <8 x i32> %tmp1
354}
355
356; COST-LABEL: uaddw2_2d
357; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
358; CODE-LABEL: uaddw2_2d
359; CODE:       uaddw2 v2.2d, v2.2d, v0.4s
360; CODE-NEXT:  uaddw v0.2d, v1.2d, v0.2s
361define <4 x i64> @uaddw2_2d(<4 x i32> %a, <4 x i64> %b) {
362  %tmp0 = zext <4 x i32> %a to <4 x i64>
363  %tmp1 = add <4 x i64> %b, %tmp0
364  ret <4 x i64> %tmp1
365}
366
367; COST-LABEL: saddw_8h
368; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
369; CODE-LABEL: saddw_8h
370; CODE:       saddw v0.8h, v1.8h, v0.8b
371define <8 x i16> @saddw_8h(<8 x i8> %a, <8 x i16> %b) {
372  %tmp0 = sext <8 x i8> %a to <8 x i16>
373  %tmp1 = add <8 x i16> %b, %tmp0
374  ret <8 x i16> %tmp1
375}
376
377; COST-LABEL: saddw_4s
378; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
379; CODE-LABEL: saddw_4s
380; CODE:       saddw v0.4s, v1.4s, v0.4h
381define <4 x i32> @saddw_4s(<4 x i16> %a, <4 x i32> %b) {
382  %tmp0 = sext <4 x i16> %a to <4 x i32>
383  %tmp1 = add <4 x i32> %b, %tmp0
384  ret <4 x i32> %tmp1
385}
386
387; COST-LABEL: saddw_2d
388; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
389; CODE-LABEL: saddw_2d
390; CODE:       saddw v0.2d, v1.2d, v0.2s
391define <2 x i64> @saddw_2d(<2 x i32> %a, <2 x i64> %b) {
392  %tmp0 = sext <2 x i32> %a to <2 x i64>
393  %tmp1 = add <2 x i64> %b, %tmp0
394  ret <2 x i64> %tmp1
395}
396
397; COST-LABEL: saddw2_8h
398; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
399; CODE-LABEL: saddw2_8h
400; CODE:       saddw2 v2.8h, v2.8h, v0.16b
401; CODE-NEXT:  saddw v0.8h, v1.8h, v0.8b
402define <16 x i16> @saddw2_8h(<16 x i8> %a, <16 x i16> %b) {
403  %tmp0 = sext <16 x i8> %a to <16 x i16>
404  %tmp1 = add <16 x i16> %b, %tmp0
405  ret <16 x i16> %tmp1
406}
407
408; COST-LABEL: saddw2_4s
409; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
410; CODE-LABEL: saddw2_4s
411; CODE:       saddw2 v2.4s, v2.4s, v0.8h
412; CODE-NEXT:  saddw v0.4s, v1.4s, v0.4h
413define <8 x i32> @saddw2_4s(<8 x i16> %a, <8 x i32> %b) {
414  %tmp0 = sext <8 x i16> %a to <8 x i32>
415  %tmp1 = add <8 x i32> %b, %tmp0
416  ret <8 x i32> %tmp1
417}
418
419; COST-LABEL: saddw2_2d
420; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
421; CODE-LABEL: saddw2_2d
422; CODE:       saddw2 v2.2d, v2.2d, v0.4s
423; CODE-NEXT:  saddw v0.2d, v1.2d, v0.2s
424define <4 x i64> @saddw2_2d(<4 x i32> %a, <4 x i64> %b) {
425  %tmp0 = sext <4 x i32> %a to <4 x i64>
426  %tmp1 = add <4 x i64> %b, %tmp0
427  ret <4 x i64> %tmp1
428}
429
430; COST-LABEL: usubw_8h
431; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
432; CODE-LABEL: usubw_8h
433; CODE:       usubw v0.8h, v1.8h, v0.8b
434define <8 x i16> @usubw_8h(<8 x i8> %a, <8 x i16> %b) {
435  %tmp0 = zext <8 x i8> %a to <8 x i16>
436  %tmp1 = sub <8 x i16> %b, %tmp0
437  ret <8 x i16> %tmp1
438}
439
440; COST-LABEL: usubw_4s
441; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32>
442; CODE-LABEL: usubw_4s
443; CODE:       usubw v0.4s, v1.4s, v0.4h
444define <4 x i32> @usubw_4s(<4 x i16> %a, <4 x i32> %b) {
445  %tmp0 = zext <4 x i16> %a to <4 x i32>
446  %tmp1 = sub <4 x i32> %b, %tmp0
447  ret <4 x i32> %tmp1
448}
449
450; COST-LABEL: usubw_2d
451; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64>
452; CODE-LABEL: usubw_2d
453; CODE:       usubw v0.2d, v1.2d, v0.2s
454define <2 x i64> @usubw_2d(<2 x i32> %a, <2 x i64> %b) {
455  %tmp0 = zext <2 x i32> %a to <2 x i64>
456  %tmp1 = sub <2 x i64> %b, %tmp0
457  ret <2 x i64> %tmp1
458}
459
460; COST-LABEL: usubw2_8h
461; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16>
462; CODE-LABEL: usubw2_8h
463; CODE:       usubw2 v2.8h, v2.8h, v0.16b
464; CODE-NEXT:  usubw v0.8h, v1.8h, v0.8b
465define <16 x i16> @usubw2_8h(<16 x i8> %a, <16 x i16> %b) {
466  %tmp0 = zext <16 x i8> %a to <16 x i16>
467  %tmp1 = sub <16 x i16> %b, %tmp0
468  ret <16 x i16> %tmp1
469}
470
471; COST-LABEL: usubw2_4s
472; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32>
473; CODE-LABEL: usubw2_4s
474; CODE:       usubw2 v2.4s, v2.4s, v0.8h
475; CODE-NEXT:  usubw v0.4s, v1.4s, v0.4h
476define <8 x i32> @usubw2_4s(<8 x i16> %a, <8 x i32> %b) {
477  %tmp0 = zext <8 x i16> %a to <8 x i32>
478  %tmp1 = sub <8 x i32> %b, %tmp0
479  ret <8 x i32> %tmp1
480}
481
482; COST-LABEL: usubw2_2d
483; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64>
484; CODE-LABEL: usubw2_2d
485; CODE:       usubw2 v2.2d, v2.2d, v0.4s
486; CODE-NEXT:  usubw v0.2d, v1.2d, v0.2s
487define <4 x i64> @usubw2_2d(<4 x i32> %a, <4 x i64> %b) {
488  %tmp0 = zext <4 x i32> %a to <4 x i64>
489  %tmp1 = sub <4 x i64> %b, %tmp0
490  ret <4 x i64> %tmp1
491}
492
493; COST-LABEL: ssubw_8h
494; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
495; CODE-LABEL: ssubw_8h
496; CODE:       ssubw v0.8h, v1.8h, v0.8b
497define <8 x i16> @ssubw_8h(<8 x i8> %a, <8 x i16> %b) {
498  %tmp0 = sext <8 x i8> %a to <8 x i16>
499  %tmp1 = sub <8 x i16> %b, %tmp0
500  ret <8 x i16> %tmp1
501}
502
503; COST-LABEL: ssubw_4s
504; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32>
505; CODE-LABEL: ssubw_4s
506; CODE:       ssubw v0.4s, v1.4s, v0.4h
507define <4 x i32> @ssubw_4s(<4 x i16> %a, <4 x i32> %b) {
508  %tmp0 = sext <4 x i16> %a to <4 x i32>
509  %tmp1 = sub <4 x i32> %b, %tmp0
510  ret <4 x i32> %tmp1
511}
512
513; COST-LABEL: ssubw_2d
514; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64>
515; CODE-LABEL: ssubw_2d
516; CODE:       ssubw v0.2d, v1.2d, v0.2s
517define <2 x i64> @ssubw_2d(<2 x i32> %a, <2 x i64> %b) {
518  %tmp0 = sext <2 x i32> %a to <2 x i64>
519  %tmp1 = sub <2 x i64> %b, %tmp0
520  ret <2 x i64> %tmp1
521}
522
523; COST-LABEL: ssubw2_8h
524; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16>
525; CODE-LABEL: ssubw2_8h
526; CODE:       ssubw2 v2.8h, v2.8h, v0.16b
527; CODE-NEXT:  ssubw v0.8h, v1.8h, v0.8b
528define <16 x i16> @ssubw2_8h(<16 x i8> %a, <16 x i16> %b) {
529  %tmp0 = sext <16 x i8> %a to <16 x i16>
530  %tmp1 = sub <16 x i16> %b, %tmp0
531  ret <16 x i16> %tmp1
532}
533
534; COST-LABEL: ssubw2_4s
535; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32>
536; CODE-LABEL: ssubw2_4s
537; CODE:       ssubw2 v2.4s, v2.4s, v0.8h
538; CODE-NEXT:  ssubw v0.4s, v1.4s, v0.4h
539define <8 x i32> @ssubw2_4s(<8 x i16> %a, <8 x i32> %b) {
540  %tmp0 = sext <8 x i16> %a to <8 x i32>
541  %tmp1 = sub <8 x i32> %b, %tmp0
542  ret <8 x i32> %tmp1
543}
544
545; COST-LABEL: ssubw2_2d
546; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64>
547; CODE-LABEL: ssubw2_2d
548; CODE:       ssubw2 v2.2d, v2.2d, v0.4s
549; CODE-NEXT:  ssubw v0.2d, v1.2d, v0.2s
550define <4 x i64> @ssubw2_2d(<4 x i32> %a, <4 x i64> %b) {
551  %tmp0 = sext <4 x i32> %a to <4 x i64>
552  %tmp1 = sub <4 x i64> %b, %tmp0
553  ret <4 x i64> %tmp1
554}
555
556; COST-LABEL: neg_wrong_operand_order
557; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
558define <8 x i16> @neg_wrong_operand_order(<8 x i8> %a, <8 x i16> %b) {
559  %tmp0 = zext <8 x i8> %a to <8 x i16>
560  %tmp1 = sub <8 x i16> %tmp0, %b
561  ret <8 x i16> %tmp1
562}
563
564; COST-LABEL: neg_non_widening_op
565; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16>
566define <8 x i16> @neg_non_widening_op(<8 x i8> %a, <8 x i16> %b) {
567  %tmp0 = zext <8 x i8> %a to <8 x i16>
568  %tmp1 = udiv <8 x i16> %b, %tmp0
569  ret <8 x i16> %tmp1
570}
571
572; COST-LABEL: neg_dissimilar_operand_kind_0
573; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16>
574; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16>
575define <8 x i16> @neg_dissimilar_operand_kind_0(<8 x i8> %a, <8 x i8> %b) {
576  %tmp0 = sext <8 x i8> %a to <8 x i16>
577  %tmp1 = zext <8 x i8> %b to <8 x i16>
578  %tmp2 = add <8 x i16> %tmp0, %tmp1
579  ret <8 x i16> %tmp2
580}
581
582; COST-LABEL: neg_dissimilar_operand_kind_1
583; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <4 x i8> %a to <4 x i32>
584; COST-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32>
585define <4 x i32> @neg_dissimilar_operand_kind_1(<4 x i8> %a, <4 x i16> %b) {
586  %tmp0 = zext <4 x i8> %a to <4 x i32>
587  %tmp1 = zext <4 x i16> %b to <4 x i32>
588  %tmp2 = add <4 x i32> %tmp0, %tmp1
589  ret <4 x i32> %tmp2
590}
591
592; COST-LABEL: neg_illegal_vector_type_0
593; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <16 x i4> %a to <16 x i8>
594define <16 x i8> @neg_illegal_vector_type_0(<16 x i4> %a, <16 x i8> %b) {
595  %tmp0 = zext <16 x i4> %a to <16 x i8>
596  %tmp1 = sub <16 x i8> %b, %tmp0
597  ret <16 x i8> %tmp1
598}
599
600; COST-LABEL: neg_llegal_vector_type_1
601; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %tmp0 = zext <1 x i16> %a to <1 x i32>
602define <1 x i32> @neg_llegal_vector_type_1(<1 x i16> %a, <1 x i32> %b) {
603  %tmp0 = zext <1 x i16> %a to <1 x i32>
604  %tmp1 = add <1 x i32> %b, %tmp0
605  ret <1 x i32> %tmp1
606}
607
608; COST-LABEL: neg_llegal_vector_type_2
609; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i64>
610define <4 x i64> @neg_llegal_vector_type_2(<4 x i16> %a, <4 x i64> %b) {
611  %tmp0 = zext <4 x i16> %a to <4 x i64>
612  %tmp1 = add <4 x i64> %b, %tmp0
613  ret <4 x i64> %tmp1
614}
615
616; COST-LABEL: neg_llegal_vector_type_3
617; COST-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %tmp0 = zext <3 x i34> %a to <3 x i68>
618define <3 x i68> @neg_llegal_vector_type_3(<3 x i34> %a, <3 x i68> %b) {
619  %tmp0 = zext <3 x i34> %a to <3 x i68>
620  %tmp1 = add <3 x i68> %b, %tmp0
621  ret <3 x i68> %tmp1
622}
623