• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
6
7define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind {
8; GENERIC-LABEL: test_x86_tbm_bextri_u32:
9; GENERIC:       # %bb.0:
10; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
11; GENERIC-NEXT:    # sched: [2:1.00]
12; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
13; GENERIC-NEXT:    # sched: [7:1.00]
14; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
15; GENERIC-NEXT:    retq # sched: [1:1.00]
16;
17; BDVER-LABEL: test_x86_tbm_bextri_u32:
18; BDVER:       # %bb.0:
19; BDVER-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
20; BDVER-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
21; BDVER-NEXT:    addl %ecx, %eax
22; BDVER-NEXT:    retq
23  %a1 = load i32, i32* %p1
24  %r0 = lshr i32 %a0, 4
25  %m0 = lshr i32 %a1, 4
26  %r1 = and i32 %r0, 4095
27  %m1 = and i32 %m0, 4095
28  %res = add i32 %r1, %m1
29  ret i32 %res
30}
31
32define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind {
33; GENERIC-LABEL: test_x86_tbm_bextri_u64:
34; GENERIC:       # %bb.0:
35; GENERIC-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
36; GENERIC-NEXT:    # sched: [2:1.00]
37; GENERIC-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
38; GENERIC-NEXT:    # sched: [7:1.00]
39; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
40; GENERIC-NEXT:    retq # sched: [1:1.00]
41;
42; BDVER-LABEL: test_x86_tbm_bextri_u64:
43; BDVER:       # %bb.0:
44; BDVER-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
45; BDVER-NEXT:    bextrl $3076, (%rsi), %eax # imm = 0xC04
46; BDVER-NEXT:    addq %rcx, %rax
47; BDVER-NEXT:    retq
48  %a1 = load i64, i64* %p1
49  %r0 = lshr i64 %a0, 4
50  %m0 = lshr i64 %a1, 4
51  %r1 = and i64 %r0, 4095
52  %m1 = and i64 %m0, 4095
53  %res = add i64 %r1, %m1
54  ret i64 %res
55}
56
57define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
58; GENERIC-LABEL: test_x86_tbm_blcfill_u32:
59; GENERIC:       # %bb.0:
60; GENERIC-NEXT:    blcfilll %edi, %ecx # sched: [1:0.33]
61; GENERIC-NEXT:    blcfilll (%rsi), %eax # sched: [6:0.50]
62; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
63; GENERIC-NEXT:    retq # sched: [1:1.00]
64;
65; BDVER-LABEL: test_x86_tbm_blcfill_u32:
66; BDVER:       # %bb.0:
67; BDVER-NEXT:    blcfilll %edi, %ecx
68; BDVER-NEXT:    blcfilll (%rsi), %eax
69; BDVER-NEXT:    addl %ecx, %eax
70; BDVER-NEXT:    retq
71  %a1 = load i32, i32* %p1
72  %r0 = add i32 %a0, 1
73  %m0 = add i32 %a1, 1
74  %r1 = and i32 %r0, %a0
75  %m1 = and i32 %m0, %a1
76  %res = add i32 %r1, %m1
77  ret i32 %res
78}
79
80define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
81; GENERIC-LABEL: test_x86_tbm_blcfill_u64:
82; GENERIC:       # %bb.0:
83; GENERIC-NEXT:    blcfillq %rdi, %rcx # sched: [1:0.33]
84; GENERIC-NEXT:    blcfillq (%rsi), %rax # sched: [6:0.50]
85; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
86; GENERIC-NEXT:    retq # sched: [1:1.00]
87;
88; BDVER-LABEL: test_x86_tbm_blcfill_u64:
89; BDVER:       # %bb.0:
90; BDVER-NEXT:    blcfillq %rdi, %rcx
91; BDVER-NEXT:    blcfillq (%rsi), %rax
92; BDVER-NEXT:    addq %rcx, %rax
93; BDVER-NEXT:    retq
94  %a1 = load i64, i64* %p1
95  %r0 = add i64 %a0, 1
96  %m0 = add i64 %a1, 1
97  %r1 = and i64 %r0, %a0
98  %m1 = and i64 %m0, %a1
99  %res = add i64 %r1, %m1
100  ret i64 %res
101}
102
103define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind {
104; GENERIC-LABEL: test_x86_tbm_blci_u32:
105; GENERIC:       # %bb.0:
106; GENERIC-NEXT:    blcil %edi, %ecx # sched: [1:0.33]
107; GENERIC-NEXT:    blcil (%rsi), %eax # sched: [6:0.50]
108; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
109; GENERIC-NEXT:    retq # sched: [1:1.00]
110;
111; BDVER-LABEL: test_x86_tbm_blci_u32:
112; BDVER:       # %bb.0:
113; BDVER-NEXT:    blcil %edi, %ecx
114; BDVER-NEXT:    blcil (%rsi), %eax
115; BDVER-NEXT:    addl %ecx, %eax
116; BDVER-NEXT:    retq
117  %a1 = load i32, i32* %p1
118  %r0 = add i32 1, %a0
119  %m0 = add i32 1, %a1
120  %r1 = xor i32 %r0, -1
121  %m1 = xor i32 %m0, -1
122  %r2 = or i32 %r1, %a0
123  %m2 = or i32 %m1, %a1
124  %res = add i32 %r2, %m2
125  ret i32 %res
126}
127
128define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind {
129; GENERIC-LABEL: test_x86_tbm_blci_u64:
130; GENERIC:       # %bb.0:
131; GENERIC-NEXT:    blciq %rdi, %rcx # sched: [1:0.33]
132; GENERIC-NEXT:    blciq (%rsi), %rax # sched: [6:0.50]
133; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
134; GENERIC-NEXT:    retq # sched: [1:1.00]
135;
136; BDVER-LABEL: test_x86_tbm_blci_u64:
137; BDVER:       # %bb.0:
138; BDVER-NEXT:    blciq %rdi, %rcx
139; BDVER-NEXT:    blciq (%rsi), %rax
140; BDVER-NEXT:    addq %rcx, %rax
141; BDVER-NEXT:    retq
142  %a1 = load i64, i64* %p1
143  %r0 = add i64 1, %a0
144  %m0 = add i64 1, %a1
145  %r1 = xor i64 %r0, -1
146  %m1 = xor i64 %m0, -1
147  %r2 = or i64 %r1, %a0
148  %m2 = or i64 %m1, %a1
149  %res = add i64 %r2, %m2
150  ret i64 %res
151}
152
153define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind {
154; GENERIC-LABEL: test_x86_tbm_blcic_u32:
155; GENERIC:       # %bb.0:
156; GENERIC-NEXT:    blcicl %edi, %ecx # sched: [1:0.33]
157; GENERIC-NEXT:    blcicl (%rsi), %eax # sched: [6:0.50]
158; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
159; GENERIC-NEXT:    retq # sched: [1:1.00]
160;
161; BDVER-LABEL: test_x86_tbm_blcic_u32:
162; BDVER:       # %bb.0:
163; BDVER-NEXT:    blcicl %edi, %ecx
164; BDVER-NEXT:    blcicl (%rsi), %eax
165; BDVER-NEXT:    addl %ecx, %eax
166; BDVER-NEXT:    retq
167  %a1 = load i32, i32* %p1
168  %r0 = xor i32 %a0, -1
169  %m0 = xor i32 %a1, -1
170  %r1 = add i32 %a0, 1
171  %m1 = add i32 %a1, 1
172  %r2 = and i32 %r1, %r0
173  %m2 = and i32 %m1, %m0
174  %res = add i32 %r2, %m2
175  ret i32 %res
176}
177
178define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind {
179; GENERIC-LABEL: test_x86_tbm_blcic_u64:
180; GENERIC:       # %bb.0:
181; GENERIC-NEXT:    blcicq %rdi, %rcx # sched: [1:0.33]
182; GENERIC-NEXT:    blcicq (%rsi), %rax # sched: [6:0.50]
183; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
184; GENERIC-NEXT:    retq # sched: [1:1.00]
185;
186; BDVER-LABEL: test_x86_tbm_blcic_u64:
187; BDVER:       # %bb.0:
188; BDVER-NEXT:    blcicq %rdi, %rcx
189; BDVER-NEXT:    blcicq (%rsi), %rax
190; BDVER-NEXT:    addq %rcx, %rax
191; BDVER-NEXT:    retq
192  %a1 = load i64, i64* %p1
193  %r0 = xor i64 %a0, -1
194  %m0 = xor i64 %a1, -1
195  %r1 = add i64 %a0, 1
196  %m1 = add i64 %a1, 1
197  %r2 = and i64 %r1, %r0
198  %m2 = and i64 %m1, %m0
199  %res = add i64 %r2, %m2
200  ret i64 %res
201}
202
203define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
204; GENERIC-LABEL: test_x86_tbm_blcmsk_u32:
205; GENERIC:       # %bb.0:
206; GENERIC-NEXT:    blcmskl %edi, %ecx # sched: [1:0.33]
207; GENERIC-NEXT:    blcmskl (%rsi), %eax # sched: [6:0.50]
208; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
209; GENERIC-NEXT:    retq # sched: [1:1.00]
210;
211; BDVER-LABEL: test_x86_tbm_blcmsk_u32:
212; BDVER:       # %bb.0:
213; BDVER-NEXT:    blcmskl %edi, %ecx
214; BDVER-NEXT:    blcmskl (%rsi), %eax
215; BDVER-NEXT:    addl %ecx, %eax
216; BDVER-NEXT:    retq
217  %a1 = load i32, i32* %p1
218  %r0 = add i32 %a0, 1
219  %m0 = add i32 %a1, 1
220  %r1 = xor i32 %r0, %a0
221  %m1 = xor i32 %m0, %a1
222  %res = add i32 %r1, %m1
223  ret i32 %res
224}
225
226define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
227; GENERIC-LABEL: test_x86_tbm_blcmsk_u64:
228; GENERIC:       # %bb.0:
229; GENERIC-NEXT:    blcmskq %rdi, %rcx # sched: [1:0.33]
230; GENERIC-NEXT:    blcmskq (%rsi), %rax # sched: [6:0.50]
231; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
232; GENERIC-NEXT:    retq # sched: [1:1.00]
233;
234; BDVER-LABEL: test_x86_tbm_blcmsk_u64:
235; BDVER:       # %bb.0:
236; BDVER-NEXT:    blcmskq %rdi, %rcx
237; BDVER-NEXT:    blcmskq (%rsi), %rax
238; BDVER-NEXT:    addq %rcx, %rax
239; BDVER-NEXT:    retq
240  %a1 = load i64, i64* %p1
241  %r0 = add i64 %a0, 1
242  %m0 = add i64 %a1, 1
243  %r1 = xor i64 %r0, %a0
244  %m1 = xor i64 %m0, %a1
245  %res = add i64 %r1, %m1
246  ret i64 %res
247}
248
249define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind {
250; GENERIC-LABEL: test_x86_tbm_blcs_u32:
251; GENERIC:       # %bb.0:
252; GENERIC-NEXT:    blcsl %edi, %ecx # sched: [1:0.33]
253; GENERIC-NEXT:    blcsl (%rsi), %eax # sched: [6:0.50]
254; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
255; GENERIC-NEXT:    retq # sched: [1:1.00]
256;
257; BDVER-LABEL: test_x86_tbm_blcs_u32:
258; BDVER:       # %bb.0:
259; BDVER-NEXT:    blcsl %edi, %ecx
260; BDVER-NEXT:    blcsl (%rsi), %eax
261; BDVER-NEXT:    addl %ecx, %eax
262; BDVER-NEXT:    retq
263  %a1 = load i32, i32* %p1
264  %r0 = add i32 %a0, 1
265  %m0 = add i32 %a1, 1
266  %r1 = or i32 %r0, %a0
267  %m1 = or i32 %m0, %a1
268  %res = add i32 %r1, %m1
269  ret i32 %res
270}
271
272define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind {
273; GENERIC-LABEL: test_x86_tbm_blcs_u64:
274; GENERIC:       # %bb.0:
275; GENERIC-NEXT:    blcsq %rdi, %rcx # sched: [1:0.33]
276; GENERIC-NEXT:    blcsq (%rsi), %rax # sched: [6:0.50]
277; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
278; GENERIC-NEXT:    retq # sched: [1:1.00]
279;
280; BDVER-LABEL: test_x86_tbm_blcs_u64:
281; BDVER:       # %bb.0:
282; BDVER-NEXT:    blcsq %rdi, %rcx
283; BDVER-NEXT:    blcsq (%rsi), %rax
284; BDVER-NEXT:    addq %rcx, %rax
285; BDVER-NEXT:    retq
286  %a1 = load i64, i64* %p1
287  %r0 = add i64 %a0, 1
288  %m0 = add i64 %a1, 1
289  %r1 = or i64 %r0, %a0
290  %m1 = or i64 %m0, %a1
291  %res = add i64 %r1, %m1
292  ret i64 %res
293}
294
295define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
296; GENERIC-LABEL: test_x86_tbm_blsfill_u32:
297; GENERIC:       # %bb.0:
298; GENERIC-NEXT:    blsfilll %edi, %ecx # sched: [1:0.33]
299; GENERIC-NEXT:    blsfilll (%rsi), %eax # sched: [6:0.50]
300; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
301; GENERIC-NEXT:    retq # sched: [1:1.00]
302;
303; BDVER-LABEL: test_x86_tbm_blsfill_u32:
304; BDVER:       # %bb.0:
305; BDVER-NEXT:    blsfilll %edi, %ecx
306; BDVER-NEXT:    blsfilll (%rsi), %eax
307; BDVER-NEXT:    addl %ecx, %eax
308; BDVER-NEXT:    retq
309  %a1 = load i32, i32* %p1
310  %r0 = add i32 %a0, -1
311  %m0 = add i32 %a1, -1
312  %r1 = or i32 %r0, %a0
313  %m1 = or i32 %m0, %a1
314  %res = add i32 %r1, %m1
315  ret i32 %res
316}
317
318define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
319; GENERIC-LABEL: test_x86_tbm_blsfill_u64:
320; GENERIC:       # %bb.0:
321; GENERIC-NEXT:    blsfillq %rdi, %rcx # sched: [1:0.33]
322; GENERIC-NEXT:    blsfillq (%rsi), %rax # sched: [6:0.50]
323; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
324; GENERIC-NEXT:    retq # sched: [1:1.00]
325;
326; BDVER-LABEL: test_x86_tbm_blsfill_u64:
327; BDVER:       # %bb.0:
328; BDVER-NEXT:    blsfillq %rdi, %rcx
329; BDVER-NEXT:    blsfillq (%rsi), %rax
330; BDVER-NEXT:    addq %rcx, %rax
331; BDVER-NEXT:    retq
332  %a1 = load i64, i64* %p1
333  %r0 = add i64 %a0, -1
334  %m0 = add i64 %a1, -1
335  %r1 = or i64 %r0, %a0
336  %m1 = or i64 %m0, %a1
337  %res = add i64 %r1, %m1
338  ret i64 %res
339}
340
341define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind {
342; GENERIC-LABEL: test_x86_tbm_blsic_u32:
343; GENERIC:       # %bb.0:
344; GENERIC-NEXT:    blsicl %edi, %ecx # sched: [1:0.33]
345; GENERIC-NEXT:    blsicl (%rsi), %eax # sched: [6:0.50]
346; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
347; GENERIC-NEXT:    retq # sched: [1:1.00]
348;
349; BDVER-LABEL: test_x86_tbm_blsic_u32:
350; BDVER:       # %bb.0:
351; BDVER-NEXT:    blsicl %edi, %ecx
352; BDVER-NEXT:    blsicl (%rsi), %eax
353; BDVER-NEXT:    addl %ecx, %eax
354; BDVER-NEXT:    retq
355  %a1 = load i32, i32* %p1
356  %r0 = xor i32 %a0, -1
357  %m0 = xor i32 %a1, -1
358  %r1 = add i32 %a0, -1
359  %m1 = add i32 %a1, -1
360  %r2 = or i32 %r0, %r1
361  %m2 = or i32 %m0, %m1
362  %res = add i32 %r2, %m2
363  ret i32 %res
364}
365
366define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind {
367; GENERIC-LABEL: test_x86_tbm_blsic_u64:
368; GENERIC:       # %bb.0:
369; GENERIC-NEXT:    blsicq %rdi, %rcx # sched: [1:0.33]
370; GENERIC-NEXT:    blsicq (%rsi), %rax # sched: [6:0.50]
371; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
372; GENERIC-NEXT:    retq # sched: [1:1.00]
373;
374; BDVER-LABEL: test_x86_tbm_blsic_u64:
375; BDVER:       # %bb.0:
376; BDVER-NEXT:    blsicq %rdi, %rcx
377; BDVER-NEXT:    blsicq (%rsi), %rax
378; BDVER-NEXT:    addq %rcx, %rax
379; BDVER-NEXT:    retq
380  %a1 = load i64, i64* %p1
381  %r0 = xor i64 %a0, -1
382  %m0 = xor i64 %a1, -1
383  %r1 = add i64 %a0, -1
384  %m1 = add i64 %a1, -1
385  %r2 = or i64 %r0, %r1
386  %m2 = or i64 %m0, %m1
387  %res = add i64 %r2, %m2
388  ret i64 %res
389}
390
391define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind {
392; GENERIC-LABEL: test_x86_tbm_t1mskc_u32:
393; GENERIC:       # %bb.0:
394; GENERIC-NEXT:    t1mskcl %edi, %ecx # sched: [1:0.33]
395; GENERIC-NEXT:    t1mskcl (%rsi), %eax # sched: [6:0.50]
396; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
397; GENERIC-NEXT:    retq # sched: [1:1.00]
398;
399; BDVER-LABEL: test_x86_tbm_t1mskc_u32:
400; BDVER:       # %bb.0:
401; BDVER-NEXT:    t1mskcl %edi, %ecx
402; BDVER-NEXT:    t1mskcl (%rsi), %eax
403; BDVER-NEXT:    addl %ecx, %eax
404; BDVER-NEXT:    retq
405  %a1 = load i32, i32* %p1
406  %r0 = xor i32 %a0, -1
407  %m0 = xor i32 %a1, -1
408  %r1 = add i32 %a0, 1
409  %m1 = add i32 %a1, 1
410  %r2 = or i32 %r0, %r1
411  %m2 = or i32 %m0, %m1
412  %res = add i32 %r2, %m2
413  ret i32 %res
414}
415
416define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind {
417; GENERIC-LABEL: test_x86_tbm_t1mskc_u64:
418; GENERIC:       # %bb.0:
419; GENERIC-NEXT:    t1mskcq %rdi, %rcx # sched: [1:0.33]
420; GENERIC-NEXT:    t1mskcq (%rsi), %rax # sched: [6:0.50]
421; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
422; GENERIC-NEXT:    retq # sched: [1:1.00]
423;
424; BDVER-LABEL: test_x86_tbm_t1mskc_u64:
425; BDVER:       # %bb.0:
426; BDVER-NEXT:    t1mskcq %rdi, %rcx
427; BDVER-NEXT:    t1mskcq (%rsi), %rax
428; BDVER-NEXT:    addq %rcx, %rax
429; BDVER-NEXT:    retq
430  %a1 = load i64, i64* %p1
431  %r0 = xor i64 %a0, -1
432  %m0 = xor i64 %a1, -1
433  %r1 = add i64 %a0, 1
434  %m1 = add i64 %a1, 1
435  %r2 = or i64 %r0, %r1
436  %m2 = or i64 %m0, %m1
437  %res = add i64 %r2, %m2
438  ret i64 %res
439}
440
441define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
442; GENERIC-LABEL: test_x86_tbm_tzmsk_u32:
443; GENERIC:       # %bb.0:
444; GENERIC-NEXT:    tzmskl %edi, %ecx # sched: [1:0.33]
445; GENERIC-NEXT:    tzmskl (%rsi), %eax # sched: [6:0.50]
446; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
447; GENERIC-NEXT:    retq # sched: [1:1.00]
448;
449; BDVER-LABEL: test_x86_tbm_tzmsk_u32:
450; BDVER:       # %bb.0:
451; BDVER-NEXT:    tzmskl %edi, %ecx
452; BDVER-NEXT:    tzmskl (%rsi), %eax
453; BDVER-NEXT:    addl %ecx, %eax
454; BDVER-NEXT:    retq
455  %a1 = load i32, i32* %p1
456  %r0 = xor i32 %a0, -1
457  %m0 = xor i32 %a1, -1
458  %r1 = add i32 %a0, -1
459  %m1 = add i32 %a1, -1
460  %r2 = and i32 %r0, %r1
461  %m2 = and i32 %m0, %m1
462  %res = add i32 %r2, %m2
463  ret i32 %res
464}
465
466define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
467; GENERIC-LABEL: test_x86_tbm_tzmsk_u64:
468; GENERIC:       # %bb.0:
469; GENERIC-NEXT:    tzmskq %rdi, %rcx # sched: [1:0.33]
470; GENERIC-NEXT:    tzmskq (%rsi), %rax # sched: [6:0.50]
471; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
472; GENERIC-NEXT:    retq # sched: [1:1.00]
473;
474; BDVER-LABEL: test_x86_tbm_tzmsk_u64:
475; BDVER:       # %bb.0:
476; BDVER-NEXT:    tzmskq %rdi, %rcx
477; BDVER-NEXT:    tzmskq (%rsi), %rax
478; BDVER-NEXT:    addq %rcx, %rax
479; BDVER-NEXT:    retq
480  %a1 = load i64, i64* %p1
481  %r0 = xor i64 %a0, -1
482  %m0 = xor i64 %a1, -1
483  %r1 = add i64 %a0, -1
484  %m1 = add i64 %a1, -1
485  %r2 = and i64 %r0, %r1
486  %m2 = and i64 %m0, %m1
487  %res = add i64 %r2, %m2
488  ret i64 %res
489}
490