• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1
3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2
6
7define i32 @andn32(i32 %x, i32 %y)   {
8; X86-LABEL: andn32:
9; X86:       # %bb.0:
10; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
11; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
12; X86-NEXT:    retl
13;
14; X64-LABEL: andn32:
15; X64:       # %bb.0:
16; X64-NEXT:    andnl %esi, %edi, %eax
17; X64-NEXT:    retq
18  %tmp1 = xor i32 %x, -1
19  %tmp2 = and i32 %y, %tmp1
20  ret i32 %tmp2
21}
22
23define i32 @andn32_load(i32 %x, i32* %y)   {
24; X86-LABEL: andn32_load:
25; X86:       # %bb.0:
26; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; X86-NEXT:    andnl (%eax), %ecx, %eax
29; X86-NEXT:    retl
30;
31; X64-LABEL: andn32_load:
32; X64:       # %bb.0:
33; X64-NEXT:    andnl (%rsi), %edi, %eax
34; X64-NEXT:    retq
35  %y1 = load i32, i32* %y
36  %tmp1 = xor i32 %x, -1
37  %tmp2 = and i32 %y1, %tmp1
38  ret i32 %tmp2
39}
40
41define i64 @andn64(i64 %x, i64 %y)   {
42; X86-LABEL: andn64:
43; X86:       # %bb.0:
44; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
45; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
46; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
47; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
48; X86-NEXT:    retl
49;
50; X64-LABEL: andn64:
51; X64:       # %bb.0:
52; X64-NEXT:    andnq %rsi, %rdi, %rax
53; X64-NEXT:    retq
54  %tmp1 = xor i64 %x, -1
55  %tmp2 = and i64 %tmp1, %y
56  ret i64 %tmp2
57}
58
59; Don't choose a 'test' if an 'andn' can be used.
60define i1 @andn_cmp(i32 %x, i32 %y) {
61; X86-LABEL: andn_cmp:
62; X86:       # %bb.0:
63; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
64; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
65; X86-NEXT:    sete %al
66; X86-NEXT:    retl
67;
68; X64-LABEL: andn_cmp:
69; X64:       # %bb.0:
70; X64-NEXT:    andnl %esi, %edi, %eax
71; X64-NEXT:    sete %al
72; X64-NEXT:    retq
73  %notx = xor i32 %x, -1
74  %and = and i32 %notx, %y
75  %cmp = icmp eq i32 %and, 0
76  ret i1 %cmp
77}
78
79; Recognize a disguised andn in the following 4 tests.
80define i1 @and_cmp1(i32 %x, i32 %y) {
81; X86-LABEL: and_cmp1:
82; X86:       # %bb.0:
83; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
84; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
85; X86-NEXT:    sete %al
86; X86-NEXT:    retl
87;
88; X64-LABEL: and_cmp1:
89; X64:       # %bb.0:
90; X64-NEXT:    andnl %esi, %edi, %eax
91; X64-NEXT:    sete %al
92; X64-NEXT:    retq
93  %and = and i32 %x, %y
94  %cmp = icmp eq i32 %and, %y
95  ret i1 %cmp
96}
97
98define i1 @and_cmp2(i32 %x, i32 %y) {
99; X86-LABEL: and_cmp2:
100; X86:       # %bb.0:
101; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
102; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
103; X86-NEXT:    setne %al
104; X86-NEXT:    retl
105;
106; X64-LABEL: and_cmp2:
107; X64:       # %bb.0:
108; X64-NEXT:    andnl %esi, %edi, %eax
109; X64-NEXT:    setne %al
110; X64-NEXT:    retq
111  %and = and i32 %y, %x
112  %cmp = icmp ne i32 %and, %y
113  ret i1 %cmp
114}
115
116define i1 @and_cmp3(i32 %x, i32 %y) {
117; X86-LABEL: and_cmp3:
118; X86:       # %bb.0:
119; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
120; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
121; X86-NEXT:    sete %al
122; X86-NEXT:    retl
123;
124; X64-LABEL: and_cmp3:
125; X64:       # %bb.0:
126; X64-NEXT:    andnl %esi, %edi, %eax
127; X64-NEXT:    sete %al
128; X64-NEXT:    retq
129  %and = and i32 %x, %y
130  %cmp = icmp eq i32 %y, %and
131  ret i1 %cmp
132}
133
134define i1 @and_cmp4(i32 %x, i32 %y) {
135; X86-LABEL: and_cmp4:
136; X86:       # %bb.0:
137; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
138; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
139; X86-NEXT:    setne %al
140; X86-NEXT:    retl
141;
142; X64-LABEL: and_cmp4:
143; X64:       # %bb.0:
144; X64-NEXT:    andnl %esi, %edi, %eax
145; X64-NEXT:    setne %al
146; X64-NEXT:    retq
147  %and = and i32 %y, %x
148  %cmp = icmp ne i32 %y, %and
149  ret i1 %cmp
150}
151
152; A mask and compare against constant is ok for an 'andn' too
153; even though the BMI instruction doesn't have an immediate form.
154define i1 @and_cmp_const(i32 %x) {
155; X86-LABEL: and_cmp_const:
156; X86:       # %bb.0:
157; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
158; X86-NEXT:    notl %eax
159; X86-NEXT:    andl $43, %eax
160; X86-NEXT:    sete %al
161; X86-NEXT:    retl
162;
163; X64-LABEL: and_cmp_const:
164; X64:       # %bb.0:
165; X64-NEXT:    notl %edi
166; X64-NEXT:    andl $43, %edi
167; X64-NEXT:    sete %al
168; X64-NEXT:    retq
169  %and = and i32 %x, 43
170  %cmp = icmp eq i32 %and, 43
171  ret i1 %cmp
172}
173
174; But don't use 'andn' if the mask is a power-of-two.
175define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) {
176; X86-LABEL: and_cmp_const_power_of_two:
177; X86:       # %bb.0:
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
180; X86-NEXT:    btl %ecx, %eax
181; X86-NEXT:    setae %al
182; X86-NEXT:    retl
183;
184; X64-LABEL: and_cmp_const_power_of_two:
185; X64:       # %bb.0:
186; X64-NEXT:    btl %esi, %edi
187; X64-NEXT:    setae %al
188; X64-NEXT:    retq
189  %shl = shl i32 1, %y
190  %and = and i32 %x, %shl
191  %cmp = icmp ne i32 %and, %shl
192  ret i1 %cmp
193}
194
195; Don't transform to 'andn' if there's another use of the 'and'.
196define i32 @and_cmp_not_one_use(i32 %x) {
197; X86-LABEL: and_cmp_not_one_use:
198; X86:       # %bb.0:
199; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
200; X86-NEXT:    andl $37, %ecx
201; X86-NEXT:    xorl %eax, %eax
202; X86-NEXT:    cmpl $37, %ecx
203; X86-NEXT:    sete %al
204; X86-NEXT:    addl %ecx, %eax
205; X86-NEXT:    retl
206;
207; X64-LABEL: and_cmp_not_one_use:
208; X64:       # %bb.0:
209; X64-NEXT:    andl $37, %edi
210; X64-NEXT:    xorl %eax, %eax
211; X64-NEXT:    cmpl $37, %edi
212; X64-NEXT:    sete %al
213; X64-NEXT:    addl %edi, %eax
214; X64-NEXT:    retq
215  %and = and i32 %x, 37
216  %cmp = icmp eq i32 %and, 37
217  %ext = zext i1 %cmp to i32
218  %add = add i32 %and, %ext
219  ret i32 %add
220}
221
222; Verify that we're not transforming invalid comparison predicates.
223define i1 @not_an_andn1(i32 %x, i32 %y) {
224; X86-LABEL: not_an_andn1:
225; X86:       # %bb.0:
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
228; X86-NEXT:    andl %eax, %ecx
229; X86-NEXT:    cmpl %ecx, %eax
230; X86-NEXT:    setg %al
231; X86-NEXT:    retl
232;
233; X64-LABEL: not_an_andn1:
234; X64:       # %bb.0:
235; X64-NEXT:    andl %esi, %edi
236; X64-NEXT:    cmpl %edi, %esi
237; X64-NEXT:    setg %al
238; X64-NEXT:    retq
239  %and = and i32 %x, %y
240  %cmp = icmp sgt i32 %y, %and
241  ret i1 %cmp
242}
243
244define i1 @not_an_andn2(i32 %x, i32 %y) {
245; X86-LABEL: not_an_andn2:
246; X86:       # %bb.0:
247; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
248; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
249; X86-NEXT:    andl %eax, %ecx
250; X86-NEXT:    cmpl %ecx, %eax
251; X86-NEXT:    setbe %al
252; X86-NEXT:    retl
253;
254; X64-LABEL: not_an_andn2:
255; X64:       # %bb.0:
256; X64-NEXT:    andl %esi, %edi
257; X64-NEXT:    cmpl %edi, %esi
258; X64-NEXT:    setbe %al
259; X64-NEXT:    retq
260  %and = and i32 %y, %x
261  %cmp = icmp ule i32 %y, %and
262  ret i1 %cmp
263}
264
265; Don't choose a 'test' if an 'andn' can be used.
266define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) {
267; X86-LABEL: andn_cmp_swap_ops:
268; X86:       # %bb.0:
269; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
270; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
271; X86-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %ecx
272; X86-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
273; X86-NEXT:    orl %ecx, %eax
274; X86-NEXT:    sete %al
275; X86-NEXT:    retl
276;
277; X64-LABEL: andn_cmp_swap_ops:
278; X64:       # %bb.0:
279; X64-NEXT:    andnq %rsi, %rdi, %rax
280; X64-NEXT:    sete %al
281; X64-NEXT:    retq
282  %notx = xor i64 %x, -1
283  %and = and i64 %y, %notx
284  %cmp = icmp eq i64 %and, 0
285  ret i1 %cmp
286}
287
288; Use a 'test' (not an 'and') because 'andn' only works for i32/i64.
289define i1 @andn_cmp_i8(i8 %x, i8 %y) {
290; X86-LABEL: andn_cmp_i8:
291; X86:       # %bb.0:
292; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
293; X86-NEXT:    notb %al
294; X86-NEXT:    testb %al, {{[0-9]+}}(%esp)
295; X86-NEXT:    sete %al
296; X86-NEXT:    retl
297;
298; X64-LABEL: andn_cmp_i8:
299; X64:       # %bb.0:
300; X64-NEXT:    notb %sil
301; X64-NEXT:    testb %sil, %dil
302; X64-NEXT:    sete %al
303; X64-NEXT:    retq
304  %noty = xor i8 %y, -1
305  %and = and i8 %x, %noty
306  %cmp = icmp eq i8 %and, 0
307  ret i1 %cmp
308}
309
310declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
311
312define i32 @bextr32(i32 %x, i32 %y)   {
313; X86-LABEL: bextr32:
314; X86:       # %bb.0:
315; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
316; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
317; X86-NEXT:    retl
318;
319; X64-LABEL: bextr32:
320; X64:       # %bb.0:
321; X64-NEXT:    bextrl %esi, %edi, %eax
322; X64-NEXT:    retq
323  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
324  ret i32 %tmp
325}
326
327define i32 @bextr32_load(i32* %x, i32 %y)   {
328; X86-LABEL: bextr32_load:
329; X86:       # %bb.0:
330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
332; X86-NEXT:    bextrl %eax, (%ecx), %eax
333; X86-NEXT:    retl
334;
335; X64-LABEL: bextr32_load:
336; X64:       # %bb.0:
337; X64-NEXT:    bextrl %esi, (%rdi), %eax
338; X64-NEXT:    retq
339  %x1 = load i32, i32* %x
340  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
341  ret i32 %tmp
342}
343
344define i32 @bextr32b(i32 %x)  uwtable  ssp {
345; X86-LABEL: bextr32b:
346; X86:       # %bb.0:
347; X86-NEXT:    movl $3076, %eax # imm = 0xC04
348; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
349; X86-NEXT:    retl
350;
351; X64-LABEL: bextr32b:
352; X64:       # %bb.0:
353; X64-NEXT:    movl $3076, %eax # imm = 0xC04
354; X64-NEXT:    bextrl %eax, %edi, %eax
355; X64-NEXT:    retq
356  %1 = lshr i32 %x, 4
357  %2 = and i32 %1, 4095
358  ret i32 %2
359}
360
361; Make sure we still use AH subreg trick to extract 15:8
362define i32 @bextr32_subreg(i32 %x)  uwtable  ssp {
363; X86-LABEL: bextr32_subreg:
364; X86:       # %bb.0:
365; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
366; X86-NEXT:    retl
367;
368; X64-LABEL: bextr32_subreg:
369; X64:       # %bb.0:
370; X64-NEXT:    movl %edi, %eax
371; X64-NEXT:    movzbl %ah, %eax
372; X64-NEXT:    retq
373  %1 = lshr i32 %x, 8
374  %2 = and i32 %1, 255
375  ret i32 %2
376}
377
378define i32 @bextr32b_load(i32* %x)  uwtable  ssp {
379; X86-LABEL: bextr32b_load:
380; X86:       # %bb.0:
381; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
382; X86-NEXT:    movl $3076, %ecx # imm = 0xC04
383; X86-NEXT:    bextrl %ecx, (%eax), %eax
384; X86-NEXT:    retl
385;
386; X64-LABEL: bextr32b_load:
387; X64:       # %bb.0:
388; X64-NEXT:    movl $3076, %eax # imm = 0xC04
389; X64-NEXT:    bextrl %eax, (%rdi), %eax
390; X64-NEXT:    retq
391  %1 = load i32, i32* %x
392  %2 = lshr i32 %1, 4
393  %3 = and i32 %2, 4095
394  ret i32 %3
395}
396
397; PR34042
398define i32 @bextr32c(i32 %x, i16 zeroext %y) {
399; X86-LABEL: bextr32c:
400; X86:       # %bb.0:
401; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
402; X86-NEXT:    bextrl %eax, {{[0-9]+}}(%esp), %eax
403; X86-NEXT:    retl
404;
405; X64-LABEL: bextr32c:
406; X64:       # %bb.0:
407; X64-NEXT:    bextrl %esi, %edi, %eax
408; X64-NEXT:    retq
409  %tmp0 = sext i16 %y to i32
410  %tmp1 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %tmp0)
411  ret i32 %tmp1
412}
413
414define i32 @non_bextr32(i32 %x) {
415; X86-LABEL: non_bextr32:
416; X86:       # %bb.0: # %entry
417; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
418; X86-NEXT:    shrl $2, %eax
419; X86-NEXT:    andl $111, %eax
420; X86-NEXT:    retl
421;
422; X64-LABEL: non_bextr32:
423; X64:       # %bb.0: # %entry
424; X64-NEXT:    shrl $2, %edi
425; X64-NEXT:    andl $111, %edi
426; X64-NEXT:    movl %edi, %eax
427; X64-NEXT:    retq
428entry:
429  %shr = lshr i32 %x, 2
430  %and = and i32 %shr, 111
431  ret i32 %and
432}
433
434define i32 @blsi32(i32 %x)   {
435; X86-LABEL: blsi32:
436; X86:       # %bb.0:
437; X86-NEXT:    blsil {{[0-9]+}}(%esp), %eax
438; X86-NEXT:    retl
439;
440; X64-LABEL: blsi32:
441; X64:       # %bb.0:
442; X64-NEXT:    blsil %edi, %eax
443; X64-NEXT:    retq
444  %tmp = sub i32 0, %x
445  %tmp2 = and i32 %x, %tmp
446  ret i32 %tmp2
447}
448
449define i32 @blsi32_load(i32* %x)   {
450; X86-LABEL: blsi32_load:
451; X86:       # %bb.0:
452; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
453; X86-NEXT:    blsil (%eax), %eax
454; X86-NEXT:    retl
455;
456; X64-LABEL: blsi32_load:
457; X64:       # %bb.0:
458; X64-NEXT:    blsil (%rdi), %eax
459; X64-NEXT:    retq
460  %x1 = load i32, i32* %x
461  %tmp = sub i32 0, %x1
462  %tmp2 = and i32 %x1, %tmp
463  ret i32 %tmp2
464}
465
466define i64 @blsi64(i64 %x)   {
467; X86-LABEL: blsi64:
468; X86:       # %bb.0:
469; X86-NEXT:    pushl %esi
470; X86-NEXT:    .cfi_def_cfa_offset 8
471; X86-NEXT:    .cfi_offset %esi, -8
472; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
473; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
474; X86-NEXT:    xorl %edx, %edx
475; X86-NEXT:    movl %ecx, %eax
476; X86-NEXT:    negl %eax
477; X86-NEXT:    sbbl %esi, %edx
478; X86-NEXT:    andl %esi, %edx
479; X86-NEXT:    andl %ecx, %eax
480; X86-NEXT:    popl %esi
481; X86-NEXT:    .cfi_def_cfa_offset 4
482; X86-NEXT:    retl
483;
484; X64-LABEL: blsi64:
485; X64:       # %bb.0:
486; X64-NEXT:    blsiq %rdi, %rax
487; X64-NEXT:    retq
488  %tmp = sub i64 0, %x
489  %tmp2 = and i64 %tmp, %x
490  ret i64 %tmp2
491}
492
493define i32 @blsmsk32(i32 %x)   {
494; X86-LABEL: blsmsk32:
495; X86:       # %bb.0:
496; X86-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
497; X86-NEXT:    retl
498;
499; X64-LABEL: blsmsk32:
500; X64:       # %bb.0:
501; X64-NEXT:    blsmskl %edi, %eax
502; X64-NEXT:    retq
503  %tmp = sub i32 %x, 1
504  %tmp2 = xor i32 %x, %tmp
505  ret i32 %tmp2
506}
507
508define i32 @blsmsk32_load(i32* %x)   {
509; X86-LABEL: blsmsk32_load:
510; X86:       # %bb.0:
511; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
512; X86-NEXT:    blsmskl (%eax), %eax
513; X86-NEXT:    retl
514;
515; X64-LABEL: blsmsk32_load:
516; X64:       # %bb.0:
517; X64-NEXT:    blsmskl (%rdi), %eax
518; X64-NEXT:    retq
519  %x1 = load i32, i32* %x
520  %tmp = sub i32 %x1, 1
521  %tmp2 = xor i32 %x1, %tmp
522  ret i32 %tmp2
523}
524
525define i64 @blsmsk64(i64 %x)   {
526; X86-LABEL: blsmsk64:
527; X86:       # %bb.0:
528; X86-NEXT:    pushl %esi
529; X86-NEXT:    .cfi_def_cfa_offset 8
530; X86-NEXT:    .cfi_offset %esi, -8
531; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
532; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
533; X86-NEXT:    movl %ecx, %eax
534; X86-NEXT:    addl $-1, %eax
535; X86-NEXT:    movl %esi, %edx
536; X86-NEXT:    adcl $-1, %edx
537; X86-NEXT:    xorl %ecx, %eax
538; X86-NEXT:    xorl %esi, %edx
539; X86-NEXT:    popl %esi
540; X86-NEXT:    .cfi_def_cfa_offset 4
541; X86-NEXT:    retl
542;
543; X64-LABEL: blsmsk64:
544; X64:       # %bb.0:
545; X64-NEXT:    blsmskq %rdi, %rax
546; X64-NEXT:    retq
547  %tmp = sub i64 %x, 1
548  %tmp2 = xor i64 %tmp, %x
549  ret i64 %tmp2
550}
551
552define i32 @blsr32(i32 %x)   {
553; X86-LABEL: blsr32:
554; X86:       # %bb.0:
555; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
556; X86-NEXT:    retl
557;
558; X64-LABEL: blsr32:
559; X64:       # %bb.0:
560; X64-NEXT:    blsrl %edi, %eax
561; X64-NEXT:    retq
562  %tmp = sub i32 %x, 1
563  %tmp2 = and i32 %x, %tmp
564  ret i32 %tmp2
565}
566
567define i32 @blsr32_load(i32* %x)   {
568; X86-LABEL: blsr32_load:
569; X86:       # %bb.0:
570; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
571; X86-NEXT:    blsrl (%eax), %eax
572; X86-NEXT:    retl
573;
574; X64-LABEL: blsr32_load:
575; X64:       # %bb.0:
576; X64-NEXT:    blsrl (%rdi), %eax
577; X64-NEXT:    retq
578  %x1 = load i32, i32* %x
579  %tmp = sub i32 %x1, 1
580  %tmp2 = and i32 %x1, %tmp
581  ret i32 %tmp2
582}
583
584define i64 @blsr64(i64 %x)   {
585; X86-LABEL: blsr64:
586; X86:       # %bb.0:
587; X86-NEXT:    pushl %esi
588; X86-NEXT:    .cfi_def_cfa_offset 8
589; X86-NEXT:    .cfi_offset %esi, -8
590; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
591; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
592; X86-NEXT:    movl %ecx, %eax
593; X86-NEXT:    addl $-1, %eax
594; X86-NEXT:    movl %esi, %edx
595; X86-NEXT:    adcl $-1, %edx
596; X86-NEXT:    andl %ecx, %eax
597; X86-NEXT:    andl %esi, %edx
598; X86-NEXT:    popl %esi
599; X86-NEXT:    .cfi_def_cfa_offset 4
600; X86-NEXT:    retl
601;
602; X64-LABEL: blsr64:
603; X64:       # %bb.0:
604; X64-NEXT:    blsrq %rdi, %rax
605; X64-NEXT:    retq
606  %tmp = sub i64 %x, 1
607  %tmp2 = and i64 %tmp, %x
608  ret i64 %tmp2
609}
610
611; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
612
613define i64 @blsr_disguised_constant(i64 %x) {
614; X86-LABEL: blsr_disguised_constant:
615; X86:       # %bb.0:
616; X86-NEXT:    blsrl {{[0-9]+}}(%esp), %eax
617; X86-NEXT:    movzwl %ax, %eax
618; X86-NEXT:    xorl %edx, %edx
619; X86-NEXT:    retl
620;
621; X64-LABEL: blsr_disguised_constant:
622; X64:       # %bb.0:
623; X64-NEXT:    blsrl %edi, %eax
624; X64-NEXT:    movzwl %ax, %eax
625; X64-NEXT:    retq
626  %a1 = and i64 %x, 65535
627  %a2 = add i64 %x, 65535
628  %r = and i64 %a1, %a2
629  ret i64 %r
630}
631
632; The add here used to get shrunk, but the and did not thus hiding the blsr pattern.
633; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too.
634define i64 @blsr_disguised_shrunk_add(i64 %x) {
635; X86-LABEL: blsr_disguised_shrunk_add:
636; X86:       # %bb.0:
637; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
638; X86-NEXT:    shrl $16, %eax
639; X86-NEXT:    blsrl %eax, %eax
640; X86-NEXT:    xorl %edx, %edx
641; X86-NEXT:    retl
642;
643; X64-LABEL: blsr_disguised_shrunk_add:
644; X64:       # %bb.0:
645; X64-NEXT:    shrq $48, %rdi
646; X64-NEXT:    blsrl %edi, %eax
647; X64-NEXT:    retq
648  %a = lshr i64 %x, 48
649  %b = add i64 %a, -1
650  %c = and i64 %b, %a
651  ret i64 %c
652}
653