• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI,X86-NOBMI
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1TBM,X86-BMI1TBM
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1TBM,X86-BMI1TBM,BMI1TBMBMI2,X86-BMI1TBMBMI2
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI1,X86-BMI1,BMI1BMI2,X86-BMI1BMI2,BMI1NOTBMBMI2,X86-BMI1NOTBMBMI2
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI,X64-NOBMI
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1TBM,X64-BMI1TBM
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1TBM,X64-BMI1TBM,BMI1TBMBMI2,X64-BMI1TBMBMI2
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI1,X64-BMI1,BMI1BMI2,X64-BMI1BMI2,BMI1NOTBMBMI2,X64-BMI1NOTBMBMI2
12
13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll
14
15; https://bugs.llvm.org/show_bug.cgi?id=36419
16; https://bugs.llvm.org/show_bug.cgi?id=37603
17; https://bugs.llvm.org/show_bug.cgi?id=37610
18
19; Patterns:
20;   a) x &  (1 << nbits) - 1
21;   b) x & ~(-1 << nbits)
22;   c) x &  (-1 >> (32 - y))
23;   d) x << (32 - y) >> (32 - y)
24; are equivalent.
25
26; ---------------------------------------------------------------------------- ;
27; Pattern a. 32-bit
28; ---------------------------------------------------------------------------- ;
29
30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
31; X86-NOBMI-LABEL: bzhi32_a0:
32; X86-NOBMI:       # %bb.0:
33; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
34; X86-NOBMI-NEXT:    movl $1, %eax
35; X86-NOBMI-NEXT:    shll %cl, %eax
36; X86-NOBMI-NEXT:    decl %eax
37; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
38; X86-NOBMI-NEXT:    retl
39;
40; X86-BMI1BMI2-LABEL: bzhi32_a0:
41; X86-BMI1BMI2:       # %bb.0:
42; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
43; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
44; X86-BMI1BMI2-NEXT:    retl
45;
46; X64-NOBMI-LABEL: bzhi32_a0:
47; X64-NOBMI:       # %bb.0:
48; X64-NOBMI-NEXT:    movl $1, %eax
49; X64-NOBMI-NEXT:    movl %esi, %ecx
50; X64-NOBMI-NEXT:    shll %cl, %eax
51; X64-NOBMI-NEXT:    decl %eax
52; X64-NOBMI-NEXT:    andl %edi, %eax
53; X64-NOBMI-NEXT:    retq
54;
55; X64-BMI1BMI2-LABEL: bzhi32_a0:
56; X64-BMI1BMI2:       # %bb.0:
57; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
58; X64-BMI1BMI2-NEXT:    retq
59  %onebit = shl i32 1, %numlowbits
60  %mask = add nsw i32 %onebit, -1
61  %masked = and i32 %mask, %val
62  ret i32 %masked
63}
64
65define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
66; X86-NOBMI-LABEL: bzhi32_a1_indexzext:
67; X86-NOBMI:       # %bb.0:
68; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
69; X86-NOBMI-NEXT:    movl $1, %eax
70; X86-NOBMI-NEXT:    shll %cl, %eax
71; X86-NOBMI-NEXT:    decl %eax
72; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
73; X86-NOBMI-NEXT:    retl
74;
75; X86-BMI1BMI2-LABEL: bzhi32_a1_indexzext:
76; X86-BMI1BMI2:       # %bb.0:
77; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
78; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
79; X86-BMI1BMI2-NEXT:    retl
80;
81; X64-NOBMI-LABEL: bzhi32_a1_indexzext:
82; X64-NOBMI:       # %bb.0:
83; X64-NOBMI-NEXT:    movl $1, %eax
84; X64-NOBMI-NEXT:    movl %esi, %ecx
85; X64-NOBMI-NEXT:    shll %cl, %eax
86; X64-NOBMI-NEXT:    decl %eax
87; X64-NOBMI-NEXT:    andl %edi, %eax
88; X64-NOBMI-NEXT:    retq
89;
90; X64-BMI1BMI2-LABEL: bzhi32_a1_indexzext:
91; X64-BMI1BMI2:       # %bb.0:
92; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
93; X64-BMI1BMI2-NEXT:    retq
94  %conv = zext i8 %numlowbits to i32
95  %onebit = shl i32 1, %conv
96  %mask = add nsw i32 %onebit, -1
97  %masked = and i32 %mask, %val
98  ret i32 %masked
99}
100
101define i32 @bzhi32_a2_load(i32* %w, i32 %numlowbits) nounwind {
102; X86-NOBMI-LABEL: bzhi32_a2_load:
103; X86-NOBMI:       # %bb.0:
104; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
105; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
106; X86-NOBMI-NEXT:    movl $1, %eax
107; X86-NOBMI-NEXT:    shll %cl, %eax
108; X86-NOBMI-NEXT:    decl %eax
109; X86-NOBMI-NEXT:    andl (%edx), %eax
110; X86-NOBMI-NEXT:    retl
111;
112; X86-BMI1BMI2-LABEL: bzhi32_a2_load:
113; X86-BMI1BMI2:       # %bb.0:
114; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
115; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
116; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
117; X86-BMI1BMI2-NEXT:    retl
118;
119; X64-NOBMI-LABEL: bzhi32_a2_load:
120; X64-NOBMI:       # %bb.0:
121; X64-NOBMI-NEXT:    movl $1, %eax
122; X64-NOBMI-NEXT:    movl %esi, %ecx
123; X64-NOBMI-NEXT:    shll %cl, %eax
124; X64-NOBMI-NEXT:    decl %eax
125; X64-NOBMI-NEXT:    andl (%rdi), %eax
126; X64-NOBMI-NEXT:    retq
127;
128; X64-BMI1BMI2-LABEL: bzhi32_a2_load:
129; X64-BMI1BMI2:       # %bb.0:
130; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
131; X64-BMI1BMI2-NEXT:    retq
132  %val = load i32, i32* %w
133  %onebit = shl i32 1, %numlowbits
134  %mask = add nsw i32 %onebit, -1
135  %masked = and i32 %mask, %val
136  ret i32 %masked
137}
138
139define i32 @bzhi32_a3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
140; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext:
141; X86-NOBMI:       # %bb.0:
142; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
143; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
144; X86-NOBMI-NEXT:    movl $1, %eax
145; X86-NOBMI-NEXT:    shll %cl, %eax
146; X86-NOBMI-NEXT:    decl %eax
147; X86-NOBMI-NEXT:    andl (%edx), %eax
148; X86-NOBMI-NEXT:    retl
149;
150; X86-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext:
151; X86-BMI1BMI2:       # %bb.0:
152; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
153; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
154; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
155; X86-BMI1BMI2-NEXT:    retl
156;
157; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext:
158; X64-NOBMI:       # %bb.0:
159; X64-NOBMI-NEXT:    movl $1, %eax
160; X64-NOBMI-NEXT:    movl %esi, %ecx
161; X64-NOBMI-NEXT:    shll %cl, %eax
162; X64-NOBMI-NEXT:    decl %eax
163; X64-NOBMI-NEXT:    andl (%rdi), %eax
164; X64-NOBMI-NEXT:    retq
165;
166; X64-BMI1BMI2-LABEL: bzhi32_a3_load_indexzext:
167; X64-BMI1BMI2:       # %bb.0:
168; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
169; X64-BMI1BMI2-NEXT:    retq
170  %val = load i32, i32* %w
171  %conv = zext i8 %numlowbits to i32
172  %onebit = shl i32 1, %conv
173  %mask = add nsw i32 %onebit, -1
174  %masked = and i32 %mask, %val
175  ret i32 %masked
176}
177
178define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
179; X86-NOBMI-LABEL: bzhi32_a4_commutative:
180; X86-NOBMI:       # %bb.0:
181; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
182; X86-NOBMI-NEXT:    movl $1, %eax
183; X86-NOBMI-NEXT:    shll %cl, %eax
184; X86-NOBMI-NEXT:    decl %eax
185; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
186; X86-NOBMI-NEXT:    retl
187;
188; X86-BMI1BMI2-LABEL: bzhi32_a4_commutative:
189; X86-BMI1BMI2:       # %bb.0:
190; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
191; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
192; X86-BMI1BMI2-NEXT:    retl
193;
194; X64-NOBMI-LABEL: bzhi32_a4_commutative:
195; X64-NOBMI:       # %bb.0:
196; X64-NOBMI-NEXT:    movl $1, %eax
197; X64-NOBMI-NEXT:    movl %esi, %ecx
198; X64-NOBMI-NEXT:    shll %cl, %eax
199; X64-NOBMI-NEXT:    decl %eax
200; X64-NOBMI-NEXT:    andl %edi, %eax
201; X64-NOBMI-NEXT:    retq
202;
203; X64-BMI1BMI2-LABEL: bzhi32_a4_commutative:
204; X64-BMI1BMI2:       # %bb.0:
205; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
206; X64-BMI1BMI2-NEXT:    retq
207  %onebit = shl i32 1, %numlowbits
208  %mask = add nsw i32 %onebit, -1
209  %masked = and i32 %val, %mask ; swapped order
210  ret i32 %masked
211}
212
213; 64-bit
214
215define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind {
216; X86-NOBMI-LABEL: bzhi64_a0:
217; X86-NOBMI:       # %bb.0:
218; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
219; X86-NOBMI-NEXT:    movl $1, %eax
220; X86-NOBMI-NEXT:    xorl %edx, %edx
221; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
222; X86-NOBMI-NEXT:    shll %cl, %eax
223; X86-NOBMI-NEXT:    testb $32, %cl
224; X86-NOBMI-NEXT:    je .LBB5_2
225; X86-NOBMI-NEXT:  # %bb.1:
226; X86-NOBMI-NEXT:    movl %eax, %edx
227; X86-NOBMI-NEXT:    xorl %eax, %eax
228; X86-NOBMI-NEXT:  .LBB5_2:
229; X86-NOBMI-NEXT:    addl $-1, %eax
230; X86-NOBMI-NEXT:    adcl $-1, %edx
231; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
232; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
233; X86-NOBMI-NEXT:    retl
234;
235; X86-BMI1BMI2-LABEL: bzhi64_a0:
236; X86-BMI1BMI2:       # %bb.0:
237; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
238; X86-BMI1BMI2-NEXT:    movl $1, %eax
239; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
240; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
241; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
242; X86-BMI1BMI2-NEXT:    testb $32, %cl
243; X86-BMI1BMI2-NEXT:    je .LBB5_2
244; X86-BMI1BMI2-NEXT:  # %bb.1:
245; X86-BMI1BMI2-NEXT:    movl %eax, %edx
246; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
247; X86-BMI1BMI2-NEXT:  .LBB5_2:
248; X86-BMI1BMI2-NEXT:    addl $-1, %eax
249; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
250; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
251; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
252; X86-BMI1BMI2-NEXT:    retl
253;
254; X64-NOBMI-LABEL: bzhi64_a0:
255; X64-NOBMI:       # %bb.0:
256; X64-NOBMI-NEXT:    movl $1, %eax
257; X64-NOBMI-NEXT:    movl %esi, %ecx
258; X64-NOBMI-NEXT:    shlq %cl, %rax
259; X64-NOBMI-NEXT:    decq %rax
260; X64-NOBMI-NEXT:    andq %rdi, %rax
261; X64-NOBMI-NEXT:    retq
262;
263; X64-BMI1BMI2-LABEL: bzhi64_a0:
264; X64-BMI1BMI2:       # %bb.0:
265; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
266; X64-BMI1BMI2-NEXT:    retq
267  %onebit = shl i64 1, %numlowbits
268  %mask = add nsw i64 %onebit, -1
269  %masked = and i64 %mask, %val
270  ret i64 %masked
271}
272
273define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
274; X86-NOBMI-LABEL: bzhi64_a1_indexzext:
275; X86-NOBMI:       # %bb.0:
276; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
277; X86-NOBMI-NEXT:    movl $1, %eax
278; X86-NOBMI-NEXT:    xorl %edx, %edx
279; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
280; X86-NOBMI-NEXT:    shll %cl, %eax
281; X86-NOBMI-NEXT:    testb $32, %cl
282; X86-NOBMI-NEXT:    je .LBB6_2
283; X86-NOBMI-NEXT:  # %bb.1:
284; X86-NOBMI-NEXT:    movl %eax, %edx
285; X86-NOBMI-NEXT:    xorl %eax, %eax
286; X86-NOBMI-NEXT:  .LBB6_2:
287; X86-NOBMI-NEXT:    addl $-1, %eax
288; X86-NOBMI-NEXT:    adcl $-1, %edx
289; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
290; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
291; X86-NOBMI-NEXT:    retl
292;
293; X86-BMI1BMI2-LABEL: bzhi64_a1_indexzext:
294; X86-BMI1BMI2:       # %bb.0:
295; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
296; X86-BMI1BMI2-NEXT:    movl $1, %eax
297; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
298; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
299; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
300; X86-BMI1BMI2-NEXT:    testb $32, %cl
301; X86-BMI1BMI2-NEXT:    je .LBB6_2
302; X86-BMI1BMI2-NEXT:  # %bb.1:
303; X86-BMI1BMI2-NEXT:    movl %eax, %edx
304; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
305; X86-BMI1BMI2-NEXT:  .LBB6_2:
306; X86-BMI1BMI2-NEXT:    addl $-1, %eax
307; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
308; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
309; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
310; X86-BMI1BMI2-NEXT:    retl
311;
312; X64-NOBMI-LABEL: bzhi64_a1_indexzext:
313; X64-NOBMI:       # %bb.0:
314; X64-NOBMI-NEXT:    movl $1, %eax
315; X64-NOBMI-NEXT:    movl %esi, %ecx
316; X64-NOBMI-NEXT:    shlq %cl, %rax
317; X64-NOBMI-NEXT:    decq %rax
318; X64-NOBMI-NEXT:    andq %rdi, %rax
319; X64-NOBMI-NEXT:    retq
320;
321; X64-BMI1BMI2-LABEL: bzhi64_a1_indexzext:
322; X64-BMI1BMI2:       # %bb.0:
323; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
324; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
325; X64-BMI1BMI2-NEXT:    retq
326  %conv = zext i8 %numlowbits to i64
327  %onebit = shl i64 1, %conv
328  %mask = add nsw i64 %onebit, -1
329  %masked = and i64 %mask, %val
330  ret i64 %masked
331}
332
333define i64 @bzhi64_a2_load(i64* %w, i64 %numlowbits) nounwind {
334; X86-NOBMI-LABEL: bzhi64_a2_load:
335; X86-NOBMI:       # %bb.0:
336; X86-NOBMI-NEXT:    pushl %esi
337; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
338; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
339; X86-NOBMI-NEXT:    movl $1, %eax
340; X86-NOBMI-NEXT:    xorl %edx, %edx
341; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
342; X86-NOBMI-NEXT:    shll %cl, %eax
343; X86-NOBMI-NEXT:    testb $32, %cl
344; X86-NOBMI-NEXT:    je .LBB7_2
345; X86-NOBMI-NEXT:  # %bb.1:
346; X86-NOBMI-NEXT:    movl %eax, %edx
347; X86-NOBMI-NEXT:    xorl %eax, %eax
348; X86-NOBMI-NEXT:  .LBB7_2:
349; X86-NOBMI-NEXT:    addl $-1, %eax
350; X86-NOBMI-NEXT:    adcl $-1, %edx
351; X86-NOBMI-NEXT:    andl 4(%esi), %edx
352; X86-NOBMI-NEXT:    andl (%esi), %eax
353; X86-NOBMI-NEXT:    popl %esi
354; X86-NOBMI-NEXT:    retl
355;
356; X86-BMI1BMI2-LABEL: bzhi64_a2_load:
357; X86-BMI1BMI2:       # %bb.0:
358; X86-BMI1BMI2-NEXT:    pushl %esi
359; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
360; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
361; X86-BMI1BMI2-NEXT:    movl $1, %eax
362; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
363; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
364; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
365; X86-BMI1BMI2-NEXT:    testb $32, %cl
366; X86-BMI1BMI2-NEXT:    je .LBB7_2
367; X86-BMI1BMI2-NEXT:  # %bb.1:
368; X86-BMI1BMI2-NEXT:    movl %eax, %edx
369; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
370; X86-BMI1BMI2-NEXT:  .LBB7_2:
371; X86-BMI1BMI2-NEXT:    addl $-1, %eax
372; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
373; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
374; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
375; X86-BMI1BMI2-NEXT:    popl %esi
376; X86-BMI1BMI2-NEXT:    retl
377;
378; X64-NOBMI-LABEL: bzhi64_a2_load:
379; X64-NOBMI:       # %bb.0:
380; X64-NOBMI-NEXT:    movl $1, %eax
381; X64-NOBMI-NEXT:    movl %esi, %ecx
382; X64-NOBMI-NEXT:    shlq %cl, %rax
383; X64-NOBMI-NEXT:    decq %rax
384; X64-NOBMI-NEXT:    andq (%rdi), %rax
385; X64-NOBMI-NEXT:    retq
386;
387; X64-BMI1BMI2-LABEL: bzhi64_a2_load:
388; X64-BMI1BMI2:       # %bb.0:
389; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
390; X64-BMI1BMI2-NEXT:    retq
391  %val = load i64, i64* %w
392  %onebit = shl i64 1, %numlowbits
393  %mask = add nsw i64 %onebit, -1
394  %masked = and i64 %mask, %val
395  ret i64 %masked
396}
397
398define i64 @bzhi64_a3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
399; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext:
400; X86-NOBMI:       # %bb.0:
401; X86-NOBMI-NEXT:    pushl %esi
402; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
403; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
404; X86-NOBMI-NEXT:    movl $1, %eax
405; X86-NOBMI-NEXT:    xorl %edx, %edx
406; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
407; X86-NOBMI-NEXT:    shll %cl, %eax
408; X86-NOBMI-NEXT:    testb $32, %cl
409; X86-NOBMI-NEXT:    je .LBB8_2
410; X86-NOBMI-NEXT:  # %bb.1:
411; X86-NOBMI-NEXT:    movl %eax, %edx
412; X86-NOBMI-NEXT:    xorl %eax, %eax
413; X86-NOBMI-NEXT:  .LBB8_2:
414; X86-NOBMI-NEXT:    addl $-1, %eax
415; X86-NOBMI-NEXT:    adcl $-1, %edx
416; X86-NOBMI-NEXT:    andl 4(%esi), %edx
417; X86-NOBMI-NEXT:    andl (%esi), %eax
418; X86-NOBMI-NEXT:    popl %esi
419; X86-NOBMI-NEXT:    retl
420;
421; X86-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext:
422; X86-BMI1BMI2:       # %bb.0:
423; X86-BMI1BMI2-NEXT:    pushl %esi
424; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
425; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
426; X86-BMI1BMI2-NEXT:    movl $1, %eax
427; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
428; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
429; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
430; X86-BMI1BMI2-NEXT:    testb $32, %cl
431; X86-BMI1BMI2-NEXT:    je .LBB8_2
432; X86-BMI1BMI2-NEXT:  # %bb.1:
433; X86-BMI1BMI2-NEXT:    movl %eax, %edx
434; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
435; X86-BMI1BMI2-NEXT:  .LBB8_2:
436; X86-BMI1BMI2-NEXT:    addl $-1, %eax
437; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
438; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
439; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
440; X86-BMI1BMI2-NEXT:    popl %esi
441; X86-BMI1BMI2-NEXT:    retl
442;
443; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext:
444; X64-NOBMI:       # %bb.0:
445; X64-NOBMI-NEXT:    movl $1, %eax
446; X64-NOBMI-NEXT:    movl %esi, %ecx
447; X64-NOBMI-NEXT:    shlq %cl, %rax
448; X64-NOBMI-NEXT:    decq %rax
449; X64-NOBMI-NEXT:    andq (%rdi), %rax
450; X64-NOBMI-NEXT:    retq
451;
452; X64-BMI1BMI2-LABEL: bzhi64_a3_load_indexzext:
453; X64-BMI1BMI2:       # %bb.0:
454; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
455; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
456; X64-BMI1BMI2-NEXT:    retq
457  %val = load i64, i64* %w
458  %conv = zext i8 %numlowbits to i64
459  %onebit = shl i64 1, %conv
460  %mask = add nsw i64 %onebit, -1
461  %masked = and i64 %mask, %val
462  ret i64 %masked
463}
464
465define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind {
466; X86-NOBMI-LABEL: bzhi64_a4_commutative:
467; X86-NOBMI:       # %bb.0:
468; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
469; X86-NOBMI-NEXT:    movl $1, %eax
470; X86-NOBMI-NEXT:    xorl %edx, %edx
471; X86-NOBMI-NEXT:    shldl %cl, %eax, %edx
472; X86-NOBMI-NEXT:    shll %cl, %eax
473; X86-NOBMI-NEXT:    testb $32, %cl
474; X86-NOBMI-NEXT:    je .LBB9_2
475; X86-NOBMI-NEXT:  # %bb.1:
476; X86-NOBMI-NEXT:    movl %eax, %edx
477; X86-NOBMI-NEXT:    xorl %eax, %eax
478; X86-NOBMI-NEXT:  .LBB9_2:
479; X86-NOBMI-NEXT:    addl $-1, %eax
480; X86-NOBMI-NEXT:    adcl $-1, %edx
481; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
482; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
483; X86-NOBMI-NEXT:    retl
484;
485; X86-BMI1BMI2-LABEL: bzhi64_a4_commutative:
486; X86-BMI1BMI2:       # %bb.0:
487; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
488; X86-BMI1BMI2-NEXT:    movl $1, %eax
489; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
490; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %edx
491; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %eax
492; X86-BMI1BMI2-NEXT:    testb $32, %cl
493; X86-BMI1BMI2-NEXT:    je .LBB9_2
494; X86-BMI1BMI2-NEXT:  # %bb.1:
495; X86-BMI1BMI2-NEXT:    movl %eax, %edx
496; X86-BMI1BMI2-NEXT:    xorl %eax, %eax
497; X86-BMI1BMI2-NEXT:  .LBB9_2:
498; X86-BMI1BMI2-NEXT:    addl $-1, %eax
499; X86-BMI1BMI2-NEXT:    adcl $-1, %edx
500; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
501; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
502; X86-BMI1BMI2-NEXT:    retl
503;
504; X64-NOBMI-LABEL: bzhi64_a4_commutative:
505; X64-NOBMI:       # %bb.0:
506; X64-NOBMI-NEXT:    movl $1, %eax
507; X64-NOBMI-NEXT:    movl %esi, %ecx
508; X64-NOBMI-NEXT:    shlq %cl, %rax
509; X64-NOBMI-NEXT:    decq %rax
510; X64-NOBMI-NEXT:    andq %rdi, %rax
511; X64-NOBMI-NEXT:    retq
512;
513; X64-BMI1BMI2-LABEL: bzhi64_a4_commutative:
514; X64-BMI1BMI2:       # %bb.0:
515; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
516; X64-BMI1BMI2-NEXT:    retq
517  %onebit = shl i64 1, %numlowbits
518  %mask = add nsw i64 %onebit, -1
519  %masked = and i64 %val, %mask ; swapped order
520  ret i64 %masked
521}
522
523; ---------------------------------------------------------------------------- ;
524; Pattern b. 32-bit
525; ---------------------------------------------------------------------------- ;
526
527define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
528; X86-NOBMI-LABEL: bzhi32_b0:
529; X86-NOBMI:       # %bb.0:
530; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
531; X86-NOBMI-NEXT:    movl $-1, %eax
532; X86-NOBMI-NEXT:    shll %cl, %eax
533; X86-NOBMI-NEXT:    notl %eax
534; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
535; X86-NOBMI-NEXT:    retl
536;
537; X86-BMI1BMI2-LABEL: bzhi32_b0:
538; X86-BMI1BMI2:       # %bb.0:
539; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
540; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
541; X86-BMI1BMI2-NEXT:    retl
542;
543; X64-NOBMI-LABEL: bzhi32_b0:
544; X64-NOBMI:       # %bb.0:
545; X64-NOBMI-NEXT:    movl $-1, %eax
546; X64-NOBMI-NEXT:    movl %esi, %ecx
547; X64-NOBMI-NEXT:    shll %cl, %eax
548; X64-NOBMI-NEXT:    notl %eax
549; X64-NOBMI-NEXT:    andl %edi, %eax
550; X64-NOBMI-NEXT:    retq
551;
552; X64-BMI1BMI2-LABEL: bzhi32_b0:
553; X64-BMI1BMI2:       # %bb.0:
554; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
555; X64-BMI1BMI2-NEXT:    retq
556  %notmask = shl i32 -1, %numlowbits
557  %mask = xor i32 %notmask, -1
558  %masked = and i32 %mask, %val
559  ret i32 %masked
560}
561
562define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
563; X86-NOBMI-LABEL: bzhi32_b1_indexzext:
564; X86-NOBMI:       # %bb.0:
565; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
566; X86-NOBMI-NEXT:    movl $-1, %eax
567; X86-NOBMI-NEXT:    shll %cl, %eax
568; X86-NOBMI-NEXT:    notl %eax
569; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
570; X86-NOBMI-NEXT:    retl
571;
572; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
573; X86-BMI1BMI2:       # %bb.0:
574; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
575; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
576; X86-BMI1BMI2-NEXT:    retl
577;
578; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
579; X64-NOBMI:       # %bb.0:
580; X64-NOBMI-NEXT:    movl $-1, %eax
581; X64-NOBMI-NEXT:    movl %esi, %ecx
582; X64-NOBMI-NEXT:    shll %cl, %eax
583; X64-NOBMI-NEXT:    notl %eax
584; X64-NOBMI-NEXT:    andl %edi, %eax
585; X64-NOBMI-NEXT:    retq
586;
587; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
588; X64-BMI1BMI2:       # %bb.0:
589; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
590; X64-BMI1BMI2-NEXT:    retq
591  %conv = zext i8 %numlowbits to i32
592  %notmask = shl i32 -1, %conv
593  %mask = xor i32 %notmask, -1
594  %masked = and i32 %mask, %val
595  ret i32 %masked
596}
597
598define i32 @bzhi32_b2_load(i32* %w, i32 %numlowbits) nounwind {
599; X86-NOBMI-LABEL: bzhi32_b2_load:
600; X86-NOBMI:       # %bb.0:
601; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
602; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
603; X86-NOBMI-NEXT:    movl $-1, %eax
604; X86-NOBMI-NEXT:    shll %cl, %eax
605; X86-NOBMI-NEXT:    notl %eax
606; X86-NOBMI-NEXT:    andl (%edx), %eax
607; X86-NOBMI-NEXT:    retl
608;
609; X86-BMI1BMI2-LABEL: bzhi32_b2_load:
610; X86-BMI1BMI2:       # %bb.0:
611; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
612; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
613; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
614; X86-BMI1BMI2-NEXT:    retl
615;
616; X64-NOBMI-LABEL: bzhi32_b2_load:
617; X64-NOBMI:       # %bb.0:
618; X64-NOBMI-NEXT:    movl $-1, %eax
619; X64-NOBMI-NEXT:    movl %esi, %ecx
620; X64-NOBMI-NEXT:    shll %cl, %eax
621; X64-NOBMI-NEXT:    notl %eax
622; X64-NOBMI-NEXT:    andl (%rdi), %eax
623; X64-NOBMI-NEXT:    retq
624;
625; X64-BMI1BMI2-LABEL: bzhi32_b2_load:
626; X64-BMI1BMI2:       # %bb.0:
627; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
628; X64-BMI1BMI2-NEXT:    retq
629  %val = load i32, i32* %w
630  %notmask = shl i32 -1, %numlowbits
631  %mask = xor i32 %notmask, -1
632  %masked = and i32 %mask, %val
633  ret i32 %masked
634}
635
636define i32 @bzhi32_b3_load_indexzext(i32* %w, i8 zeroext %numlowbits) nounwind {
637; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext:
638; X86-NOBMI:       # %bb.0:
639; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
640; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
641; X86-NOBMI-NEXT:    movl $-1, %eax
642; X86-NOBMI-NEXT:    shll %cl, %eax
643; X86-NOBMI-NEXT:    notl %eax
644; X86-NOBMI-NEXT:    andl (%edx), %eax
645; X86-NOBMI-NEXT:    retl
646;
647; X86-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
648; X86-BMI1BMI2:       # %bb.0:
649; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
650; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
651; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
652; X86-BMI1BMI2-NEXT:    retl
653;
654; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
655; X64-NOBMI:       # %bb.0:
656; X64-NOBMI-NEXT:    movl $-1, %eax
657; X64-NOBMI-NEXT:    movl %esi, %ecx
658; X64-NOBMI-NEXT:    shll %cl, %eax
659; X64-NOBMI-NEXT:    notl %eax
660; X64-NOBMI-NEXT:    andl (%rdi), %eax
661; X64-NOBMI-NEXT:    retq
662;
663; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
664; X64-BMI1BMI2:       # %bb.0:
665; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
666; X64-BMI1BMI2-NEXT:    retq
667  %val = load i32, i32* %w
668  %conv = zext i8 %numlowbits to i32
669  %notmask = shl i32 -1, %conv
670  %mask = xor i32 %notmask, -1
671  %masked = and i32 %mask, %val
672  ret i32 %masked
673}
674
675define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
676; X86-NOBMI-LABEL: bzhi32_b4_commutative:
677; X86-NOBMI:       # %bb.0:
678; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
679; X86-NOBMI-NEXT:    movl $-1, %eax
680; X86-NOBMI-NEXT:    shll %cl, %eax
681; X86-NOBMI-NEXT:    notl %eax
682; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
683; X86-NOBMI-NEXT:    retl
684;
685; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative:
686; X86-BMI1BMI2:       # %bb.0:
687; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
688; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
689; X86-BMI1BMI2-NEXT:    retl
690;
691; X64-NOBMI-LABEL: bzhi32_b4_commutative:
692; X64-NOBMI:       # %bb.0:
693; X64-NOBMI-NEXT:    movl $-1, %eax
694; X64-NOBMI-NEXT:    movl %esi, %ecx
695; X64-NOBMI-NEXT:    shll %cl, %eax
696; X64-NOBMI-NEXT:    notl %eax
697; X64-NOBMI-NEXT:    andl %edi, %eax
698; X64-NOBMI-NEXT:    retq
699;
700; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative:
701; X64-BMI1BMI2:       # %bb.0:
702; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
703; X64-BMI1BMI2-NEXT:    retq
704  %notmask = shl i32 -1, %numlowbits
705  %mask = xor i32 %notmask, -1
706  %masked = and i32 %val, %mask ; swapped order
707  ret i32 %masked
708}
709
710; 64-bit
711
712define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind {
713; X86-NOBMI-LABEL: bzhi64_b0:
714; X86-NOBMI:       # %bb.0:
715; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
716; X86-NOBMI-NEXT:    movl $-1, %edx
717; X86-NOBMI-NEXT:    movl $-1, %eax
718; X86-NOBMI-NEXT:    shll %cl, %eax
719; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
720; X86-NOBMI-NEXT:    testb $32, %cl
721; X86-NOBMI-NEXT:    je .LBB15_2
722; X86-NOBMI-NEXT:  # %bb.1:
723; X86-NOBMI-NEXT:    movl %eax, %edx
724; X86-NOBMI-NEXT:    xorl %eax, %eax
725; X86-NOBMI-NEXT:  .LBB15_2:
726; X86-NOBMI-NEXT:    notl %edx
727; X86-NOBMI-NEXT:    notl %eax
728; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
729; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
730; X86-NOBMI-NEXT:    retl
731;
732; X86-BMI1BMI2-LABEL: bzhi64_b0:
733; X86-BMI1BMI2:       # %bb.0:
734; X86-BMI1BMI2-NEXT:    pushl %esi
735; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
736; X86-BMI1BMI2-NEXT:    movl $-1, %eax
737; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
738; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
739; X86-BMI1BMI2-NEXT:    testb $32, %cl
740; X86-BMI1BMI2-NEXT:    je .LBB15_2
741; X86-BMI1BMI2-NEXT:  # %bb.1:
742; X86-BMI1BMI2-NEXT:    movl %esi, %eax
743; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
744; X86-BMI1BMI2-NEXT:  .LBB15_2:
745; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
746; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
747; X86-BMI1BMI2-NEXT:    popl %esi
748; X86-BMI1BMI2-NEXT:    retl
749;
750; X64-NOBMI-LABEL: bzhi64_b0:
751; X64-NOBMI:       # %bb.0:
752; X64-NOBMI-NEXT:    movq $-1, %rax
753; X64-NOBMI-NEXT:    movl %esi, %ecx
754; X64-NOBMI-NEXT:    shlq %cl, %rax
755; X64-NOBMI-NEXT:    notq %rax
756; X64-NOBMI-NEXT:    andq %rdi, %rax
757; X64-NOBMI-NEXT:    retq
758;
759; X64-BMI1BMI2-LABEL: bzhi64_b0:
760; X64-BMI1BMI2:       # %bb.0:
761; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
762; X64-BMI1BMI2-NEXT:    retq
763  %notmask = shl i64 -1, %numlowbits
764  %mask = xor i64 %notmask, -1
765  %masked = and i64 %mask, %val
766  ret i64 %masked
767}
768
769define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
770; X86-NOBMI-LABEL: bzhi64_b1_indexzext:
771; X86-NOBMI:       # %bb.0:
772; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
773; X86-NOBMI-NEXT:    movl $-1, %edx
774; X86-NOBMI-NEXT:    movl $-1, %eax
775; X86-NOBMI-NEXT:    shll %cl, %eax
776; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
777; X86-NOBMI-NEXT:    testb $32, %cl
778; X86-NOBMI-NEXT:    je .LBB16_2
779; X86-NOBMI-NEXT:  # %bb.1:
780; X86-NOBMI-NEXT:    movl %eax, %edx
781; X86-NOBMI-NEXT:    xorl %eax, %eax
782; X86-NOBMI-NEXT:  .LBB16_2:
783; X86-NOBMI-NEXT:    notl %edx
784; X86-NOBMI-NEXT:    notl %eax
785; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
786; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
787; X86-NOBMI-NEXT:    retl
788;
789; X86-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
790; X86-BMI1BMI2:       # %bb.0:
791; X86-BMI1BMI2-NEXT:    pushl %esi
792; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
793; X86-BMI1BMI2-NEXT:    movl $-1, %eax
794; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
795; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
796; X86-BMI1BMI2-NEXT:    testb $32, %cl
797; X86-BMI1BMI2-NEXT:    je .LBB16_2
798; X86-BMI1BMI2-NEXT:  # %bb.1:
799; X86-BMI1BMI2-NEXT:    movl %esi, %eax
800; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
801; X86-BMI1BMI2-NEXT:  .LBB16_2:
802; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
803; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
804; X86-BMI1BMI2-NEXT:    popl %esi
805; X86-BMI1BMI2-NEXT:    retl
806;
807; X64-NOBMI-LABEL: bzhi64_b1_indexzext:
808; X64-NOBMI:       # %bb.0:
809; X64-NOBMI-NEXT:    movq $-1, %rax
810; X64-NOBMI-NEXT:    movl %esi, %ecx
811; X64-NOBMI-NEXT:    shlq %cl, %rax
812; X64-NOBMI-NEXT:    notq %rax
813; X64-NOBMI-NEXT:    andq %rdi, %rax
814; X64-NOBMI-NEXT:    retq
815;
816; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
817; X64-BMI1BMI2:       # %bb.0:
818; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
819; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
820; X64-BMI1BMI2-NEXT:    retq
821  %conv = zext i8 %numlowbits to i64
822  %notmask = shl i64 -1, %conv
823  %mask = xor i64 %notmask, -1
824  %masked = and i64 %mask, %val
825  ret i64 %masked
826}
827
828define i64 @bzhi64_b2_load(i64* %w, i64 %numlowbits) nounwind {
829; X86-NOBMI-LABEL: bzhi64_b2_load:
830; X86-NOBMI:       # %bb.0:
831; X86-NOBMI-NEXT:    pushl %esi
832; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
833; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
834; X86-NOBMI-NEXT:    movl $-1, %edx
835; X86-NOBMI-NEXT:    movl $-1, %eax
836; X86-NOBMI-NEXT:    shll %cl, %eax
837; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
838; X86-NOBMI-NEXT:    testb $32, %cl
839; X86-NOBMI-NEXT:    je .LBB17_2
840; X86-NOBMI-NEXT:  # %bb.1:
841; X86-NOBMI-NEXT:    movl %eax, %edx
842; X86-NOBMI-NEXT:    xorl %eax, %eax
843; X86-NOBMI-NEXT:  .LBB17_2:
844; X86-NOBMI-NEXT:    notl %edx
845; X86-NOBMI-NEXT:    notl %eax
846; X86-NOBMI-NEXT:    andl 4(%esi), %edx
847; X86-NOBMI-NEXT:    andl (%esi), %eax
848; X86-NOBMI-NEXT:    popl %esi
849; X86-NOBMI-NEXT:    retl
850;
851; X86-BMI1BMI2-LABEL: bzhi64_b2_load:
852; X86-BMI1BMI2:       # %bb.0:
853; X86-BMI1BMI2-NEXT:    pushl %esi
854; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
855; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
856; X86-BMI1BMI2-NEXT:    movl $-1, %edx
857; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
858; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
859; X86-BMI1BMI2-NEXT:    testb $32, %cl
860; X86-BMI1BMI2-NEXT:    je .LBB17_2
861; X86-BMI1BMI2-NEXT:  # %bb.1:
862; X86-BMI1BMI2-NEXT:    movl %esi, %edx
863; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
864; X86-BMI1BMI2-NEXT:  .LBB17_2:
865; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
866; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
867; X86-BMI1BMI2-NEXT:    popl %esi
868; X86-BMI1BMI2-NEXT:    retl
869;
870; X64-NOBMI-LABEL: bzhi64_b2_load:
871; X64-NOBMI:       # %bb.0:
872; X64-NOBMI-NEXT:    movq $-1, %rax
873; X64-NOBMI-NEXT:    movl %esi, %ecx
874; X64-NOBMI-NEXT:    shlq %cl, %rax
875; X64-NOBMI-NEXT:    notq %rax
876; X64-NOBMI-NEXT:    andq (%rdi), %rax
877; X64-NOBMI-NEXT:    retq
878;
879; X64-BMI1BMI2-LABEL: bzhi64_b2_load:
880; X64-BMI1BMI2:       # %bb.0:
881; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
882; X64-BMI1BMI2-NEXT:    retq
883  %val = load i64, i64* %w
884  %notmask = shl i64 -1, %numlowbits
885  %mask = xor i64 %notmask, -1
886  %masked = and i64 %mask, %val
887  ret i64 %masked
888}
889
890define i64 @bzhi64_b3_load_indexzext(i64* %w, i8 zeroext %numlowbits) nounwind {
891; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext:
892; X86-NOBMI:       # %bb.0:
893; X86-NOBMI-NEXT:    pushl %esi
894; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
895; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
896; X86-NOBMI-NEXT:    movl $-1, %edx
897; X86-NOBMI-NEXT:    movl $-1, %eax
898; X86-NOBMI-NEXT:    shll %cl, %eax
899; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
900; X86-NOBMI-NEXT:    testb $32, %cl
901; X86-NOBMI-NEXT:    je .LBB18_2
902; X86-NOBMI-NEXT:  # %bb.1:
903; X86-NOBMI-NEXT:    movl %eax, %edx
904; X86-NOBMI-NEXT:    xorl %eax, %eax
905; X86-NOBMI-NEXT:  .LBB18_2:
906; X86-NOBMI-NEXT:    notl %edx
907; X86-NOBMI-NEXT:    notl %eax
908; X86-NOBMI-NEXT:    andl 4(%esi), %edx
909; X86-NOBMI-NEXT:    andl (%esi), %eax
910; X86-NOBMI-NEXT:    popl %esi
911; X86-NOBMI-NEXT:    retl
912;
913; X86-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
914; X86-BMI1BMI2:       # %bb.0:
915; X86-BMI1BMI2-NEXT:    pushl %esi
916; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
917; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
918; X86-BMI1BMI2-NEXT:    movl $-1, %edx
919; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %esi
920; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %edx
921; X86-BMI1BMI2-NEXT:    testb $32, %cl
922; X86-BMI1BMI2-NEXT:    je .LBB18_2
923; X86-BMI1BMI2-NEXT:  # %bb.1:
924; X86-BMI1BMI2-NEXT:    movl %esi, %edx
925; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
926; X86-BMI1BMI2-NEXT:  .LBB18_2:
927; X86-BMI1BMI2-NEXT:    andnl 4(%eax), %edx, %edx
928; X86-BMI1BMI2-NEXT:    andnl (%eax), %esi, %eax
929; X86-BMI1BMI2-NEXT:    popl %esi
930; X86-BMI1BMI2-NEXT:    retl
931;
932; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext:
933; X64-NOBMI:       # %bb.0:
934; X64-NOBMI-NEXT:    movq $-1, %rax
935; X64-NOBMI-NEXT:    movl %esi, %ecx
936; X64-NOBMI-NEXT:    shlq %cl, %rax
937; X64-NOBMI-NEXT:    notq %rax
938; X64-NOBMI-NEXT:    andq (%rdi), %rax
939; X64-NOBMI-NEXT:    retq
940;
941; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
942; X64-BMI1BMI2:       # %bb.0:
943; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
944; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
945; X64-BMI1BMI2-NEXT:    retq
946  %val = load i64, i64* %w
947  %conv = zext i8 %numlowbits to i64
948  %notmask = shl i64 -1, %conv
949  %mask = xor i64 %notmask, -1
950  %masked = and i64 %mask, %val
951  ret i64 %masked
952}
953
954define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind {
955; X86-NOBMI-LABEL: bzhi64_b4_commutative:
956; X86-NOBMI:       # %bb.0:
957; X86-NOBMI-NEXT:    movb {{[0-9]+}}(%esp), %cl
958; X86-NOBMI-NEXT:    movl $-1, %edx
959; X86-NOBMI-NEXT:    movl $-1, %eax
960; X86-NOBMI-NEXT:    shll %cl, %eax
961; X86-NOBMI-NEXT:    shldl %cl, %edx, %edx
962; X86-NOBMI-NEXT:    testb $32, %cl
963; X86-NOBMI-NEXT:    je .LBB19_2
964; X86-NOBMI-NEXT:  # %bb.1:
965; X86-NOBMI-NEXT:    movl %eax, %edx
966; X86-NOBMI-NEXT:    xorl %eax, %eax
967; X86-NOBMI-NEXT:  .LBB19_2:
968; X86-NOBMI-NEXT:    notl %edx
969; X86-NOBMI-NEXT:    notl %eax
970; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
971; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
972; X86-NOBMI-NEXT:    retl
973;
974; X86-BMI1BMI2-LABEL: bzhi64_b4_commutative:
975; X86-BMI1BMI2:       # %bb.0:
976; X86-BMI1BMI2-NEXT:    pushl %esi
977; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
978; X86-BMI1BMI2-NEXT:    movl $-1, %eax
979; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %esi
980; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %eax
981; X86-BMI1BMI2-NEXT:    testb $32, %cl
982; X86-BMI1BMI2-NEXT:    je .LBB19_2
983; X86-BMI1BMI2-NEXT:  # %bb.1:
984; X86-BMI1BMI2-NEXT:    movl %esi, %eax
985; X86-BMI1BMI2-NEXT:    xorl %esi, %esi
986; X86-BMI1BMI2-NEXT:  .LBB19_2:
987; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %edx
988; X86-BMI1BMI2-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %eax
989; X86-BMI1BMI2-NEXT:    popl %esi
990; X86-BMI1BMI2-NEXT:    retl
991;
992; X64-NOBMI-LABEL: bzhi64_b4_commutative:
993; X64-NOBMI:       # %bb.0:
994; X64-NOBMI-NEXT:    movq $-1, %rax
995; X64-NOBMI-NEXT:    movl %esi, %ecx
996; X64-NOBMI-NEXT:    shlq %cl, %rax
997; X64-NOBMI-NEXT:    notq %rax
998; X64-NOBMI-NEXT:    andq %rdi, %rax
999; X64-NOBMI-NEXT:    retq
1000;
1001; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
1002; X64-BMI1BMI2:       # %bb.0:
1003; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1004; X64-BMI1BMI2-NEXT:    retq
1005  %notmask = shl i64 -1, %numlowbits
1006  %mask = xor i64 %notmask, -1
1007  %masked = and i64 %val, %mask ; swapped order
1008  ret i64 %masked
1009}
1010
1011; ---------------------------------------------------------------------------- ;
1012; Pattern c. 32-bit
1013; ---------------------------------------------------------------------------- ;
1014
1015define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
1016; X86-NOBMI-LABEL: bzhi32_c0:
1017; X86-NOBMI:       # %bb.0:
1018; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1019; X86-NOBMI-NEXT:    movl $32, %ecx
1020; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1021; X86-NOBMI-NEXT:    shll %cl, %eax
1022; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1023; X86-NOBMI-NEXT:    shrl %cl, %eax
1024; X86-NOBMI-NEXT:    retl
1025;
1026; X86-BMI1BMI2-LABEL: bzhi32_c0:
1027; X86-BMI1BMI2:       # %bb.0:
1028; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1029; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1030; X86-BMI1BMI2-NEXT:    retl
1031;
1032; X64-NOBMI-LABEL: bzhi32_c0:
1033; X64-NOBMI:       # %bb.0:
1034; X64-NOBMI-NEXT:    movl $32, %ecx
1035; X64-NOBMI-NEXT:    subl %esi, %ecx
1036; X64-NOBMI-NEXT:    shll %cl, %edi
1037; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1038; X64-NOBMI-NEXT:    shrl %cl, %edi
1039; X64-NOBMI-NEXT:    movl %edi, %eax
1040; X64-NOBMI-NEXT:    retq
1041;
1042; X64-BMI1BMI2-LABEL: bzhi32_c0:
1043; X64-BMI1BMI2:       # %bb.0:
1044; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
1045; X64-BMI1BMI2-NEXT:    retq
1046  %numhighbits = sub i32 32, %numlowbits
1047  %mask = lshr i32 -1, %numhighbits
1048  %masked = and i32 %mask, %val
1049  ret i32 %masked
1050}
1051
1052define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
1053; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
1054; X86-NOBMI:       # %bb.0:
1055; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1056; X86-NOBMI-NEXT:    movb $32, %cl
1057; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1058; X86-NOBMI-NEXT:    shll %cl, %eax
1059; X86-NOBMI-NEXT:    shrl %cl, %eax
1060; X86-NOBMI-NEXT:    retl
1061;
1062; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
1063; X86-BMI1BMI2:       # %bb.0:
1064; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1065; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1066; X86-BMI1BMI2-NEXT:    retl
1067;
1068; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
1069; X64-NOBMI:       # %bb.0:
1070; X64-NOBMI-NEXT:    movb $32, %cl
1071; X64-NOBMI-NEXT:    subb %sil, %cl
1072; X64-NOBMI-NEXT:    shll %cl, %edi
1073; X64-NOBMI-NEXT:    shrl %cl, %edi
1074; X64-NOBMI-NEXT:    movl %edi, %eax
1075; X64-NOBMI-NEXT:    retq
1076;
1077; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
1078; X64-BMI1BMI2:       # %bb.0:
1079; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
1080; X64-BMI1BMI2-NEXT:    retq
1081  %numhighbits = sub i8 32, %numlowbits
1082  %sh_prom = zext i8 %numhighbits to i32
1083  %mask = lshr i32 -1, %sh_prom
1084  %masked = and i32 %mask, %val
1085  ret i32 %masked
1086}
1087
1088define i32 @bzhi32_c2_load(i32* %w, i32 %numlowbits) nounwind {
1089; X86-NOBMI-LABEL: bzhi32_c2_load:
1090; X86-NOBMI:       # %bb.0:
1091; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1092; X86-NOBMI-NEXT:    movl (%eax), %eax
1093; X86-NOBMI-NEXT:    movl $32, %ecx
1094; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1095; X86-NOBMI-NEXT:    shll %cl, %eax
1096; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1097; X86-NOBMI-NEXT:    shrl %cl, %eax
1098; X86-NOBMI-NEXT:    retl
1099;
1100; X86-BMI1BMI2-LABEL: bzhi32_c2_load:
1101; X86-BMI1BMI2:       # %bb.0:
1102; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1103; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1104; X86-BMI1BMI2-NEXT:    bzhil %eax, (%ecx), %eax
1105; X86-BMI1BMI2-NEXT:    retl
1106;
1107; X64-NOBMI-LABEL: bzhi32_c2_load:
1108; X64-NOBMI:       # %bb.0:
1109; X64-NOBMI-NEXT:    movl (%rdi), %eax
1110; X64-NOBMI-NEXT:    movl $32, %ecx
1111; X64-NOBMI-NEXT:    subl %esi, %ecx
1112; X64-NOBMI-NEXT:    shll %cl, %eax
1113; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1114; X64-NOBMI-NEXT:    shrl %cl, %eax
1115; X64-NOBMI-NEXT:    retq
1116;
1117; X64-BMI1BMI2-LABEL: bzhi32_c2_load:
1118; X64-BMI1BMI2:       # %bb.0:
1119; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1120; X64-BMI1BMI2-NEXT:    retq
1121  %val = load i32, i32* %w
1122  %numhighbits = sub i32 32, %numlowbits
1123  %mask = lshr i32 -1, %numhighbits
1124  %masked = and i32 %mask, %val
1125  ret i32 %masked
1126}
1127
1128define i32 @bzhi32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
1129; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext:
1130; X86-NOBMI:       # %bb.0:
1131; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1132; X86-NOBMI-NEXT:    movl (%eax), %eax
1133; X86-NOBMI-NEXT:    movb $32, %cl
1134; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1135; X86-NOBMI-NEXT:    shll %cl, %eax
1136; X86-NOBMI-NEXT:    shrl %cl, %eax
1137; X86-NOBMI-NEXT:    retl
1138;
1139; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
1140; X86-BMI1BMI2:       # %bb.0:
1141; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1142; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1143; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1144; X86-BMI1BMI2-NEXT:    retl
1145;
1146; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
1147; X64-NOBMI:       # %bb.0:
1148; X64-NOBMI-NEXT:    movl (%rdi), %eax
1149; X64-NOBMI-NEXT:    movb $32, %cl
1150; X64-NOBMI-NEXT:    subb %sil, %cl
1151; X64-NOBMI-NEXT:    shll %cl, %eax
1152; X64-NOBMI-NEXT:    shrl %cl, %eax
1153; X64-NOBMI-NEXT:    retq
1154;
1155; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
1156; X64-BMI1BMI2:       # %bb.0:
1157; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1158; X64-BMI1BMI2-NEXT:    retq
1159  %val = load i32, i32* %w
1160  %numhighbits = sub i8 32, %numlowbits
1161  %sh_prom = zext i8 %numhighbits to i32
1162  %mask = lshr i32 -1, %sh_prom
1163  %masked = and i32 %mask, %val
1164  ret i32 %masked
1165}
1166
1167define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
1168; X86-NOBMI-LABEL: bzhi32_c4_commutative:
1169; X86-NOBMI:       # %bb.0:
1170; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1171; X86-NOBMI-NEXT:    movl $32, %ecx
1172; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1173; X86-NOBMI-NEXT:    shll %cl, %eax
1174; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1175; X86-NOBMI-NEXT:    shrl %cl, %eax
1176; X86-NOBMI-NEXT:    retl
1177;
1178; X86-BMI1BMI2-LABEL: bzhi32_c4_commutative:
1179; X86-BMI1BMI2:       # %bb.0:
1180; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1181; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1182; X86-BMI1BMI2-NEXT:    retl
1183;
1184; X64-NOBMI-LABEL: bzhi32_c4_commutative:
1185; X64-NOBMI:       # %bb.0:
1186; X64-NOBMI-NEXT:    movl $32, %ecx
1187; X64-NOBMI-NEXT:    subl %esi, %ecx
1188; X64-NOBMI-NEXT:    shll %cl, %edi
1189; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1190; X64-NOBMI-NEXT:    shrl %cl, %edi
1191; X64-NOBMI-NEXT:    movl %edi, %eax
1192; X64-NOBMI-NEXT:    retq
1193;
1194; X64-BMI1BMI2-LABEL: bzhi32_c4_commutative:
1195; X64-BMI1BMI2:       # %bb.0:
1196; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
1197; X64-BMI1BMI2-NEXT:    retq
1198  %numhighbits = sub i32 32, %numlowbits
1199  %mask = lshr i32 -1, %numhighbits
1200  %masked = and i32 %val, %mask ; swapped order
1201  ret i32 %masked
1202}
1203
1204; 64-bit
1205
1206define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind {
1207; X86-NOBMI-LABEL: bzhi64_c0:
1208; X86-NOBMI:       # %bb.0:
1209; X86-NOBMI-NEXT:    movl $64, %ecx
1210; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1211; X86-NOBMI-NEXT:    movl $-1, %eax
1212; X86-NOBMI-NEXT:    movl $-1, %edx
1213; X86-NOBMI-NEXT:    shrl %cl, %edx
1214; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
1215; X86-NOBMI-NEXT:    testb $32, %cl
1216; X86-NOBMI-NEXT:    je .LBB25_2
1217; X86-NOBMI-NEXT:  # %bb.1:
1218; X86-NOBMI-NEXT:    movl %edx, %eax
1219; X86-NOBMI-NEXT:    xorl %edx, %edx
1220; X86-NOBMI-NEXT:  .LBB25_2:
1221; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1222; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1223; X86-NOBMI-NEXT:    retl
1224;
1225; X86-BMI1BMI2-LABEL: bzhi64_c0:
1226; X86-BMI1BMI2:       # %bb.0:
1227; X86-BMI1BMI2-NEXT:    movl $64, %ecx
1228; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1229; X86-BMI1BMI2-NEXT:    movl $-1, %eax
1230; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
1231; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
1232; X86-BMI1BMI2-NEXT:    testb $32, %cl
1233; X86-BMI1BMI2-NEXT:    je .LBB25_2
1234; X86-BMI1BMI2-NEXT:  # %bb.1:
1235; X86-BMI1BMI2-NEXT:    movl %edx, %eax
1236; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1237; X86-BMI1BMI2-NEXT:  .LBB25_2:
1238; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1239; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1240; X86-BMI1BMI2-NEXT:    retl
1241;
1242; X64-NOBMI-LABEL: bzhi64_c0:
1243; X64-NOBMI:       # %bb.0:
1244; X64-NOBMI-NEXT:    movl $64, %ecx
1245; X64-NOBMI-NEXT:    subl %esi, %ecx
1246; X64-NOBMI-NEXT:    shlq %cl, %rdi
1247; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1248; X64-NOBMI-NEXT:    shrq %cl, %rdi
1249; X64-NOBMI-NEXT:    movq %rdi, %rax
1250; X64-NOBMI-NEXT:    retq
1251;
1252; X64-BMI1BMI2-LABEL: bzhi64_c0:
1253; X64-BMI1BMI2:       # %bb.0:
1254; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1255; X64-BMI1BMI2-NEXT:    retq
1256  %numhighbits = sub i64 64, %numlowbits
1257  %mask = lshr i64 -1, %numhighbits
1258  %masked = and i64 %mask, %val
1259  ret i64 %masked
1260}
1261
1262define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
1263; X86-NOBMI-LABEL: bzhi64_c1_indexzext:
1264; X86-NOBMI:       # %bb.0:
1265; X86-NOBMI-NEXT:    movb $64, %cl
1266; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1267; X86-NOBMI-NEXT:    movl $-1, %eax
1268; X86-NOBMI-NEXT:    movl $-1, %edx
1269; X86-NOBMI-NEXT:    shrl %cl, %edx
1270; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
1271; X86-NOBMI-NEXT:    testb $32, %cl
1272; X86-NOBMI-NEXT:    je .LBB26_2
1273; X86-NOBMI-NEXT:  # %bb.1:
1274; X86-NOBMI-NEXT:    movl %edx, %eax
1275; X86-NOBMI-NEXT:    xorl %edx, %edx
1276; X86-NOBMI-NEXT:  .LBB26_2:
1277; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1278; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1279; X86-NOBMI-NEXT:    retl
1280;
1281; X86-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
1282; X86-BMI1BMI2:       # %bb.0:
1283; X86-BMI1BMI2-NEXT:    movb $64, %cl
1284; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1285; X86-BMI1BMI2-NEXT:    movl $-1, %eax
1286; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
1287; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
1288; X86-BMI1BMI2-NEXT:    testb $32, %cl
1289; X86-BMI1BMI2-NEXT:    je .LBB26_2
1290; X86-BMI1BMI2-NEXT:  # %bb.1:
1291; X86-BMI1BMI2-NEXT:    movl %edx, %eax
1292; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1293; X86-BMI1BMI2-NEXT:  .LBB26_2:
1294; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1295; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1296; X86-BMI1BMI2-NEXT:    retl
1297;
1298; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
1299; X64-NOBMI:       # %bb.0:
1300; X64-NOBMI-NEXT:    movb $64, %cl
1301; X64-NOBMI-NEXT:    subb %sil, %cl
1302; X64-NOBMI-NEXT:    shlq %cl, %rdi
1303; X64-NOBMI-NEXT:    shrq %cl, %rdi
1304; X64-NOBMI-NEXT:    movq %rdi, %rax
1305; X64-NOBMI-NEXT:    retq
1306;
1307; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
1308; X64-BMI1BMI2:       # %bb.0:
1309; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1310; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1311; X64-BMI1BMI2-NEXT:    retq
1312  %numhighbits = sub i8 64, %numlowbits
1313  %sh_prom = zext i8 %numhighbits to i64
1314  %mask = lshr i64 -1, %sh_prom
1315  %masked = and i64 %mask, %val
1316  ret i64 %masked
1317}
1318
1319define i64 @bzhi64_c2_load(i64* %w, i64 %numlowbits) nounwind {
1320; X86-NOBMI-LABEL: bzhi64_c2_load:
1321; X86-NOBMI:       # %bb.0:
1322; X86-NOBMI-NEXT:    pushl %esi
1323; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1324; X86-NOBMI-NEXT:    movl $64, %ecx
1325; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1326; X86-NOBMI-NEXT:    movl $-1, %eax
1327; X86-NOBMI-NEXT:    movl $-1, %edx
1328; X86-NOBMI-NEXT:    shrl %cl, %edx
1329; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
1330; X86-NOBMI-NEXT:    testb $32, %cl
1331; X86-NOBMI-NEXT:    je .LBB27_2
1332; X86-NOBMI-NEXT:  # %bb.1:
1333; X86-NOBMI-NEXT:    movl %edx, %eax
1334; X86-NOBMI-NEXT:    xorl %edx, %edx
1335; X86-NOBMI-NEXT:  .LBB27_2:
1336; X86-NOBMI-NEXT:    andl (%esi), %eax
1337; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1338; X86-NOBMI-NEXT:    popl %esi
1339; X86-NOBMI-NEXT:    retl
1340;
1341; X86-BMI1BMI2-LABEL: bzhi64_c2_load:
1342; X86-BMI1BMI2:       # %bb.0:
1343; X86-BMI1BMI2-NEXT:    pushl %esi
1344; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1345; X86-BMI1BMI2-NEXT:    movl $64, %ecx
1346; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1347; X86-BMI1BMI2-NEXT:    movl $-1, %eax
1348; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
1349; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
1350; X86-BMI1BMI2-NEXT:    testb $32, %cl
1351; X86-BMI1BMI2-NEXT:    je .LBB27_2
1352; X86-BMI1BMI2-NEXT:  # %bb.1:
1353; X86-BMI1BMI2-NEXT:    movl %edx, %eax
1354; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1355; X86-BMI1BMI2-NEXT:  .LBB27_2:
1356; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
1357; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
1358; X86-BMI1BMI2-NEXT:    popl %esi
1359; X86-BMI1BMI2-NEXT:    retl
1360;
1361; X64-NOBMI-LABEL: bzhi64_c2_load:
1362; X64-NOBMI:       # %bb.0:
1363; X64-NOBMI-NEXT:    movq (%rdi), %rax
1364; X64-NOBMI-NEXT:    movl $64, %ecx
1365; X64-NOBMI-NEXT:    subl %esi, %ecx
1366; X64-NOBMI-NEXT:    shlq %cl, %rax
1367; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1368; X64-NOBMI-NEXT:    shrq %cl, %rax
1369; X64-NOBMI-NEXT:    retq
1370;
1371; X64-BMI1BMI2-LABEL: bzhi64_c2_load:
1372; X64-BMI1BMI2:       # %bb.0:
1373; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1374; X64-BMI1BMI2-NEXT:    retq
1375  %val = load i64, i64* %w
1376  %numhighbits = sub i64 64, %numlowbits
1377  %mask = lshr i64 -1, %numhighbits
1378  %masked = and i64 %mask, %val
1379  ret i64 %masked
1380}
1381
1382define i64 @bzhi64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
1383; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext:
1384; X86-NOBMI:       # %bb.0:
1385; X86-NOBMI-NEXT:    pushl %esi
1386; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
1387; X86-NOBMI-NEXT:    movb $64, %cl
1388; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1389; X86-NOBMI-NEXT:    movl $-1, %eax
1390; X86-NOBMI-NEXT:    movl $-1, %edx
1391; X86-NOBMI-NEXT:    shrl %cl, %edx
1392; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
1393; X86-NOBMI-NEXT:    testb $32, %cl
1394; X86-NOBMI-NEXT:    je .LBB28_2
1395; X86-NOBMI-NEXT:  # %bb.1:
1396; X86-NOBMI-NEXT:    movl %edx, %eax
1397; X86-NOBMI-NEXT:    xorl %edx, %edx
1398; X86-NOBMI-NEXT:  .LBB28_2:
1399; X86-NOBMI-NEXT:    andl (%esi), %eax
1400; X86-NOBMI-NEXT:    andl 4(%esi), %edx
1401; X86-NOBMI-NEXT:    popl %esi
1402; X86-NOBMI-NEXT:    retl
1403;
1404; X86-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
1405; X86-BMI1BMI2:       # %bb.0:
1406; X86-BMI1BMI2-NEXT:    pushl %esi
1407; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1408; X86-BMI1BMI2-NEXT:    movb $64, %cl
1409; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1410; X86-BMI1BMI2-NEXT:    movl $-1, %eax
1411; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
1412; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
1413; X86-BMI1BMI2-NEXT:    testb $32, %cl
1414; X86-BMI1BMI2-NEXT:    je .LBB28_2
1415; X86-BMI1BMI2-NEXT:  # %bb.1:
1416; X86-BMI1BMI2-NEXT:    movl %edx, %eax
1417; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1418; X86-BMI1BMI2-NEXT:  .LBB28_2:
1419; X86-BMI1BMI2-NEXT:    andl (%esi), %eax
1420; X86-BMI1BMI2-NEXT:    andl 4(%esi), %edx
1421; X86-BMI1BMI2-NEXT:    popl %esi
1422; X86-BMI1BMI2-NEXT:    retl
1423;
1424; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
1425; X64-NOBMI:       # %bb.0:
1426; X64-NOBMI-NEXT:    movq (%rdi), %rax
1427; X64-NOBMI-NEXT:    movb $64, %cl
1428; X64-NOBMI-NEXT:    subb %sil, %cl
1429; X64-NOBMI-NEXT:    shlq %cl, %rax
1430; X64-NOBMI-NEXT:    shrq %cl, %rax
1431; X64-NOBMI-NEXT:    retq
1432;
1433; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
1434; X64-BMI1BMI2:       # %bb.0:
1435; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1436; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1437; X64-BMI1BMI2-NEXT:    retq
1438  %val = load i64, i64* %w
1439  %numhighbits = sub i8 64, %numlowbits
1440  %sh_prom = zext i8 %numhighbits to i64
1441  %mask = lshr i64 -1, %sh_prom
1442  %masked = and i64 %mask, %val
1443  ret i64 %masked
1444}
1445
1446define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
1447; X86-NOBMI-LABEL: bzhi64_c4_commutative:
1448; X86-NOBMI:       # %bb.0:
1449; X86-NOBMI-NEXT:    movl $64, %ecx
1450; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1451; X86-NOBMI-NEXT:    movl $-1, %eax
1452; X86-NOBMI-NEXT:    movl $-1, %edx
1453; X86-NOBMI-NEXT:    shrl %cl, %edx
1454; X86-NOBMI-NEXT:    shrdl %cl, %eax, %eax
1455; X86-NOBMI-NEXT:    testb $32, %cl
1456; X86-NOBMI-NEXT:    je .LBB29_2
1457; X86-NOBMI-NEXT:  # %bb.1:
1458; X86-NOBMI-NEXT:    movl %edx, %eax
1459; X86-NOBMI-NEXT:    xorl %edx, %edx
1460; X86-NOBMI-NEXT:  .LBB29_2:
1461; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
1462; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
1463; X86-NOBMI-NEXT:    retl
1464;
1465; X86-BMI1BMI2-LABEL: bzhi64_c4_commutative:
1466; X86-BMI1BMI2:       # %bb.0:
1467; X86-BMI1BMI2-NEXT:    movl $64, %ecx
1468; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1469; X86-BMI1BMI2-NEXT:    movl $-1, %eax
1470; X86-BMI1BMI2-NEXT:    shrxl %ecx, %eax, %edx
1471; X86-BMI1BMI2-NEXT:    shrdl %cl, %eax, %eax
1472; X86-BMI1BMI2-NEXT:    testb $32, %cl
1473; X86-BMI1BMI2-NEXT:    je .LBB29_2
1474; X86-BMI1BMI2-NEXT:  # %bb.1:
1475; X86-BMI1BMI2-NEXT:    movl %edx, %eax
1476; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1477; X86-BMI1BMI2-NEXT:  .LBB29_2:
1478; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1479; X86-BMI1BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1480; X86-BMI1BMI2-NEXT:    retl
1481;
1482; X64-NOBMI-LABEL: bzhi64_c4_commutative:
1483; X64-NOBMI:       # %bb.0:
1484; X64-NOBMI-NEXT:    movl $64, %ecx
1485; X64-NOBMI-NEXT:    subl %esi, %ecx
1486; X64-NOBMI-NEXT:    shlq %cl, %rdi
1487; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1488; X64-NOBMI-NEXT:    shrq %cl, %rdi
1489; X64-NOBMI-NEXT:    movq %rdi, %rax
1490; X64-NOBMI-NEXT:    retq
1491;
1492; X64-BMI1BMI2-LABEL: bzhi64_c4_commutative:
1493; X64-BMI1BMI2:       # %bb.0:
1494; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1495; X64-BMI1BMI2-NEXT:    retq
1496  %numhighbits = sub i64 64, %numlowbits
1497  %mask = lshr i64 -1, %numhighbits
1498  %masked = and i64 %val, %mask ; swapped order
1499  ret i64 %masked
1500}
1501
1502; ---------------------------------------------------------------------------- ;
1503; Pattern d. 32-bit.
1504; ---------------------------------------------------------------------------- ;
1505
1506define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
1507; X86-NOBMI-LABEL: bzhi32_d0:
1508; X86-NOBMI:       # %bb.0:
1509; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1510; X86-NOBMI-NEXT:    movl $32, %ecx
1511; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1512; X86-NOBMI-NEXT:    shll %cl, %eax
1513; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1514; X86-NOBMI-NEXT:    shrl %cl, %eax
1515; X86-NOBMI-NEXT:    retl
1516;
1517; X86-BMI1BMI2-LABEL: bzhi32_d0:
1518; X86-BMI1BMI2:       # %bb.0:
1519; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1520; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1521; X86-BMI1BMI2-NEXT:    retl
1522;
1523; X64-NOBMI-LABEL: bzhi32_d0:
1524; X64-NOBMI:       # %bb.0:
1525; X64-NOBMI-NEXT:    movl $32, %ecx
1526; X64-NOBMI-NEXT:    subl %esi, %ecx
1527; X64-NOBMI-NEXT:    shll %cl, %edi
1528; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1529; X64-NOBMI-NEXT:    shrl %cl, %edi
1530; X64-NOBMI-NEXT:    movl %edi, %eax
1531; X64-NOBMI-NEXT:    retq
1532;
1533; X64-BMI1BMI2-LABEL: bzhi32_d0:
1534; X64-BMI1BMI2:       # %bb.0:
1535; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
1536; X64-BMI1BMI2-NEXT:    retq
1537  %numhighbits = sub i32 32, %numlowbits
1538  %highbitscleared = shl i32 %val, %numhighbits
1539  %masked = lshr i32 %highbitscleared, %numhighbits
1540  ret i32 %masked
1541}
1542
1543define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
1544; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
1545; X86-NOBMI:       # %bb.0:
1546; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1547; X86-NOBMI-NEXT:    movb $32, %cl
1548; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1549; X86-NOBMI-NEXT:    shll %cl, %eax
1550; X86-NOBMI-NEXT:    shrl %cl, %eax
1551; X86-NOBMI-NEXT:    retl
1552;
1553; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
1554; X86-BMI1BMI2:       # %bb.0:
1555; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1556; X86-BMI1BMI2-NEXT:    bzhil %eax, {{[0-9]+}}(%esp), %eax
1557; X86-BMI1BMI2-NEXT:    retl
1558;
1559; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
1560; X64-NOBMI:       # %bb.0:
1561; X64-NOBMI-NEXT:    movb $32, %cl
1562; X64-NOBMI-NEXT:    subb %sil, %cl
1563; X64-NOBMI-NEXT:    shll %cl, %edi
1564; X64-NOBMI-NEXT:    shrl %cl, %edi
1565; X64-NOBMI-NEXT:    movl %edi, %eax
1566; X64-NOBMI-NEXT:    retq
1567;
1568; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
1569; X64-BMI1BMI2:       # %bb.0:
1570; X64-BMI1BMI2-NEXT:    bzhil %esi, %edi, %eax
1571; X64-BMI1BMI2-NEXT:    retq
1572  %numhighbits = sub i8 32, %numlowbits
1573  %sh_prom = zext i8 %numhighbits to i32
1574  %highbitscleared = shl i32 %val, %sh_prom
1575  %masked = lshr i32 %highbitscleared, %sh_prom
1576  ret i32 %masked
1577}
1578
1579define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind {
1580; X86-NOBMI-LABEL: bzhi32_d2_load:
1581; X86-NOBMI:       # %bb.0:
1582; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1583; X86-NOBMI-NEXT:    movl (%eax), %eax
1584; X86-NOBMI-NEXT:    movl $32, %ecx
1585; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1586; X86-NOBMI-NEXT:    shll %cl, %eax
1587; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1588; X86-NOBMI-NEXT:    shrl %cl, %eax
1589; X86-NOBMI-NEXT:    retl
1590;
1591; X86-BMI1BMI2-LABEL: bzhi32_d2_load:
1592; X86-BMI1BMI2:       # %bb.0:
1593; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1594; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
1595; X86-BMI1BMI2-NEXT:    bzhil %eax, (%ecx), %eax
1596; X86-BMI1BMI2-NEXT:    retl
1597;
1598; X64-NOBMI-LABEL: bzhi32_d2_load:
1599; X64-NOBMI:       # %bb.0:
1600; X64-NOBMI-NEXT:    movl (%rdi), %eax
1601; X64-NOBMI-NEXT:    movl $32, %ecx
1602; X64-NOBMI-NEXT:    subl %esi, %ecx
1603; X64-NOBMI-NEXT:    shll %cl, %eax
1604; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1605; X64-NOBMI-NEXT:    shrl %cl, %eax
1606; X64-NOBMI-NEXT:    retq
1607;
1608; X64-BMI1BMI2-LABEL: bzhi32_d2_load:
1609; X64-BMI1BMI2:       # %bb.0:
1610; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1611; X64-BMI1BMI2-NEXT:    retq
1612  %val = load i32, i32* %w
1613  %numhighbits = sub i32 32, %numlowbits
1614  %highbitscleared = shl i32 %val, %numhighbits
1615  %masked = lshr i32 %highbitscleared, %numhighbits
1616  ret i32 %masked
1617}
1618
1619define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
1620; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext:
1621; X86-NOBMI:       # %bb.0:
1622; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1623; X86-NOBMI-NEXT:    movl (%eax), %eax
1624; X86-NOBMI-NEXT:    movb $32, %cl
1625; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1626; X86-NOBMI-NEXT:    shll %cl, %eax
1627; X86-NOBMI-NEXT:    shrl %cl, %eax
1628; X86-NOBMI-NEXT:    retl
1629;
1630; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
1631; X86-BMI1BMI2:       # %bb.0:
1632; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1633; X86-BMI1BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1634; X86-BMI1BMI2-NEXT:    bzhil %ecx, (%eax), %eax
1635; X86-BMI1BMI2-NEXT:    retl
1636;
1637; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
1638; X64-NOBMI:       # %bb.0:
1639; X64-NOBMI-NEXT:    movl (%rdi), %eax
1640; X64-NOBMI-NEXT:    movb $32, %cl
1641; X64-NOBMI-NEXT:    subb %sil, %cl
1642; X64-NOBMI-NEXT:    shll %cl, %eax
1643; X64-NOBMI-NEXT:    shrl %cl, %eax
1644; X64-NOBMI-NEXT:    retq
1645;
1646; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
1647; X64-BMI1BMI2:       # %bb.0:
1648; X64-BMI1BMI2-NEXT:    bzhil %esi, (%rdi), %eax
1649; X64-BMI1BMI2-NEXT:    retq
1650  %val = load i32, i32* %w
1651  %numhighbits = sub i8 32, %numlowbits
1652  %sh_prom = zext i8 %numhighbits to i32
1653  %highbitscleared = shl i32 %val, %sh_prom
1654  %masked = lshr i32 %highbitscleared, %sh_prom
1655  ret i32 %masked
1656}
1657
1658; 64-bit.
1659
1660define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind {
1661; X86-NOBMI-LABEL: bzhi64_d0:
1662; X86-NOBMI:       # %bb.0:
1663; X86-NOBMI-NEXT:    pushl %ebx
1664; X86-NOBMI-NEXT:    pushl %edi
1665; X86-NOBMI-NEXT:    pushl %esi
1666; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1667; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1668; X86-NOBMI-NEXT:    movl $64, %ecx
1669; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1670; X86-NOBMI-NEXT:    movl %edx, %esi
1671; X86-NOBMI-NEXT:    shll %cl, %esi
1672; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
1673; X86-NOBMI-NEXT:    testb $32, %cl
1674; X86-NOBMI-NEXT:    movl %esi, %edi
1675; X86-NOBMI-NEXT:    jne .LBB34_2
1676; X86-NOBMI-NEXT:  # %bb.1:
1677; X86-NOBMI-NEXT:    movl %eax, %edi
1678; X86-NOBMI-NEXT:  .LBB34_2:
1679; X86-NOBMI-NEXT:    movl %edi, %eax
1680; X86-NOBMI-NEXT:    shrl %cl, %eax
1681; X86-NOBMI-NEXT:    xorl %ebx, %ebx
1682; X86-NOBMI-NEXT:    testb $32, %cl
1683; X86-NOBMI-NEXT:    movl $0, %edx
1684; X86-NOBMI-NEXT:    jne .LBB34_4
1685; X86-NOBMI-NEXT:  # %bb.3:
1686; X86-NOBMI-NEXT:    movl %esi, %ebx
1687; X86-NOBMI-NEXT:    movl %eax, %edx
1688; X86-NOBMI-NEXT:  .LBB34_4:
1689; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
1690; X86-NOBMI-NEXT:    testb $32, %cl
1691; X86-NOBMI-NEXT:    jne .LBB34_6
1692; X86-NOBMI-NEXT:  # %bb.5:
1693; X86-NOBMI-NEXT:    movl %ebx, %eax
1694; X86-NOBMI-NEXT:  .LBB34_6:
1695; X86-NOBMI-NEXT:    popl %esi
1696; X86-NOBMI-NEXT:    popl %edi
1697; X86-NOBMI-NEXT:    popl %ebx
1698; X86-NOBMI-NEXT:    retl
1699;
1700; X86-BMI1BMI2-LABEL: bzhi64_d0:
1701; X86-BMI1BMI2:       # %bb.0:
1702; X86-BMI1BMI2-NEXT:    pushl %edi
1703; X86-BMI1BMI2-NEXT:    pushl %esi
1704; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1705; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1706; X86-BMI1BMI2-NEXT:    movl $64, %ecx
1707; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1708; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
1709; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
1710; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1711; X86-BMI1BMI2-NEXT:    testb $32, %cl
1712; X86-BMI1BMI2-NEXT:    je .LBB34_2
1713; X86-BMI1BMI2-NEXT:  # %bb.1:
1714; X86-BMI1BMI2-NEXT:    movl %edi, %esi
1715; X86-BMI1BMI2-NEXT:    movl $0, %edi
1716; X86-BMI1BMI2-NEXT:  .LBB34_2:
1717; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
1718; X86-BMI1BMI2-NEXT:    jne .LBB34_4
1719; X86-BMI1BMI2-NEXT:  # %bb.3:
1720; X86-BMI1BMI2-NEXT:    movl %eax, %edx
1721; X86-BMI1BMI2-NEXT:  .LBB34_4:
1722; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
1723; X86-BMI1BMI2-NEXT:    testb $32, %cl
1724; X86-BMI1BMI2-NEXT:    jne .LBB34_6
1725; X86-BMI1BMI2-NEXT:  # %bb.5:
1726; X86-BMI1BMI2-NEXT:    movl %edi, %eax
1727; X86-BMI1BMI2-NEXT:  .LBB34_6:
1728; X86-BMI1BMI2-NEXT:    popl %esi
1729; X86-BMI1BMI2-NEXT:    popl %edi
1730; X86-BMI1BMI2-NEXT:    retl
1731;
1732; X64-NOBMI-LABEL: bzhi64_d0:
1733; X64-NOBMI:       # %bb.0:
1734; X64-NOBMI-NEXT:    movl $64, %ecx
1735; X64-NOBMI-NEXT:    subl %esi, %ecx
1736; X64-NOBMI-NEXT:    shlq %cl, %rdi
1737; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1738; X64-NOBMI-NEXT:    shrq %cl, %rdi
1739; X64-NOBMI-NEXT:    movq %rdi, %rax
1740; X64-NOBMI-NEXT:    retq
1741;
1742; X64-BMI1BMI2-LABEL: bzhi64_d0:
1743; X64-BMI1BMI2:       # %bb.0:
1744; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1745; X64-BMI1BMI2-NEXT:    retq
1746  %numhighbits = sub i64 64, %numlowbits
1747  %highbitscleared = shl i64 %val, %numhighbits
1748  %masked = lshr i64 %highbitscleared, %numhighbits
1749  ret i64 %masked
1750}
1751
1752define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind {
1753; X86-NOBMI-LABEL: bzhi64_d1_indexzext:
1754; X86-NOBMI:       # %bb.0:
1755; X86-NOBMI-NEXT:    pushl %ebx
1756; X86-NOBMI-NEXT:    pushl %edi
1757; X86-NOBMI-NEXT:    pushl %esi
1758; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
1759; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1760; X86-NOBMI-NEXT:    movb $64, %cl
1761; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1762; X86-NOBMI-NEXT:    movl %edx, %esi
1763; X86-NOBMI-NEXT:    shll %cl, %esi
1764; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
1765; X86-NOBMI-NEXT:    testb $32, %cl
1766; X86-NOBMI-NEXT:    movl %esi, %edi
1767; X86-NOBMI-NEXT:    jne .LBB35_2
1768; X86-NOBMI-NEXT:  # %bb.1:
1769; X86-NOBMI-NEXT:    movl %eax, %edi
1770; X86-NOBMI-NEXT:  .LBB35_2:
1771; X86-NOBMI-NEXT:    movl %edi, %eax
1772; X86-NOBMI-NEXT:    shrl %cl, %eax
1773; X86-NOBMI-NEXT:    xorl %ebx, %ebx
1774; X86-NOBMI-NEXT:    testb $32, %cl
1775; X86-NOBMI-NEXT:    movl $0, %edx
1776; X86-NOBMI-NEXT:    jne .LBB35_4
1777; X86-NOBMI-NEXT:  # %bb.3:
1778; X86-NOBMI-NEXT:    movl %esi, %ebx
1779; X86-NOBMI-NEXT:    movl %eax, %edx
1780; X86-NOBMI-NEXT:  .LBB35_4:
1781; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
1782; X86-NOBMI-NEXT:    testb $32, %cl
1783; X86-NOBMI-NEXT:    jne .LBB35_6
1784; X86-NOBMI-NEXT:  # %bb.5:
1785; X86-NOBMI-NEXT:    movl %ebx, %eax
1786; X86-NOBMI-NEXT:  .LBB35_6:
1787; X86-NOBMI-NEXT:    popl %esi
1788; X86-NOBMI-NEXT:    popl %edi
1789; X86-NOBMI-NEXT:    popl %ebx
1790; X86-NOBMI-NEXT:    retl
1791;
1792; X86-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
1793; X86-BMI1BMI2:       # %bb.0:
1794; X86-BMI1BMI2-NEXT:    pushl %edi
1795; X86-BMI1BMI2-NEXT:    pushl %esi
1796; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1797; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1798; X86-BMI1BMI2-NEXT:    movb $64, %cl
1799; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1800; X86-BMI1BMI2-NEXT:    shldl %cl, %eax, %esi
1801; X86-BMI1BMI2-NEXT:    shlxl %ecx, %eax, %edi
1802; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1803; X86-BMI1BMI2-NEXT:    testb $32, %cl
1804; X86-BMI1BMI2-NEXT:    je .LBB35_2
1805; X86-BMI1BMI2-NEXT:  # %bb.1:
1806; X86-BMI1BMI2-NEXT:    movl %edi, %esi
1807; X86-BMI1BMI2-NEXT:    movl $0, %edi
1808; X86-BMI1BMI2-NEXT:  .LBB35_2:
1809; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
1810; X86-BMI1BMI2-NEXT:    jne .LBB35_4
1811; X86-BMI1BMI2-NEXT:  # %bb.3:
1812; X86-BMI1BMI2-NEXT:    movl %eax, %edx
1813; X86-BMI1BMI2-NEXT:  .LBB35_4:
1814; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
1815; X86-BMI1BMI2-NEXT:    testb $32, %cl
1816; X86-BMI1BMI2-NEXT:    jne .LBB35_6
1817; X86-BMI1BMI2-NEXT:  # %bb.5:
1818; X86-BMI1BMI2-NEXT:    movl %edi, %eax
1819; X86-BMI1BMI2-NEXT:  .LBB35_6:
1820; X86-BMI1BMI2-NEXT:    popl %esi
1821; X86-BMI1BMI2-NEXT:    popl %edi
1822; X86-BMI1BMI2-NEXT:    retl
1823;
1824; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
1825; X64-NOBMI:       # %bb.0:
1826; X64-NOBMI-NEXT:    movb $64, %cl
1827; X64-NOBMI-NEXT:    subb %sil, %cl
1828; X64-NOBMI-NEXT:    shlq %cl, %rdi
1829; X64-NOBMI-NEXT:    shrq %cl, %rdi
1830; X64-NOBMI-NEXT:    movq %rdi, %rax
1831; X64-NOBMI-NEXT:    retq
1832;
1833; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
1834; X64-BMI1BMI2:       # %bb.0:
1835; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
1836; X64-BMI1BMI2-NEXT:    bzhiq %rsi, %rdi, %rax
1837; X64-BMI1BMI2-NEXT:    retq
1838  %numhighbits = sub i8 64, %numlowbits
1839  %sh_prom = zext i8 %numhighbits to i64
1840  %highbitscleared = shl i64 %val, %sh_prom
1841  %masked = lshr i64 %highbitscleared, %sh_prom
1842  ret i64 %masked
1843}
1844
1845define i64 @bzhi64_d2_load(i64* %w, i64 %numlowbits) nounwind {
1846; X86-NOBMI-LABEL: bzhi64_d2_load:
1847; X86-NOBMI:       # %bb.0:
1848; X86-NOBMI-NEXT:    pushl %ebx
1849; X86-NOBMI-NEXT:    pushl %edi
1850; X86-NOBMI-NEXT:    pushl %esi
1851; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1852; X86-NOBMI-NEXT:    movl (%eax), %edx
1853; X86-NOBMI-NEXT:    movl 4(%eax), %eax
1854; X86-NOBMI-NEXT:    movl $64, %ecx
1855; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1856; X86-NOBMI-NEXT:    movl %edx, %esi
1857; X86-NOBMI-NEXT:    shll %cl, %esi
1858; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
1859; X86-NOBMI-NEXT:    testb $32, %cl
1860; X86-NOBMI-NEXT:    movl %esi, %edi
1861; X86-NOBMI-NEXT:    jne .LBB36_2
1862; X86-NOBMI-NEXT:  # %bb.1:
1863; X86-NOBMI-NEXT:    movl %eax, %edi
1864; X86-NOBMI-NEXT:  .LBB36_2:
1865; X86-NOBMI-NEXT:    movl %edi, %eax
1866; X86-NOBMI-NEXT:    shrl %cl, %eax
1867; X86-NOBMI-NEXT:    xorl %ebx, %ebx
1868; X86-NOBMI-NEXT:    testb $32, %cl
1869; X86-NOBMI-NEXT:    movl $0, %edx
1870; X86-NOBMI-NEXT:    jne .LBB36_4
1871; X86-NOBMI-NEXT:  # %bb.3:
1872; X86-NOBMI-NEXT:    movl %esi, %ebx
1873; X86-NOBMI-NEXT:    movl %eax, %edx
1874; X86-NOBMI-NEXT:  .LBB36_4:
1875; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
1876; X86-NOBMI-NEXT:    testb $32, %cl
1877; X86-NOBMI-NEXT:    jne .LBB36_6
1878; X86-NOBMI-NEXT:  # %bb.5:
1879; X86-NOBMI-NEXT:    movl %ebx, %eax
1880; X86-NOBMI-NEXT:  .LBB36_6:
1881; X86-NOBMI-NEXT:    popl %esi
1882; X86-NOBMI-NEXT:    popl %edi
1883; X86-NOBMI-NEXT:    popl %ebx
1884; X86-NOBMI-NEXT:    retl
1885;
1886; X86-BMI1BMI2-LABEL: bzhi64_d2_load:
1887; X86-BMI1BMI2:       # %bb.0:
1888; X86-BMI1BMI2-NEXT:    pushl %edi
1889; X86-BMI1BMI2-NEXT:    pushl %esi
1890; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1891; X86-BMI1BMI2-NEXT:    movl (%eax), %edx
1892; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
1893; X86-BMI1BMI2-NEXT:    movl $64, %ecx
1894; X86-BMI1BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1895; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %esi
1896; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
1897; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1898; X86-BMI1BMI2-NEXT:    testb $32, %cl
1899; X86-BMI1BMI2-NEXT:    je .LBB36_2
1900; X86-BMI1BMI2-NEXT:  # %bb.1:
1901; X86-BMI1BMI2-NEXT:    movl %edi, %esi
1902; X86-BMI1BMI2-NEXT:    movl $0, %edi
1903; X86-BMI1BMI2-NEXT:  .LBB36_2:
1904; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
1905; X86-BMI1BMI2-NEXT:    jne .LBB36_4
1906; X86-BMI1BMI2-NEXT:  # %bb.3:
1907; X86-BMI1BMI2-NEXT:    movl %eax, %edx
1908; X86-BMI1BMI2-NEXT:  .LBB36_4:
1909; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
1910; X86-BMI1BMI2-NEXT:    testb $32, %cl
1911; X86-BMI1BMI2-NEXT:    jne .LBB36_6
1912; X86-BMI1BMI2-NEXT:  # %bb.5:
1913; X86-BMI1BMI2-NEXT:    movl %edi, %eax
1914; X86-BMI1BMI2-NEXT:  .LBB36_6:
1915; X86-BMI1BMI2-NEXT:    popl %esi
1916; X86-BMI1BMI2-NEXT:    popl %edi
1917; X86-BMI1BMI2-NEXT:    retl
1918;
1919; X64-NOBMI-LABEL: bzhi64_d2_load:
1920; X64-NOBMI:       # %bb.0:
1921; X64-NOBMI-NEXT:    movq (%rdi), %rax
1922; X64-NOBMI-NEXT:    movl $64, %ecx
1923; X64-NOBMI-NEXT:    subl %esi, %ecx
1924; X64-NOBMI-NEXT:    shlq %cl, %rax
1925; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
1926; X64-NOBMI-NEXT:    shrq %cl, %rax
1927; X64-NOBMI-NEXT:    retq
1928;
1929; X64-BMI1BMI2-LABEL: bzhi64_d2_load:
1930; X64-BMI1BMI2:       # %bb.0:
1931; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
1932; X64-BMI1BMI2-NEXT:    retq
1933  %val = load i64, i64* %w
1934  %numhighbits = sub i64 64, %numlowbits
1935  %highbitscleared = shl i64 %val, %numhighbits
1936  %masked = lshr i64 %highbitscleared, %numhighbits
1937  ret i64 %masked
1938}
1939
1940define i64 @bzhi64_d3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
1941; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext:
1942; X86-NOBMI:       # %bb.0:
1943; X86-NOBMI-NEXT:    pushl %ebx
1944; X86-NOBMI-NEXT:    pushl %edi
1945; X86-NOBMI-NEXT:    pushl %esi
1946; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
1947; X86-NOBMI-NEXT:    movl (%eax), %edx
1948; X86-NOBMI-NEXT:    movl 4(%eax), %eax
1949; X86-NOBMI-NEXT:    movb $64, %cl
1950; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
1951; X86-NOBMI-NEXT:    movl %edx, %esi
1952; X86-NOBMI-NEXT:    shll %cl, %esi
1953; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
1954; X86-NOBMI-NEXT:    testb $32, %cl
1955; X86-NOBMI-NEXT:    movl %esi, %edi
1956; X86-NOBMI-NEXT:    jne .LBB37_2
1957; X86-NOBMI-NEXT:  # %bb.1:
1958; X86-NOBMI-NEXT:    movl %eax, %edi
1959; X86-NOBMI-NEXT:  .LBB37_2:
1960; X86-NOBMI-NEXT:    movl %edi, %eax
1961; X86-NOBMI-NEXT:    shrl %cl, %eax
1962; X86-NOBMI-NEXT:    xorl %ebx, %ebx
1963; X86-NOBMI-NEXT:    testb $32, %cl
1964; X86-NOBMI-NEXT:    movl $0, %edx
1965; X86-NOBMI-NEXT:    jne .LBB37_4
1966; X86-NOBMI-NEXT:  # %bb.3:
1967; X86-NOBMI-NEXT:    movl %esi, %ebx
1968; X86-NOBMI-NEXT:    movl %eax, %edx
1969; X86-NOBMI-NEXT:  .LBB37_4:
1970; X86-NOBMI-NEXT:    shrdl %cl, %edi, %ebx
1971; X86-NOBMI-NEXT:    testb $32, %cl
1972; X86-NOBMI-NEXT:    jne .LBB37_6
1973; X86-NOBMI-NEXT:  # %bb.5:
1974; X86-NOBMI-NEXT:    movl %ebx, %eax
1975; X86-NOBMI-NEXT:  .LBB37_6:
1976; X86-NOBMI-NEXT:    popl %esi
1977; X86-NOBMI-NEXT:    popl %edi
1978; X86-NOBMI-NEXT:    popl %ebx
1979; X86-NOBMI-NEXT:    retl
1980;
1981; X86-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
1982; X86-BMI1BMI2:       # %bb.0:
1983; X86-BMI1BMI2-NEXT:    pushl %edi
1984; X86-BMI1BMI2-NEXT:    pushl %esi
1985; X86-BMI1BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1986; X86-BMI1BMI2-NEXT:    movl (%eax), %edx
1987; X86-BMI1BMI2-NEXT:    movl 4(%eax), %esi
1988; X86-BMI1BMI2-NEXT:    movb $64, %cl
1989; X86-BMI1BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1990; X86-BMI1BMI2-NEXT:    shldl %cl, %edx, %esi
1991; X86-BMI1BMI2-NEXT:    shlxl %ecx, %edx, %edi
1992; X86-BMI1BMI2-NEXT:    xorl %edx, %edx
1993; X86-BMI1BMI2-NEXT:    testb $32, %cl
1994; X86-BMI1BMI2-NEXT:    je .LBB37_2
1995; X86-BMI1BMI2-NEXT:  # %bb.1:
1996; X86-BMI1BMI2-NEXT:    movl %edi, %esi
1997; X86-BMI1BMI2-NEXT:    movl $0, %edi
1998; X86-BMI1BMI2-NEXT:  .LBB37_2:
1999; X86-BMI1BMI2-NEXT:    shrxl %ecx, %esi, %eax
2000; X86-BMI1BMI2-NEXT:    jne .LBB37_4
2001; X86-BMI1BMI2-NEXT:  # %bb.3:
2002; X86-BMI1BMI2-NEXT:    movl %eax, %edx
2003; X86-BMI1BMI2-NEXT:  .LBB37_4:
2004; X86-BMI1BMI2-NEXT:    shrdl %cl, %esi, %edi
2005; X86-BMI1BMI2-NEXT:    testb $32, %cl
2006; X86-BMI1BMI2-NEXT:    jne .LBB37_6
2007; X86-BMI1BMI2-NEXT:  # %bb.5:
2008; X86-BMI1BMI2-NEXT:    movl %edi, %eax
2009; X86-BMI1BMI2-NEXT:  .LBB37_6:
2010; X86-BMI1BMI2-NEXT:    popl %esi
2011; X86-BMI1BMI2-NEXT:    popl %edi
2012; X86-BMI1BMI2-NEXT:    retl
2013;
2014; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
2015; X64-NOBMI:       # %bb.0:
2016; X64-NOBMI-NEXT:    movq (%rdi), %rax
2017; X64-NOBMI-NEXT:    movb $64, %cl
2018; X64-NOBMI-NEXT:    subb %sil, %cl
2019; X64-NOBMI-NEXT:    shlq %cl, %rax
2020; X64-NOBMI-NEXT:    shrq %cl, %rax
2021; X64-NOBMI-NEXT:    retq
2022;
2023; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
2024; X64-BMI1BMI2:       # %bb.0:
2025; X64-BMI1BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
2026; X64-BMI1BMI2-NEXT:    bzhiq %rsi, (%rdi), %rax
2027; X64-BMI1BMI2-NEXT:    retq
2028  %val = load i64, i64* %w
2029  %numhighbits = sub i8 64, %numlowbits
2030  %sh_prom = zext i8 %numhighbits to i64
2031  %highbitscleared = shl i64 %val, %sh_prom
2032  %masked = lshr i64 %highbitscleared, %sh_prom
2033  ret i64 %masked
2034}
2035
2036; ---------------------------------------------------------------------------- ;
2037; Constant mask
2038; ---------------------------------------------------------------------------- ;
2039
2040; 32-bit
2041
2042define i32 @bzhi32_constant_mask32(i32 %val) nounwind {
2043; X86-LABEL: bzhi32_constant_mask32:
2044; X86:       # %bb.0:
2045; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
2046; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
2047; X86-NEXT:    retl
2048;
2049; X64-LABEL: bzhi32_constant_mask32:
2050; X64:       # %bb.0:
2051; X64-NEXT:    andl $2147483647, %edi # imm = 0x7FFFFFFF
2052; X64-NEXT:    movl %edi, %eax
2053; X64-NEXT:    retq
2054  %masked = and i32 %val, 2147483647
2055  ret i32 %masked
2056}
2057
2058define i32 @bzhi32_constant_mask32_load(i32* %val) nounwind {
2059; X86-LABEL: bzhi32_constant_mask32_load:
2060; X86:       # %bb.0:
2061; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2062; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
2063; X86-NEXT:    andl (%ecx), %eax
2064; X86-NEXT:    retl
2065;
2066; X64-LABEL: bzhi32_constant_mask32_load:
2067; X64:       # %bb.0:
2068; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
2069; X64-NEXT:    andl (%rdi), %eax
2070; X64-NEXT:    retq
2071  %val1 = load i32, i32* %val
2072  %masked = and i32 %val1, 2147483647
2073  ret i32 %masked
2074}
2075
2076define i32 @bzhi32_constant_mask16(i32 %val) nounwind {
2077; X86-LABEL: bzhi32_constant_mask16:
2078; X86:       # %bb.0:
2079; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
2080; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
2081; X86-NEXT:    retl
2082;
2083; X64-LABEL: bzhi32_constant_mask16:
2084; X64:       # %bb.0:
2085; X64-NEXT:    andl $32767, %edi # imm = 0x7FFF
2086; X64-NEXT:    movl %edi, %eax
2087; X64-NEXT:    retq
2088  %masked = and i32 %val, 32767
2089  ret i32 %masked
2090}
2091
2092define i32 @bzhi32_constant_mask16_load(i32* %val) nounwind {
2093; X86-LABEL: bzhi32_constant_mask16_load:
2094; X86:       # %bb.0:
2095; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2096; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
2097; X86-NEXT:    andl (%ecx), %eax
2098; X86-NEXT:    retl
2099;
2100; X64-LABEL: bzhi32_constant_mask16_load:
2101; X64:       # %bb.0:
2102; X64-NEXT:    movl $32767, %eax # imm = 0x7FFF
2103; X64-NEXT:    andl (%rdi), %eax
2104; X64-NEXT:    retq
2105  %val1 = load i32, i32* %val
2106  %masked = and i32 %val1, 32767
2107  ret i32 %masked
2108}
2109
2110define i32 @bzhi32_constant_mask8(i32 %val) nounwind {
2111; X86-LABEL: bzhi32_constant_mask8:
2112; X86:       # %bb.0:
2113; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2114; X86-NEXT:    andl $127, %eax
2115; X86-NEXT:    retl
2116;
2117; X64-LABEL: bzhi32_constant_mask8:
2118; X64:       # %bb.0:
2119; X64-NEXT:    andl $127, %edi
2120; X64-NEXT:    movl %edi, %eax
2121; X64-NEXT:    retq
2122  %masked = and i32 %val, 127
2123  ret i32 %masked
2124}
2125
2126define i32 @bzhi32_constant_mask8_load(i32* %val) nounwind {
2127; X86-LABEL: bzhi32_constant_mask8_load:
2128; X86:       # %bb.0:
2129; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2130; X86-NEXT:    movl (%eax), %eax
2131; X86-NEXT:    andl $127, %eax
2132; X86-NEXT:    retl
2133;
2134; X64-LABEL: bzhi32_constant_mask8_load:
2135; X64:       # %bb.0:
2136; X64-NEXT:    movl (%rdi), %eax
2137; X64-NEXT:    andl $127, %eax
2138; X64-NEXT:    retq
2139  %val1 = load i32, i32* %val
2140  %masked = and i32 %val1, 127
2141  ret i32 %masked
2142}
2143
2144; 64-bit
2145
2146define i64 @bzhi64_constant_mask64(i64 %val) nounwind {
2147; X86-LABEL: bzhi64_constant_mask64:
2148; X86:       # %bb.0:
2149; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2150; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
2151; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
2152; X86-NEXT:    retl
2153;
2154; X64-NOBMI-LABEL: bzhi64_constant_mask64:
2155; X64-NOBMI:       # %bb.0:
2156; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
2157; X64-NOBMI-NEXT:    andq %rdi, %rax
2158; X64-NOBMI-NEXT:    retq
2159;
2160; X64-BMI1TBM-LABEL: bzhi64_constant_mask64:
2161; X64-BMI1TBM:       # %bb.0:
2162; X64-BMI1TBM-NEXT:    bextrq $15872, %rdi, %rax # imm = 0x3E00
2163; X64-BMI1TBM-NEXT:    retq
2164;
2165; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64:
2166; X64-BMI1NOTBMBMI2:       # %bb.0:
2167; X64-BMI1NOTBMBMI2-NEXT:    movb $62, %al
2168; X64-BMI1NOTBMBMI2-NEXT:    bzhiq %rax, %rdi, %rax
2169; X64-BMI1NOTBMBMI2-NEXT:    retq
2170  %masked = and i64 %val, 4611686018427387903
2171  ret i64 %masked
2172}
2173
2174define i64 @bzhi64_constant_mask64_load(i64* %val) nounwind {
2175; X86-LABEL: bzhi64_constant_mask64_load:
2176; X86:       # %bb.0:
2177; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2178; X86-NEXT:    movl (%ecx), %eax
2179; X86-NEXT:    movl $1073741823, %edx # imm = 0x3FFFFFFF
2180; X86-NEXT:    andl 4(%ecx), %edx
2181; X86-NEXT:    retl
2182;
2183; X64-NOBMI-LABEL: bzhi64_constant_mask64_load:
2184; X64-NOBMI:       # %bb.0:
2185; X64-NOBMI-NEXT:    movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF
2186; X64-NOBMI-NEXT:    andq (%rdi), %rax
2187; X64-NOBMI-NEXT:    retq
2188;
2189; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load:
2190; X64-BMI1TBM:       # %bb.0:
2191; X64-BMI1TBM-NEXT:    bextrq $15872, (%rdi), %rax # imm = 0x3E00
2192; X64-BMI1TBM-NEXT:    retq
2193;
2194; X64-BMI1NOTBMBMI2-LABEL: bzhi64_constant_mask64_load:
2195; X64-BMI1NOTBMBMI2:       # %bb.0:
2196; X64-BMI1NOTBMBMI2-NEXT:    movb $62, %al
2197; X64-BMI1NOTBMBMI2-NEXT:    bzhiq %rax, (%rdi), %rax
2198; X64-BMI1NOTBMBMI2-NEXT:    retq
2199  %val1 = load i64, i64* %val
2200  %masked = and i64 %val1, 4611686018427387903
2201  ret i64 %masked
2202}
2203
2204define i64 @bzhi64_constant_mask32(i64 %val) nounwind {
2205; X86-LABEL: bzhi64_constant_mask32:
2206; X86:       # %bb.0:
2207; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
2208; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
2209; X86-NEXT:    xorl %edx, %edx
2210; X86-NEXT:    retl
2211;
2212; X64-LABEL: bzhi64_constant_mask32:
2213; X64:       # %bb.0:
2214; X64-NEXT:    andl $2147483647, %edi # imm = 0x7FFFFFFF
2215; X64-NEXT:    movq %rdi, %rax
2216; X64-NEXT:    retq
2217  %masked = and i64 %val, 2147483647
2218  ret i64 %masked
2219}
2220
2221define i64 @bzhi64_constant_mask32_load(i64* %val) nounwind {
2222; X86-LABEL: bzhi64_constant_mask32_load:
2223; X86:       # %bb.0:
2224; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2225; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
2226; X86-NEXT:    andl (%ecx), %eax
2227; X86-NEXT:    xorl %edx, %edx
2228; X86-NEXT:    retl
2229;
2230; X64-LABEL: bzhi64_constant_mask32_load:
2231; X64:       # %bb.0:
2232; X64-NEXT:    movq (%rdi), %rax
2233; X64-NEXT:    andl $2147483647, %eax # imm = 0x7FFFFFFF
2234; X64-NEXT:    retq
2235  %val1 = load i64, i64* %val
2236  %masked = and i64 %val1, 2147483647
2237  ret i64 %masked
2238}
2239
2240define i64 @bzhi64_constant_mask16(i64 %val) nounwind {
2241; X86-LABEL: bzhi64_constant_mask16:
2242; X86:       # %bb.0:
2243; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
2244; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
2245; X86-NEXT:    xorl %edx, %edx
2246; X86-NEXT:    retl
2247;
2248; X64-LABEL: bzhi64_constant_mask16:
2249; X64:       # %bb.0:
2250; X64-NEXT:    andl $32767, %edi # imm = 0x7FFF
2251; X64-NEXT:    movq %rdi, %rax
2252; X64-NEXT:    retq
2253  %masked = and i64 %val, 32767
2254  ret i64 %masked
2255}
2256
2257define i64 @bzhi64_constant_mask16_load(i64* %val) nounwind {
2258; X86-LABEL: bzhi64_constant_mask16_load:
2259; X86:       # %bb.0:
2260; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
2261; X86-NEXT:    movl $32767, %eax # imm = 0x7FFF
2262; X86-NEXT:    andl (%ecx), %eax
2263; X86-NEXT:    xorl %edx, %edx
2264; X86-NEXT:    retl
2265;
2266; X64-LABEL: bzhi64_constant_mask16_load:
2267; X64:       # %bb.0:
2268; X64-NEXT:    movq (%rdi), %rax
2269; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
2270; X64-NEXT:    retq
2271  %val1 = load i64, i64* %val
2272  %masked = and i64 %val1, 32767
2273  ret i64 %masked
2274}
2275
2276define i64 @bzhi64_constant_mask8(i64 %val) nounwind {
2277; X86-LABEL: bzhi64_constant_mask8:
2278; X86:       # %bb.0:
2279; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2280; X86-NEXT:    andl $127, %eax
2281; X86-NEXT:    xorl %edx, %edx
2282; X86-NEXT:    retl
2283;
2284; X64-LABEL: bzhi64_constant_mask8:
2285; X64:       # %bb.0:
2286; X64-NEXT:    andl $127, %edi
2287; X64-NEXT:    movq %rdi, %rax
2288; X64-NEXT:    retq
2289  %masked = and i64 %val, 127
2290  ret i64 %masked
2291}
2292
2293define i64 @bzhi64_constant_mask8_load(i64* %val) nounwind {
2294; X86-LABEL: bzhi64_constant_mask8_load:
2295; X86:       # %bb.0:
2296; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
2297; X86-NEXT:    movl (%eax), %eax
2298; X86-NEXT:    andl $127, %eax
2299; X86-NEXT:    xorl %edx, %edx
2300; X86-NEXT:    retl
2301;
2302; X64-LABEL: bzhi64_constant_mask8_load:
2303; X64:       # %bb.0:
2304; X64-NEXT:    movq (%rdi), %rax
2305; X64-NEXT:    andl $127, %eax
2306; X64-NEXT:    retq
2307  %val1 = load i64, i64* %val
2308  %masked = and i64 %val1, 127
2309  ret i64 %masked
2310}
2311