• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK0,X86-FALLBACK0
3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK1,X86-FALLBACK1
4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,NOBMI2,X86-NOBMI2,FALLBACK2,X86-FALLBACK2
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK3,X86-FALLBACK3
6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X86,BMI2,X86-BMI2,FALLBACK4,X86-FALLBACK4
7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK0,X64-FALLBACK0
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK1,X64-FALLBACK1
9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,NOBMI2,X64-NOBMI2,FALLBACK2,X64-FALLBACK2
10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK3,X64-FALLBACK3
11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=CHECK,X64,BMI2,X64-BMI2,FALLBACK4,X64-FALLBACK4
12
13; Patterns:
14;    c) x &  (-1 << y)
15;   ic) x &  (-1 << (32 - y))
16;    d) x >> y << y
17;   id) x >> (32 - y) << (32 - y)
18; are equivalent, but we prefer the second variant if we have BMI2.
19
20; ---------------------------------------------------------------------------- ;
21; Pattern c.
22; ---------------------------------------------------------------------------- ;
23
24; 8-bit
25
26define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind {
27; X86-LABEL: clear_lowbits8_c0:
28; X86:       # %bb.0:
29; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
30; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
31; X86-NEXT:    shrb %cl, %al
32; X86-NEXT:    shlb %cl, %al
33; X86-NEXT:    retl
34;
35; X64-LABEL: clear_lowbits8_c0:
36; X64:       # %bb.0:
37; X64-NEXT:    movl %esi, %ecx
38; X64-NEXT:    shrb %cl, %dil
39; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
40; X64-NEXT:    shlb %cl, %dil
41; X64-NEXT:    movl %edi, %eax
42; X64-NEXT:    retq
43  %mask = shl i8 -1, %numlowbits
44  %masked = and i8 %mask, %val
45  ret i8 %masked
46}
47
48define i8 @clear_lowbits8_c2_load(i8* %w, i8 %numlowbits) nounwind {
49; X86-LABEL: clear_lowbits8_c2_load:
50; X86:       # %bb.0:
51; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
52; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
53; X86-NEXT:    movb (%eax), %al
54; X86-NEXT:    shrb %cl, %al
55; X86-NEXT:    shlb %cl, %al
56; X86-NEXT:    retl
57;
58; X64-LABEL: clear_lowbits8_c2_load:
59; X64:       # %bb.0:
60; X64-NEXT:    movl %esi, %ecx
61; X64-NEXT:    movb (%rdi), %al
62; X64-NEXT:    shrb %cl, %al
63; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
64; X64-NEXT:    shlb %cl, %al
65; X64-NEXT:    retq
66  %val = load i8, i8* %w
67  %mask = shl i8 -1, %numlowbits
68  %masked = and i8 %mask, %val
69  ret i8 %masked
70}
71
72define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind {
73; X86-LABEL: clear_lowbits8_c4_commutative:
74; X86:       # %bb.0:
75; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
76; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
77; X86-NEXT:    shrb %cl, %al
78; X86-NEXT:    shlb %cl, %al
79; X86-NEXT:    retl
80;
81; X64-LABEL: clear_lowbits8_c4_commutative:
82; X64:       # %bb.0:
83; X64-NEXT:    movl %esi, %ecx
84; X64-NEXT:    shrb %cl, %dil
85; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
86; X64-NEXT:    shlb %cl, %dil
87; X64-NEXT:    movl %edi, %eax
88; X64-NEXT:    retq
89  %mask = shl i8 -1, %numlowbits
90  %masked = and i8 %val, %mask ; swapped order
91  ret i8 %masked
92}
93
94; 16-bit
95
96define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind {
97; X86-NOBMI2-LABEL: clear_lowbits16_c0:
98; X86-NOBMI2:       # %bb.0:
99; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
100; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
101; X86-NOBMI2-NEXT:    shrl %cl, %eax
102; X86-NOBMI2-NEXT:    shll %cl, %eax
103; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
104; X86-NOBMI2-NEXT:    retl
105;
106; X86-BMI2-LABEL: clear_lowbits16_c0:
107; X86-BMI2:       # %bb.0:
108; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
109; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
110; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
111; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
112; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
113; X86-BMI2-NEXT:    retl
114;
115; X64-NOBMI2-LABEL: clear_lowbits16_c0:
116; X64-NOBMI2:       # %bb.0:
117; X64-NOBMI2-NEXT:    movl %esi, %ecx
118; X64-NOBMI2-NEXT:    movzwl %di, %eax
119; X64-NOBMI2-NEXT:    shrl %cl, %eax
120; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
121; X64-NOBMI2-NEXT:    shll %cl, %eax
122; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
123; X64-NOBMI2-NEXT:    retq
124;
125; X64-BMI2-LABEL: clear_lowbits16_c0:
126; X64-BMI2:       # %bb.0:
127; X64-BMI2-NEXT:    movzwl %di, %eax
128; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
129; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
130; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
131; X64-BMI2-NEXT:    retq
132  %mask = shl i16 -1, %numlowbits
133  %masked = and i16 %mask, %val
134  ret i16 %masked
135}
136
137define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind {
138; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext:
139; X86-NOBMI2:       # %bb.0:
140; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
141; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
142; X86-NOBMI2-NEXT:    shrl %cl, %eax
143; X86-NOBMI2-NEXT:    shll %cl, %eax
144; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
145; X86-NOBMI2-NEXT:    retl
146;
147; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext:
148; X86-BMI2:       # %bb.0:
149; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
150; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
151; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
152; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
153; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
154; X86-BMI2-NEXT:    retl
155;
156; X64-NOBMI2-LABEL: clear_lowbits16_c1_indexzext:
157; X64-NOBMI2:       # %bb.0:
158; X64-NOBMI2-NEXT:    movl %esi, %ecx
159; X64-NOBMI2-NEXT:    movzwl %di, %eax
160; X64-NOBMI2-NEXT:    shrl %cl, %eax
161; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
162; X64-NOBMI2-NEXT:    shll %cl, %eax
163; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
164; X64-NOBMI2-NEXT:    retq
165;
166; X64-BMI2-LABEL: clear_lowbits16_c1_indexzext:
167; X64-BMI2:       # %bb.0:
168; X64-BMI2-NEXT:    movzwl %di, %eax
169; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
170; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
171; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
172; X64-BMI2-NEXT:    retq
173  %sh_prom = zext i8 %numlowbits to i16
174  %mask = shl i16 -1, %sh_prom
175  %masked = and i16 %mask, %val
176  ret i16 %masked
177}
178
179define i16 @clear_lowbits16_c2_load(i16* %w, i16 %numlowbits) nounwind {
180; X86-NOBMI2-LABEL: clear_lowbits16_c2_load:
181; X86-NOBMI2:       # %bb.0:
182; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
183; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
184; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
185; X86-NOBMI2-NEXT:    shrl %cl, %eax
186; X86-NOBMI2-NEXT:    shll %cl, %eax
187; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
188; X86-NOBMI2-NEXT:    retl
189;
190; X86-BMI2-LABEL: clear_lowbits16_c2_load:
191; X86-BMI2:       # %bb.0:
192; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
193; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
194; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
195; X86-BMI2-NEXT:    shrxl %eax, %ecx, %ecx
196; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
197; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
198; X86-BMI2-NEXT:    retl
199;
200; X64-NOBMI2-LABEL: clear_lowbits16_c2_load:
201; X64-NOBMI2:       # %bb.0:
202; X64-NOBMI2-NEXT:    movl %esi, %ecx
203; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
204; X64-NOBMI2-NEXT:    shrl %cl, %eax
205; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
206; X64-NOBMI2-NEXT:    shll %cl, %eax
207; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
208; X64-NOBMI2-NEXT:    retq
209;
210; X64-BMI2-LABEL: clear_lowbits16_c2_load:
211; X64-BMI2:       # %bb.0:
212; X64-BMI2-NEXT:    movzwl (%rdi), %eax
213; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
214; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
215; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
216; X64-BMI2-NEXT:    retq
217  %val = load i16, i16* %w
218  %mask = shl i16 -1, %numlowbits
219  %masked = and i16 %mask, %val
220  ret i16 %masked
221}
222
223define i16 @clear_lowbits16_c3_load_indexzext(i16* %w, i8 %numlowbits) nounwind {
224; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext:
225; X86-NOBMI2:       # %bb.0:
226; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
227; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
228; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
229; X86-NOBMI2-NEXT:    shrl %cl, %eax
230; X86-NOBMI2-NEXT:    shll %cl, %eax
231; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
232; X86-NOBMI2-NEXT:    retl
233;
234; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext:
235; X86-BMI2:       # %bb.0:
236; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
237; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
238; X86-BMI2-NEXT:    movzwl (%ecx), %ecx
239; X86-BMI2-NEXT:    shrxl %eax, %ecx, %ecx
240; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
241; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
242; X86-BMI2-NEXT:    retl
243;
244; X64-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext:
245; X64-NOBMI2:       # %bb.0:
246; X64-NOBMI2-NEXT:    movl %esi, %ecx
247; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
248; X64-NOBMI2-NEXT:    shrl %cl, %eax
249; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
250; X64-NOBMI2-NEXT:    shll %cl, %eax
251; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
252; X64-NOBMI2-NEXT:    retq
253;
254; X64-BMI2-LABEL: clear_lowbits16_c3_load_indexzext:
255; X64-BMI2:       # %bb.0:
256; X64-BMI2-NEXT:    movzwl (%rdi), %eax
257; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
258; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
259; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
260; X64-BMI2-NEXT:    retq
261  %val = load i16, i16* %w
262  %sh_prom = zext i8 %numlowbits to i16
263  %mask = shl i16 -1, %sh_prom
264  %masked = and i16 %mask, %val
265  ret i16 %masked
266}
267
268define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind {
269; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative:
270; X86-NOBMI2:       # %bb.0:
271; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
272; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
273; X86-NOBMI2-NEXT:    shrl %cl, %eax
274; X86-NOBMI2-NEXT:    shll %cl, %eax
275; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
276; X86-NOBMI2-NEXT:    retl
277;
278; X86-BMI2-LABEL: clear_lowbits16_c4_commutative:
279; X86-BMI2:       # %bb.0:
280; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
281; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
282; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
283; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
284; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
285; X86-BMI2-NEXT:    retl
286;
287; X64-NOBMI2-LABEL: clear_lowbits16_c4_commutative:
288; X64-NOBMI2:       # %bb.0:
289; X64-NOBMI2-NEXT:    movl %esi, %ecx
290; X64-NOBMI2-NEXT:    movzwl %di, %eax
291; X64-NOBMI2-NEXT:    shrl %cl, %eax
292; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
293; X64-NOBMI2-NEXT:    shll %cl, %eax
294; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
295; X64-NOBMI2-NEXT:    retq
296;
297; X64-BMI2-LABEL: clear_lowbits16_c4_commutative:
298; X64-BMI2:       # %bb.0:
299; X64-BMI2-NEXT:    movzwl %di, %eax
300; X64-BMI2-NEXT:    shrxl %esi, %eax, %eax
301; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
302; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
303; X64-BMI2-NEXT:    retq
304  %mask = shl i16 -1, %numlowbits
305  %masked = and i16 %val, %mask ; swapped order
306  ret i16 %masked
307}
308
309; 32-bit
310
311define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind {
312; X86-NOBMI2-LABEL: clear_lowbits32_c0:
313; X86-NOBMI2:       # %bb.0:
314; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
315; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
316; X86-NOBMI2-NEXT:    shrl %cl, %eax
317; X86-NOBMI2-NEXT:    shll %cl, %eax
318; X86-NOBMI2-NEXT:    retl
319;
320; X86-BMI2-LABEL: clear_lowbits32_c0:
321; X86-BMI2:       # %bb.0:
322; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
323; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
324; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
325; X86-BMI2-NEXT:    retl
326;
327; X64-NOBMI2-LABEL: clear_lowbits32_c0:
328; X64-NOBMI2:       # %bb.0:
329; X64-NOBMI2-NEXT:    movl %esi, %ecx
330; X64-NOBMI2-NEXT:    shrl %cl, %edi
331; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
332; X64-NOBMI2-NEXT:    shll %cl, %edi
333; X64-NOBMI2-NEXT:    movl %edi, %eax
334; X64-NOBMI2-NEXT:    retq
335;
336; X64-BMI2-LABEL: clear_lowbits32_c0:
337; X64-BMI2:       # %bb.0:
338; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
339; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
340; X64-BMI2-NEXT:    retq
341  %mask = shl i32 -1, %numlowbits
342  %masked = and i32 %mask, %val
343  ret i32 %masked
344}
345
346define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
347; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext:
348; X86-NOBMI2:       # %bb.0:
349; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
350; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
351; X86-NOBMI2-NEXT:    shrl %cl, %eax
352; X86-NOBMI2-NEXT:    shll %cl, %eax
353; X86-NOBMI2-NEXT:    retl
354;
355; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext:
356; X86-BMI2:       # %bb.0:
357; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
358; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
359; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
360; X86-BMI2-NEXT:    retl
361;
362; X64-NOBMI2-LABEL: clear_lowbits32_c1_indexzext:
363; X64-NOBMI2:       # %bb.0:
364; X64-NOBMI2-NEXT:    movl %esi, %ecx
365; X64-NOBMI2-NEXT:    shrl %cl, %edi
366; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
367; X64-NOBMI2-NEXT:    shll %cl, %edi
368; X64-NOBMI2-NEXT:    movl %edi, %eax
369; X64-NOBMI2-NEXT:    retq
370;
371; X64-BMI2-LABEL: clear_lowbits32_c1_indexzext:
372; X64-BMI2:       # %bb.0:
373; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
374; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
375; X64-BMI2-NEXT:    retq
376  %sh_prom = zext i8 %numlowbits to i32
377  %mask = shl i32 -1, %sh_prom
378  %masked = and i32 %mask, %val
379  ret i32 %masked
380}
381
382define i32 @clear_lowbits32_c2_load(i32* %w, i32 %numlowbits) nounwind {
383; X86-NOBMI2-LABEL: clear_lowbits32_c2_load:
384; X86-NOBMI2:       # %bb.0:
385; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
386; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
387; X86-NOBMI2-NEXT:    movl (%eax), %eax
388; X86-NOBMI2-NEXT:    shrl %cl, %eax
389; X86-NOBMI2-NEXT:    shll %cl, %eax
390; X86-NOBMI2-NEXT:    retl
391;
392; X86-BMI2-LABEL: clear_lowbits32_c2_load:
393; X86-BMI2:       # %bb.0:
394; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
395; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
396; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
397; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
398; X86-BMI2-NEXT:    retl
399;
400; X64-NOBMI2-LABEL: clear_lowbits32_c2_load:
401; X64-NOBMI2:       # %bb.0:
402; X64-NOBMI2-NEXT:    movl %esi, %ecx
403; X64-NOBMI2-NEXT:    movl (%rdi), %eax
404; X64-NOBMI2-NEXT:    shrl %cl, %eax
405; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
406; X64-NOBMI2-NEXT:    shll %cl, %eax
407; X64-NOBMI2-NEXT:    retq
408;
409; X64-BMI2-LABEL: clear_lowbits32_c2_load:
410; X64-BMI2:       # %bb.0:
411; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
412; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
413; X64-BMI2-NEXT:    retq
414  %val = load i32, i32* %w
415  %mask = shl i32 -1, %numlowbits
416  %masked = and i32 %mask, %val
417  ret i32 %masked
418}
419
420define i32 @clear_lowbits32_c3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
421; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext:
422; X86-NOBMI2:       # %bb.0:
423; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
424; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
425; X86-NOBMI2-NEXT:    movl (%eax), %eax
426; X86-NOBMI2-NEXT:    shrl %cl, %eax
427; X86-NOBMI2-NEXT:    shll %cl, %eax
428; X86-NOBMI2-NEXT:    retl
429;
430; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext:
431; X86-BMI2:       # %bb.0:
432; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
433; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
434; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
435; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
436; X86-BMI2-NEXT:    retl
437;
438; X64-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext:
439; X64-NOBMI2:       # %bb.0:
440; X64-NOBMI2-NEXT:    movl %esi, %ecx
441; X64-NOBMI2-NEXT:    movl (%rdi), %eax
442; X64-NOBMI2-NEXT:    shrl %cl, %eax
443; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
444; X64-NOBMI2-NEXT:    shll %cl, %eax
445; X64-NOBMI2-NEXT:    retq
446;
447; X64-BMI2-LABEL: clear_lowbits32_c3_load_indexzext:
448; X64-BMI2:       # %bb.0:
449; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
450; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
451; X64-BMI2-NEXT:    retq
452  %val = load i32, i32* %w
453  %sh_prom = zext i8 %numlowbits to i32
454  %mask = shl i32 -1, %sh_prom
455  %masked = and i32 %mask, %val
456  ret i32 %masked
457}
458
459define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
460; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative:
461; X86-NOBMI2:       # %bb.0:
462; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
463; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
464; X86-NOBMI2-NEXT:    shrl %cl, %eax
465; X86-NOBMI2-NEXT:    shll %cl, %eax
466; X86-NOBMI2-NEXT:    retl
467;
468; X86-BMI2-LABEL: clear_lowbits32_c4_commutative:
469; X86-BMI2:       # %bb.0:
470; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
471; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
472; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
473; X86-BMI2-NEXT:    retl
474;
475; X64-NOBMI2-LABEL: clear_lowbits32_c4_commutative:
476; X64-NOBMI2:       # %bb.0:
477; X64-NOBMI2-NEXT:    movl %esi, %ecx
478; X64-NOBMI2-NEXT:    shrl %cl, %edi
479; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
480; X64-NOBMI2-NEXT:    shll %cl, %edi
481; X64-NOBMI2-NEXT:    movl %edi, %eax
482; X64-NOBMI2-NEXT:    retq
483;
484; X64-BMI2-LABEL: clear_lowbits32_c4_commutative:
485; X64-BMI2:       # %bb.0:
486; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
487; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
488; X64-BMI2-NEXT:    retq
489  %mask = shl i32 -1, %numlowbits
490  %masked = and i32 %val, %mask ; swapped order
491  ret i32 %masked
492}
493
494; 64-bit
495
496define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind {
497; X86-NOBMI2-LABEL: clear_lowbits64_c0:
498; X86-NOBMI2:       # %bb.0:
499; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
500; X86-NOBMI2-NEXT:    movl $-1, %edx
501; X86-NOBMI2-NEXT:    movl $-1, %eax
502; X86-NOBMI2-NEXT:    shll %cl, %eax
503; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
504; X86-NOBMI2-NEXT:    testb $32, %cl
505; X86-NOBMI2-NEXT:    je .LBB13_2
506; X86-NOBMI2-NEXT:  # %bb.1:
507; X86-NOBMI2-NEXT:    movl %eax, %edx
508; X86-NOBMI2-NEXT:    xorl %eax, %eax
509; X86-NOBMI2-NEXT:  .LBB13_2:
510; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
511; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
512; X86-NOBMI2-NEXT:    retl
513;
514; X86-BMI2-LABEL: clear_lowbits64_c0:
515; X86-BMI2:       # %bb.0:
516; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
517; X86-BMI2-NEXT:    movl $-1, %edx
518; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
519; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
520; X86-BMI2-NEXT:    testb $32, %cl
521; X86-BMI2-NEXT:    je .LBB13_2
522; X86-BMI2-NEXT:  # %bb.1:
523; X86-BMI2-NEXT:    movl %eax, %edx
524; X86-BMI2-NEXT:    xorl %eax, %eax
525; X86-BMI2-NEXT:  .LBB13_2:
526; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
527; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
528; X86-BMI2-NEXT:    retl
529;
530; X64-NOBMI2-LABEL: clear_lowbits64_c0:
531; X64-NOBMI2:       # %bb.0:
532; X64-NOBMI2-NEXT:    movq %rsi, %rcx
533; X64-NOBMI2-NEXT:    shrq %cl, %rdi
534; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
535; X64-NOBMI2-NEXT:    shlq %cl, %rdi
536; X64-NOBMI2-NEXT:    movq %rdi, %rax
537; X64-NOBMI2-NEXT:    retq
538;
539; X64-BMI2-LABEL: clear_lowbits64_c0:
540; X64-BMI2:       # %bb.0:
541; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
542; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
543; X64-BMI2-NEXT:    retq
544  %mask = shl i64 -1, %numlowbits
545  %masked = and i64 %mask, %val
546  ret i64 %masked
547}
548
549define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind {
550; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
551; X86-NOBMI2:       # %bb.0:
552; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
553; X86-NOBMI2-NEXT:    movl $-1, %edx
554; X86-NOBMI2-NEXT:    movl $-1, %eax
555; X86-NOBMI2-NEXT:    shll %cl, %eax
556; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
557; X86-NOBMI2-NEXT:    testb $32, %cl
558; X86-NOBMI2-NEXT:    je .LBB14_2
559; X86-NOBMI2-NEXT:  # %bb.1:
560; X86-NOBMI2-NEXT:    movl %eax, %edx
561; X86-NOBMI2-NEXT:    xorl %eax, %eax
562; X86-NOBMI2-NEXT:  .LBB14_2:
563; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
564; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
565; X86-NOBMI2-NEXT:    retl
566;
567; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext:
568; X86-BMI2:       # %bb.0:
569; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
570; X86-BMI2-NEXT:    movl $-1, %edx
571; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
572; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
573; X86-BMI2-NEXT:    testb $32, %cl
574; X86-BMI2-NEXT:    je .LBB14_2
575; X86-BMI2-NEXT:  # %bb.1:
576; X86-BMI2-NEXT:    movl %eax, %edx
577; X86-BMI2-NEXT:    xorl %eax, %eax
578; X86-BMI2-NEXT:  .LBB14_2:
579; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
580; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
581; X86-BMI2-NEXT:    retl
582;
583; X64-NOBMI2-LABEL: clear_lowbits64_c1_indexzext:
584; X64-NOBMI2:       # %bb.0:
585; X64-NOBMI2-NEXT:    movl %esi, %ecx
586; X64-NOBMI2-NEXT:    shrq %cl, %rdi
587; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
588; X64-NOBMI2-NEXT:    shlq %cl, %rdi
589; X64-NOBMI2-NEXT:    movq %rdi, %rax
590; X64-NOBMI2-NEXT:    retq
591;
592; X64-BMI2-LABEL: clear_lowbits64_c1_indexzext:
593; X64-BMI2:       # %bb.0:
594; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
595; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
596; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
597; X64-BMI2-NEXT:    retq
598  %sh_prom = zext i8 %numlowbits to i64
599  %mask = shl i64 -1, %sh_prom
600  %masked = and i64 %mask, %val
601  ret i64 %masked
602}
603
604define i64 @clear_lowbits64_c2_load(i64* %w, i64 %numlowbits) nounwind {
605; X86-NOBMI2-LABEL: clear_lowbits64_c2_load:
606; X86-NOBMI2:       # %bb.0:
607; X86-NOBMI2-NEXT:    pushl %esi
608; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
609; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
610; X86-NOBMI2-NEXT:    movl $-1, %edx
611; X86-NOBMI2-NEXT:    movl $-1, %eax
612; X86-NOBMI2-NEXT:    shll %cl, %eax
613; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
614; X86-NOBMI2-NEXT:    testb $32, %cl
615; X86-NOBMI2-NEXT:    je .LBB15_2
616; X86-NOBMI2-NEXT:  # %bb.1:
617; X86-NOBMI2-NEXT:    movl %eax, %edx
618; X86-NOBMI2-NEXT:    xorl %eax, %eax
619; X86-NOBMI2-NEXT:  .LBB15_2:
620; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
621; X86-NOBMI2-NEXT:    andl (%esi), %eax
622; X86-NOBMI2-NEXT:    popl %esi
623; X86-NOBMI2-NEXT:    retl
624;
625; X86-BMI2-LABEL: clear_lowbits64_c2_load:
626; X86-BMI2:       # %bb.0:
627; X86-BMI2-NEXT:    pushl %esi
628; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
629; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
630; X86-BMI2-NEXT:    movl $-1, %edx
631; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
632; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
633; X86-BMI2-NEXT:    testb $32, %cl
634; X86-BMI2-NEXT:    je .LBB15_2
635; X86-BMI2-NEXT:  # %bb.1:
636; X86-BMI2-NEXT:    movl %eax, %edx
637; X86-BMI2-NEXT:    xorl %eax, %eax
638; X86-BMI2-NEXT:  .LBB15_2:
639; X86-BMI2-NEXT:    andl 4(%esi), %edx
640; X86-BMI2-NEXT:    andl (%esi), %eax
641; X86-BMI2-NEXT:    popl %esi
642; X86-BMI2-NEXT:    retl
643;
644; X64-NOBMI2-LABEL: clear_lowbits64_c2_load:
645; X64-NOBMI2:       # %bb.0:
646; X64-NOBMI2-NEXT:    movq %rsi, %rcx
647; X64-NOBMI2-NEXT:    movq (%rdi), %rax
648; X64-NOBMI2-NEXT:    shrq %cl, %rax
649; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
650; X64-NOBMI2-NEXT:    shlq %cl, %rax
651; X64-NOBMI2-NEXT:    retq
652;
653; X64-BMI2-LABEL: clear_lowbits64_c2_load:
654; X64-BMI2:       # %bb.0:
655; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
656; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
657; X64-BMI2-NEXT:    retq
658  %val = load i64, i64* %w
659  %mask = shl i64 -1, %numlowbits
660  %masked = and i64 %mask, %val
661  ret i64 %masked
662}
663
664define i64 @clear_lowbits64_c3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
665; X86-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext:
666; X86-NOBMI2:       # %bb.0:
667; X86-NOBMI2-NEXT:    pushl %esi
668; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
669; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
670; X86-NOBMI2-NEXT:    movl $-1, %edx
671; X86-NOBMI2-NEXT:    movl $-1, %eax
672; X86-NOBMI2-NEXT:    shll %cl, %eax
673; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
674; X86-NOBMI2-NEXT:    testb $32, %cl
675; X86-NOBMI2-NEXT:    je .LBB16_2
676; X86-NOBMI2-NEXT:  # %bb.1:
677; X86-NOBMI2-NEXT:    movl %eax, %edx
678; X86-NOBMI2-NEXT:    xorl %eax, %eax
679; X86-NOBMI2-NEXT:  .LBB16_2:
680; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
681; X86-NOBMI2-NEXT:    andl (%esi), %eax
682; X86-NOBMI2-NEXT:    popl %esi
683; X86-NOBMI2-NEXT:    retl
684;
685; X86-BMI2-LABEL: clear_lowbits64_c3_load_indexzext:
686; X86-BMI2:       # %bb.0:
687; X86-BMI2-NEXT:    pushl %esi
688; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
689; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
690; X86-BMI2-NEXT:    movl $-1, %edx
691; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
692; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
693; X86-BMI2-NEXT:    testb $32, %cl
694; X86-BMI2-NEXT:    je .LBB16_2
695; X86-BMI2-NEXT:  # %bb.1:
696; X86-BMI2-NEXT:    movl %eax, %edx
697; X86-BMI2-NEXT:    xorl %eax, %eax
698; X86-BMI2-NEXT:  .LBB16_2:
699; X86-BMI2-NEXT:    andl 4(%esi), %edx
700; X86-BMI2-NEXT:    andl (%esi), %eax
701; X86-BMI2-NEXT:    popl %esi
702; X86-BMI2-NEXT:    retl
703;
704; X64-NOBMI2-LABEL: clear_lowbits64_c3_load_indexzext:
705; X64-NOBMI2:       # %bb.0:
706; X64-NOBMI2-NEXT:    movl %esi, %ecx
707; X64-NOBMI2-NEXT:    movq (%rdi), %rax
708; X64-NOBMI2-NEXT:    shrq %cl, %rax
709; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
710; X64-NOBMI2-NEXT:    shlq %cl, %rax
711; X64-NOBMI2-NEXT:    retq
712;
713; X64-BMI2-LABEL: clear_lowbits64_c3_load_indexzext:
714; X64-BMI2:       # %bb.0:
715; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
716; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
717; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
718; X64-BMI2-NEXT:    retq
719  %val = load i64, i64* %w
720  %sh_prom = zext i8 %numlowbits to i64
721  %mask = shl i64 -1, %sh_prom
722  %masked = and i64 %mask, %val
723  ret i64 %masked
724}
725
726define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind {
727; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
728; X86-NOBMI2:       # %bb.0:
729; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
730; X86-NOBMI2-NEXT:    movl $-1, %edx
731; X86-NOBMI2-NEXT:    movl $-1, %eax
732; X86-NOBMI2-NEXT:    shll %cl, %eax
733; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
734; X86-NOBMI2-NEXT:    testb $32, %cl
735; X86-NOBMI2-NEXT:    je .LBB17_2
736; X86-NOBMI2-NEXT:  # %bb.1:
737; X86-NOBMI2-NEXT:    movl %eax, %edx
738; X86-NOBMI2-NEXT:    xorl %eax, %eax
739; X86-NOBMI2-NEXT:  .LBB17_2:
740; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
741; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
742; X86-NOBMI2-NEXT:    retl
743;
744; X86-BMI2-LABEL: clear_lowbits64_c4_commutative:
745; X86-BMI2:       # %bb.0:
746; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
747; X86-BMI2-NEXT:    movl $-1, %edx
748; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
749; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
750; X86-BMI2-NEXT:    testb $32, %cl
751; X86-BMI2-NEXT:    je .LBB17_2
752; X86-BMI2-NEXT:  # %bb.1:
753; X86-BMI2-NEXT:    movl %eax, %edx
754; X86-BMI2-NEXT:    xorl %eax, %eax
755; X86-BMI2-NEXT:  .LBB17_2:
756; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
757; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
758; X86-BMI2-NEXT:    retl
759;
760; X64-NOBMI2-LABEL: clear_lowbits64_c4_commutative:
761; X64-NOBMI2:       # %bb.0:
762; X64-NOBMI2-NEXT:    movq %rsi, %rcx
763; X64-NOBMI2-NEXT:    shrq %cl, %rdi
764; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
765; X64-NOBMI2-NEXT:    shlq %cl, %rdi
766; X64-NOBMI2-NEXT:    movq %rdi, %rax
767; X64-NOBMI2-NEXT:    retq
768;
769; X64-BMI2-LABEL: clear_lowbits64_c4_commutative:
770; X64-BMI2:       # %bb.0:
771; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
772; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
773; X64-BMI2-NEXT:    retq
774  %mask = shl i64 -1, %numlowbits
775  %masked = and i64 %val, %mask ; swapped order
776  ret i64 %masked
777}
778
779; ---------------------------------------------------------------------------- ;
780; Pattern ic.
781; ---------------------------------------------------------------------------- ;
782
783; 8-bit
784
785define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind {
786; X86-LABEL: clear_lowbits8_ic0:
787; X86:       # %bb.0:
788; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
789; X86-NEXT:    movb $8, %cl
790; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
791; X86-NEXT:    shrb %cl, %al
792; X86-NEXT:    shlb %cl, %al
793; X86-NEXT:    retl
794;
795; X64-LABEL: clear_lowbits8_ic0:
796; X64:       # %bb.0:
797; X64-NEXT:    movb $8, %cl
798; X64-NEXT:    subb %sil, %cl
799; X64-NEXT:    shrb %cl, %dil
800; X64-NEXT:    shlb %cl, %dil
801; X64-NEXT:    movl %edi, %eax
802; X64-NEXT:    retq
803  %numhighbits = sub i8 8, %numlowbits
804  %mask = shl i8 -1, %numhighbits
805  %masked = and i8 %mask, %val
806  ret i8 %masked
807}
808
809define i8 @clear_lowbits8_ic2_load(i8* %w, i8 %numlowbits) nounwind {
810; X86-LABEL: clear_lowbits8_ic2_load:
811; X86:       # %bb.0:
812; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
813; X86-NEXT:    movb (%eax), %al
814; X86-NEXT:    movb $8, %cl
815; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
816; X86-NEXT:    shrb %cl, %al
817; X86-NEXT:    shlb %cl, %al
818; X86-NEXT:    retl
819;
820; X64-LABEL: clear_lowbits8_ic2_load:
821; X64:       # %bb.0:
822; X64-NEXT:    movb (%rdi), %al
823; X64-NEXT:    movb $8, %cl
824; X64-NEXT:    subb %sil, %cl
825; X64-NEXT:    shrb %cl, %al
826; X64-NEXT:    shlb %cl, %al
827; X64-NEXT:    retq
828  %val = load i8, i8* %w
829  %numhighbits = sub i8 8, %numlowbits
830  %mask = shl i8 -1, %numhighbits
831  %masked = and i8 %mask, %val
832  ret i8 %masked
833}
834
835define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind {
836; X86-LABEL: clear_lowbits8_ic4_commutative:
837; X86:       # %bb.0:
838; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
839; X86-NEXT:    movb $8, %cl
840; X86-NEXT:    subb {{[0-9]+}}(%esp), %cl
841; X86-NEXT:    shrb %cl, %al
842; X86-NEXT:    shlb %cl, %al
843; X86-NEXT:    retl
844;
845; X64-LABEL: clear_lowbits8_ic4_commutative:
846; X64:       # %bb.0:
847; X64-NEXT:    movb $8, %cl
848; X64-NEXT:    subb %sil, %cl
849; X64-NEXT:    shrb %cl, %dil
850; X64-NEXT:    shlb %cl, %dil
851; X64-NEXT:    movl %edi, %eax
852; X64-NEXT:    retq
853  %numhighbits = sub i8 8, %numlowbits
854  %mask = shl i8 -1, %numhighbits
855  %masked = and i8 %val, %mask ; swapped order
856  ret i8 %masked
857}
858
859; 16-bit
860
861define i16 @clear_lowbits16_ic0(i16 %val, i16 %numlowbits) nounwind {
862; X86-NOBMI2-LABEL: clear_lowbits16_ic0:
863; X86-NOBMI2:       # %bb.0:
864; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
865; X86-NOBMI2-NEXT:    movw $16, %cx
866; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
867; X86-NOBMI2-NEXT:    shrl %cl, %eax
868; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
869; X86-NOBMI2-NEXT:    shll %cl, %eax
870; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
871; X86-NOBMI2-NEXT:    retl
872;
873; X86-BMI2-LABEL: clear_lowbits16_ic0:
874; X86-BMI2:       # %bb.0:
875; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
876; X86-BMI2-NEXT:    movw $16, %cx
877; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
878; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
879; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
880; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
881; X86-BMI2-NEXT:    retl
882;
883; X64-NOBMI2-LABEL: clear_lowbits16_ic0:
884; X64-NOBMI2:       # %bb.0:
885; X64-NOBMI2-NEXT:    movzwl %di, %eax
886; X64-NOBMI2-NEXT:    movl $16, %ecx
887; X64-NOBMI2-NEXT:    subl %esi, %ecx
888; X64-NOBMI2-NEXT:    shrl %cl, %eax
889; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
890; X64-NOBMI2-NEXT:    shll %cl, %eax
891; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
892; X64-NOBMI2-NEXT:    retq
893;
894; X64-BMI2-LABEL: clear_lowbits16_ic0:
895; X64-BMI2:       # %bb.0:
896; X64-BMI2-NEXT:    movzwl %di, %eax
897; X64-BMI2-NEXT:    movl $16, %ecx
898; X64-BMI2-NEXT:    subl %esi, %ecx
899; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
900; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
901; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
902; X64-BMI2-NEXT:    retq
903  %numhighbits = sub i16 16, %numlowbits
904  %mask = shl i16 -1, %numhighbits
905  %masked = and i16 %mask, %val
906  ret i16 %masked
907}
908
909define i16 @clear_lowbits16_ic1_indexzext(i16 %val, i8 %numlowbits) nounwind {
910; X86-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext:
911; X86-NOBMI2:       # %bb.0:
912; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
913; X86-NOBMI2-NEXT:    movb $16, %cl
914; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
915; X86-NOBMI2-NEXT:    shrl %cl, %eax
916; X86-NOBMI2-NEXT:    shll %cl, %eax
917; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
918; X86-NOBMI2-NEXT:    retl
919;
920; X86-BMI2-LABEL: clear_lowbits16_ic1_indexzext:
921; X86-BMI2:       # %bb.0:
922; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
923; X86-BMI2-NEXT:    movb $16, %cl
924; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
925; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
926; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
927; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
928; X86-BMI2-NEXT:    retl
929;
930; X64-NOBMI2-LABEL: clear_lowbits16_ic1_indexzext:
931; X64-NOBMI2:       # %bb.0:
932; X64-NOBMI2-NEXT:    movzwl %di, %eax
933; X64-NOBMI2-NEXT:    movb $16, %cl
934; X64-NOBMI2-NEXT:    subb %sil, %cl
935; X64-NOBMI2-NEXT:    shrl %cl, %eax
936; X64-NOBMI2-NEXT:    shll %cl, %eax
937; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
938; X64-NOBMI2-NEXT:    retq
939;
940; X64-BMI2-LABEL: clear_lowbits16_ic1_indexzext:
941; X64-BMI2:       # %bb.0:
942; X64-BMI2-NEXT:    movzwl %di, %eax
943; X64-BMI2-NEXT:    movb $16, %cl
944; X64-BMI2-NEXT:    subb %sil, %cl
945; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
946; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
947; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
948; X64-BMI2-NEXT:    retq
949  %numhighbits = sub i8 16, %numlowbits
950  %sh_prom = zext i8 %numhighbits to i16
951  %mask = shl i16 -1, %sh_prom
952  %masked = and i16 %mask, %val
953  ret i16 %masked
954}
955
956define i16 @clear_lowbits16_ic2_load(i16* %w, i16 %numlowbits) nounwind {
957; X86-NOBMI2-LABEL: clear_lowbits16_ic2_load:
958; X86-NOBMI2:       # %bb.0:
959; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
960; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
961; X86-NOBMI2-NEXT:    movw $16, %cx
962; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
963; X86-NOBMI2-NEXT:    shrl %cl, %eax
964; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
965; X86-NOBMI2-NEXT:    shll %cl, %eax
966; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
967; X86-NOBMI2-NEXT:    retl
968;
969; X86-BMI2-LABEL: clear_lowbits16_ic2_load:
970; X86-BMI2:       # %bb.0:
971; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
972; X86-BMI2-NEXT:    movzwl (%eax), %eax
973; X86-BMI2-NEXT:    movw $16, %cx
974; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
975; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
976; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
977; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
978; X86-BMI2-NEXT:    retl
979;
980; X64-NOBMI2-LABEL: clear_lowbits16_ic2_load:
981; X64-NOBMI2:       # %bb.0:
982; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
983; X64-NOBMI2-NEXT:    movl $16, %ecx
984; X64-NOBMI2-NEXT:    subl %esi, %ecx
985; X64-NOBMI2-NEXT:    shrl %cl, %eax
986; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
987; X64-NOBMI2-NEXT:    shll %cl, %eax
988; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
989; X64-NOBMI2-NEXT:    retq
990;
991; X64-BMI2-LABEL: clear_lowbits16_ic2_load:
992; X64-BMI2:       # %bb.0:
993; X64-BMI2-NEXT:    movzwl (%rdi), %eax
994; X64-BMI2-NEXT:    movl $16, %ecx
995; X64-BMI2-NEXT:    subl %esi, %ecx
996; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
997; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
998; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
999; X64-BMI2-NEXT:    retq
1000  %val = load i16, i16* %w
1001  %numhighbits = sub i16 16, %numlowbits
1002  %mask = shl i16 -1, %numhighbits
1003  %masked = and i16 %mask, %val
1004  ret i16 %masked
1005}
1006
1007define i16 @clear_lowbits16_ic3_load_indexzext(i16* %w, i8 %numlowbits) nounwind {
1008; X86-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext:
1009; X86-NOBMI2:       # %bb.0:
1010; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1011; X86-NOBMI2-NEXT:    movzwl (%eax), %eax
1012; X86-NOBMI2-NEXT:    movb $16, %cl
1013; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1014; X86-NOBMI2-NEXT:    shrl %cl, %eax
1015; X86-NOBMI2-NEXT:    shll %cl, %eax
1016; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1017; X86-NOBMI2-NEXT:    retl
1018;
1019; X86-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext:
1020; X86-BMI2:       # %bb.0:
1021; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1022; X86-BMI2-NEXT:    movzwl (%eax), %eax
1023; X86-BMI2-NEXT:    movb $16, %cl
1024; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1025; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
1026; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1027; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1028; X86-BMI2-NEXT:    retl
1029;
1030; X64-NOBMI2-LABEL: clear_lowbits16_ic3_load_indexzext:
1031; X64-NOBMI2:       # %bb.0:
1032; X64-NOBMI2-NEXT:    movzwl (%rdi), %eax
1033; X64-NOBMI2-NEXT:    movb $16, %cl
1034; X64-NOBMI2-NEXT:    subb %sil, %cl
1035; X64-NOBMI2-NEXT:    shrl %cl, %eax
1036; X64-NOBMI2-NEXT:    shll %cl, %eax
1037; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1038; X64-NOBMI2-NEXT:    retq
1039;
1040; X64-BMI2-LABEL: clear_lowbits16_ic3_load_indexzext:
1041; X64-BMI2:       # %bb.0:
1042; X64-BMI2-NEXT:    movzwl (%rdi), %eax
1043; X64-BMI2-NEXT:    movb $16, %cl
1044; X64-BMI2-NEXT:    subb %sil, %cl
1045; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
1046; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1047; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1048; X64-BMI2-NEXT:    retq
1049  %val = load i16, i16* %w
1050  %numhighbits = sub i8 16, %numlowbits
1051  %sh_prom = zext i8 %numhighbits to i16
1052  %mask = shl i16 -1, %sh_prom
1053  %masked = and i16 %mask, %val
1054  ret i16 %masked
1055}
1056
1057define i16 @clear_lowbits16_ic4_commutative(i16 %val, i16 %numlowbits) nounwind {
1058; X86-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
1059; X86-NOBMI2:       # %bb.0:
1060; X86-NOBMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1061; X86-NOBMI2-NEXT:    movw $16, %cx
1062; X86-NOBMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
1063; X86-NOBMI2-NEXT:    shrl %cl, %eax
1064; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $cx
1065; X86-NOBMI2-NEXT:    shll %cl, %eax
1066; X86-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1067; X86-NOBMI2-NEXT:    retl
1068;
1069; X86-BMI2-LABEL: clear_lowbits16_ic4_commutative:
1070; X86-BMI2:       # %bb.0:
1071; X86-BMI2-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
1072; X86-BMI2-NEXT:    movw $16, %cx
1073; X86-BMI2-NEXT:    subw {{[0-9]+}}(%esp), %cx
1074; X86-BMI2-NEXT:    shrxl %ecx, %eax, %eax
1075; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1076; X86-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1077; X86-BMI2-NEXT:    retl
1078;
1079; X64-NOBMI2-LABEL: clear_lowbits16_ic4_commutative:
1080; X64-NOBMI2:       # %bb.0:
1081; X64-NOBMI2-NEXT:    movzwl %di, %eax
1082; X64-NOBMI2-NEXT:    movl $16, %ecx
1083; X64-NOBMI2-NEXT:    subl %esi, %ecx
1084; X64-NOBMI2-NEXT:    shrl %cl, %eax
1085; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1086; X64-NOBMI2-NEXT:    shll %cl, %eax
1087; X64-NOBMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1088; X64-NOBMI2-NEXT:    retq
1089;
1090; X64-BMI2-LABEL: clear_lowbits16_ic4_commutative:
1091; X64-BMI2:       # %bb.0:
1092; X64-BMI2-NEXT:    movzwl %di, %eax
1093; X64-BMI2-NEXT:    movl $16, %ecx
1094; X64-BMI2-NEXT:    subl %esi, %ecx
1095; X64-BMI2-NEXT:    shrxl %ecx, %eax, %eax
1096; X64-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1097; X64-BMI2-NEXT:    # kill: def $ax killed $ax killed $eax
1098; X64-BMI2-NEXT:    retq
1099  %numhighbits = sub i16 16, %numlowbits
1100  %mask = shl i16 -1, %numhighbits
1101  %masked = and i16 %val, %mask ; swapped order
1102  ret i16 %masked
1103}
1104
1105; 32-bit
1106
1107define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind {
1108; X86-NOBMI2-LABEL: clear_lowbits32_ic0:
1109; X86-NOBMI2:       # %bb.0:
1110; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1111; X86-NOBMI2-NEXT:    movl $32, %ecx
1112; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1113; X86-NOBMI2-NEXT:    shrl %cl, %eax
1114; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1115; X86-NOBMI2-NEXT:    shll %cl, %eax
1116; X86-NOBMI2-NEXT:    retl
1117;
1118; X86-BMI2-LABEL: clear_lowbits32_ic0:
1119; X86-BMI2:       # %bb.0:
1120; X86-BMI2-NEXT:    movl $32, %eax
1121; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
1122; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
1123; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1124; X86-BMI2-NEXT:    retl
1125;
1126; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
1127; X64-NOBMI2:       # %bb.0:
1128; X64-NOBMI2-NEXT:    movl $32, %ecx
1129; X64-NOBMI2-NEXT:    subl %esi, %ecx
1130; X64-NOBMI2-NEXT:    shrl %cl, %edi
1131; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1132; X64-NOBMI2-NEXT:    shll %cl, %edi
1133; X64-NOBMI2-NEXT:    movl %edi, %eax
1134; X64-NOBMI2-NEXT:    retq
1135;
1136; X64-BMI2-LABEL: clear_lowbits32_ic0:
1137; X64-BMI2:       # %bb.0:
1138; X64-BMI2-NEXT:    movl $32, %eax
1139; X64-BMI2-NEXT:    subl %esi, %eax
1140; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
1141; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1142; X64-BMI2-NEXT:    retq
1143  %numhighbits = sub i32 32, %numlowbits
1144  %mask = shl i32 -1, %numhighbits
1145  %masked = and i32 %mask, %val
1146  ret i32 %masked
1147}
1148
1149define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind {
1150; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
1151; X86-NOBMI2:       # %bb.0:
1152; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1153; X86-NOBMI2-NEXT:    movb $32, %cl
1154; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1155; X86-NOBMI2-NEXT:    shrl %cl, %eax
1156; X86-NOBMI2-NEXT:    shll %cl, %eax
1157; X86-NOBMI2-NEXT:    retl
1158;
1159; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
1160; X86-BMI2:       # %bb.0:
1161; X86-BMI2-NEXT:    movb $32, %al
1162; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
1163; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
1164; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1165; X86-BMI2-NEXT:    retl
1166;
1167; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
1168; X64-NOBMI2:       # %bb.0:
1169; X64-NOBMI2-NEXT:    movb $32, %cl
1170; X64-NOBMI2-NEXT:    subb %sil, %cl
1171; X64-NOBMI2-NEXT:    shrl %cl, %edi
1172; X64-NOBMI2-NEXT:    shll %cl, %edi
1173; X64-NOBMI2-NEXT:    movl %edi, %eax
1174; X64-NOBMI2-NEXT:    retq
1175;
1176; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
1177; X64-BMI2:       # %bb.0:
1178; X64-BMI2-NEXT:    movb $32, %al
1179; X64-BMI2-NEXT:    subb %sil, %al
1180; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
1181; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1182; X64-BMI2-NEXT:    retq
1183  %numhighbits = sub i8 32, %numlowbits
1184  %sh_prom = zext i8 %numhighbits to i32
1185  %mask = shl i32 -1, %sh_prom
1186  %masked = and i32 %mask, %val
1187  ret i32 %masked
1188}
1189
1190define i32 @clear_lowbits32_ic2_load(i32* %w, i32 %numlowbits) nounwind {
1191; X86-NOBMI2-LABEL: clear_lowbits32_ic2_load:
1192; X86-NOBMI2:       # %bb.0:
1193; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1194; X86-NOBMI2-NEXT:    movl (%eax), %eax
1195; X86-NOBMI2-NEXT:    movl $32, %ecx
1196; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1197; X86-NOBMI2-NEXT:    shrl %cl, %eax
1198; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1199; X86-NOBMI2-NEXT:    shll %cl, %eax
1200; X86-NOBMI2-NEXT:    retl
1201;
1202; X86-BMI2-LABEL: clear_lowbits32_ic2_load:
1203; X86-BMI2:       # %bb.0:
1204; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1205; X86-BMI2-NEXT:    movl $32, %ecx
1206; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1207; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
1208; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1209; X86-BMI2-NEXT:    retl
1210;
1211; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load:
1212; X64-NOBMI2:       # %bb.0:
1213; X64-NOBMI2-NEXT:    movl (%rdi), %eax
1214; X64-NOBMI2-NEXT:    movl $32, %ecx
1215; X64-NOBMI2-NEXT:    subl %esi, %ecx
1216; X64-NOBMI2-NEXT:    shrl %cl, %eax
1217; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1218; X64-NOBMI2-NEXT:    shll %cl, %eax
1219; X64-NOBMI2-NEXT:    retq
1220;
1221; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
1222; X64-BMI2:       # %bb.0:
1223; X64-BMI2-NEXT:    movl $32, %eax
1224; X64-BMI2-NEXT:    subl %esi, %eax
1225; X64-BMI2-NEXT:    shrxl %eax, (%rdi), %ecx
1226; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1227; X64-BMI2-NEXT:    retq
1228  %val = load i32, i32* %w
1229  %numhighbits = sub i32 32, %numlowbits
1230  %mask = shl i32 -1, %numhighbits
1231  %masked = and i32 %mask, %val
1232  ret i32 %masked
1233}
1234
1235define i32 @clear_lowbits32_ic3_load_indexzext(i32* %w, i8 %numlowbits) nounwind {
1236; X86-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
1237; X86-NOBMI2:       # %bb.0:
1238; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1239; X86-NOBMI2-NEXT:    movl (%eax), %eax
1240; X86-NOBMI2-NEXT:    movb $32, %cl
1241; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1242; X86-NOBMI2-NEXT:    shrl %cl, %eax
1243; X86-NOBMI2-NEXT:    shll %cl, %eax
1244; X86-NOBMI2-NEXT:    retl
1245;
1246; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
1247; X86-BMI2:       # %bb.0:
1248; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1249; X86-BMI2-NEXT:    movb $32, %cl
1250; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1251; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
1252; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
1253; X86-BMI2-NEXT:    retl
1254;
1255; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
1256; X64-NOBMI2:       # %bb.0:
1257; X64-NOBMI2-NEXT:    movl (%rdi), %eax
1258; X64-NOBMI2-NEXT:    movb $32, %cl
1259; X64-NOBMI2-NEXT:    subb %sil, %cl
1260; X64-NOBMI2-NEXT:    shrl %cl, %eax
1261; X64-NOBMI2-NEXT:    shll %cl, %eax
1262; X64-NOBMI2-NEXT:    retq
1263;
1264; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
1265; X64-BMI2:       # %bb.0:
1266; X64-BMI2-NEXT:    movb $32, %al
1267; X64-BMI2-NEXT:    subb %sil, %al
1268; X64-BMI2-NEXT:    shrxl %eax, (%rdi), %ecx
1269; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1270; X64-BMI2-NEXT:    retq
1271  %val = load i32, i32* %w
1272  %numhighbits = sub i8 32, %numlowbits
1273  %sh_prom = zext i8 %numhighbits to i32
1274  %mask = shl i32 -1, %sh_prom
1275  %masked = and i32 %mask, %val
1276  ret i32 %masked
1277}
1278
1279define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind {
1280; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
1281; X86-NOBMI2:       # %bb.0:
1282; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
1283; X86-NOBMI2-NEXT:    movl $32, %ecx
1284; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1285; X86-NOBMI2-NEXT:    shrl %cl, %eax
1286; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1287; X86-NOBMI2-NEXT:    shll %cl, %eax
1288; X86-NOBMI2-NEXT:    retl
1289;
1290; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
1291; X86-BMI2:       # %bb.0:
1292; X86-BMI2-NEXT:    movl $32, %eax
1293; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
1294; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
1295; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1296; X86-BMI2-NEXT:    retl
1297;
1298; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
1299; X64-NOBMI2:       # %bb.0:
1300; X64-NOBMI2-NEXT:    movl $32, %ecx
1301; X64-NOBMI2-NEXT:    subl %esi, %ecx
1302; X64-NOBMI2-NEXT:    shrl %cl, %edi
1303; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1304; X64-NOBMI2-NEXT:    shll %cl, %edi
1305; X64-NOBMI2-NEXT:    movl %edi, %eax
1306; X64-NOBMI2-NEXT:    retq
1307;
1308; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
1309; X64-BMI2:       # %bb.0:
1310; X64-BMI2-NEXT:    movl $32, %eax
1311; X64-BMI2-NEXT:    subl %esi, %eax
1312; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
1313; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
1314; X64-BMI2-NEXT:    retq
1315  %numhighbits = sub i32 32, %numlowbits
1316  %mask = shl i32 -1, %numhighbits
1317  %masked = and i32 %val, %mask ; swapped order
1318  ret i32 %masked
1319}
1320
1321; 64-bit
1322
1323define i64 @clear_lowbits64_ic0(i64 %val, i64 %numlowbits) nounwind {
1324; X86-NOBMI2-LABEL: clear_lowbits64_ic0:
1325; X86-NOBMI2:       # %bb.0:
1326; X86-NOBMI2-NEXT:    movl $64, %ecx
1327; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1328; X86-NOBMI2-NEXT:    movl $-1, %edx
1329; X86-NOBMI2-NEXT:    movl $-1, %eax
1330; X86-NOBMI2-NEXT:    shll %cl, %eax
1331; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
1332; X86-NOBMI2-NEXT:    testb $32, %cl
1333; X86-NOBMI2-NEXT:    je .LBB31_2
1334; X86-NOBMI2-NEXT:  # %bb.1:
1335; X86-NOBMI2-NEXT:    movl %eax, %edx
1336; X86-NOBMI2-NEXT:    xorl %eax, %eax
1337; X86-NOBMI2-NEXT:  .LBB31_2:
1338; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1339; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1340; X86-NOBMI2-NEXT:    retl
1341;
1342; X86-BMI2-LABEL: clear_lowbits64_ic0:
1343; X86-BMI2:       # %bb.0:
1344; X86-BMI2-NEXT:    movl $64, %ecx
1345; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1346; X86-BMI2-NEXT:    movl $-1, %edx
1347; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
1348; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
1349; X86-BMI2-NEXT:    testb $32, %cl
1350; X86-BMI2-NEXT:    je .LBB31_2
1351; X86-BMI2-NEXT:  # %bb.1:
1352; X86-BMI2-NEXT:    movl %eax, %edx
1353; X86-BMI2-NEXT:    xorl %eax, %eax
1354; X86-BMI2-NEXT:  .LBB31_2:
1355; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1356; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1357; X86-BMI2-NEXT:    retl
1358;
1359; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
1360; X64-NOBMI2:       # %bb.0:
1361; X64-NOBMI2-NEXT:    movl $64, %ecx
1362; X64-NOBMI2-NEXT:    subl %esi, %ecx
1363; X64-NOBMI2-NEXT:    shrq %cl, %rdi
1364; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1365; X64-NOBMI2-NEXT:    shlq %cl, %rdi
1366; X64-NOBMI2-NEXT:    movq %rdi, %rax
1367; X64-NOBMI2-NEXT:    retq
1368;
1369; X64-BMI2-LABEL: clear_lowbits64_ic0:
1370; X64-BMI2:       # %bb.0:
1371; X64-BMI2-NEXT:    movl $64, %eax
1372; X64-BMI2-NEXT:    subl %esi, %eax
1373; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
1374; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
1375; X64-BMI2-NEXT:    retq
1376  %numhighbits = sub i64 64, %numlowbits
1377  %mask = shl i64 -1, %numhighbits
1378  %masked = and i64 %mask, %val
1379  ret i64 %masked
1380}
1381
1382define i64 @clear_lowbits64_ic1_indexzext(i64 %val, i8 %numlowbits) nounwind {
1383; X86-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
1384; X86-NOBMI2:       # %bb.0:
1385; X86-NOBMI2-NEXT:    movb $64, %cl
1386; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1387; X86-NOBMI2-NEXT:    movl $-1, %edx
1388; X86-NOBMI2-NEXT:    movl $-1, %eax
1389; X86-NOBMI2-NEXT:    shll %cl, %eax
1390; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
1391; X86-NOBMI2-NEXT:    testb $32, %cl
1392; X86-NOBMI2-NEXT:    je .LBB32_2
1393; X86-NOBMI2-NEXT:  # %bb.1:
1394; X86-NOBMI2-NEXT:    movl %eax, %edx
1395; X86-NOBMI2-NEXT:    xorl %eax, %eax
1396; X86-NOBMI2-NEXT:  .LBB32_2:
1397; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1398; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1399; X86-NOBMI2-NEXT:    retl
1400;
1401; X86-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
1402; X86-BMI2:       # %bb.0:
1403; X86-BMI2-NEXT:    movb $64, %cl
1404; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1405; X86-BMI2-NEXT:    movl $-1, %edx
1406; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
1407; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
1408; X86-BMI2-NEXT:    testb $32, %cl
1409; X86-BMI2-NEXT:    je .LBB32_2
1410; X86-BMI2-NEXT:  # %bb.1:
1411; X86-BMI2-NEXT:    movl %eax, %edx
1412; X86-BMI2-NEXT:    xorl %eax, %eax
1413; X86-BMI2-NEXT:  .LBB32_2:
1414; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1415; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1416; X86-BMI2-NEXT:    retl
1417;
1418; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
1419; X64-NOBMI2:       # %bb.0:
1420; X64-NOBMI2-NEXT:    movb $64, %cl
1421; X64-NOBMI2-NEXT:    subb %sil, %cl
1422; X64-NOBMI2-NEXT:    shrq %cl, %rdi
1423; X64-NOBMI2-NEXT:    shlq %cl, %rdi
1424; X64-NOBMI2-NEXT:    movq %rdi, %rax
1425; X64-NOBMI2-NEXT:    retq
1426;
1427; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
1428; X64-BMI2:       # %bb.0:
1429; X64-BMI2-NEXT:    movb $64, %al
1430; X64-BMI2-NEXT:    subb %sil, %al
1431; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
1432; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
1433; X64-BMI2-NEXT:    retq
1434  %numhighbits = sub i8 64, %numlowbits
1435  %sh_prom = zext i8 %numhighbits to i64
1436  %mask = shl i64 -1, %sh_prom
1437  %masked = and i64 %mask, %val
1438  ret i64 %masked
1439}
1440
1441define i64 @clear_lowbits64_ic2_load(i64* %w, i64 %numlowbits) nounwind {
1442; X86-NOBMI2-LABEL: clear_lowbits64_ic2_load:
1443; X86-NOBMI2:       # %bb.0:
1444; X86-NOBMI2-NEXT:    pushl %esi
1445; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1446; X86-NOBMI2-NEXT:    movl $64, %ecx
1447; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1448; X86-NOBMI2-NEXT:    movl $-1, %edx
1449; X86-NOBMI2-NEXT:    movl $-1, %eax
1450; X86-NOBMI2-NEXT:    shll %cl, %eax
1451; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
1452; X86-NOBMI2-NEXT:    testb $32, %cl
1453; X86-NOBMI2-NEXT:    je .LBB33_2
1454; X86-NOBMI2-NEXT:  # %bb.1:
1455; X86-NOBMI2-NEXT:    movl %eax, %edx
1456; X86-NOBMI2-NEXT:    xorl %eax, %eax
1457; X86-NOBMI2-NEXT:  .LBB33_2:
1458; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
1459; X86-NOBMI2-NEXT:    andl (%esi), %eax
1460; X86-NOBMI2-NEXT:    popl %esi
1461; X86-NOBMI2-NEXT:    retl
1462;
1463; X86-BMI2-LABEL: clear_lowbits64_ic2_load:
1464; X86-BMI2:       # %bb.0:
1465; X86-BMI2-NEXT:    pushl %esi
1466; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1467; X86-BMI2-NEXT:    movl $64, %ecx
1468; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1469; X86-BMI2-NEXT:    movl $-1, %edx
1470; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
1471; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
1472; X86-BMI2-NEXT:    testb $32, %cl
1473; X86-BMI2-NEXT:    je .LBB33_2
1474; X86-BMI2-NEXT:  # %bb.1:
1475; X86-BMI2-NEXT:    movl %eax, %edx
1476; X86-BMI2-NEXT:    xorl %eax, %eax
1477; X86-BMI2-NEXT:  .LBB33_2:
1478; X86-BMI2-NEXT:    andl 4(%esi), %edx
1479; X86-BMI2-NEXT:    andl (%esi), %eax
1480; X86-BMI2-NEXT:    popl %esi
1481; X86-BMI2-NEXT:    retl
1482;
1483; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
1484; X64-NOBMI2:       # %bb.0:
1485; X64-NOBMI2-NEXT:    movq (%rdi), %rax
1486; X64-NOBMI2-NEXT:    movl $64, %ecx
1487; X64-NOBMI2-NEXT:    subl %esi, %ecx
1488; X64-NOBMI2-NEXT:    shrq %cl, %rax
1489; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1490; X64-NOBMI2-NEXT:    shlq %cl, %rax
1491; X64-NOBMI2-NEXT:    retq
1492;
1493; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
1494; X64-BMI2:       # %bb.0:
1495; X64-BMI2-NEXT:    movl $64, %eax
1496; X64-BMI2-NEXT:    subl %esi, %eax
1497; X64-BMI2-NEXT:    shrxq %rax, (%rdi), %rcx
1498; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
1499; X64-BMI2-NEXT:    retq
1500  %val = load i64, i64* %w
1501  %numhighbits = sub i64 64, %numlowbits
1502  %mask = shl i64 -1, %numhighbits
1503  %masked = and i64 %mask, %val
1504  ret i64 %masked
1505}
1506
1507define i64 @clear_lowbits64_ic3_load_indexzext(i64* %w, i8 %numlowbits) nounwind {
1508; X86-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
1509; X86-NOBMI2:       # %bb.0:
1510; X86-NOBMI2-NEXT:    pushl %esi
1511; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1512; X86-NOBMI2-NEXT:    movb $64, %cl
1513; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1514; X86-NOBMI2-NEXT:    movl $-1, %edx
1515; X86-NOBMI2-NEXT:    movl $-1, %eax
1516; X86-NOBMI2-NEXT:    shll %cl, %eax
1517; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
1518; X86-NOBMI2-NEXT:    testb $32, %cl
1519; X86-NOBMI2-NEXT:    je .LBB34_2
1520; X86-NOBMI2-NEXT:  # %bb.1:
1521; X86-NOBMI2-NEXT:    movl %eax, %edx
1522; X86-NOBMI2-NEXT:    xorl %eax, %eax
1523; X86-NOBMI2-NEXT:  .LBB34_2:
1524; X86-NOBMI2-NEXT:    andl 4(%esi), %edx
1525; X86-NOBMI2-NEXT:    andl (%esi), %eax
1526; X86-NOBMI2-NEXT:    popl %esi
1527; X86-NOBMI2-NEXT:    retl
1528;
1529; X86-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
1530; X86-BMI2:       # %bb.0:
1531; X86-BMI2-NEXT:    pushl %esi
1532; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
1533; X86-BMI2-NEXT:    movb $64, %cl
1534; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
1535; X86-BMI2-NEXT:    movl $-1, %edx
1536; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
1537; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
1538; X86-BMI2-NEXT:    testb $32, %cl
1539; X86-BMI2-NEXT:    je .LBB34_2
1540; X86-BMI2-NEXT:  # %bb.1:
1541; X86-BMI2-NEXT:    movl %eax, %edx
1542; X86-BMI2-NEXT:    xorl %eax, %eax
1543; X86-BMI2-NEXT:  .LBB34_2:
1544; X86-BMI2-NEXT:    andl 4(%esi), %edx
1545; X86-BMI2-NEXT:    andl (%esi), %eax
1546; X86-BMI2-NEXT:    popl %esi
1547; X86-BMI2-NEXT:    retl
1548;
1549; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
1550; X64-NOBMI2:       # %bb.0:
1551; X64-NOBMI2-NEXT:    movq (%rdi), %rax
1552; X64-NOBMI2-NEXT:    movb $64, %cl
1553; X64-NOBMI2-NEXT:    subb %sil, %cl
1554; X64-NOBMI2-NEXT:    shrq %cl, %rax
1555; X64-NOBMI2-NEXT:    shlq %cl, %rax
1556; X64-NOBMI2-NEXT:    retq
1557;
1558; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
1559; X64-BMI2:       # %bb.0:
1560; X64-BMI2-NEXT:    movb $64, %al
1561; X64-BMI2-NEXT:    subb %sil, %al
1562; X64-BMI2-NEXT:    shrxq %rax, (%rdi), %rcx
1563; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
1564; X64-BMI2-NEXT:    retq
1565  %val = load i64, i64* %w
1566  %numhighbits = sub i8 64, %numlowbits
1567  %sh_prom = zext i8 %numhighbits to i64
1568  %mask = shl i64 -1, %sh_prom
1569  %masked = and i64 %mask, %val
1570  ret i64 %masked
1571}
1572
1573define i64 @clear_lowbits64_ic4_commutative(i64 %val, i64 %numlowbits) nounwind {
1574; X86-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
1575; X86-NOBMI2:       # %bb.0:
1576; X86-NOBMI2-NEXT:    movl $64, %ecx
1577; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1578; X86-NOBMI2-NEXT:    movl $-1, %edx
1579; X86-NOBMI2-NEXT:    movl $-1, %eax
1580; X86-NOBMI2-NEXT:    shll %cl, %eax
1581; X86-NOBMI2-NEXT:    shldl %cl, %edx, %edx
1582; X86-NOBMI2-NEXT:    testb $32, %cl
1583; X86-NOBMI2-NEXT:    je .LBB35_2
1584; X86-NOBMI2-NEXT:  # %bb.1:
1585; X86-NOBMI2-NEXT:    movl %eax, %edx
1586; X86-NOBMI2-NEXT:    xorl %eax, %eax
1587; X86-NOBMI2-NEXT:  .LBB35_2:
1588; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1589; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1590; X86-NOBMI2-NEXT:    retl
1591;
1592; X86-BMI2-LABEL: clear_lowbits64_ic4_commutative:
1593; X86-BMI2:       # %bb.0:
1594; X86-BMI2-NEXT:    movl $64, %ecx
1595; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
1596; X86-BMI2-NEXT:    movl $-1, %edx
1597; X86-BMI2-NEXT:    shlxl %ecx, %edx, %eax
1598; X86-BMI2-NEXT:    shldl %cl, %edx, %edx
1599; X86-BMI2-NEXT:    testb $32, %cl
1600; X86-BMI2-NEXT:    je .LBB35_2
1601; X86-BMI2-NEXT:  # %bb.1:
1602; X86-BMI2-NEXT:    movl %eax, %edx
1603; X86-BMI2-NEXT:    xorl %eax, %eax
1604; X86-BMI2-NEXT:  .LBB35_2:
1605; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
1606; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
1607; X86-BMI2-NEXT:    retl
1608;
1609; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
1610; X64-NOBMI2:       # %bb.0:
1611; X64-NOBMI2-NEXT:    movl $64, %ecx
1612; X64-NOBMI2-NEXT:    subl %esi, %ecx
1613; X64-NOBMI2-NEXT:    shrq %cl, %rdi
1614; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
1615; X64-NOBMI2-NEXT:    shlq %cl, %rdi
1616; X64-NOBMI2-NEXT:    movq %rdi, %rax
1617; X64-NOBMI2-NEXT:    retq
1618;
1619; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
1620; X64-BMI2:       # %bb.0:
1621; X64-BMI2-NEXT:    movl $64, %eax
1622; X64-BMI2-NEXT:    subl %esi, %eax
1623; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
1624; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
1625; X64-BMI2-NEXT:    retq
1626  %numhighbits = sub i64 64, %numlowbits
1627  %mask = shl i64 -1, %numhighbits
1628  %masked = and i64 %val, %mask ; swapped order
1629  ret i64 %masked
1630}
1631
1632; ---------------------------------------------------------------------------- ;
1633; Multi-use tests
1634; ---------------------------------------------------------------------------- ;
1635
1636declare void @use32(i32)
1637declare void @use64(i64)
1638
1639define i32 @oneuse32(i32 %val, i32 %numlowbits) nounwind {
1640; X86-NOBMI2-LABEL: oneuse32:
1641; X86-NOBMI2:       # %bb.0:
1642; X86-NOBMI2-NEXT:    pushl %esi
1643; X86-NOBMI2-NEXT:    subl $8, %esp
1644; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1645; X86-NOBMI2-NEXT:    movl $-1, %esi
1646; X86-NOBMI2-NEXT:    shll %cl, %esi
1647; X86-NOBMI2-NEXT:    movl %esi, (%esp)
1648; X86-NOBMI2-NEXT:    calll use32
1649; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1650; X86-NOBMI2-NEXT:    movl %esi, %eax
1651; X86-NOBMI2-NEXT:    addl $8, %esp
1652; X86-NOBMI2-NEXT:    popl %esi
1653; X86-NOBMI2-NEXT:    retl
1654;
1655; X86-BMI2-LABEL: oneuse32:
1656; X86-BMI2:       # %bb.0:
1657; X86-BMI2-NEXT:    pushl %esi
1658; X86-BMI2-NEXT:    subl $8, %esp
1659; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %al
1660; X86-BMI2-NEXT:    movl $-1, %ecx
1661; X86-BMI2-NEXT:    shlxl %eax, %ecx, %esi
1662; X86-BMI2-NEXT:    movl %esi, (%esp)
1663; X86-BMI2-NEXT:    calll use32
1664; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1665; X86-BMI2-NEXT:    movl %esi, %eax
1666; X86-BMI2-NEXT:    addl $8, %esp
1667; X86-BMI2-NEXT:    popl %esi
1668; X86-BMI2-NEXT:    retl
1669;
1670; X64-NOBMI2-LABEL: oneuse32:
1671; X64-NOBMI2:       # %bb.0:
1672; X64-NOBMI2-NEXT:    pushq %rbp
1673; X64-NOBMI2-NEXT:    pushq %rbx
1674; X64-NOBMI2-NEXT:    pushq %rax
1675; X64-NOBMI2-NEXT:    movl %edi, %ebx
1676; X64-NOBMI2-NEXT:    movl $-1, %ebp
1677; X64-NOBMI2-NEXT:    movl %esi, %ecx
1678; X64-NOBMI2-NEXT:    shll %cl, %ebp
1679; X64-NOBMI2-NEXT:    movl %ebp, %edi
1680; X64-NOBMI2-NEXT:    callq use32
1681; X64-NOBMI2-NEXT:    andl %ebx, %ebp
1682; X64-NOBMI2-NEXT:    movl %ebp, %eax
1683; X64-NOBMI2-NEXT:    addq $8, %rsp
1684; X64-NOBMI2-NEXT:    popq %rbx
1685; X64-NOBMI2-NEXT:    popq %rbp
1686; X64-NOBMI2-NEXT:    retq
1687;
1688; X64-BMI2-LABEL: oneuse32:
1689; X64-BMI2:       # %bb.0:
1690; X64-BMI2-NEXT:    pushq %rbp
1691; X64-BMI2-NEXT:    pushq %rbx
1692; X64-BMI2-NEXT:    pushq %rax
1693; X64-BMI2-NEXT:    movl %edi, %ebx
1694; X64-BMI2-NEXT:    movl $-1, %eax
1695; X64-BMI2-NEXT:    shlxl %esi, %eax, %ebp
1696; X64-BMI2-NEXT:    movl %ebp, %edi
1697; X64-BMI2-NEXT:    callq use32
1698; X64-BMI2-NEXT:    andl %ebx, %ebp
1699; X64-BMI2-NEXT:    movl %ebp, %eax
1700; X64-BMI2-NEXT:    addq $8, %rsp
1701; X64-BMI2-NEXT:    popq %rbx
1702; X64-BMI2-NEXT:    popq %rbp
1703; X64-BMI2-NEXT:    retq
1704  %mask = shl i32 -1, %numlowbits
1705  call void @use32(i32 %mask)
1706  %masked = and i32 %mask, %val
1707  ret i32 %masked
1708}
1709
1710define i64 @oneuse64(i64 %val, i64 %numlowbits) nounwind {
1711; X86-NOBMI2-LABEL: oneuse64:
1712; X86-NOBMI2:       # %bb.0:
1713; X86-NOBMI2-NEXT:    pushl %edi
1714; X86-NOBMI2-NEXT:    pushl %esi
1715; X86-NOBMI2-NEXT:    pushl %eax
1716; X86-NOBMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1717; X86-NOBMI2-NEXT:    movl $-1, %esi
1718; X86-NOBMI2-NEXT:    movl $-1, %edi
1719; X86-NOBMI2-NEXT:    shll %cl, %edi
1720; X86-NOBMI2-NEXT:    shldl %cl, %esi, %esi
1721; X86-NOBMI2-NEXT:    testb $32, %cl
1722; X86-NOBMI2-NEXT:    je .LBB37_2
1723; X86-NOBMI2-NEXT:  # %bb.1:
1724; X86-NOBMI2-NEXT:    movl %edi, %esi
1725; X86-NOBMI2-NEXT:    xorl %edi, %edi
1726; X86-NOBMI2-NEXT:  .LBB37_2:
1727; X86-NOBMI2-NEXT:    subl $8, %esp
1728; X86-NOBMI2-NEXT:    pushl %esi
1729; X86-NOBMI2-NEXT:    pushl %edi
1730; X86-NOBMI2-NEXT:    calll use64
1731; X86-NOBMI2-NEXT:    addl $16, %esp
1732; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1733; X86-NOBMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
1734; X86-NOBMI2-NEXT:    movl %edi, %eax
1735; X86-NOBMI2-NEXT:    movl %esi, %edx
1736; X86-NOBMI2-NEXT:    addl $4, %esp
1737; X86-NOBMI2-NEXT:    popl %esi
1738; X86-NOBMI2-NEXT:    popl %edi
1739; X86-NOBMI2-NEXT:    retl
1740;
1741; X86-BMI2-LABEL: oneuse64:
1742; X86-BMI2:       # %bb.0:
1743; X86-BMI2-NEXT:    pushl %edi
1744; X86-BMI2-NEXT:    pushl %esi
1745; X86-BMI2-NEXT:    pushl %eax
1746; X86-BMI2-NEXT:    movb {{[0-9]+}}(%esp), %cl
1747; X86-BMI2-NEXT:    movl $-1, %esi
1748; X86-BMI2-NEXT:    shlxl %ecx, %esi, %edi
1749; X86-BMI2-NEXT:    shldl %cl, %esi, %esi
1750; X86-BMI2-NEXT:    testb $32, %cl
1751; X86-BMI2-NEXT:    je .LBB37_2
1752; X86-BMI2-NEXT:  # %bb.1:
1753; X86-BMI2-NEXT:    movl %edi, %esi
1754; X86-BMI2-NEXT:    xorl %edi, %edi
1755; X86-BMI2-NEXT:  .LBB37_2:
1756; X86-BMI2-NEXT:    subl $8, %esp
1757; X86-BMI2-NEXT:    pushl %esi
1758; X86-BMI2-NEXT:    pushl %edi
1759; X86-BMI2-NEXT:    calll use64
1760; X86-BMI2-NEXT:    addl $16, %esp
1761; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %esi
1762; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edi
1763; X86-BMI2-NEXT:    movl %edi, %eax
1764; X86-BMI2-NEXT:    movl %esi, %edx
1765; X86-BMI2-NEXT:    addl $4, %esp
1766; X86-BMI2-NEXT:    popl %esi
1767; X86-BMI2-NEXT:    popl %edi
1768; X86-BMI2-NEXT:    retl
1769;
1770; X64-NOBMI2-LABEL: oneuse64:
1771; X64-NOBMI2:       # %bb.0:
1772; X64-NOBMI2-NEXT:    pushq %r14
1773; X64-NOBMI2-NEXT:    pushq %rbx
1774; X64-NOBMI2-NEXT:    pushq %rax
1775; X64-NOBMI2-NEXT:    movq %rdi, %r14
1776; X64-NOBMI2-NEXT:    movq $-1, %rbx
1777; X64-NOBMI2-NEXT:    movl %esi, %ecx
1778; X64-NOBMI2-NEXT:    shlq %cl, %rbx
1779; X64-NOBMI2-NEXT:    movq %rbx, %rdi
1780; X64-NOBMI2-NEXT:    callq use64
1781; X64-NOBMI2-NEXT:    andq %r14, %rbx
1782; X64-NOBMI2-NEXT:    movq %rbx, %rax
1783; X64-NOBMI2-NEXT:    addq $8, %rsp
1784; X64-NOBMI2-NEXT:    popq %rbx
1785; X64-NOBMI2-NEXT:    popq %r14
1786; X64-NOBMI2-NEXT:    retq
1787;
1788; X64-BMI2-LABEL: oneuse64:
1789; X64-BMI2:       # %bb.0:
1790; X64-BMI2-NEXT:    pushq %r14
1791; X64-BMI2-NEXT:    pushq %rbx
1792; X64-BMI2-NEXT:    pushq %rax
1793; X64-BMI2-NEXT:    movq %rdi, %r14
1794; X64-BMI2-NEXT:    movq $-1, %rax
1795; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rbx
1796; X64-BMI2-NEXT:    movq %rbx, %rdi
1797; X64-BMI2-NEXT:    callq use64
1798; X64-BMI2-NEXT:    andq %r14, %rbx
1799; X64-BMI2-NEXT:    movq %rbx, %rax
1800; X64-BMI2-NEXT:    addq $8, %rsp
1801; X64-BMI2-NEXT:    popq %rbx
1802; X64-BMI2-NEXT:    popq %r14
1803; X64-BMI2-NEXT:    retq
1804  %mask = shl i64 -1, %numlowbits
1805  call void @use64(i64 %mask)
1806  %masked = and i64 %mask, %val
1807  ret i64 %masked
1808}
1809